├── Baseline_random.py
├── Environment.py
├── MR_vs_NV.png
├── README.md
├── S_V2V_link_VS_NV.png
├── agent.py
├── base.py
├── ddqn.png
├── dqn.png
├── main.py
├── replay_memory.py
├── requirement.txt
└── utils.py


/Baseline_random.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division, print_function
 2 | import numpy as np 
 3 | from Environment import *
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | # This py file using the random algorithm.
 7 | 
 8 | def main():
 9 |     up_lanes = [3.5/2,3.5/2 + 3.5,250+3.5/2, 250+3.5+3.5/2, 500+3.5/2, 500+3.5+3.5/2]
10 |     down_lanes = [250-3.5-3.5/2,250-3.5/2,500-3.5-3.5/2,500-3.5/2,750-3.5-3.5/2,750-3.5/2]
11 |     left_lanes = [3.5/2,3.5/2 + 3.5,433+3.5/2, 433+3.5+3.5/2, 866+3.5/2, 866+3.5+3.5/2]
12 |     right_lanes = [433-3.5-3.5/2,433-3.5/2,866-3.5-3.5/2,866-3.5/2,1299-3.5-3.5/2,1299-3.5/2]
13 |     width = 750
14 |     height = 1299
15 |     n = 40
16 |     Env = Environ(down_lanes,up_lanes,left_lanes,right_lanes, width, height)
17 |     number_of_game = 50
18 |     n_step = 100
19 |     V2I_Rate_List = np.zeros([number_of_game, n_step])
20 |     Fail_Percent = np.zeros([number_of_game, n_step])
21 |     for game_idx in range(number_of_game):
22 |         print (game_idx)
23 |         Env.new_random_game(n)
24 |         for i in range(n_step):
25 |             #print(i)
26 |             actions = np.random.randint(0,20,[n,3])
27 |             power_selection = np.zeros(actions.shape, dtype = 'int')
28 |             actions = np.concatenate((actions[..., np.newaxis],power_selection[...,np.newaxis]), axis = 2)
29 |             reward, percent = Env.act(actions)
30 |             V2I_Rate_List[game_idx, i] = np.sum(reward)
31 |             Fail_Percent[game_idx, i] = percent
32 |         print(np.sum(reward))
33 |         print ('percentage here is ', percent)
34 |     print ('The number of vehicles is ', n)
35 |     print ('mean of V2I rate is that ', np.mean(V2I_Rate_List))
36 |     print ('mean of percent is ', np.mean(Fail_Percent[:,-1]))
37 | 
38 | main()
39 | 


--------------------------------------------------------------------------------
/Environment.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import numpy as np
  3 | import time
  4 | import random
  5 | import math
  6 | # This file is revised for more precise and concise expression.
  7 | class V2Vchannels:              
  8 |     # Simulator of the V2V Channels
  9 |     def __init__(self, n_Veh, n_RB):
 10 |         self.t = 0
 11 |         self.h_bs = 1.5
 12 |         self.h_ms = 1.5
 13 |         self.fc = 2
 14 |         self.decorrelation_distance = 10
 15 |         self.shadow_std = 3
 16 |         self.n_Veh = n_Veh
 17 |         self.n_RB = n_RB
 18 |         self.update_shadow([])
 19 |     def update_positions(self, positions):
 20 |         self.positions = positions
 21 |     def update_pathloss(self):
 22 |         self.PathLoss = np.zeros(shape=(len(self.positions),len(self.positions)))
 23 |         for i in range(len(self.positions)):
 24 |             for j in range(len(self.positions)):
 25 |                 self.PathLoss[i][j] = self.get_path_loss(self.positions[i], self.positions[j])
 26 |     def update_shadow(self, delta_distance_list):
 27 |         delta_distance = np.zeros((len(delta_distance_list), len(delta_distance_list)))
 28 |         for i in range(len(delta_distance)):
 29 |             for j in range(len(delta_distance)):
 30 |                 delta_distance[i][j] = delta_distance_list[i] + delta_distance_list[j]
 31 |         if len(delta_distance_list) == 0: 
 32 |             self.Shadow = np.random.normal(0,self.shadow_std, size=(self.n_Veh, self.n_Veh))
 33 |         else:
 34 |             self.Shadow = np.exp(-1*(delta_distance/self.decorrelation_distance)) * self.Shadow +\
 35 |                          np.sqrt(1 - np.exp(-2*(delta_distance/self.decorrelation_distance))) * np.random.normal(0, self.shadow_std, size = (self.n_Veh, self.n_Veh))
 36 |     def update_fast_fading(self):
 37 |         h = 1/np.sqrt(2) * (np.random.normal(size=(self.n_Veh, self.n_Veh, self.n_RB) ) + 1j * np.random.normal(size=(self.n_Veh, self.n_Veh, self.n_RB)))
 38 |         self.FastFading = 20 * np.log10(np.abs(h))
 39 |     def get_path_loss(self, position_A, position_B):
 40 |         d1 = abs(position_A[0] - position_B[0])
 41 |         d2 = abs(position_A[1] - position_B[1])
 42 |         d = math.hypot(d1,d2)+0.001
 43 |         d_bp = 4 * (self.h_bs - 1) * (self.h_ms - 1) * self.fc * (10**9)/(3*10**8)     
 44 |         def PL_Los(d):
 45 |             if d <= 3:
 46 |                 return 22.7 * np.log10(3) + 41 + 20*np.log10(self.fc/5)
 47 |             else:
 48 |                 if d < d_bp:
 49 |                     return 22.7 * np.log10(d) + 41 + 20 * np.log10(self.fc/5)
 50 |                 else:
 51 |                     return 40.0 * np.log10(d) + 9.45 - 17.3 * np.log10(self.h_bs) - 17.3 * np.log10(self.h_ms) + 2.7 * np.log10(self.fc/5)
 52 |         def PL_NLos(d_a,d_b):
 53 |                 n_j = max(2.8 - 0.0024*d_b, 1.84)
 54 |                 return PL_Los(d_a) + 20 - 12.5*n_j + 10 * n_j * np.log10(d_b) + 3*np.log10(self.fc/5)
 55 |         if min(d1,d2) < 7: 
 56 |             PL = PL_Los(d)
 57 |             self.ifLOS = True
 58 |             self.shadow_std = 3
 59 |         else:
 60 |             PL = min(PL_NLos(d1,d2), PL_NLos(d2,d1))
 61 |             self.ifLOS = False
 62 |             self.shadow_std = 4                      # if Non line of sight, the std is 4
 63 |         return PL
 64 | 
 65 | class V2Ichannels: 
 66 |     # Simulator of the V2I channels
 67 |     def __init__(self, n_Veh, n_RB):
 68 |         self.h_bs = 25
 69 |         self.h_ms = 1.5        
 70 |         self.Decorrelation_distance = 50        
 71 |         self.BS_position = [750/2, 1299/2]    # Suppose the BS is in the center
 72 |         self.shadow_std = 8
 73 |         self.n_Veh = n_Veh
 74 |         self.n_RB = n_RB
 75 |         self.update_shadow([])
 76 |     def update_positions(self, positions):
 77 |         self.positions = positions
 78 |         
 79 |     def update_pathloss(self):
 80 |         self.PathLoss = np.zeros(len(self.positions))
 81 |         for i in range(len(self.positions)):
 82 |             d1 = abs(self.positions[i][0] - self.BS_position[0])
 83 |             d2 = abs(self.positions[i][1] - self.BS_position[1])
 84 |             distance = math.hypot(d1,d2) # change from meters to kilometers
 85 |             self.PathLoss[i] = 128.1 + 37.6*np.log10(math.sqrt(distance**2 + (self.h_bs-self.h_ms)**2)/1000)
 86 |     def update_shadow(self, delta_distance_list):
 87 |         if len(delta_distance_list) == 0:  # initialization
 88 |             self.Shadow = np.random.normal(0, self.shadow_std, self.n_Veh)
 89 |         else: 
 90 |             delta_distance = np.asarray(delta_distance_list)
 91 |             self.Shadow = np.exp(-1*(delta_distance/self.Decorrelation_distance))* self.Shadow +\
 92 |                           np.sqrt(1-np.exp(-2*(delta_distance/self.Decorrelation_distance)))*np.random.normal(0,self.shadow_std, self.n_Veh)
 93 |     def update_fast_fading(self):
 94 |         h = 1/np.sqrt(2) * (np.random.normal(size = (self.n_Veh, self.n_RB)) + 1j* np.random.normal(size = (self.n_Veh, self.n_RB)))
 95 |         self.FastFading = 20 * np.log10(np.abs(h))
 96 | 
 97 | class Vehicle:
 98 |     # Vehicle simulator: include all the information for a vehicle
 99 |     def __init__(self, start_position, start_direction, velocity):
100 |         self.position = start_position
101 |         self.direction = start_direction
102 |         self.velocity = velocity
103 |         self.neighbors = []
104 |         self.destinations = []
105 | class Environ:
106 |     # Enviroment Simulator: Provide states and rewards to agents. 
107 |     # Evolve to new state based on the actions taken by the vehicles.
108 |     def __init__ (self, down_lane, up_lane, left_lane, right_lane, width, height):
109 |         self.timestep = 0.01
110 |         self.down_lanes = down_lane
111 |         self.up_lanes = up_lane
112 |         self.left_lanes = left_lane
113 |         self.right_lanes = right_lane
114 |         self.width = width
115 |         self.height = height
116 |         self.vehicles = []
117 |         self.demands = []  
118 |         self.V2V_power_dB = 23 # dBm
119 |         self.V2I_power_dB = 23 # dBm
120 |         self.V2V_power_dB_List = [23, 10, 5]             # the power levels
121 |         #self.V2V_power = 10**(self.V2V_power_dB)
122 |         #self.V2I_power = 10**(self.V2I_power_dB)
123 |         self.sig2_dB = -114
124 |         self.bsAntGain = 8 
125 |         self.bsNoiseFigure = 5
126 |         self.vehAntGain = 3
127 |         self.vehNoiseFigure = 9
128 |         self.sig2 = 10**(self.sig2_dB/10) 
129 |         self.V2V_Shadowing = []
130 |         self.V2I_Shadowing = []
131 |         self.delta_distance = []
132 |         self.n_RB = 20
133 |         self.n_Veh = 60
134 |         self.V2Vchannels = V2Vchannels(self.n_Veh, self.n_RB)  # number of vehicles
135 |         self.V2Ichannels = V2Ichannels(self.n_Veh, self.n_RB)
136 | 
137 |         self.V2V_Interference_all = np.zeros((self.n_Veh, 3, self.n_RB)) + self.sig2
138 |         self.n_step = 0
139 |     def add_new_vehicles(self, start_position, start_direction, start_velocity):    
140 |         self.vehicles.append(Vehicle(start_position, start_direction, start_velocity))
141 |         
142 |     def add_new_vehicles_by_number(self, n):
143 |         for i in range(n):
144 |             ind = np.random.randint(0,len(self.down_lanes))
145 |             start_position = [self.down_lanes[ind], random.randint(0,self.height)]
146 |             start_direction = 'd'
147 |             self.add_new_vehicles(start_position,start_direction,random.randint(10,15))
148 |             start_position = [self.up_lanes[ind], random.randint(0,self.height)]
149 |             start_direction = 'u'
150 |             self.add_new_vehicles(start_position,start_direction,random.randint(10,15))
151 |             start_position = [random.randint(0,self.width), self.left_lanes[ind]]
152 |             start_direction = 'l'
153 |             self.add_new_vehicles(start_position,start_direction,random.randint(10,15))
154 |             start_position = [random.randint(0,self.width), self.right_lanes[ind]]
155 |             start_direction = 'r'
156 |             self.add_new_vehicles(start_position,start_direction,random.randint(10,15))
157 |         self.V2V_Shadowing = np.random.normal(0, 3, [len(self.vehicles), len(self.vehicles)])
158 |         self.V2I_Shadowing = np.random.normal(0, 8, len(self.vehicles))
159 |         self.delta_distance = np.asarray([c.velocity for c in self.vehicles])
160 |         #self.renew_channel()
161 |     def renew_positions(self):
162 |         # ========================================================
163 |         # This function update the position of each vehicle
164 |         # ===========================================================
165 |         i = 0
166 |         #for i in range(len(self.position)):
167 |         while(i < len(self.vehicles)):
168 |             #print ('start iteration ', i)
169 |             #print(self.position, len(self.position), self.direction)
170 |             delta_distance = self.vehicles[i].velocity * self.timestep
171 |             change_direction = False
172 |             if self.vehicles[i].direction == 'u':
173 |                 #print ('len of position', len(self.position), i)
174 |                 for j in range(len(self.left_lanes)):
175 |                     
176 |                     if (self.vehicles[i].position[1] <=self.left_lanes[j]) and ((self.vehicles[i].position[1] + delta_distance) >= self.left_lanes[j]):   # came to an cross
177 |                         if (random.uniform(0,1) < 0.4):
178 |                             self.vehicles[i].position = [self.vehicles[i].position[0] - (delta_distance - (self.left_lanes[j] - self.vehicles[i].position[1])),self.left_lanes[j] ] 
179 |                             self.vehicles[i].direction = 'l'
180 |                             change_direction = True
181 |                             break
182 |                 if change_direction == False :
183 |                     for j in range(len(self.right_lanes)):
184 |                         if (self.vehicles[i].position[1] <=self.right_lanes[j]) and ((self.vehicles[i].position[1] + delta_distance) >= self.right_lanes[j]):
185 |                             if (random.uniform(0,1) < 0.4):
186 |                                 self.vehicles[i].position = [self.vehicles[i].position[0] + (delta_distance + (self.right_lanes[j] - self.vehicles[i].position[1])), self.right_lanes[j] ] 
187 |                                 self.vehicles[i].direction = 'r'
188 |                                 change_direction = True
189 |                                 break
190 |                 if change_direction == False:
191 |                     self.vehicles[i].position[1] += delta_distance
192 |             if (self.vehicles[i].direction == 'd') and (change_direction == False):
193 |                 #print ('len of position', len(self.position), i)
194 |                 for j in range(len(self.left_lanes)):
195 |                     if (self.vehicles[i].position[1] >=self.left_lanes[j]) and ((self.vehicles[i].position[1] - delta_distance) <= self.left_lanes[j]):  # came to an cross
196 |                         if (random.uniform(0,1) < 0.4):
197 |                             self.vehicles[i].position = [self.vehicles[i].position[0] - (delta_distance - ( self.vehicles[i].position[1]- self.left_lanes[j])), self.left_lanes[j] ] 
198 |                             #print ('down with left', self.vehicles[i].position)
199 |                             self.vehicles[i].direction = 'l'
200 |                             change_direction = True
201 |                             break
202 |                 if change_direction == False :
203 |                     for j in range(len(self.right_lanes)):
204 |                         if (self.vehicles[i].position[1] >=self.right_lanes[j]) and (self.vehicles[i].position[1] - delta_distance <= self.right_lanes[j]):
205 |                             if (random.uniform(0,1) < 0.4):
206 |                                 self.vehicles[i].position = [self.vehicles[i].position[0] + (delta_distance + ( self.vehicles[i].position[1]- self.right_lanes[j])),self.right_lanes[j] ] 
207 |                                 #print ('down with right', self.vehicles[i].position)
208 |                                 self.vehicles[i].direction = 'r'
209 |                                 change_direction = True
210 |                                 break
211 |                 if change_direction == False:
212 |                     self.vehicles[i].position[1] -= delta_distance
213 |             if (self.vehicles[i].direction == 'r') and (change_direction == False):
214 |                 #print ('len of position', len(self.position), i)
215 |                 for j in range(len(self.up_lanes)):
216 |                     if (self.vehicles[i].position[0] <= self.up_lanes[j]) and ((self.vehicles[i].position[0] + delta_distance) >= self.up_lanes[j]):   # came to an cross
217 |                         if (random.uniform(0,1) < 0.4):
218 |                             self.vehicles[i].position = [self.up_lanes[j], self.vehicles[i].position[1] + (delta_distance - (self.up_lanes[j] - self.vehicles[i].position[0]))]
219 |                             change_direction = True
220 |                             self.vehicles[i].direction = 'u'
221 |                             break
222 |                 if change_direction == False :
223 |                     for j in range(len(self.down_lanes)):
224 |                         if (self.vehicles[i].position[0] <= self.down_lanes[j]) and ((self.vehicles[i].position[0] + delta_distance) >= self.down_lanes[j]):
225 |                             if (random.uniform(0,1) < 0.4):
226 |                                 self.vehicles[i].position = [self.down_lanes[j], self.vehicles[i].position[1] - (delta_distance - (self.down_lanes[j] - self.vehicles[i].position[0]))]
227 |                                 change_direction = True
228 |                                 self.vehicles[i].direction = 'd'
229 |                                 break
230 |                 if change_direction == False:
231 |                     self.vehicles[i].position[0] += delta_distance
232 |             if (self.vehicles[i].direction == 'l') and (change_direction == False):
233 |                 for j in range(len(self.up_lanes)):
234 |                     
235 |                     if (self.vehicles[i].position[0] >= self.up_lanes[j]) and ((self.vehicles[i].position[0] - delta_distance) <= self.up_lanes[j]):   # came to an cross
236 |                         if (random.uniform(0,1) < 0.4):
237 |                             self.vehicles[i].position = [self.up_lanes[j], self.vehicles[i].position[1] + (delta_distance - (self.vehicles[i].position[0] - self.up_lanes[j]))]
238 |                             change_direction = True
239 |                             self.vehicles[i].direction = 'u'
240 |                             break
241 |                 if change_direction == False :
242 |                     for j in range(len(self.down_lanes)):
243 |                         if (self.vehicles[i].position[0] >= self.down_lanes[j]) and ((self.vehicles[i].position[0] - delta_distance) <= self.down_lanes[j]):
244 |                             if (random.uniform(0,1) < 0.4):
245 |                                 self.vehicles[i].position = [self.down_lanes[j], self.vehicles[i].position[1] - (delta_distance - (self.vehicles[i].position[0] - self.down_lanes[j]))]
246 |                                 change_direction = True
247 |                                 self.vehicles[i].direction = 'd'
248 |                                 break
249 |                     if change_direction == False:
250 |                         self.vehicles[i].position[0] -= delta_distance
251 |             # if it comes to an exit
252 |             if (self.vehicles[i].position[0] < 0) or (self.vehicles[i].position[1] < 0) or (self.vehicles[i].position[0] > self.width) or (self.vehicles[i].position[1] > self.height):
253 |             # delete
254 |             #    print ('delete ', self.position[i])
255 |                 if (self.vehicles[i].direction == 'u'):
256 |                     self.vehicles[i].direction = 'r'
257 |                     self.vehicles[i].position = [self.vehicles[i].position[0], self.right_lanes[-1]]
258 |                 else:
259 |                     if (self.vehicles[i].direction == 'd'):
260 |                         self.vehicles[i].direction = 'l'
261 |                         self.vehicles[i].position = [self.vehicles[i].position[0], self.left_lanes[0]]
262 |                     else:
263 |                         if (self.vehicles[i].direction == 'l'):
264 |                             self.vehicles[i].direction = 'u'
265 |                             self.vehicles[i].position = [self.up_lanes[0],self.vehicles[i].position[1]]
266 |                         else:
267 |                             if (self.vehicles[i].direction == 'r'):
268 |                                 self.vehicles[i].direction = 'd'
269 |                                 self.vehicles[i].position = [self.down_lanes[-1],self.vehicles[i].position[1]]
270 |                 
271 |             i += 1
272 |     def test_channel(self):
273 |         # ===================================
274 |         #   test the V2I and the V2V channel 
275 |         # ===================================
276 |         self.n_step = 0
277 |         self.vehicles = []
278 |         n_Veh = 60
279 |         self.n_Veh = n_Veh
280 |         self.add_new_vehicles_by_number(int(self.n_Veh/4))
281 |         step = 1000
282 |         time_step = 0.1  # every 0.1s update
283 |         for i in range(step):
284 |             self.renew_positions() 
285 |             positions = [c.position for c in self.vehicles]
286 |             self.update_large_fading(positions, time_step)
287 |             self.update_small_fading()
288 |             print("Time step: ", i)
289 |             print(" ============== V2I ===========")
290 |             print("Path Loss: ", self.V2Ichannels.PathLoss)
291 |             print("Shadow:",  self.V2Ichannels.Shadow)
292 |             print("Fast Fading: ",  self.V2Ichannels.FastFading)
293 |             print(" ============== V2V ===========")
294 |             print("Path Loss: ", self.V2Vchannels.PathLoss[0:3])
295 |             print("Shadow:", self.V2Vchannels.Shadow[0:3])
296 |             print("Fast Fading: ", self.V2Vchannels.FastFading[0:3])
297 | 
298 |     def update_large_fading(self, positions, time_step):
299 |         self.V2Ichannels.update_positions(positions)
300 |         self.V2Vchannels.update_positions(positions)
301 |         self.V2Ichannels.update_pathloss()
302 |         self.V2Vchannels.update_pathloss()
303 |         delta_distance = time_step * np.asarray([c.velocity for c in self.vehicles])
304 |         self.V2Ichannels.update_shadow(delta_distance)
305 |         self.V2Vchannels.update_shadow(delta_distance)
306 |     def update_small_fading(self):
307 |         self.V2Ichannels.update_fast_fading()
308 |         self.V2Vchannels.update_fast_fading()
309 |         
310 |     def renew_neighbor(self):   
311 |         # ==========================================
312 |         # update the neighbors of each vehicle.
313 |         # ===========================================
314 |         for i in range(len(self.vehicles)):
315 |             self.vehicles[i].neighbors = []
316 |             self.vehicles[i].actions = []
317 |             #print('action and neighbors delete', self.vehicles[i].actions, self.vehicles[i].neighbors)
318 |         Distance = np.zeros((len(self.vehicles),len(self.vehicles)))
319 |         z = np.array([[complex(c.position[0],c.position[1]) for c in self.vehicles]])
320 |         Distance = abs(z.T-z)
321 |         for i in range(len(self.vehicles)):       
322 |             sort_idx = np.argsort(Distance[:,i])
323 |             for j in range(3):
324 |                 self.vehicles[i].neighbors.append(sort_idx[j+1])                
325 |             destination = np.random.choice(sort_idx[1:int(len(sort_idx)/5)],3, replace = False)
326 |             self.vehicles[i].destinations = destination
327 |     def renew_channel(self):
328 |         # ===========================================================================
329 |         # This function updates all the channels including V2V and V2I channels
330 |         # =============================================================================
331 |         positions = [c.position for c in self.vehicles]
332 |         self.V2Ichannels.update_positions(positions)
333 |         self.V2Vchannels.update_positions(positions)
334 |         self.V2Ichannels.update_pathloss()
335 |         self.V2Vchannels.update_pathloss()
336 |         delta_distance = 0.002 * np.asarray([c.velocity for c in self.vehicles])    # time slot is 2 ms. 
337 |         self.V2Ichannels.update_shadow(delta_distance)
338 |         self.V2Vchannels.update_shadow(delta_distance)
339 |         self.V2V_channels_abs = self.V2Vchannels.PathLoss + self.V2Vchannels.Shadow + 50 * np.identity(
340 |             len(self.vehicles))
341 |         self.V2I_channels_abs = self.V2Ichannels.PathLoss + self.V2Ichannels.Shadow
342 | 
343 |     def renew_channels_fastfading(self):   
344 |         # =======================================================================
345 |         # This function updates all the channels including V2V and V2I channels
346 |         # =========================================================================
347 |         self.renew_channel()
348 |         self.V2Ichannels.update_fast_fading()
349 |         self.V2Vchannels.update_fast_fading()
350 |         V2V_channels_with_fastfading = np.repeat(self.V2V_channels_abs[:, :, np.newaxis], self.n_RB, axis=2)
351 |         self.V2V_channels_with_fastfading = V2V_channels_with_fastfading - self.V2Vchannels.FastFading
352 |         V2I_channels_with_fastfading = np.repeat(self.V2I_channels_abs[:, np.newaxis], self.n_RB, axis=1)
353 |         self.V2I_channels_with_fastfading = V2I_channels_with_fastfading - self.V2Ichannels.FastFading
354 |         #print("V2I channels", self.V2I_channels_with_fastfading)
355 |         
356 |     def Compute_Performance_Reward_fast_fading_with_power(self, actions_power):   # revising based on the fast fading part
357 |         actions = actions_power.copy()[:,:,0]  # the channel_selection_part
358 |         power_selection = actions_power.copy()[:,:,1]
359 |         Rate = np.zeros(len(self.vehicles))
360 |         Interference = np.zeros(self.n_RB)  # V2V signal interference to V2I links
361 |         for i in range(len(self.vehicles)):
362 |             for j in range(len(actions[i,:])):
363 |                 if not self.activate_links[i,j]:
364 |                     continue
365 |                 #print('power selection,', power_selection[i,j])  
366 |                 Interference[actions[i][j]] += 10**((self.V2V_power_dB_List[power_selection[i,j]]  - self.V2I_channels_with_fastfading[i, actions[i,j]] + self.vehAntGain + self.bsAntGain - self.bsNoiseFigure)/10)  # fast fading
367 | 
368 |         self.V2I_Interference = Interference + self.sig2
369 |         V2V_Interference = np.zeros((len(self.vehicles), 3))
370 |         V2V_Signal = np.zeros((len(self.vehicles), 3))
371 |         
372 |         # remove the effects of none active links
373 |         #print('shapes', actions.shape, self.activate_links.shape)
374 |         #print(not self.activate_links)
375 |         actions[(np.logical_not(self.activate_links))] = -1
376 |         #print('action are', actions)
377 |         for i in range(self.n_RB):
378 |             indexes = np.argwhere(actions == i)
379 |             for j in range(len(indexes)):
380 |                 #receiver_j = self.vehicles[indexes[j,0]].neighbors[indexes[j,1]]
381 |                 receiver_j = self.vehicles[indexes[j,0]].destinations[indexes[j,1]]
382 |                 # compute the V2V signal links
383 |                 V2V_Signal[indexes[j, 0],indexes[j, 1]] = 10**((self.V2V_power_dB_List[power_selection[indexes[j, 0],indexes[j, 1]]] - self.V2V_channels_with_fastfading[indexes[j][0]][receiver_j][i] + 2*self.vehAntGain - self.vehNoiseFigure)/10) 
384 |                 #V2V_Signal[indexes[j, 0],indexes[j, 1]] = 10**((self.V2V_power_dB_List[0] - self.V2V_channels_with_fastfading[indexes[j][0]][receiver_j][i])/10) 
385 |                 if i < self.n_Veh:
386 |                     V2V_Interference[indexes[j,0],indexes[j,1]] += 10**((self.V2I_power_dB - self.V2V_channels_with_fastfading[i][receiver_j][i]+ 2*self.vehAntGain - self.vehNoiseFigure )/10)  # V2I links interference to V2V links  
387 |                 for k in range(j+1, len(indexes)):                  # computer the peer V2V links
388 |                     #receiver_k = self.vehicles[indexes[k][0]].neighbors[indexes[k][1]]
389 |                     receiver_k = self.vehicles[indexes[k][0]].destinations[indexes[k][1]]
390 |                     V2V_Interference[indexes[j,0],indexes[j,1]] += 10**((self.V2V_power_dB_List[power_selection[indexes[k,0],indexes[k,1]]] - self.V2V_channels_with_fastfading[indexes[k][0]][receiver_j][i]+ 2*self.vehAntGain - self.vehNoiseFigure)/10)
391 |                     V2V_Interference[indexes[k,0],indexes[k,1]] += 10**((self.V2V_power_dB_List[power_selection[indexes[j,0],indexes[j,1]]] - self.V2V_channels_with_fastfading[indexes[j][0]][receiver_k][i]+ 2*self.vehAntGain - self.vehNoiseFigure)/10)               
392 |        
393 |         self.V2V_Interference = V2V_Interference + self.sig2
394 |         V2V_Rate = np.zeros(self.activate_links.shape)
395 |         V2V_Rate[self.activate_links] = np.log2(1 + np.divide(V2V_Signal[self.activate_links], self.V2V_Interference[self.activate_links]))
396 | 
397 |         #print("V2V Rate", V2V_Rate * self.update_time_test * 1500)
398 |         #print ('V2V_Signal is ', np.log(np.mean(V2V_Signal[self.activate_links])))
399 |         V2I_Signals = self.V2I_power_dB-self.V2I_channels_abs[0:min(self.n_RB,self.n_Veh)] + self.vehAntGain + self.bsAntGain - self.bsNoiseFigure
400 |         V2I_Rate = np.log2(1 + np.divide(10**(V2I_Signals/10), self.V2I_Interference[0:min(self.n_RB,self.n_Veh)]))
401 | 
402 | 
403 |          # -- compute the latency constraits --
404 |         self.demand -= V2V_Rate * self.update_time_test * 1500    # decrease the demand
405 |         self.test_time_count -= self.update_time_test               # compute the time left for estimation
406 |         self.individual_time_limit -= self.update_time_test         # compute the time left for individual V2V transmission
407 |         self.individual_time_interval -= self.update_time_test      # compute the time interval left for next transmission
408 | 
409 |         # --- update the demand ---
410 |         
411 |         new_active = self.individual_time_interval <= 0
412 |         self.activate_links[new_active] = True
413 |         self.individual_time_interval[new_active] = np.random.exponential(0.02, self.individual_time_interval[new_active].shape ) + self.V2V_limit
414 |         self.individual_time_limit[new_active] = self.V2V_limit
415 |         self.demand[new_active] = self.demand_amount
416 |         #print("demand is", self.demand)
417 |         #print('mean rate of average V2V link is', np.mean(V2V_Rate[self.activate_links]))
418 |         
419 |         # -- update the statistics---
420 |         early_finish = np.multiply(self.demand <= 0, self.activate_links)        
421 |         unqulified = np.multiply(self.individual_time_limit <=0, self.activate_links)
422 |         self.activate_links[np.add(early_finish, unqulified)] = False 
423 |         #print('number of activate links is', np.sum(self.activate_links)) 
424 |         self.success_transmission += np.sum(early_finish)
425 |         self.failed_transmission += np.sum(unqulified)
426 |         #if self.n_step % 1000 == 0 :
427 |         #    self.success_transmission = 0
428 |         #    self.failed_transmission = 0
429 |         failed_percentage = self.failed_transmission/(self.failed_transmission + self.success_transmission + 0.0001)
430 |         # print('Percentage of failed', np.sum(new_active), self.failed_transmission, self.failed_transmission + self.success_transmission , failed_percentage)    
431 |         return V2I_Rate, failed_percentage #failed_percentage
432 | 
433 |         
434 |     def Compute_Performance_Reward_fast_fading_with_power_asyn(self, actions_power):   # revising based on the fast fading part
435 |         # ===================================================
436 |         #  --------- Used for Testing -------
437 |         # ===================================================
438 |         actions = actions_power[:,:,0]  # the channel_selection_part
439 |         power_selection = actions_power[:,:,1]
440 |         Interference = np.zeros(self.n_RB)   # Calculate the interference from V2V to V2I
441 |         for i in range(len(self.vehicles)):
442 |             for j in range(len(actions[i,:])):
443 |                 if not self.activate_links[i,j]:
444 |                     continue
445 |                 Interference[actions[i][j]] += 10**((self.V2V_power_dB_List[power_selection[i,j]] - \
446 |                                                      self.V2I_channels_with_fastfading[i, actions[i,j]] + \
447 |                                                      self.vehAntGain + self.bsAntGain - self.bsNoiseFigure)/10)
448 |         self.V2I_Interference = Interference + self.sig2
449 |         V2V_Interference = np.zeros((len(self.vehicles), 3))
450 |         V2V_Signal = np.zeros((len(self.vehicles), 3))
451 |         Interfence_times = np.zeros((len(self.vehicles), 3))
452 |         actions[(np.logical_not(self.activate_links))] = -1
453 |         for i in range(self.n_RB):
454 |             indexes = np.argwhere(actions == i)
455 |             for j in range(len(indexes)):
456 |                 #receiver_j = self.vehicles[indexes[j,0]].neighbors[indexes[j,1]]
457 |                 receiver_j = self.vehicles[indexes[j,0]].destinations[indexes[j,1]]
458 |                 V2V_Signal[indexes[j, 0],indexes[j, 1]] = 10**((self.V2V_power_dB_List[power_selection[indexes[j, 0],indexes[j, 1]]] -\
459 |                 self.V2V_channels_with_fastfading[indexes[j][0]][receiver_j][i] + 2*self.vehAntGain - self.vehNoiseFigure)/10)
460 |                 #V2V_Signal[indexes[j, 0],indexes[j, 1]] = 10**((self.V2V_power_dB_List[0] - self.V2V_channels_with_fastfading[indexes[j][0]][receiver_j][i])/10) 
461 |                 if i<self.n_Veh:
462 |                     V2V_Interference[indexes[j,0],indexes[j,1]] += 10**((self.V2I_power_dB - \
463 |                     self.V2V_channels_with_fastfading[i][receiver_j][i] + 2*self.vehAntGain - self.vehNoiseFigure )/10)  # V2I links interference to V2V links
464 |                 for k in range(j+1, len(indexes)):
465 |                     receiver_k = self.vehicles[indexes[k][0]].destinations[indexes[k][1]]
466 |                     V2V_Interference[indexes[j,0],indexes[j,1]] += 10**((self.V2V_power_dB_List[power_selection[indexes[k,0],indexes[k,1]]] -\
467 |                     self.V2V_channels_with_fastfading[indexes[k][0]][receiver_j][i]+ 2*self.vehAntGain - self.vehNoiseFigure)/10)
468 |                     V2V_Interference[indexes[k,0],indexes[k,1]] += 10**((self.V2V_power_dB_List[power_selection[indexes[j,0],indexes[j,1]]] - \
469 |                     self.V2V_channels_with_fastfading[indexes[j][0]][receiver_k][i]+ 2*self.vehAntGain - self.vehNoiseFigure)/10)
470 |                     Interfence_times[indexes[j,0],indexes[j,1]] += 1
471 |                     Interfence_times[indexes[k,0],indexes[k,1]] += 1               
472 | 
473 |         self.V2V_Interference = V2V_Interference + self.sig2
474 |         V2V_Rate = np.log2(1 + np.divide(V2V_Signal, self.V2V_Interference))
475 |         V2I_Signals = self.V2I_power_dB-self.V2I_channels_abs[0:min(self.n_RB,self.n_Veh)] + self.vehAntGain + self.bsAntGain - self.bsNoiseFigure
476 |         V2I_Rate = np.log2(1 + np.divide(10**(V2I_Signals/10), self.V2I_Interference[0:min(self.n_RB,self.n_Veh)]))
477 |         #print("V2I information", V2I_Signals, self.V2I_Interference, V2I_Rate)
478 |         
479 |         # -- compute the latency constraits --
480 |         self.demand -= V2V_Rate * self.update_time_asyn * 1500    # decrease the demand
481 |         self.test_time_count -= self.update_time_asyn               # compute the time left for estimation
482 |         self.individual_time_limit -= self.update_time_asyn         # compute the time left for individual V2V transmission
483 |         self.individual_time_interval -= self.update_time_asyn     # compute the time interval left for next transmission
484 | 
485 |         # --- update the demand ---
486 |         new_active = self.individual_time_interval <= 0
487 |         self.activate_links[new_active] = True
488 |         self.individual_time_interval[new_active] = np.random.exponential(0.02, self.individual_time_interval[new_active].shape) + self.V2V_limit
489 |         self.individual_time_limit[new_active] = self.V2V_limit
490 |         self.demand[new_active] = self.demand_amount
491 |         
492 |         # -- update the statistics---
493 |         early_finish = np.multiply(self.demand <= 0, self.activate_links)        
494 |         unqulified = np.multiply(self.individual_time_limit <=0, self.activate_links)
495 |         self.activate_links[np.add(early_finish, unqulified)] = False
496 |         self.success_transmission += np.sum(early_finish)
497 |         self.failed_transmission += np.sum(unqulified)
498 |         fail_percent = self.failed_transmission/(self.failed_transmission + self.success_transmission + 0.0001)            
499 |         return V2I_Rate, fail_percent
500 | 
501 |     def Compute_Performance_Reward_Batch(self, actions_power, idx):    # add the power dimension to the action selection
502 |         # ==================================================
503 |         # ------------- Used for Training ----------------
504 |         # ==================================================
505 |         actions = actions_power.copy()[:,:,0]           #
506 |         power_selection = actions_power.copy()[:,:,1]   #
507 |         V2V_Interference = np.zeros((len(self.vehicles), 3))
508 |         V2V_Signal = np.zeros((len(self.vehicles), 3))
509 |         Interfence_times = np.zeros((len(self.vehicles), 3))    #  3 neighbors
510 |         #print(actions)
511 |         origin_channel_selection = actions[idx[0], idx[1]]
512 |         actions[idx[0], idx[1]] = 100  # something not relavant
513 |         for i in range(self.n_RB):
514 |             indexes = np.argwhere(actions == i)
515 |             #print('index',indexes)
516 |             for j in range(len(indexes)):
517 |                 #receiver_j = self.vehicles[indexes[j,0]].neighbors[indexes[j,1]]
518 |                 receiver_j = self.vehicles[indexes[j,0]].destinations[indexes[j,1]]
519 |                 V2V_Signal[indexes[j, 0],indexes[j, 1]] = 10**((self.V2V_power_dB_List[power_selection[indexes[j, 0],indexes[j, 1]]] -\
520 |                 self.V2V_channels_with_fastfading[indexes[j,0], receiver_j, i]+ 2*self.vehAntGain - self.vehNoiseFigure)/10) 
521 |                 V2V_Interference[indexes[j,0],indexes[j,1]] +=  10**((self.V2I_power_dB- self.V2V_channels_with_fastfading[i,receiver_j,i] + \
522 |                 2*self.vehAntGain - self.vehNoiseFigure)/10)  # interference from the V2I links
523 |                 
524 |                 for k in range(j+1, len(indexes)):
525 |                     receiver_k = self.vehicles[indexes[k,0]].destinations[indexes[k,1]]
526 |                     V2V_Interference[indexes[j,0],indexes[j,1]] += 10**((self.V2V_power_dB_List[power_selection[indexes[k,0],indexes[k,1]]] - \
527 |                     self.V2V_channels_with_fastfading[indexes[k,0],receiver_j,i] + 2*self.vehAntGain - self.vehNoiseFigure)/10)
528 |                     V2V_Interference[indexes[k,0],indexes[k,1]] += 10**((self.V2V_power_dB_List[power_selection[indexes[j,0],indexes[j,1]]] - \
529 |                     self.V2V_channels_with_fastfading[indexes[j,0], receiver_k, i] + 2*self.vehAntGain - self.vehNoiseFigure)/10)
530 |                     Interfence_times[indexes[j,0],indexes[j,1]] += 1
531 |                     Interfence_times[indexes[k,0],indexes[k,1]] += 1
532 |                     
533 |         self.V2V_Interference = V2V_Interference + self.sig2
534 |         V2V_Rate_list = np.zeros((self.n_RB, len(self.V2V_power_dB_List)))  # the number of RB times the power level
535 |         Deficit_list = np.zeros((self.n_RB, len(self.V2V_power_dB_List)))
536 |         for i in range(self.n_RB):
537 |             indexes = np.argwhere(actions == i)
538 |             V2V_Signal_temp = V2V_Signal.copy()            
539 |             #receiver_k = self.vehicles[idx[0]].neighbors[idx[1]]
540 |             receiver_k = self.vehicles[idx[0]].destinations[idx[1]]
541 |             for power_idx in range(len(self.V2V_power_dB_List)):
542 |                 V2V_Interference_temp = V2V_Interference.copy()
543 |                 V2V_Signal_temp[idx[0],idx[1]] = 10**((self.V2V_power_dB_List[power_idx] - \
544 |                 self.V2V_channels_with_fastfading[idx[0], self.vehicles[idx[0]].destinations[idx[1]],i] + 2*self.vehAntGain - self.vehNoiseFigure )/10)
545 |                 V2V_Interference_temp[idx[0],idx[1]] +=  10**((self.V2I_power_dB - \
546 |                 self.V2V_channels_with_fastfading[i,self.vehicles[idx[0]].destinations[idx[1]],i] + 2*self.vehAntGain - self.vehNoiseFigure)/10)
547 |                 for j in range(len(indexes)):
548 |                     receiver_j = self.vehicles[indexes[j,0]].destinations[indexes[j,1]]
549 |                     V2V_Interference_temp[idx[0],idx[1]] += 10**((self.V2V_power_dB_List[power_selection[indexes[j,0], indexes[j,1]]] -\
550 |                     self.V2V_channels_with_fastfading[indexes[j,0],receiver_k, i] + 2*self.vehAntGain - self.vehNoiseFigure)/10)
551 |                     V2V_Interference_temp[indexes[j,0],indexes[j,1]] += 10**((self.V2V_power_dB_List[power_idx]-\
552 |                     self.V2V_channels_with_fastfading[idx[0],receiver_j, i] + 2*self.vehAntGain - self.vehNoiseFigure)/10)
553 |                 V2V_Rate_cur = np.log2(1 + np.divide(V2V_Signal_temp, V2V_Interference_temp))
554 |                 if (origin_channel_selection == i) and (power_selection[idx[0], idx[1]] == power_idx):
555 |                     V2V_Rate = V2V_Rate_cur.copy()
556 |                 V2V_Rate_list[i, power_idx] = np.sum(V2V_Rate_cur)
557 |                 Deficit_list[i,power_idx] = 0 - 1 * np.sum(np.maximum(np.zeros(V2V_Signal_temp.shape), (self.demand - self.individual_time_limit * V2V_Rate_cur * 1500)))
558 |         Interference = np.zeros(self.n_RB)  
559 |         V2I_Rate_list = np.zeros((self.n_RB,len(self.V2V_power_dB_List)))    # 3 of power level
560 |         for i in range(len(self.vehicles)):
561 |             for j in range(len(actions[i,:])):
562 |                 if (i ==idx[0] and j == idx[1]):
563 |                     continue
564 |                 Interference[actions[i][j]] += 10**((self.V2V_power_dB_List[power_selection[i,j]] - \
565 |                 self.V2I_channels_with_fastfading[i, actions[i][j]] + self.vehAntGain + self.bsAntGain - self.bsNoiseFigure)/10) 
566 |         V2I_Interference = Interference + self.sig2
567 |         for i in range(self.n_RB):            
568 |             for j in range(len(self.V2V_power_dB_List)):
569 |                 V2I_Interference_temp = V2I_Interference.copy()
570 |                 V2I_Interference_temp[i] += 10**((self.V2V_power_dB_List[j] - self.V2I_channels_with_fastfading[idx[0], i] + self.vehAntGain + self.bsAntGain - self.bsNoiseFigure)/10)
571 |                 V2I_Rate_list[i, j] = np.sum(np.log2(1 + np.divide(10**((self.V2I_power_dB + self.vehAntGain + self.bsAntGain \
572 |                 - self.bsNoiseFigure-self.V2I_channels_abs[0:min(self.n_RB,self.n_Veh)])/10), V2I_Interference_temp[0:min(self.n_RB,self.n_Veh)])))
573 |                      
574 |         self.demand -= V2V_Rate * self.update_time_train * 1500
575 |         self.test_time_count -= self.update_time_train
576 |         self.individual_time_limit -= self.update_time_train
577 |         self.individual_time_limit [np.add(self.individual_time_limit <= 0,  self.demand < 0)] = self.V2V_limit
578 |         self.demand[self.demand < 0] = self.demand_amount
579 |         if self.test_time_count == 0:
580 |             self.test_time_count = 10
581 |         return V2I_Rate_list, Deficit_list, self.individual_time_limit[idx[0], idx[1]]
582 | 
583 |     def Compute_Interference(self, actions):
584 |         # ====================================================
585 |         # Compute the Interference to each channel_selection
586 |         # ====================================================
587 |         V2V_Interference = np.zeros((len(self.vehicles), 3, self.n_RB)) + self.sig2
588 |         if len(actions.shape) == 3:
589 |             channel_selection = actions.copy()[:,:,0]
590 |             power_selection = actions[:,:,1]
591 |             channel_selection[np.logical_not(self.activate_links)] = -1
592 |             for i in range(self.n_RB):
593 |                 for k in range(len(self.vehicles)):
594 |                     for m in range(len(channel_selection[k,:])):
595 |                         V2V_Interference[k, m, i] += 10 ** ((self.V2I_power_dB - self.V2V_channels_with_fastfading[i][self.vehicles[k].destinations[m]][i] + \
596 |                         2 * self.vehAntGain - self.vehNoiseFigure)/10)
597 |             for i in range(len(self.vehicles)):
598 |                 for j in range(len(channel_selection[i,:])):
599 |                     for k in range(len(self.vehicles)):
600 |                         for m in range(len(channel_selection[k,:])):
601 |                             if (i==k) or (channel_selection[i,j] >= 0):
602 |                                 continue
603 |                             V2V_Interference[k, m, channel_selection[i,j]] += 10**((self.V2V_power_dB_List[power_selection[i,j]] -\
604 |                             self.V2V_channels_with_fastfading[i][self.vehicles[k].destinations[m]][channel_selection[i,j]] + 2*self.vehAntGain - self.vehNoiseFigure)/10)
605 | 
606 |         self.V2V_Interference_all = 10 * np.log10(V2V_Interference)
607 |                 
608 |         
609 |     def renew_demand(self):
610 |         # generate a new demand of a V2V
611 |         self.demand = self.demand_amount*np.ones((self.n_RB,3))
612 |         self.time_limit = 10
613 |     def act_for_training(self, actions, idx):
614 |         # =============================================
615 |         # This function gives rewards for training
616 |         # ===========================================
617 |         rewards_list = np.zeros(self.n_RB)
618 |         action_temp = actions.copy()
619 |         self.activate_links = np.ones((self.n_Veh,3), dtype = 'bool')
620 |         V2I_rewardlist, V2V_rewardlist, time_left = self.Compute_Performance_Reward_Batch(action_temp,idx)
621 |         self.renew_positions()
622 |         self.renew_channels_fastfading()
623 |         self.Compute_Interference(actions) 
624 |         rewards_list = rewards_list.T.reshape([-1])
625 |         V2I_rewardlist = V2I_rewardlist.T.reshape([-1])
626 |         V2V_rewardlist = V2V_rewardlist.T.reshape([-1])
627 |         V2I_reward = (V2I_rewardlist[actions[idx[0],idx[1], 0]+ 20*actions[idx[0],idx[1], 1]] -\
628 |                       np.min(V2I_rewardlist))/(np.max(V2I_rewardlist) -np.min(V2I_rewardlist) + 0.000001)
629 |         V2V_reward = (V2V_rewardlist[actions[idx[0],idx[1], 0]+ 20*actions[idx[0],idx[1], 1]] -\
630 |                      np.min(V2V_rewardlist))/(np.max(V2V_rewardlist) -np.min(V2V_rewardlist) + 0.000001)
631 |         lambdda = 0.1
632 |         #print ("Reward", V2I_reward, V2V_reward, time_left)
633 |         t = lambdda * V2I_reward + (1-lambdda) * V2V_reward
634 |         #print("time left", time_left)
635 |         #return t
636 |         return t - (self.V2V_limit - time_left)/self.V2V_limit
637 |         
638 |     def act_asyn(self, actions):
639 |         self.n_step += 1
640 |         if self.n_step % 10 == 0:
641 |             self.renew_positions()            
642 |             self.renew_channels_fastfading()
643 |         reward = self.Compute_Performance_Reward_fast_fading_with_power_asyn(actions)
644 |         self.Compute_Interference(actions)
645 |         return reward
646 |     def act(self, actions):
647 |         # simulate the next state after the action is given
648 |         self.n_step += 1        
649 |         reward = self.Compute_Performance_Reward_fast_fading_with_power(actions)
650 |         self.renew_positions()            
651 |         self.renew_channels_fastfading()
652 |         self.Compute_Interference(actions)
653 |         return reward
654 |         
655 |     def new_random_game(self, n_Veh = 0):
656 |         # make a new game
657 |         self.n_step = 0
658 |         self.vehicles = []
659 |         if n_Veh > 0:
660 |             self.n_Veh = n_Veh
661 |         self.add_new_vehicles_by_number(int(self.n_Veh/4))
662 |         self.V2Vchannels = V2Vchannels(self.n_Veh, self.n_RB)  # number of vehicles
663 |         self.V2Ichannels = V2Ichannels(self.n_Veh, self.n_RB)
664 |         self.renew_channels_fastfading()
665 |         self.renew_neighbor()
666 |         self.demand_amount = 30
667 |         self.demand = self.demand_amount * np.ones((self.n_Veh,3))
668 |         self.test_time_count = 10
669 |         self.V2V_limit = 0.1  # 100 ms V2V toleratable latency
670 |         self.individual_time_limit = self.V2V_limit * np.ones((self.n_Veh,3))
671 |         self.individual_time_interval = np.random.exponential(0.05, (self.n_Veh,3))
672 |         self.UnsuccessfulLink = np.zeros((self.n_Veh,3))
673 |         self.success_transmission = 0
674 |         self.failed_transmission = 0
675 |         self.update_time_train = 0.01  # 10ms update time for the training
676 |         self.update_time_test = 0.002 # 2ms update time for testing
677 |         self.update_time_asyn = 0.0002 # 0.2 ms update one subset of the vehicles; for each vehicle, the update time is 2 ms
678 |         self.activate_links = np.zeros((self.n_Veh,3), dtype='bool')
679 | 
680 | if __name__ == "__main__":
681 |     up_lanes = [3.5/2,3.5/2 + 3.5,250+3.5/2, 250+3.5+3.5/2, 500+3.5/2, 500+3.5+3.5/2]
682 |     down_lanes = [250-3.5-3.5/2,250-3.5/2,500-3.5-3.5/2,500-3.5/2,750-3.5-3.5/2,750-3.5/2]
683 |     left_lanes = [3.5/2,3.5/2 + 3.5,433+3.5/2, 433+3.5+3.5/2, 866+3.5/2, 866+3.5+3.5/2]
684 |     right_lanes = [433-3.5-3.5/2,433-3.5/2,866-3.5-3.5/2,866-3.5/2,1299-3.5-3.5/2,1299-3.5/2]
685 |     width = 750
686 |     height = 1299
687 |     Env = Environ(down_lanes,up_lanes,left_lanes,right_lanes, width, height) 
688 |     Env.test_channel()    
689 | 


--------------------------------------------------------------------------------
/MR_vs_NV.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Engineer1999/Double-Deep-Q-Learning-for-Resource-Allocation/8e4d3973ec8fe86e8afdce7715ce631970a0d786/MR_vs_NV.png


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Deep Reinforcement Learning based Resource Allocation for V2V Communications
 2 | 
 3 | This repository contains the implementation of `reinforcement learning` algorithm `double deep-Q learning` for resource allocation problem in the vehicle to vehicle communication based on the research paper ["Deep Reinforcement Learning based Resource Allocation for V2V Communications"](https://ieeexplore.ieee.org/document/8633948) by Hao Ye, Geoffrey Ye Li, and Biing-Hwang Fred Juang.
 4 | Orignal codes are developed by [IIT-lab, Paper-with-Code-of-Wireless-communication-Based-on-DL](https://github.com/haoyye/ResourceAllocationReinforcementLearning) which implements `deep-q learning`.
 5 | 
 6 | I have made some modifications in code so that the results of the research paper can be reproduced.
 7 | 
 8 | # Installation and use
 9 | 
10 | ## Linux
11 | 
12 | ### Fork the repository and open the terminal using `ctrl+alt+t`
13 | ```
14 | cd <path-to-the-python-files>
15 | 
16 | pip3 install -r requirement.txt
17 | ```
18 | 
19 | ### After successful installation close the terminal and again open it, use the below command in terminal to run the program.
20 | 
21 | ```
22 | cd <path-to-the-python-files>
23 | 
24 | python3 agent.py
25 | ```
26 | Running this code will require a good amount of time (36 hours on i7 7th gen)
27 | 
28 | ## Tips and Tricks
29 | Use the below commands to save the terminal output in .txt file. It will be beneficial while you are debugging the code.
30 | 
31 | ```
32 | python3 agent.py 2>&1 | tee SomeFile.txt
33 | ```
34 | 
35 | Run the code using the above command.
36 | 
37 | # Results reproduced using Deep-Q learning
38 | 
39 | ## Sum Rate of V2I vs Number of Vehicles
40 | 
41 | | ![](MR_vs_NV.png) |
42 | |:--:|
43 | | *Figure-1* |
44 | 
45 | 
46 | > The above figure shows the sum rate of V2I vs the number of vehicles.  From the figure, we can infer that, with the increase in the number of vehicles, the number of V2V links increases as a result, the interference with the V2I link grows, therefore the V2I capacity will drop.
47 | 
48 | 
49 | ## Probability of Satisfied V2V links vs the number of vehicles
50 | 
51 | | ![](S_V2V_link_VS_NV.png) |
52 | |:--:|
53 | | *Figure-2* |
54 | 
55 | >The given figure shows the probability that the V2V links satisfy the latency constraint versus the number of vehicles.  From the figure, we can infer that, with the increase in the number of vehicles, the V2V links in increases, as a result, it is more difficult to ensure every vehicle satisfies the latency constraint.
56 | 
57 | ##  The Probability of power level selection with the remaining time for transmission
58 | 
59 | | ![](dqn.png) |
60 | |:--:|
61 | | *Figure-3* |
62 | 
63 | > The above figure shows the probability for the agent to choose power levels with different time left for transmission. In general, the probability for the agent to choose the maximum power is low when there is abundant time for transmission, while the agent will select the maximum power with a high probability to ensure satisfying the V2V latency constraint when only a small amount of time left. However, when only 10 ms left, the probability for choosing the maximum power level suddenly drops to about 0.6 because the agent learns that even with the maximum power the latency constraints will be violated with high probability and switching to a lower power will get more reward by reducing interference to the V2I and other V2V links.
64 | 
65 | > Therefore, we can infer that the improvement of the deep reinforcement learning based approach comes from learning the implicit relationship between the state and the reward function.
66 | 
67 | 
68 | # Effect of Double Deep-Q Learning
69 | 
70 | ## The Probability of power level selection with the remaining time for transmission
71 | 
72 | | ![](ddqn.png) |
73 | |:--:|
74 | | *Figure-4* |
75 | 
76 | 
77 | > Figure-4  shows the  probability for the agent to choose power levels with different time left for transmission when  Double-Deep Q-Learning is used.   The probability for  the agent to choose the maximum power is decreased compared to the figure-3 when there is abundant time for transmission.  Also, the probability of selecting maximum power to ensure the V2V latency constraint when a small amount of time left is increased.
78 | 
79 | > Apart from this, when the agent has abundant time for transmission it will select low power transmission to reduce resource usage.
80 | 


--------------------------------------------------------------------------------
/S_V2V_link_VS_NV.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Engineer1999/Double-Deep-Q-Learning-for-Resource-Allocation/8e4d3973ec8fe86e8afdce7715ce631970a0d786/S_V2V_link_VS_NV.png


--------------------------------------------------------------------------------
/agent.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | import os
  3 | import time
  4 | import random
  5 | import numpy as np
  6 | from Environment import *
  7 | from base import BaseModel
  8 | from replay_memory import ReplayMemory
  9 | from utils import save_pkl, load_pkl
 10 | import tensorflow as tf
 11 | import matplotlib.pyplot as plt
 12 | 
 13 | 
 14 | class Agent(BaseModel):
 15 |     def __init__(self, config, environment, sess):
 16 |         self.sess = sess
 17 |         self.weight_dir = 'weight'        
 18 |         self.env = environment
 19 |         #self.history = History(self.config)
 20 |         model_dir = './Model/a.model'
 21 |         self.memory = ReplayMemory(model_dir) 
 22 |         self.max_step = 100000
 23 |         self.RB_number = 20
 24 |         self.num_vehicle = len(self.env.vehicles)
 25 |         print('-------------------------------------------')
 26 |         print(self.num_vehicle)
 27 |         print('-------------------------------------------')
 28 |         self.action_all_with_power = np.zeros([self.num_vehicle, 3, 2],dtype = 'int32')   # this is actions that taken by V2V links with power
 29 |         self.action_all_with_power_training = np.zeros([20, 3, 2],dtype = 'int32')   # this is actions that taken by V2V links with power
 30 |         self.reward = []
 31 |         self.learning_rate = 0.01
 32 |         self.learning_rate_minimum = 0.0001
 33 |         self.learning_rate_decay = 0.96
 34 |         self.learning_rate_decay_step = 500000
 35 |         self.target_q_update_step = 100
 36 |         self.discount = 0.5
 37 |         self.double_q = True
 38 |         print("------------")
 39 |         print(self.double_q)
 40 |         print("------------")
 41 |         self.build_dqn()          
 42 |         self.V2V_number = 3 * len(self.env.vehicles)    # every vehicle need to communicate with 3 neighbors  
 43 |         self.training = True
 44 |         #self.actions_all = np.zeros([len(self.env.vehicles),3], dtype = 'int32')
 45 |     def merge_action(self, idx, action):
 46 |         self.action_all_with_power[idx[0], idx[1], 0] = action % self.RB_number
 47 |         self.action_all_with_power[idx[0], idx[1], 1] = int(np.floor(action/self.RB_number))
 48 |     def get_state(self, idx):
 49 |     # ===============
 50 |     #  Get State from the environment
 51 |     # =============
 52 |         vehicle_number = len(self.env.vehicles)
 53 |         V2V_channel = (self.env.V2V_channels_with_fastfading[idx[0],self.env.vehicles[idx[0]].destinations[idx[1]],:] - 80)/60
 54 |         V2I_channel = (self.env.V2I_channels_with_fastfading[idx[0], :] - 80)/60
 55 |         V2V_interference = (-self.env.V2V_Interference_all[idx[0],idx[1],:] - 60)/60
 56 |         NeiSelection = np.zeros(self.RB_number)
 57 |         for i in range(3):
 58 |             for j in range(3):
 59 |                 if self.training:
 60 |                     NeiSelection[self.action_all_with_power_training[self.env.vehicles[idx[0]].neighbors[i], j, 0 ]] = 1
 61 |                 else:
 62 |                     NeiSelection[self.action_all_with_power[self.env.vehicles[idx[0]].neighbors[i], j, 0 ]] = 1
 63 |                    
 64 |         for i in range(3):
 65 |             if i == idx[1]:
 66 |                 continue
 67 |             if self.training:
 68 |                 if self.action_all_with_power_training[idx[0],i,0] >= 0:
 69 |                     NeiSelection[self.action_all_with_power_training[idx[0],i,0]] = 1
 70 |             else:
 71 |                 if self.action_all_with_power[idx[0],i,0] >= 0:
 72 |                     NeiSelection[self.action_all_with_power[idx[0],i,0]] = 1
 73 |         time_remaining = np.asarray([self.env.demand[idx[0],idx[1]] / self.env.demand_amount])
 74 |         load_remaining = np.asarray([self.env.individual_time_limit[idx[0],idx[1]] / self.env.V2V_limit])
 75 |         #print('shapes', time_remaining.shape,load_remaining.shape)
 76 |         return np.concatenate((V2I_channel, V2V_interference, V2V_channel, NeiSelection, time_remaining, load_remaining))#,time_remaining))
 77 |         #return np.concatenate((V2I_channel, V2V_interference, V2V_channel, time_remaining, load_remaining))#,time_remaining))
 78 |     def predict(self, s_t,  step, test_ep = False):
 79 |         # ==========================
 80 |         #  Select actions
 81 |         # ======================
 82 |         ep = 1/(step/1000000 + 1)
 83 |         if random.random() < ep and test_ep == False:   # epsion to balance the exporation and exploition
 84 |             action = np.random.randint(60)
 85 |         else:          
 86 |             action =  self.q_action.eval({self.s_t:[s_t]})[0] 
 87 |         return action
 88 |     def observe(self, prestate, state, reward, action):
 89 |         # -----------
 90 |         # Collect Data for Training 
 91 |         # ---------
 92 |         self.memory.add(prestate, state, reward, action) # add the state and the action and the reward to the memory
 93 |         #print(self.step)
 94 |         if self.step > 0:
 95 |             if self.step % 50 == 0:
 96 |                 #print('Training')
 97 |                 self.q_learning_mini_batch()            # training a mini batch
 98 |                 self.save_weight_to_pkl()
 99 |             if self.step % self.target_q_update_step == self.target_q_update_step - 1:
100 |                 #print("Update Target Q network:")
101 |                 self.update_target_q_network()           # ?? what is the meaning ??
102 |     def train(self):        
103 |         num_game, self.update_count, ep_reward = 0, 0, 0.
104 |         total_reward, self.total_loss, self.total_q = 0.,0.,0.
105 |         max_avg_ep_reward = 0
106 |         ep_reward, actions = [], []        
107 |         mean_big = 0
108 |         number_big = 0
109 |         mean_not_big = 0
110 |         number_not_big = 0
111 |         self.env.new_random_game(20)
112 |         for self.step in (range(0, 10000)): # need more configuration
113 |             if self.step == 0:                   # initialize set some varibles
114 |                 num_game, self.update_count,ep_reward = 0, 0, 0.
115 |                 total_reward, self.total_loss, self.total_q = 0., 0., 0.
116 |                 ep_reward, actions = [], []               
117 |                 
118 |             # prediction
119 |             # action = self.predict(self.history.get())
120 |             if (self.step % 2000 == 1):
121 |                 self.env.new_random_game(20)
122 |             print(self.step)
123 |             state_old = self.get_state([0,0])
124 |             #print("state", state_old)
125 |             self.training = True
126 |             for k in range(1):
127 |                 for i in range(len(self.env.vehicles)):              
128 |                     for j in range(3): 
129 |                         state_old = self.get_state([i,j]) 
130 |                         action = self.predict(state_old, self.step)                    
131 |                         #self.merge_action([i,j], action)   
132 |                         self.action_all_with_power_training[i, j, 0] = action % self.RB_number
133 |                         self.action_all_with_power_training[i, j, 1] = int(np.floor(action/self.RB_number))                                                    
134 |                         reward_train = self.env.act_for_training(self.action_all_with_power_training, [i,j]) 
135 |                         state_new = self.get_state([i,j]) 
136 |                         self.observe(state_old, state_new, reward_train, action)
137 |             if (self.step % 2000 == 0) and (self.step > 0):
138 |                 # testing 
139 |                 self.training = False
140 |                 number_of_game = 10
141 |                 if (self.step % 10000 == 0) and (self.step > 0):
142 |                     number_of_game = 50 
143 |                 if (self.step == 38000):
144 |                     number_of_game = 100               
145 |                 V2I_Rate_list = np.zeros(number_of_game)
146 |                 Fail_percent_list = np.zeros(number_of_game)
147 |                 for game_idx in range(number_of_game):
148 |                     self.env.new_random_game(self.num_vehicle)
149 |                     test_sample = 200
150 |                     Rate_list = []
151 |                     print('test game idx:', game_idx)
152 |                     for k in range(test_sample):
153 |                         action_temp = self.action_all_with_power.copy()
154 |                         for i in range(len(self.env.vehicles)):
155 |                             self.action_all_with_power[i,:,0] = -1
156 |                             sorted_idx = np.argsort(self.env.individual_time_limit[i,:])          
157 |                             for j in sorted_idx:                   
158 |                                 state_old = self.get_state([i,j])
159 |                                 action = self.predict(state_old, self.step, True)
160 |                                 self.merge_action([i,j], action)
161 |                             if i % (len(self.env.vehicles)/10) == 1:
162 |                                 action_temp = self.action_all_with_power.copy()
163 |                                 reward, percent = self.env.act_asyn(action_temp) #self.action_all)            
164 |                                 Rate_list.append(np.sum(reward))
165 |                         #print("actions", self.action_all_with_power)
166 |                     V2I_Rate_list[game_idx] = np.mean(np.asarray(Rate_list))
167 |                     Fail_percent_list[game_idx] = percent
168 |                     #print("action is", self.action_all_with_power)
169 |                     print('failure probability is, ', percent)
170 |                     #print('action is that', action_temp[0,:])
171 |             #print("OUT")
172 |                 self.save_weight_to_pkl()
173 |                 print ('The number of vehicle is ', len(self.env.vehicles))
174 |                 print ('Mean of the V2I rate is that ', np.mean(V2I_Rate_list))
175 |                 print('Mean of Fail percent is that ', np.mean(Fail_percent_list))                   
176 |                 #print('Test Reward is ', np.mean(test_result))
177 |              
178 |                   
179 |                     
180 |             
181 |     def q_learning_mini_batch(self):
182 | 
183 |         # Training the DQN model
184 |         # ------ 
185 |         #s_t, action,reward, s_t_plus_1, terminal = self.memory.sample() 
186 |         s_t, s_t_plus_1, action, reward = self.memory.sample()  
187 |         #print() 
188 |         #print('samples:', s_t[0:10], s_t_plus_1[0:10], action[0:10], reward[0:10])        
189 |         t = time.time()        
190 |         if self.double_q:       #double Q learning   
191 |             pred_action = self.q_action.eval({self.s_t: s_t_plus_1})       
192 |             q_t_plus_1_with_pred_action = self.target_q_with_idx.eval({self.target_s_t: s_t_plus_1, self.target_q_idx: [[idx, pred_a] for idx, pred_a in enumerate(pred_action)]})            
193 |             target_q_t =  self.discount * q_t_plus_1_with_pred_action + reward
194 |         else:
195 |             q_t_plus_1 = self.target_q.eval({self.target_s_t: s_t_plus_1})         
196 |             max_q_t_plus_1 = np.max(q_t_plus_1, axis=1)
197 |             target_q_t = self.discount * max_q_t_plus_1 +reward
198 |         _, q_t, loss,w = self.sess.run([self.optim, self.q, self.loss, self.w], {self.target_q_t: target_q_t, self.action:action, self.s_t:s_t, self.learning_rate_step: self.step}) # training the network
199 |         
200 |         print('loss is ', loss)
201 |         self.total_loss += loss
202 |         self.total_q += q_t.mean()
203 |         self.update_count += 1
204 |             
205 | 
206 |     def build_dqn(self): 
207 |     # --- Building the DQN -------
208 |         self.w = {}
209 |         self.t_w = {}        
210 |         
211 |         initializer = tf. truncated_normal_initializer(0, 0.02)
212 |         activation_fn = tf.nn.relu
213 |         n_hidden_1 = 500
214 |         n_hidden_2 = 250
215 |         n_hidden_3 = 120
216 |         n_input = 82
217 |         n_output = 60
218 |         def encoder(x):
219 |             weights = {                    
220 |                 'encoder_h1': tf.Variable(tf.truncated_normal([n_input, n_hidden_1],stddev=0.1)),
221 |                 'encoder_h2': tf.Variable(tf.truncated_normal([n_hidden_1, n_hidden_2],stddev=0.1)),
222 |                 'encoder_h3': tf.Variable(tf.truncated_normal([n_hidden_2, n_hidden_3],stddev=0.1)),
223 |                 'encoder_h4': tf.Variable(tf.truncated_normal([n_hidden_3, n_output],stddev=0.1)),
224 |                 'encoder_b1': tf.Variable(tf.truncated_normal([n_hidden_1],stddev=0.1)),
225 |                 'encoder_b2': tf.Variable(tf.truncated_normal([n_hidden_2],stddev=0.1)),
226 |                 'encoder_b3': tf.Variable(tf.truncated_normal([n_hidden_3],stddev=0.1)),
227 |                 'encoder_b4': tf.Variable(tf.truncated_normal([n_output],stddev=0.1)),         
228 |             
229 |             }
230 |             layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights['encoder_h1']), weights['encoder_b1']))
231 |             layer_2 = tf.nn.relu(tf.add(tf.matmul(layer_1, weights['encoder_h2']), weights['encoder_b2']))
232 |             layer_3 = tf.nn.relu(tf.add(tf.matmul(layer_2, weights['encoder_h3']), weights['encoder_b3']))
233 |             layer_4 = tf.nn.relu(tf.add(tf.matmul(layer_3, weights['encoder_h4']), weights['encoder_b4']))
234 |             return layer_4, weights
235 |         with tf.variable_scope('prediction'):
236 |             self.s_t = tf.placeholder('float32',[None, n_input])            
237 |             self.q, self.w = encoder(self.s_t)
238 |             self.q_action = tf.argmax(self.q, dimension = 1)
239 |         with tf.variable_scope('target'):
240 |             self.target_s_t = tf.placeholder('float32', [None, n_input])
241 |             self.target_q, self.target_w = encoder(self.target_s_t)
242 |             self.target_q_idx = tf.placeholder('int32', [None,None], 'output_idx')
243 |             self.target_q_with_idx = tf.gather_nd(self.target_q, self.target_q_idx)
244 |         with tf.variable_scope('pred_to_target'):
245 |             self.t_w_input = {}
246 |             self.t_w_assign_op = {}
247 |             for name in self.w.keys():
248 |                 print('name in self w keys', name)
249 |                 self.t_w_input[name] = tf.placeholder('float32', self.target_w[name].get_shape().as_list(),name = name)
250 |                 self.t_w_assign_op[name] = self.target_w[name].assign(self.t_w_input[name])       
251 |         
252 |         def clipped_error(x):
253 |             try:
254 |                 return tf.select(tf.abs(x) < 1.0, 0.5 * tf.square(x), tf.abs(x) - 0.5)
255 |             except:
256 |                 return tf.where(tf.abs(x) < 1.0, 0.5 * tf.square(x), tf.abs(x) - 0.5)
257 | 
258 |         with tf.variable_scope('optimizer'):
259 |             self.target_q_t = tf.placeholder('float32', None, name='target_q_t')
260 |             self.action = tf.placeholder('int32',None, name = 'action')
261 |             action_one_hot = tf.one_hot(self.action, n_output, 1.0, 0.0, name='action_one_hot')
262 |             q_acted = tf.reduce_sum(self.q * action_one_hot, reduction_indices = 1, name='q_acted')
263 |             self.delta = self.target_q_t - q_acted
264 |             self.global_step = tf.Variable(0, trainable=False)
265 |             self.loss = tf.reduce_mean(tf.square(self.delta), name = 'loss')
266 |             self.learning_rate_step = tf.placeholder('int64', None, name='learning_rate_step')
267 |             self.learning_rate_op = tf.maximum(self.learning_rate_minimum, tf.train.exponential_decay(self.learning_rate, self.learning_rate_step, self.learning_rate_decay_step, self.learning_rate_decay, staircase=True))
268 |             self.optim = tf.train.RMSPropOptimizer(self.learning_rate_op, momentum=0.95, epsilon=0.01).minimize(self.loss) 
269 |         
270 |         tf.initialize_all_variables().run()
271 |         self.update_target_q_network()
272 | 
273 | 
274 | 
275 |     def update_target_q_network(self):    
276 |         for name in self.w.keys():
277 |             self.t_w_assign_op[name].eval({self.t_w_input[name]: self.w[name].eval()})       
278 |         
279 |     def save_weight_to_pkl(self): 
280 |         if not os.path.exists(self.weight_dir):
281 |             os.makedirs(self.weight_dir)
282 |         for name in self.w.keys():
283 |             save_pkl(self.w[name].eval(), os.path.join(self.weight_dir,"%s.pkl" % name))       
284 |     def load_weight_from_pkl(self):
285 |         with tf.variable_scope('load_pred_from_pkl'):
286 |             self.w_input = {}
287 |             self.w_assign_op = {}
288 |             for name in self.w.keys():
289 |                 self.w_input[name] = tf.placeholder('float32')
290 |                 self.w_assign_op[name] = self.w[name].assign(self.w_input[name])
291 |         for name in self.w.keys():
292 |             self.w_assign_op[name].eval({self.w_input[name]:load_pkl(os.path.join(self.weight_dir, "%s.pkl" % name))})
293 |         self.update_target_q_network()   
294 |       
295 |     def play(self, n_step = 100, n_episode = 100, test_ep = None, render = False):
296 |         number_of_game = 100
297 |         V2I_Rate_list = np.zeros(number_of_game)
298 |         Fail_percent_list = np.zeros(number_of_game)
299 |         self.load_weight_from_pkl()
300 |         self.training = False
301 | 
302 | 
303 |         for game_idx in range(number_of_game):
304 |             self.env.new_random_game(self.num_vehicle)
305 |             test_sample = 200
306 |             Rate_list = []
307 |             print('test game idx:', game_idx)
308 |             print('The number of vehicle is ', len(self.env.vehicles))
309 |             time_left_list = []
310 |             power_select_list_0 = []
311 |             power_select_list_1 = []
312 |             power_select_list_2 = []
313 | 
314 |             for k in range(test_sample):
315 |                 #print(k)
316 |                 action_temp = self.action_all_with_power.copy()
317 |                 for i in range(len(self.env.vehicles)):
318 |                     self.action_all_with_power[i, :, 0] = -1
319 |                     sorted_idx = np.argsort(self.env.individual_time_limit[i, :])
320 |                     for j in sorted_idx:
321 |                         state_old = self.get_state([i, j])
322 |                         time_left_list.append(state_old[-1])
323 |                         action = self.predict(state_old, 0, True)
324 |                         
325 |                         if state_old[-1] <=0:
326 |                             continue
327 |                         power_selection = int(np.floor(action/self.RB_number))
328 |                         if power_selection == 0:
329 |                             power_select_list_0.append(state_old[-1])
330 | 
331 |                         if power_selection == 1:
332 |                             power_select_list_1.append(state_old[-1])
333 |                         if power_selection == 2:
334 |                             power_select_list_2.append(state_old[-1])
335 |                         
336 |                         self.merge_action([i, j], action)
337 |                     if i % (len(self.env.vehicles) / 10) == 1:
338 |                         action_temp = self.action_all_with_power.copy()
339 |                         reward, percent = self.env.act_asyn(action_temp)  # self.action_all)
340 |                         Rate_list.append(np.sum(reward))
341 |                 # print("actions", self.action_all_with_power)
342 |             
343 |             number_0, bin_edges = np.histogram(power_select_list_0, bins = 10)
344 | 
345 |             number_1, bin_edges = np.histogram(power_select_list_1, bins = 10)
346 | 
347 |             number_2, bin_edges = np.histogram(power_select_list_2, bins = 10)
348 | 
349 | 
350 |             p_0 = number_0 / (number_0 + number_1 + number_2)
351 |             p_1 = number_1 / (number_0 + number_1 + number_2)
352 |             p_2 = number_2 / (number_0 + number_1 + number_2)
353 |             plt.figure()
354 |             plt.plot(bin_edges[:-1]*0.1 + 0.01, p_0, 'b*-', label='Power Level 23 dB')
355 |             plt.plot(bin_edges[:-1]*0.1 + 0.01, p_1, 'rs-', label='Power Level 10 dB')
356 |             plt.plot(bin_edges[:-1]*0.1 + 0.01, p_2, 'go-', label='Power Level 5 dB')
357 |             plt.xlim([0,0.12])
358 |             plt.xlabel("Time left for V2V transmission (s)")
359 |             plt.ylabel("Probability of power selection")
360 |             plt.legend()
361 |             plt.grid()
362 |             plt.savefig()
363 |             #plt.show()
364 |             
365 |             V2I_Rate_list[game_idx] = np.mean(np.asarray(Rate_list))
366 |             Fail_percent_list[game_idx] = percent
367 | 
368 |             print('Mean of the V2I rate is that ', np.mean(V2I_Rate_list[0:game_idx] ))
369 |             print('Mean of Fail percent is that ',percent, np.mean(Fail_percent_list[0:game_idx]))
370 |             # print('action is that', action_temp[0,:])
371 | 
372 |         print('The number of vehicle is ', len(self.env.vehicles))
373 |         print('Mean of the V2I rate is that ', np.mean(V2I_Rate_list))
374 |         print('Mean of Fail percent is that ', np.mean(Fail_percent_list))
375 |         # print('Test Reward is ', np.mean(test_result))
376 | 	
377 | 	
378 | def main(_):
379 | 
380 |   up_lanes = [3.5/2,3.5/2 + 3.5,250+3.5/2, 250+3.5+3.5/2, 500+3.5/2, 500+3.5+3.5/2]
381 |   down_lanes = [250-3.5-3.5/2,250-3.5/2,500-3.5-3.5/2,500-3.5/2,750-3.5-3.5/2,750-3.5/2]
382 |   left_lanes = [3.5/2,3.5/2 + 3.5,433+3.5/2, 433+3.5+3.5/2, 866+3.5/2, 866+3.5+3.5/2]
383 |   right_lanes = [433-3.5-3.5/2,433-3.5/2,866-3.5-3.5/2,866-3.5/2,1299-3.5-3.5/2,1299-3.5/2]
384 |   width = 750
385 |   height = 1299
386 |   Env = Environ(down_lanes,up_lanes,left_lanes,right_lanes, width, height)
387 |   Env.new_random_game()
388 |   '''
389 |   gpu_options = tf.GPUOptions(
390 |       per_process_gpu_memory_fraction=calc_gpu_fraction(FLAGS.gpu_fraction))
391 |   config = tf.ConfigProto()
392 |   config.gpu_options.allow_growth = True
393 | '''
394 |   with tf.Session(config=tf.ConfigProto()) as sess:
395 |     config = []
396 |     agent = Agent(config, Env, sess)
397 |     #agent.play()
398 |     agent.train()
399 |     agent.play()
400 | 
401 | if __name__ == '__main__':
402 |     tf.app.run()
403 |         
404 | 
405 | 
406 | 
407 | 
408 | 


--------------------------------------------------------------------------------
/base.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | 
 4 | 
 5 | class BaseModel(object):
 6 | 
 7 |     def __init__(self, config):
 8 |         self._saver = None
 9 |         self.config = config
10 |         
11 |         try:
12 |             self._attr = config.__dict__['__flags']
13 |         except:
14 |             self._attr = class_var(config)
15 |             
16 |         self.config = config
17 |         for attr in self._attrs:
18 |             name = attr if not attr.startswith('_') else attr[1:]
19 |             setattr(self, name, getattr(self.config, attr))
20 |             
21 |             
22 |             
23 |         def save_model(self, step=None):
24 |             print(" [*] Saving checkpoints...")
25 |             model_name = type(self).__name__
26 | 
27 |             if not os.path.exists(self.checkpoint_dir):
28 |                 os.makedirs(self.checkpoint_dir)
29 |             self.saver.save(self.sess, self.checkpoint_dir, global_step=step)
30 | 
31 |     def load_model(self):
32 |         print(" [*] Loading checkpoints...")
33 | 
34 |         ckpt = tf.train.get_checkpoint_state(self.checkpoint_dir)
35 |         if ckpt and ckpt.model_checkpoint_path:
36 |             ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
37 |             fname = os.path.join(self.checkpoint_dir, ckpt_name)
38 |             self.saver.restore(self.sess, fname)
39 |             print(" [*] Load SUCCESS: %s" % fname)
40 |             return True
41 |         else:
42 |             print(" [!] Load FAILED: %s" % self.checkpoint_dir)
43 |             return False
44 | 
45 |     @property
46 |     def checkpoint_dir(self):
47 |         return os.path.join('checkpoints', self.model_dir)
48 | 
49 |     @property
50 |     def model_dir(self):
51 |         model_dir = self.config.env_name
52 |         for k, v in self._attrs.items():
53 |             if not k.startswith('_') and k not in ['display']:
54 |                 model_dir += "/%s-%s" % (k, ",".join([str(i) for i in v])
55 |                     if type(v) == list else v)
56 |         return model_dir + '/'
57 | 
58 |     @property
59 |     def saver(self):
60 |         if self._saver == None:
61 |             self._saver = tf.train.Saver(max_to_keep=10)
62 |         return self._saver
63 | 


--------------------------------------------------------------------------------
/ddqn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Engineer1999/Double-Deep-Q-Learning-for-Resource-Allocation/8e4d3973ec8fe86e8afdce7715ce631970a0d786/ddqn.png


--------------------------------------------------------------------------------
/dqn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Engineer1999/Double-Deep-Q-Learning-for-Resource-Allocation/8e4d3973ec8fe86e8afdce7715ce631970a0d786/dqn.png


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division, print_function
 2 | import random
 3 | import tensorflow as tf
 4 | from agent import Agent
 5 | from Environment import *
 6 | flags = tf.app.flags
 7 | 
 8 | # Model
 9 | flags.DEFINE_string('model', 'm1', 'Type of model')
10 | flags.DEFINE_boolean('dueling', False, 'Whether to use dueling deep q-network')
11 | flags.DEFINE_boolean('double_q', False, 'Whether to use double q-learning')
12 | 
13 | # Environment
14 | flags.DEFINE_string('env_name', 'Breakout-v0', 'The name of gym environment to use')
15 | flags.DEFINE_integer('action_repeat', 4, 'The number of action to be repeated')
16 | 
17 | # Etc
18 | flags.DEFINE_boolean('use_gpu', True, 'Whether to use gpu or not')
19 | flags.DEFINE_string('gpu_fraction', '1/1', 'idx / # of gpu fraction e.g. 1/3, 2/3, 3/3')
20 | flags.DEFINE_boolean('display', False, 'Whether to do display the game screen or not')
21 | flags.DEFINE_boolean('is_train', True, 'Whether to do training or testing')
22 | flags.DEFINE_integer('random_seed', 123, 'Value of random seed')
23 | 
24 | FLAGS = flags.FLAGS
25 | 
26 | # Set random seed
27 | tf.set_random_seed(FLAGS.random_seed)
28 | random.seed(FLAGS.random_seed)
29 | 
30 | if FLAGS.gpu_fraction == '':
31 |   raise ValueError("--gpu_fraction should be defined")
32 | 
33 | def calc_gpu_fraction(fraction_string):
34 |   idx, num = fraction_string.split('/')
35 |   idx, num = float(idx), float(num)
36 | 
37 |   fraction = 1 / (num - idx + 1)
38 |   print(" [*] GPU : %.4f" % fraction)
39 |   return fraction
40 | 
41 | def main(_):
42 | 
43 |   up_lanes = [3.5/2,3.5/2 + 3.5,250+3.5/2, 250+3.5+3.5/2, 500+3.5/2, 500+3.5+3.5/2]
44 |   down_lanes = [250-3.5-3.5/2,250-3.5/2,500-3.5-3.5/2,500-3.5/2,750-3.5-3.5/2,750-3.5/2]
45 |   left_lanes = [3.5/2,3.5/2 + 3.5,433+3.5/2, 433+3.5+3.5/2, 866+3.5/2, 866+3.5+3.5/2]
46 |   right_lanes = [433-3.5-3.5/2,433-3.5/2,866-3.5-3.5/2,866-3.5/2,1299-3.5-3.5/2,1299-3.5/2]
47 |   width = 750
48 |   height = 1299
49 |   Env = Environ(down_lanes,up_lanes,left_lanes,right_lanes, width, height)
50 |   Env.new_random_game()
51 |   gpu_options = tf.GPUOptions(
52 |       per_process_gpu_memory_fraction=calc_gpu_fraction(FLAGS.gpu_fraction))
53 |   config = tf.ConfigProto()
54 |   config.gpu_options.allow_growth = True
55 | 
56 |   with tf.Session(config=config) as sess:
57 |     config = []
58 |     agent = Agent(config, Env, sess)
59 |     #agent.play()
60 |     agent.train()
61 | 
62 |     #agent.play()
63 | 
64 | if __name__ == '__main__':
65 |     tf.app.run()
66 | 


--------------------------------------------------------------------------------
/replay_memory.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | import logging
 4 | import numpy as np
 5 | #from utils import save_npy, load_npy
 6 | 
 7 | class ReplayMemory:
 8 |     def __init__(self, model_dir):
 9 |         self.model_dir = model_dir        
10 |         self.memory_size = 1000000
11 |         self.actions = np.empty(self.memory_size, dtype = np.uint8)
12 |         self.rewards = np.empty(self.memory_size, dtype = np.float64)
13 |         self.prestate = np.empty((self.memory_size, 82), dtype = np.float16)
14 |         self.poststate = np.empty((self.memory_size, 82), dtype = np.float16)
15 |         self.batch_size = 2000
16 |         self.count = 0
17 |         self.current = 0
18 |         
19 | 
20 |     def add(self, prestate, poststate, reward, action):
21 |         self.actions[self.current] = action
22 |         self.rewards[self.current] = reward
23 |         self.prestate[self.current] = prestate
24 |         self.poststate[self.current] = poststate
25 |         self.count = max(self.count, self.current + 1)
26 |         self.current = (self.current + 1) % self.memory_size
27 |         
28 |    
29 |            
30 |     def sample(self):
31 |         indexes = []
32 |         while len(indexes) < self.batch_size:
33 |             index = random.randint(0, self.count - 1)
34 |             indexes.append(index)
35 |         prestate = self.prestate[indexes]
36 |         poststate = self.poststate[indexes]
37 |         actions = self.actions[indexes]
38 |         rewards = self.rewards[indexes]
39 |         return prestate, poststate, actions, rewards
40 |    
41 | 


--------------------------------------------------------------------------------
/requirement.txt:
--------------------------------------------------------------------------------
1 | os
2 | time
3 | random
4 | numpy==1.13.1
5 | math
6 | matplotlib
7 | logging
8 | _pickle
9 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import numpy as np
 3 | import _pickle as cPickle
 4 | def save_pkl(obj, path):
 5 |   with open(path, 'wb') as f:
 6 |     cPickle.dump(obj, f)
 7 |     print("  [*] save %s" % path)
 8 | def load_pkl(path):
 9 |   with open(path, 'rb') as f:
10 |     obj = cPickle.load(f)
11 |     print("  [*] load %s" % path)
12 |     return obj


--------------------------------------------------------------------------------