├── README.md
├── .gitattributes
├── utils.py
├── replay_memory.py
├── Baseline_random.py
├── base.py
├── main.py
├── agent.py
└── Environment.py


/README.md:
--------------------------------------------------------------------------------
1 | # ResourceAllocationReinforcementLearning
2 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import numpy as np
 3 | import _pickle as cPickle
 4 | def save_pkl(obj, path):
 5 |   with open(path, 'wb') as f:
 6 |     cPickle.dump(obj, f)
 7 |     print("  [*] save %s" % path)
 8 | def load_pkl(path):
 9 |   with open(path, 'rb') as f:
10 |     obj = cPickle.load(f)
11 |     print("  [*] load %s" % path)
12 |     return obj


--------------------------------------------------------------------------------
/replay_memory.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | import logging
 4 | import numpy as np
 5 | #from utils import save_npy, load_npy
 6 | 
 7 | class ReplayMemory:
 8 |     def __init__(self, model_dir):
 9 |         self.model_dir = model_dir        
10 |         self.memory_size = 1000000
11 |         self.actions = np.empty(self.memory_size, dtype = np.uint8)
12 |         self.rewards = np.empty(self.memory_size, dtype = np.float64)
13 |         self.prestate = np.empty((self.memory_size, 82), dtype = np.float16)
14 |         self.poststate = np.empty((self.memory_size, 82), dtype = np.float16)
15 |         self.batch_size = 2000
16 |         self.count = 0
17 |         self.current = 0
18 |         
19 | 
20 |     def add(self, prestate, poststate, reward, action):
21 |         self.actions[self.current] = action
22 |         self.rewards[self.current] = reward
23 |         self.prestate[self.current] = prestate
24 |         self.poststate[self.current] = poststate
25 |         self.count = max(self.count, self.current + 1)
26 |         self.current = (self.current + 1) % self.memory_size
27 |         
28 |    
29 |            
30 |     def sample(self):
31 |         indexes = []
32 |         while len(indexes) < self.batch_size:
33 |             index = random.randint(0, self.count - 1)
34 |             indexes.append(index)
35 |         prestate = self.prestate[indexes]
36 |         poststate = self.poststate[indexes]
37 |         actions = self.actions[indexes]
38 |         rewards = self.rewards[indexes]
39 |         return prestate, poststate, actions, rewards
40 |    
41 | 


--------------------------------------------------------------------------------
/Baseline_random.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division, print_function
 2 | import numpy as np 
 3 | from Environment import *
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | # This py file using the random algorithm.
 7 | 
 8 | def main():
 9 |     up_lanes = [3.5/2,3.5/2 + 3.5,250+3.5/2, 250+3.5+3.5/2, 500+3.5/2, 500+3.5+3.5/2]
10 |     down_lanes = [250-3.5-3.5/2,250-3.5/2,500-3.5-3.5/2,500-3.5/2,750-3.5-3.5/2,750-3.5/2]
11 |     left_lanes = [3.5/2,3.5/2 + 3.5,433+3.5/2, 433+3.5+3.5/2, 866+3.5/2, 866+3.5+3.5/2]
12 |     right_lanes = [433-3.5-3.5/2,433-3.5/2,866-3.5-3.5/2,866-3.5/2,1299-3.5-3.5/2,1299-3.5/2]
13 |     width = 750
14 |     height = 1299
15 |     n = 40
16 |     Env = Environ(down_lanes,up_lanes,left_lanes,right_lanes, width, height)
17 |     number_of_game = 500
18 |     n_step = 1000
19 |     V2I_Rate_List = np.zeros([number_of_game, n_step])
20 |     Fail_Percent = np.zeros([number_of_game, n_step])
21 |     for game_idx in range(number_of_game):
22 |         print (game_idx)
23 |         Env.new_random_game(n)
24 |         for i in range(n_step):
25 |             #print(i)
26 |             actions = np.random.randint(0,20,[n,3])
27 |             power_selection = np.zeros(actions.shape, dtype = 'int')
28 |             actions = np.concatenate((actions[..., np.newaxis],power_selection[...,np.newaxis]), axis = 2)
29 |             reward, percent = Env.act(actions)
30 |             V2I_Rate_List[game_idx, i] = np.sum(reward)
31 |             Fail_Percent[game_idx, i] = percent
32 |         print(np.sum(reward))
33 |         print ('percentage here is ', percent)
34 |     print ('The number of vehicles is ', n)
35 |     print ('mean of V2I rate is that ', np.mean(V2I_Rate_List))
36 |     print ('mean of percent is ', np.mean(Fail_Percent[:,-1]))
37 | 
38 | main()
39 | 


--------------------------------------------------------------------------------
/base.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | 
 4 | 
 5 | class BaseModel(object):
 6 | 
 7 |     def __init__(self, config):
 8 |         self._saver = None
 9 |         self.config = config
10 |         
11 |         try:
12 |             self._attr = config.__dict__['__flags']
13 |         except:
14 |             self._attr = class_var(config)
15 |             
16 |         self.config = config
17 |         for attr in self._attrs:
18 |             name = attr if not attr.startswith('_') else attr[1:]
19 |             setattr(self, name, getattr(self.config, attr))
20 |             
21 |             
22 |             
23 |         def save_model(self, step=None):
24 |             print(" [*] Saving checkpoints...")
25 |             model_name = type(self).__name__
26 | 
27 |             if not os.path.exists(self.checkpoint_dir):
28 |                 os.makedirs(self.checkpoint_dir)
29 |             self.saver.save(self.sess, self.checkpoint_dir, global_step=step)
30 | 
31 |     def load_model(self):
32 |         print(" [*] Loading checkpoints...")
33 | 
34 |         ckpt = tf.train.get_checkpoint_state(self.checkpoint_dir)
35 |         if ckpt and ckpt.model_checkpoint_path:
36 |             ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
37 |             fname = os.path.join(self.checkpoint_dir, ckpt_name)
38 |             self.saver.restore(self.sess, fname)
39 |             print(" [*] Load SUCCESS: %s" % fname)
40 |             return True
41 |         else:
42 |             print(" [!] Load FAILED: %s" % self.checkpoint_dir)
43 |             return False
44 | 
45 |     @property
46 |     def checkpoint_dir(self):
47 |         return os.path.join('checkpoints', self.model_dir)
48 | 
49 |     @property
50 |     def model_dir(self):
51 |         model_dir = self.config.env_name
52 |         for k, v in self._attrs.items():
53 |             if not k.startswith('_') and k not in ['display']:
54 |                 model_dir += "/%s-%s" % (k, ",".join([str(i) for i in v])
55 |                     if type(v) == list else v)
56 |         return model_dir + '/'
57 | 
58 |     @property
59 |     def saver(self):
60 |         if self._saver == None:
61 |             self._saver = tf.train.Saver(max_to_keep=10)
62 |         return self._saver
63 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division, print_function
 2 | import random
 3 | import tensorflow as tf
 4 | from agent import Agent
 5 | from Environment import *
 6 | flags = tf.app.flags
 7 | 
 8 | # Model
 9 | flags.DEFINE_string('model', 'm1', 'Type of model')
10 | flags.DEFINE_boolean('dueling', False, 'Whether to use dueling deep q-network')
11 | flags.DEFINE_boolean('double_q', False, 'Whether to use double q-learning')
12 | 
13 | # Environment
14 | flags.DEFINE_string('env_name', 'Breakout-v0', 'The name of gym environment to use')
15 | flags.DEFINE_integer('action_repeat', 4, 'The number of action to be repeated')
16 | 
17 | # Etc
18 | flags.DEFINE_boolean('use_gpu', True, 'Whether to use gpu or not')
19 | flags.DEFINE_string('gpu_fraction', '1/1', 'idx / # of gpu fraction e.g. 1/3, 2/3, 3/3')
20 | flags.DEFINE_boolean('display', False, 'Whether to do display the game screen or not')
21 | flags.DEFINE_boolean('is_train', True, 'Whether to do training or testing')
22 | flags.DEFINE_integer('random_seed', 123, 'Value of random seed')
23 | 
24 | FLAGS = flags.FLAGS
25 | 
26 | # Set random seed
27 | tf.set_random_seed(FLAGS.random_seed)
28 | random.seed(FLAGS.random_seed)
29 | 
30 | if FLAGS.gpu_fraction == '':
31 |   raise ValueError("--gpu_fraction should be defined")
32 | 
33 | def calc_gpu_fraction(fraction_string):
34 |   idx, num = fraction_string.split('/')
35 |   idx, num = float(idx), float(num)
36 | 
37 |   fraction = 1 / (num - idx + 1)
38 |   print(" [*] GPU : %.4f" % fraction)
39 |   return fraction
40 | 
41 | def main(_):
42 | 
43 |   up_lanes = [3.5/2,3.5/2 + 3.5,250+3.5/2, 250+3.5+3.5/2, 500+3.5/2, 500+3.5+3.5/2]
44 |   down_lanes = [250-3.5-3.5/2,250-3.5/2,500-3.5-3.5/2,500-3.5/2,750-3.5-3.5/2,750-3.5/2]
45 |   left_lanes = [3.5/2,3.5/2 + 3.5,433+3.5/2, 433+3.5+3.5/2, 866+3.5/2, 866+3.5+3.5/2]
46 |   right_lanes = [433-3.5-3.5/2,433-3.5/2,866-3.5-3.5/2,866-3.5/2,1299-3.5-3.5/2,1299-3.5/2]
47 |   width = 750
48 |   height = 1299
49 |   Env = Environ(down_lanes,up_lanes,left_lanes,right_lanes, width, height)
50 |   Env.new_random_game()
51 |   gpu_options = tf.GPUOptions(
52 |       per_process_gpu_memory_fraction=calc_gpu_fraction(FLAGS.gpu_fraction))
53 |   config = tf.ConfigProto()
54 |   config.gpu_options.allow_growth = True
55 | 
56 |   with tf.Session(config=config) as sess:
57 |     config = []
58 |     agent = Agent(config, Env, sess)
59 |     #agent.play()
60 |     agent.train()
61 | 
62 |     #agent.play()
63 | 
64 | if __name__ == '__main__':
65 |     tf.app.run()
66 | 


--------------------------------------------------------------------------------
/agent.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | import os
  3 | import time
  4 | import random
  5 | import numpy as np
  6 | from base import BaseModel
  7 | from replay_memory import ReplayMemory
  8 | from utils import save_pkl, load_pkl
  9 | import tensorflow as tf
 10 | import matplotlib.pyplot as plt
 11 | 
 12 | class Agent(BaseModel):
 13 |     def __init__(self, config, environment, sess):
 14 |         self.sess = sess
 15 |         self.weight_dir = 'weight'        
 16 |         self.env = environment
 17 |         #self.history = History(self.config)
 18 |         model_dir = './Model/a.model'
 19 |         self.memory = ReplayMemory(model_dir) 
 20 |         self.max_step = 100000 
 21 |         self.RB_number = 20
 22 |         self.num_vehicle = len(self.env.vehicles)
 23 |         self.action_all_with_power = np.zeros([self.num_vehicle, 3, 2],dtype = 'int32')   # this is actions that taken by V2V links with power
 24 |         self.action_all_with_power_training = np.zeros([20, 3, 2],dtype = 'int32')   # this is actions that taken by V2V links with power
 25 |         self.reward = []
 26 |         self.learning_rate = 0.01
 27 |         self.learning_rate_minimum = 0.0001
 28 |         self.learning_rate_decay = 0.96
 29 |         self.learning_rate_decay_step = 500000
 30 |         self.target_q_update_step = 100
 31 |         self.discount = 0.5
 32 |         self.double_q = True
 33 |         self.build_dqn()          
 34 |         self.V2V_number = 3 * len(self.env.vehicles)    # every vehicle need to communicate with 3 neighbors  
 35 |         self.training = True
 36 |         #self.actions_all = np.zeros([len(self.env.vehicles),3], dtype = 'int32')
 37 |     def merge_action(self, idx, action):
 38 |         self.action_all_with_power[idx[0], idx[1], 0] = action % self.RB_number
 39 |         self.action_all_with_power[idx[0], idx[1], 1] = int(np.floor(action/self.RB_number))
 40 |     def get_state(self, idx):
 41 |     # ===============
 42 |     #  Get State from the environment
 43 |     # =============
 44 |         vehicle_number = len(self.env.vehicles)
 45 |         V2V_channel = (self.env.V2V_channels_with_fastfading[idx[0],self.env.vehicles[idx[0]].destinations[idx[1]],:] - 80)/60
 46 |         V2I_channel = (self.env.V2I_channels_with_fastfading[idx[0], :] - 80)/60
 47 |         V2V_interference = (-self.env.V2V_Interference_all[idx[0],idx[1],:] - 60)/60
 48 |         NeiSelection = np.zeros(self.RB_number)
 49 |         for i in range(3):
 50 |             for j in range(3):
 51 |                 if self.training:
 52 |                     NeiSelection[self.action_all_with_power_training[self.env.vehicles[idx[0]].neighbors[i], j, 0 ]] = 1
 53 |                 else:
 54 |                     NeiSelection[self.action_all_with_power[self.env.vehicles[idx[0]].neighbors[i], j, 0 ]] = 1
 55 |                    
 56 |         for i in range(3):
 57 |             if i == idx[1]:
 58 |                 continue
 59 |             if self.training:
 60 |                 if self.action_all_with_power_training[idx[0],i,0] >= 0:
 61 |                     NeiSelection[self.action_all_with_power_training[idx[0],i,0]] = 1
 62 |             else:
 63 |                 if self.action_all_with_power[idx[0],i,0] >= 0:
 64 |                     NeiSelection[self.action_all_with_power[idx[0],i,0]] = 1
 65 |         time_remaining = np.asarray([self.env.demand[idx[0],idx[1]] / self.env.demand_amount])
 66 |         load_remaining = np.asarray([self.env.individual_time_limit[idx[0],idx[1]] / self.env.V2V_limit])
 67 |         #print('shapes', time_remaining.shape,load_remaining.shape)
 68 |         return np.concatenate((V2I_channel, V2V_interference, V2V_channel, NeiSelection, time_remaining, load_remaining))#,time_remaining))
 69 |         #return np.concatenate((V2I_channel, V2V_interference, V2V_channel, time_remaining, load_remaining))#,time_remaining))
 70 |     def predict(self, s_t,  step, test_ep = False):
 71 |         # ==========================
 72 |         #  Select actions
 73 |         # ======================
 74 |         ep = 1/(step/1000000 + 1)
 75 |         if random.random() < ep and test_ep == False:   # epsion to balance the exporation and exploition
 76 |             action = np.random.randint(60)
 77 |         else:          
 78 |             action =  self.q_action.eval({self.s_t:[s_t]})[0] 
 79 |         return action
 80 |     def observe(self, prestate, state, reward, action):
 81 |         # -----------
 82 |         # Collect Data for Training 
 83 |         # ---------
 84 |         self.memory.add(prestate, state, reward, action) # add the state and the action and the reward to the memory
 85 |         #print(self.step)
 86 |         if self.step > 0:
 87 |             if self.step % 50 == 0:
 88 |                 #print('Training')
 89 |                 self.q_learning_mini_batch()            # training a mini batch
 90 |                 #self.save_weight_to_pkl()
 91 |             if self.step % self.target_q_update_step == self.target_q_update_step - 1:
 92 |                 #print("Update Target Q network:")
 93 |                 self.update_target_q_network()           # ?? what is the meaning ??
 94 |     def train(self):        
 95 |         num_game, self.update_count, ep_reward = 0, 0, 0.
 96 |         total_reward, self.total_loss, self.total_q = 0.,0.,0.
 97 |         max_avg_ep_reward = 0
 98 |         ep_reward, actions = [], []        
 99 |         mean_big = 0
100 |         number_big = 0
101 |         mean_not_big = 0
102 |         number_not_big = 0
103 |         self.env.new_random_game(20)
104 |         for self.step in (range(0, 40000)): # need more configuration
105 |             if self.step == 0:                   # initialize set some varibles
106 |                 num_game, self.update_count,ep_reward = 0, 0, 0.
107 |                 total_reward, self.total_loss, self.total_q = 0., 0., 0.
108 |                 ep_reward, actions = [], []               
109 |                 
110 |             # prediction
111 |             # action = self.predict(self.history.get())
112 |             if (self.step % 2000 == 1):
113 |                 self.env.new_random_game(20)
114 |             print(self.step)
115 |             state_old = self.get_state([0,0])
116 |             #print("state", state_old)
117 |             self.training = True
118 |             for k in range(1):
119 |                 for i in range(len(self.env.vehicles)):              
120 |                     for j in range(3): 
121 |                         state_old = self.get_state([i,j]) 
122 |                         action = self.predict(state_old, self.step)                    
123 |                         #self.merge_action([i,j], action)   
124 |                         self.action_all_with_power_training[i, j, 0] = action % self.RB_number
125 |                         self.action_all_with_power_training[i, j, 1] = int(np.floor(action/self.RB_number))                                                    
126 |                         reward_train = self.env.act_for_training(self.action_all_with_power_training, [i,j]) 
127 |                         state_new = self.get_state([i,j]) 
128 |                         self.observe(state_old, state_new, reward_train, action)
129 |             if (self.step % 2000 == 0) and (self.step > 0):
130 |                 # testing 
131 |                 self.training = False
132 |                 number_of_game = 10
133 |                 if (self.step % 10000 == 0) and (self.step > 0):
134 |                     number_of_game = 50 
135 |                 if (self.step == 38000):
136 |                     number_of_game = 100               
137 |                 V2I_Rate_list = np.zeros(number_of_game)
138 |                 Fail_percent_list = np.zeros(number_of_game)
139 |                 for game_idx in range(number_of_game):
140 |                     self.env.new_random_game(self.num_vehicle)
141 |                     test_sample = 200
142 |                     Rate_list = []
143 |                     print('test game idx:', game_idx)
144 |                     for k in range(test_sample):
145 |                         action_temp = self.action_all_with_power.copy()
146 |                         for i in range(len(self.env.vehicles)):
147 |                             self.action_all_with_power[i,:,0] = -1
148 |                             sorted_idx = np.argsort(self.env.individual_time_limit[i,:])          
149 |                             for j in sorted_idx:                   
150 |                                 state_old = self.get_state([i,j])
151 |                                 action = self.predict(state_old, self.step, True)
152 |                                 self.merge_action([i,j], action)
153 |                             if i % (len(self.env.vehicles)/10) == 1:
154 |                                 action_temp = self.action_all_with_power.copy()
155 |                                 reward, percent = self.env.act_asyn(action_temp) #self.action_all)            
156 |                                 Rate_list.append(np.sum(reward))
157 |                         #print("actions", self.action_all_with_power)
158 |                     V2I_Rate_list[game_idx] = np.mean(np.asarray(Rate_list))
159 |                     Fail_percent_list[game_idx] = percent
160 |                     #print("action is", self.action_all_with_power)
161 |                     print('failure probability is, ', percent)
162 |                     #print('action is that', action_temp[0,:])
163 |                 self.save_weight_to_pkl()
164 |                 print ('The number of vehicle is ', len(self.env.vehicles))
165 |                 print ('Mean of the V2I rate is that ', np.mean(V2I_Rate_list))
166 |                 print('Mean of Fail percent is that ', np.mean(Fail_percent_list))                   
167 |                 #print('Test Reward is ', np.mean(test_result))
168 |              
169 |                   
170 |                     
171 |             
172 |     def q_learning_mini_batch(self):
173 | 
174 |         # Training the DQN model
175 |         # ------ 
176 |         #s_t, action,reward, s_t_plus_1, terminal = self.memory.sample() 
177 |         s_t, s_t_plus_1, action, reward = self.memory.sample()  
178 |         #print() 
179 |         #print('samples:', s_t[0:10], s_t_plus_1[0:10], action[0:10], reward[0:10])        
180 |         t = time.time()        
181 |         if self.double_q:       #double Q learning   
182 |             pred_action = self.q_action.eval({self.s_t: s_t_plus_1})       
183 |             q_t_plus_1_with_pred_action = self.target_q_with_idx.eval({self.target_s_t: s_t_plus_1, self.target_q_idx: [[idx, pred_a] for idx, pred_a in enumerate(pred_action)]})            
184 |             target_q_t =  self.discount * q_t_plus_1_with_pred_action + reward
185 |         else:
186 |             q_t_plus_1 = self.target_q.eval({self.target_s_t: s_t_plus_1})         
187 |             max_q_t_plus_1 = np.max(q_t_plus_1, axis=1)
188 |             target_q_t = self.discount * max_q_t_plus_1 +reward
189 |         _, q_t, loss,w = self.sess.run([self.optim, self.q, self.loss, self.w], {self.target_q_t: target_q_t, self.action:action, self.s_t:s_t, self.learning_rate_step: self.step}) # training the network
190 |         
191 |         print('loss is ', loss)
192 |         self.total_loss += loss
193 |         self.total_q += q_t.mean()
194 |         self.update_count += 1
195 |             
196 | 
197 |     def build_dqn(self): 
198 |     # --- Building the DQN -------
199 |         self.w = {}
200 |         self.t_w = {}        
201 |         
202 |         initializer = tf. truncated_normal_initializer(0, 0.02)
203 |         activation_fn = tf.nn.relu
204 |         n_hidden_1 = 500
205 |         n_hidden_2 = 250
206 |         n_hidden_3 = 120
207 |         n_input = 82
208 |         n_output = 60
209 |         def encoder(x):
210 |             weights = {                    
211 |                 'encoder_h1': tf.Variable(tf.truncated_normal([n_input, n_hidden_1],stddev=0.1)),
212 |                 'encoder_h2': tf.Variable(tf.truncated_normal([n_hidden_1, n_hidden_2],stddev=0.1)),
213 |                 'encoder_h3': tf.Variable(tf.truncated_normal([n_hidden_2, n_hidden_3],stddev=0.1)),
214 |                 'encoder_h4': tf.Variable(tf.truncated_normal([n_hidden_3, n_output],stddev=0.1)),
215 |                 'encoder_b1': tf.Variable(tf.truncated_normal([n_hidden_1],stddev=0.1)),
216 |                 'encoder_b2': tf.Variable(tf.truncated_normal([n_hidden_2],stddev=0.1)),
217 |                 'encoder_b3': tf.Variable(tf.truncated_normal([n_hidden_3],stddev=0.1)),
218 |                 'encoder_b4': tf.Variable(tf.truncated_normal([n_output],stddev=0.1)),         
219 |             
220 |             }
221 |             layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights['encoder_h1']), weights['encoder_b1']))
222 |             layer_2 = tf.nn.relu(tf.add(tf.matmul(layer_1, weights['encoder_h2']), weights['encoder_b2']))
223 |             layer_3 = tf.nn.relu(tf.add(tf.matmul(layer_2, weights['encoder_h3']), weights['encoder_b3']))
224 |             layer_4 = tf.nn.relu(tf.add(tf.matmul(layer_3, weights['encoder_h4']), weights['encoder_b4']))
225 |             return layer_4, weights
226 |         with tf.variable_scope('prediction'):
227 |             self.s_t = tf.placeholder('float32',[None, n_input])            
228 |             self.q, self.w = encoder(self.s_t)
229 |             self.q_action = tf.argmax(self.q, dimension = 1)
230 |         with tf.variable_scope('target'):
231 |             self.target_s_t = tf.placeholder('float32', [None, n_input])
232 |             self.target_q, self.target_w = encoder(self.target_s_t)
233 |             self.target_q_idx = tf.placeholder('int32', [None,None], 'output_idx')
234 |             self.target_q_with_idx = tf.gather_nd(self.target_q, self.target_q_idx)
235 |         with tf.variable_scope('pred_to_target'):
236 |             self.t_w_input = {}
237 |             self.t_w_assign_op = {}
238 |             for name in self.w.keys():
239 |                 print('name in self w keys', name)
240 |                 self.t_w_input[name] = tf.placeholder('float32', self.target_w[name].get_shape().as_list(),name = name)
241 |                 self.t_w_assign_op[name] = self.target_w[name].assign(self.t_w_input[name])       
242 |         
243 |         def clipped_error(x):
244 |             try:
245 |                 return tf.select(tf.abs(x) < 1.0, 0.5 * tf.square(x), tf.abs(x) - 0.5)
246 |             except:
247 |                 return tf.where(tf.abs(x) < 1.0, 0.5 * tf.square(x), tf.abs(x) - 0.5)
248 | 
249 |         with tf.variable_scope('optimizer'):
250 |             self.target_q_t = tf.placeholder('float32', None, name='target_q_t')
251 |             self.action = tf.placeholder('int32',None, name = 'action')
252 |             action_one_hot = tf.one_hot(self.action, n_output, 1.0, 0.0, name='action_one_hot')
253 |             q_acted = tf.reduce_sum(self.q * action_one_hot, reduction_indices = 1, name='q_acted')
254 |             self.delta = self.target_q_t - q_acted
255 |             self.global_step = tf.Variable(0, trainable=False)
256 |             self.loss = tf.reduce_mean(tf.square(self.delta), name = 'loss')
257 |             self.learning_rate_step = tf.placeholder('int64', None, name='learning_rate_step')
258 |             self.learning_rate_op = tf.maximum(self.learning_rate_minimum, tf.train.exponential_decay(self.learning_rate, self.learning_rate_step, self.learning_rate_decay_step, self.learning_rate_decay, staircase=True))
259 |             self.optim = tf.train.RMSPropOptimizer(self.learning_rate_op, momentum=0.95, epsilon=0.01).minimize(self.loss) 
260 |         
261 |         tf.initialize_all_variables().run()
262 |         self.update_target_q_network()
263 | 
264 | 
265 | 
266 |     def update_target_q_network(self):    
267 |         for name in self.w.keys():
268 |             self.t_w_assign_op[name].eval({self.t_w_input[name]: self.w[name].eval()})       
269 |         
270 |     def save_weight_to_pkl(self): 
271 |         if not os.path.exists(self.weight_dir):
272 |             os.makedirs(self.weight_dir)
273 |         for name in self.w.keys():
274 |             save_pkl(self.w[name].eval(), os.path.join(self.weight_dir,"%s.pkl" % name))       
275 |     def load_weight_from_pkl(self):
276 |         with tf.variable_scope('load_pred_from_pkl'):
277 |             self.w_input = {}
278 |             self.w_assign_op = {}
279 |             for name in self.w.keys():
280 |                 self.w_input[name] = tf.placeholder('float32')
281 |                 self.w_assign_op[name] = self.w[name].assign(self.w_input[name])
282 |         for name in self.w.keys():
283 |             self.w_assign_op[name].eval({self.w_input[name]:load_pkl(os.path.join(self.weight_dir, "%s.pkl" % name))})
284 |         self.update_target_q_network()   
285 |       
286 |     def play(self, n_step = 100, n_episode = 100, test_ep = None, render = False):
287 |         number_of_game = 100
288 |         V2I_Rate_list = np.zeros(number_of_game)
289 |         Fail_percent_list = np.zeros(number_of_game)
290 |         self.load_weight_from_pkl()
291 |         self.training = False
292 | 
293 | 
294 |         for game_idx in range(number_of_game):
295 |             self.env.new_random_game(self.num_vehicle)
296 |             test_sample = 200
297 |             Rate_list = []
298 |             print('test game idx:', game_idx)
299 |             print('The number of vehicle is ', len(self.env.vehicles))
300 |             time_left_list = []
301 |             power_select_list_0 = []
302 |             power_select_list_1 = []
303 |             power_select_list_2 = []
304 | 
305 |             for k in range(test_sample):
306 |                 action_temp = self.action_all_with_power.copy()
307 |                 for i in range(len(self.env.vehicles)):
308 |                     self.action_all_with_power[i, :, 0] = -1
309 |                     sorted_idx = np.argsort(self.env.individual_time_limit[i, :])
310 |                     for j in sorted_idx:
311 |                         state_old = self.get_state([i, j])
312 |                         time_left_list.append(state_old[-1])
313 |                         action = self.predict(state_old, 0, True)
314 |                         '''
315 |                         if state_old[-1] <=0:
316 |                             continue
317 |                         power_selection = int(np.floor(action/self.RB_number))
318 |                         if power_selection == 0:
319 |                             power_select_list_0.append(state_old[-1])
320 | 
321 |                         if power_selection == 1:
322 |                             power_select_list_1.append(state_old[-1])
323 |                         if power_selection == 2:
324 |                             power_select_list_2.append(state_old[-1])
325 |                         '''
326 |                         self.merge_action([i, j], action)
327 |                     if i % (len(self.env.vehicles) / 10) == 1:
328 |                         action_temp = self.action_all_with_power.copy()
329 |                         reward, percent = self.env.act_asyn(action_temp)  # self.action_all)
330 |                         Rate_list.append(np.sum(reward))
331 |                 # print("actions", self.action_all_with_power)
332 |             '''
333 |             number_0, bin_edges = np.histogram(power_select_list_0, bins = 10)
334 | 
335 |             number_1, bin_edges = np.histogram(power_select_list_1, bins = 10)
336 | 
337 |             number_2, bin_edges = np.histogram(power_select_list_2, bins = 10)
338 | 
339 | 
340 |             p_0 = number_0 / (number_0 + number_1 + number_2)
341 |             p_1 = number_1 / (number_0 + number_1 + number_2)
342 |             p_2 = number_2 / (number_0 + number_1 + number_2)
343 | 
344 |             plt.plot(bin_edges[:-1]*0.1 + 0.01, p_0, 'b*-', label='Power Level 23 dB')
345 |             plt.plot(bin_edges[:-1]*0.1 + 0.01, p_1, 'rs-', label='Power Level 10 dB')
346 |             plt.plot(bin_edges[:-1]*0.1 + 0.01, p_2, 'go-', label='Power Level 5 dB')
347 |             plt.xlim([0,0.12])
348 |             plt.xlabel("Time left for V2V transmission (s)")
349 |             plt.ylabel("Probability of power selection")
350 |             plt.legend()
351 |             plt.grid()
352 |             plt.show()
353 |             '''
354 |             V2I_Rate_list[game_idx] = np.mean(np.asarray(Rate_list))
355 |             Fail_percent_list[game_idx] = percent
356 | 
357 |             print('Mean of the V2I rate is that ', np.mean(V2I_Rate_list[0:game_idx] ))
358 |             print('Mean of Fail percent is that ',percent, np.mean(Fail_percent_list[0:game_idx]))
359 |             # print('action is that', action_temp[0,:])
360 | 
361 |         print('The number of vehicle is ', len(self.env.vehicles))
362 |         print('Mean of the V2I rate is that ', np.mean(V2I_Rate_list))
363 |         print('Mean of Fail percent is that ', np.mean(Fail_percent_list))
364 |         # print('Test Reward is ', np.mean(test_result))
365 | 	
366 | 
367 | 
368 | 
369 | 
370 | 


--------------------------------------------------------------------------------
/Environment.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import numpy as np
  3 | import time
  4 | import random
  5 | import math
  6 | # This file is revised for more precise and concise expression.
  7 | class V2Vchannels:              
  8 |     # Simulator of the V2V Channels
  9 |     def __init__(self, n_Veh, n_RB):
 10 |         self.t = 0
 11 |         self.h_bs = 1.5
 12 |         self.h_ms = 1.5
 13 |         self.fc = 2
 14 |         self.decorrelation_distance = 10
 15 |         self.shadow_std = 3
 16 |         self.n_Veh = n_Veh
 17 |         self.n_RB = n_RB
 18 |         self.update_shadow([])
 19 |     def update_positions(self, positions):
 20 |         self.positions = positions
 21 |     def update_pathloss(self):
 22 |         self.PathLoss = np.zeros(shape=(len(self.positions),len(self.positions)))
 23 |         for i in range(len(self.positions)):
 24 |             for j in range(len(self.positions)):
 25 |                 self.PathLoss[i][j] = self.get_path_loss(self.positions[i], self.positions[j])
 26 |     def update_shadow(self, delta_distance_list):
 27 |         delta_distance = np.zeros((len(delta_distance_list), len(delta_distance_list)))
 28 |         for i in range(len(delta_distance)):
 29 |             for j in range(len(delta_distance)):
 30 |                 delta_distance[i][j] = delta_distance_list[i] + delta_distance_list[j]
 31 |         if len(delta_distance_list) == 0: 
 32 |             self.Shadow = np.random.normal(0,self.shadow_std, size=(self.n_Veh, self.n_Veh))
 33 |         else:
 34 |             self.Shadow = np.exp(-1*(delta_distance/self.decorrelation_distance)) * self.Shadow +\
 35 |                          np.sqrt(1 - np.exp(-2*(delta_distance/self.decorrelation_distance))) * np.random.normal(0, self.shadow_std, size = (self.n_Veh, self.n_Veh))
 36 |     def update_fast_fading(self):
 37 |         h = 1/np.sqrt(2) * (np.random.normal(size=(self.n_Veh, self.n_Veh, self.n_RB) ) + 1j * np.random.normal(size=(self.n_Veh, self.n_Veh, self.n_RB)))
 38 |         self.FastFading = 20 * np.log10(np.abs(h))
 39 |     def get_path_loss(self, position_A, position_B):
 40 |         d1 = abs(position_A[0] - position_B[0])
 41 |         d2 = abs(position_A[1] - position_B[1])
 42 |         d = math.hypot(d1,d2)+0.001
 43 |         d_bp = 4 * (self.h_bs - 1) * (self.h_ms - 1) * self.fc * (10**9)/(3*10**8)     
 44 |         def PL_Los(d):
 45 |             if d <= 3:
 46 |                 return 22.7 * np.log10(3) + 41 + 20*np.log10(self.fc/5)
 47 |             else:
 48 |                 if d < d_bp:
 49 |                     return 22.7 * np.log10(d) + 41 + 20 * np.log10(self.fc/5)
 50 |                 else:
 51 |                     return 40.0 * np.log10(d) + 9.45 - 17.3 * np.log10(self.h_bs) - 17.3 * np.log10(self.h_ms) + 2.7 * np.log10(self.fc/5)
 52 |         def PL_NLos(d_a,d_b):
 53 |                 n_j = max(2.8 - 0.0024*d_b, 1.84)
 54 |                 return PL_Los(d_a) + 20 - 12.5*n_j + 10 * n_j * np.log10(d_b) + 3*np.log10(self.fc/5)
 55 |         if min(d1,d2) < 7: 
 56 |             PL = PL_Los(d)
 57 |             self.ifLOS = True
 58 |             self.shadow_std = 3
 59 |         else:
 60 |             PL = min(PL_NLos(d1,d2), PL_NLos(d2,d1))
 61 |             self.ifLOS = False
 62 |             self.shadow_std = 4                      # if Non line of sight, the std is 4
 63 |         return PL
 64 | 
 65 | class V2Ichannels: 
 66 |     # Simulator of the V2I channels
 67 |     def __init__(self, n_Veh, n_RB):
 68 |         self.h_bs = 25
 69 |         self.h_ms = 1.5        
 70 |         self.Decorrelation_distance = 50        
 71 |         self.BS_position = [750/2, 1299/2]    # Suppose the BS is in the center
 72 |         self.shadow_std = 8
 73 |         self.n_Veh = n_Veh
 74 |         self.n_RB = n_RB
 75 |         self.update_shadow([])
 76 |     def update_positions(self, positions):
 77 |         self.positions = positions
 78 |         
 79 |     def update_pathloss(self):
 80 |         self.PathLoss = np.zeros(len(self.positions))
 81 |         for i in range(len(self.positions)):
 82 |             d1 = abs(self.positions[i][0] - self.BS_position[0])
 83 |             d2 = abs(self.positions[i][1] - self.BS_position[1])
 84 |             distance = math.hypot(d1,d2) # change from meters to kilometers
 85 |             self.PathLoss[i] = 128.1 + 37.6*np.log10(math.sqrt(distance**2 + (self.h_bs-self.h_ms)**2)/1000)
 86 |     def update_shadow(self, delta_distance_list):
 87 |         if len(delta_distance_list) == 0:  # initialization
 88 |             self.Shadow = np.random.normal(0, self.shadow_std, self.n_Veh)
 89 |         else: 
 90 |             delta_distance = np.asarray(delta_distance_list)
 91 |             self.Shadow = np.exp(-1*(delta_distance/self.Decorrelation_distance))* self.Shadow +\
 92 |                           np.sqrt(1-np.exp(-2*(delta_distance/self.Decorrelation_distance)))*np.random.normal(0,self.shadow_std, self.n_Veh)
 93 |     def update_fast_fading(self):
 94 |         h = 1/np.sqrt(2) * (np.random.normal(size = (self.n_Veh, self.n_RB)) + 1j* np.random.normal(size = (self.n_Veh, self.n_RB)))
 95 |         self.FastFading = 20 * np.log10(np.abs(h))
 96 | 
 97 | class Vehicle:
 98 |     # Vehicle simulator: include all the information for a vehicle
 99 |     def __init__(self, start_position, start_direction, velocity):
100 |         self.position = start_position
101 |         self.direction = start_direction
102 |         self.velocity = velocity
103 |         self.neighbors = []
104 |         self.destinations = []
105 | class Environ:
106 |     # Enviroment Simulator: Provide states and rewards to agents. 
107 |     # Evolve to new state based on the actions taken by the vehicles.
108 |     def __init__ (self, down_lane, up_lane, left_lane, right_lane, width, height):
109 |         self.timestep = 0.01
110 |         self.down_lanes = down_lane
111 |         self.up_lanes = up_lane
112 |         self.left_lanes = left_lane
113 |         self.right_lanes = right_lane
114 |         self.width = width
115 |         self.height = height
116 |         self.vehicles = []
117 |         self.demands = []  
118 |         self.V2V_power_dB = 23 # dBm
119 |         self.V2I_power_dB = 23 # dBm
120 |         self.V2V_power_dB_List = [23, 10, 5]             # the power levels
121 |         #self.V2V_power = 10**(self.V2V_power_dB)
122 |         #self.V2I_power = 10**(self.V2I_power_dB)
123 |         self.sig2_dB = -114
124 |         self.bsAntGain = 8 
125 |         self.bsNoiseFigure = 5
126 |         self.vehAntGain = 3
127 |         self.vehNoiseFigure = 9
128 |         self.sig2 = 10**(self.sig2_dB/10) 
129 |         self.V2V_Shadowing = []
130 |         self.V2I_Shadowing = []
131 |         self.delta_distance = []
132 |         self.n_RB = 20
133 |         self.n_Veh = 40
134 |         self.V2Vchannels = V2Vchannels(self.n_Veh, self.n_RB)  # number of vehicles
135 |         self.V2Ichannels = V2Ichannels(self.n_Veh, self.n_RB)
136 | 
137 |         self.V2V_Interference_all = np.zeros((self.n_Veh, 3, self.n_RB)) + self.sig2
138 |         self.n_step = 0
139 |     def add_new_vehicles(self, start_position, start_direction, start_velocity):    
140 |         self.vehicles.append(Vehicle(start_position, start_direction, start_velocity))
141 |         
142 |     def add_new_vehicles_by_number(self, n):
143 |         for i in range(n):
144 |             ind = np.random.randint(0,len(self.down_lanes))
145 |             start_position = [self.down_lanes[ind], random.randint(0,self.height)]
146 |             start_direction = 'd'
147 |             self.add_new_vehicles(start_position,start_direction,random.randint(10,15))
148 |             start_position = [self.up_lanes[ind], random.randint(0,self.height)]
149 |             start_direction = 'u'
150 |             self.add_new_vehicles(start_position,start_direction,random.randint(10,15))
151 |             start_position = [random.randint(0,self.width), self.left_lanes[ind]]
152 |             start_direction = 'l'
153 |             self.add_new_vehicles(start_position,start_direction,random.randint(10,15))
154 |             start_position = [random.randint(0,self.width), self.right_lanes[ind]]
155 |             start_direction = 'r'
156 |             self.add_new_vehicles(start_position,start_direction,random.randint(10,15))
157 |         self.V2V_Shadowing = np.random.normal(0, 3, [len(self.vehicles), len(self.vehicles)])
158 |         self.V2I_Shadowing = np.random.normal(0, 8, len(self.vehicles))
159 |         self.delta_distance = np.asarray([c.velocity for c in self.vehicles])
160 |         #self.renew_channel()
161 |     def renew_positions(self):
162 |         # ========================================================
163 |         # This function update the position of each vehicle
164 |         # ===========================================================
165 |         i = 0
166 |         #for i in range(len(self.position)):
167 |         while(i < len(self.vehicles)):
168 |             #print ('start iteration ', i)
169 |             #print(self.position, len(self.position), self.direction)
170 |             delta_distance = self.vehicles[i].velocity * self.timestep
171 |             change_direction = False
172 |             if self.vehicles[i].direction == 'u':
173 |                 #print ('len of position', len(self.position), i)
174 |                 for j in range(len(self.left_lanes)):
175 |                     
176 |                     if (self.vehicles[i].position[1] <=self.left_lanes[j]) and ((self.vehicles[i].position[1] + delta_distance) >= self.left_lanes[j]):   # came to an cross
177 |                         if (random.uniform(0,1) < 0.4):
178 |                             self.vehicles[i].position = [self.vehicles[i].position[0] - (delta_distance - (self.left_lanes[j] - self.vehicles[i].position[1])),self.left_lanes[j] ] 
179 |                             self.vehicles[i].direction = 'l'
180 |                             change_direction = True
181 |                             break
182 |                 if change_direction == False :
183 |                     for j in range(len(self.right_lanes)):
184 |                         if (self.vehicles[i].position[1] <=self.right_lanes[j]) and ((self.vehicles[i].position[1] + delta_distance) >= self.right_lanes[j]):
185 |                             if (random.uniform(0,1) < 0.4):
186 |                                 self.vehicles[i].position = [self.vehicles[i].position[0] + (delta_distance + (self.right_lanes[j] - self.vehicles[i].position[1])), self.right_lanes[j] ] 
187 |                                 self.vehicles[i].direction = 'r'
188 |                                 change_direction = True
189 |                                 break
190 |                 if change_direction == False:
191 |                     self.vehicles[i].position[1] += delta_distance
192 |             if (self.vehicles[i].direction == 'd') and (change_direction == False):
193 |                 #print ('len of position', len(self.position), i)
194 |                 for j in range(len(self.left_lanes)):
195 |                     if (self.vehicles[i].position[1] >=self.left_lanes[j]) and ((self.vehicles[i].position[1] - delta_distance) <= self.left_lanes[j]):  # came to an cross
196 |                         if (random.uniform(0,1) < 0.4):
197 |                             self.vehicles[i].position = [self.vehicles[i].position[0] - (delta_distance - ( self.vehicles[i].position[1]- self.left_lanes[j])), self.left_lanes[j] ] 
198 |                             #print ('down with left', self.vehicles[i].position)
199 |                             self.vehicles[i].direction = 'l'
200 |                             change_direction = True
201 |                             break
202 |                 if change_direction == False :
203 |                     for j in range(len(self.right_lanes)):
204 |                         if (self.vehicles[i].position[1] >=self.right_lanes[j]) and (self.vehicles[i].position[1] - delta_distance <= self.right_lanes[j]):
205 |                             if (random.uniform(0,1) < 0.4):
206 |                                 self.vehicles[i].position = [self.vehicles[i].position[0] + (delta_distance + ( self.vehicles[i].position[1]- self.right_lanes[j])),self.right_lanes[j] ] 
207 |                                 #print ('down with right', self.vehicles[i].position)
208 |                                 self.vehicles[i].direction = 'r'
209 |                                 change_direction = True
210 |                                 break
211 |                 if change_direction == False:
212 |                     self.vehicles[i].position[1] -= delta_distance
213 |             if (self.vehicles[i].direction == 'r') and (change_direction == False):
214 |                 #print ('len of position', len(self.position), i)
215 |                 for j in range(len(self.up_lanes)):
216 |                     if (self.vehicles[i].position[0] <= self.up_lanes[j]) and ((self.vehicles[i].position[0] + delta_distance) >= self.up_lanes[j]):   # came to an cross
217 |                         if (random.uniform(0,1) < 0.4):
218 |                             self.vehicles[i].position = [self.up_lanes[j], self.vehicles[i].position[1] + (delta_distance - (self.up_lanes[j] - self.vehicles[i].position[0]))]
219 |                             change_direction = True
220 |                             self.vehicles[i].direction = 'u'
221 |                             break
222 |                 if change_direction == False :
223 |                     for j in range(len(self.down_lanes)):
224 |                         if (self.vehicles[i].position[0] <= self.down_lanes[j]) and ((self.vehicles[i].position[0] + delta_distance) >= self.down_lanes[j]):
225 |                             if (random.uniform(0,1) < 0.4):
226 |                                 self.vehicles[i].position = [self.down_lanes[j], self.vehicles[i].position[1] - (delta_distance - (self.down_lanes[j] - self.vehicles[i].position[0]))]
227 |                                 change_direction = True
228 |                                 self.vehicles[i].direction = 'd'
229 |                                 break
230 |                 if change_direction == False:
231 |                     self.vehicles[i].position[0] += delta_distance
232 |             if (self.vehicles[i].direction == 'l') and (change_direction == False):
233 |                 for j in range(len(self.up_lanes)):
234 |                     
235 |                     if (self.vehicles[i].position[0] >= self.up_lanes[j]) and ((self.vehicles[i].position[0] - delta_distance) <= self.up_lanes[j]):   # came to an cross
236 |                         if (random.uniform(0,1) < 0.4):
237 |                             self.vehicles[i].position = [self.up_lanes[j], self.vehicles[i].position[1] + (delta_distance - (self.vehicles[i].position[0] - self.up_lanes[j]))]
238 |                             change_direction = True
239 |                             self.vehicles[i].direction = 'u'
240 |                             break
241 |                 if change_direction == False :
242 |                     for j in range(len(self.down_lanes)):
243 |                         if (self.vehicles[i].position[0] >= self.down_lanes[j]) and ((self.vehicles[i].position[0] - delta_distance) <= self.down_lanes[j]):
244 |                             if (random.uniform(0,1) < 0.4):
245 |                                 self.vehicles[i].position = [self.down_lanes[j], self.vehicles[i].position[1] - (delta_distance - (self.vehicles[i].position[0] - self.down_lanes[j]))]
246 |                                 change_direction = True
247 |                                 self.vehicles[i].direction = 'd'
248 |                                 break
249 |                     if change_direction == False:
250 |                         self.vehicles[i].position[0] -= delta_distance
251 |             # if it comes to an exit
252 |             if (self.vehicles[i].position[0] < 0) or (self.vehicles[i].position[1] < 0) or (self.vehicles[i].position[0] > self.width) or (self.vehicles[i].position[1] > self.height):
253 |             # delete
254 |             #    print ('delete ', self.position[i])
255 |                 if (self.vehicles[i].direction == 'u'):
256 |                     self.vehicles[i].direction = 'r'
257 |                     self.vehicles[i].position = [self.vehicles[i].position[0], self.right_lanes[-1]]
258 |                 else:
259 |                     if (self.vehicles[i].direction == 'd'):
260 |                         self.vehicles[i].direction = 'l'
261 |                         self.vehicles[i].position = [self.vehicles[i].position[0], self.left_lanes[0]]
262 |                     else:
263 |                         if (self.vehicles[i].direction == 'l'):
264 |                             self.vehicles[i].direction = 'u'
265 |                             self.vehicles[i].position = [self.up_lanes[0],self.vehicles[i].position[1]]
266 |                         else:
267 |                             if (self.vehicles[i].direction == 'r'):
268 |                                 self.vehicles[i].direction = 'd'
269 |                                 self.vehicles[i].position = [self.down_lanes[-1],self.vehicles[i].position[1]]
270 |                 
271 |             i += 1
272 |     def test_channel(self):
273 |         # ===================================
274 |         #   test the V2I and the V2V channel 
275 |         # ===================================
276 |         self.n_step = 0
277 |         self.vehicles = []
278 |         n_Veh = 20
279 |         self.n_Veh = n_Veh
280 |         self.add_new_vehicles_by_number(int(self.n_Veh/4))
281 |         step = 1000
282 |         time_step = 0.1  # every 0.1s update
283 |         for i in range(step):
284 |             self.renew_positions() 
285 |             positions = [c.position for c in self.vehicles]
286 |             self.update_large_fading(positions, time_step)
287 |             self.update_small_fading()
288 |             print("Time step: ", i)
289 |             print(" ============== V2I ===========")
290 |             print("Path Loss: ", self.V2Ichannels.PathLoss)
291 |             print("Shadow:",  self.V2Ichannels.Shadow)
292 |             print("Fast Fading: ",  self.V2Ichannels.FastFading)
293 |             print(" ============== V2V ===========")
294 |             print("Path Loss: ", self.V2Vchannels.PathLoss[0:3])
295 |             print("Shadow:", self.V2Vchannels.Shadow[0:3])
296 |             print("Fast Fading: ", self.V2Vchannels.FastFading[0:3])
297 | 
298 |     def update_large_fading(self, positions, time_step):
299 |         self.V2Ichannels.update_positions(positions)
300 |         self.V2Vchannels.update_positions(positions)
301 |         self.V2Ichannels.update_pathloss()
302 |         self.V2Vchannels.update_pathloss()
303 |         delta_distance = time_step * np.asarray([c.velocity for c in self.vehicles])
304 |         self.V2Ichannels.update_shadow(delta_distance)
305 |         self.V2Vchannels.update_shadow(delta_distance)
306 |     def update_small_fading(self):
307 |         self.V2Ichannels.update_fast_fading()
308 |         self.V2Vchannels.update_fast_fading()
309 |         
310 |     def renew_neighbor(self):   
311 |         # ==========================================
312 |         # update the neighbors of each vehicle.
313 |         # ===========================================
314 |         for i in range(len(self.vehicles)):
315 |             self.vehicles[i].neighbors = []
316 |             self.vehicles[i].actions = []
317 |             #print('action and neighbors delete', self.vehicles[i].actions, self.vehicles[i].neighbors)
318 |         Distance = np.zeros((len(self.vehicles),len(self.vehicles)))
319 |         z = np.array([[complex(c.position[0],c.position[1]) for c in self.vehicles]])
320 |         Distance = abs(z.T-z)
321 |         for i in range(len(self.vehicles)):       
322 |             sort_idx = np.argsort(Distance[:,i])
323 |             for j in range(3):
324 |                 self.vehicles[i].neighbors.append(sort_idx[j+1])                
325 |             destination = np.random.choice(sort_idx[1:int(len(sort_idx)/5)],3, replace = False)
326 |             self.vehicles[i].destinations = destination
327 |     def renew_channel(self):
328 |         # ===========================================================================
329 |         # This function updates all the channels including V2V and V2I channels
330 |         # =============================================================================
331 |         positions = [c.position for c in self.vehicles]
332 |         self.V2Ichannels.update_positions(positions)
333 |         self.V2Vchannels.update_positions(positions)
334 |         self.V2Ichannels.update_pathloss()
335 |         self.V2Vchannels.update_pathloss()
336 |         delta_distance = 0.002 * np.asarray([c.velocity for c in self.vehicles])    # time slot is 2 ms. 
337 |         self.V2Ichannels.update_shadow(delta_distance)
338 |         self.V2Vchannels.update_shadow(delta_distance)
339 |         self.V2V_channels_abs = self.V2Vchannels.PathLoss + self.V2Vchannels.Shadow + 50 * np.identity(
340 |             len(self.vehicles))
341 |         self.V2I_channels_abs = self.V2Ichannels.PathLoss + self.V2Ichannels.Shadow
342 | 
343 |     def renew_channels_fastfading(self):   
344 |         # =======================================================================
345 |         # This function updates all the channels including V2V and V2I channels
346 |         # =========================================================================
347 |         self.renew_channel()
348 |         self.V2Ichannels.update_fast_fading()
349 |         self.V2Vchannels.update_fast_fading()
350 |         V2V_channels_with_fastfading = np.repeat(self.V2V_channels_abs[:, :, np.newaxis], self.n_RB, axis=2)
351 |         self.V2V_channels_with_fastfading = V2V_channels_with_fastfading - self.V2Vchannels.FastFading
352 |         V2I_channels_with_fastfading = np.repeat(self.V2I_channels_abs[:, np.newaxis], self.n_RB, axis=1)
353 |         self.V2I_channels_with_fastfading = V2I_channels_with_fastfading - self.V2Ichannels.FastFading
354 |         #print("V2I channels", self.V2I_channels_with_fastfading)
355 |         
356 |     def Compute_Performance_Reward_fast_fading_with_power(self, actions_power):   # revising based on the fast fading part
357 |         actions = actions_power.copy()[:,:,0]  # the channel_selection_part
358 |         power_selection = actions_power.copy()[:,:,1]
359 |         Rate = np.zeros(len(self.vehicles))
360 |         Interference = np.zeros(self.n_RB)  # V2V signal interference to V2I links
361 |         for i in range(len(self.vehicles)):
362 |             for j in range(len(actions[i,:])):
363 |                 if not self.activate_links[i,j]:
364 |                     continue
365 |                 #print('power selection,', power_selection[i,j])  
366 |                 Interference[actions[i][j]] += 10**((self.V2V_power_dB_List[power_selection[i,j]]  - self.V2I_channels_with_fastfading[i, actions[i,j]] + self.vehAntGain + self.bsAntGain - self.bsNoiseFigure)/10)  # fast fading
367 | 
368 |         self.V2I_Interference = Interference + self.sig2
369 |         V2V_Interference = np.zeros((len(self.vehicles), 3))
370 |         V2V_Signal = np.zeros((len(self.vehicles), 3))
371 |         
372 |         # remove the effects of none active links
373 |         #print('shapes', actions.shape, self.activate_links.shape)
374 |         #print(not self.activate_links)
375 |         actions[(np.logical_not(self.activate_links))] = -1
376 |         #print('action are', actions)
377 |         for i in range(self.n_RB):
378 |             indexes = np.argwhere(actions == i)
379 |             for j in range(len(indexes)):
380 |                 #receiver_j = self.vehicles[indexes[j,0]].neighbors[indexes[j,1]]
381 |                 receiver_j = self.vehicles[indexes[j,0]].destinations[indexes[j,1]]
382 |                 # compute the V2V signal links
383 |                 V2V_Signal[indexes[j, 0],indexes[j, 1]] = 10**((self.V2V_power_dB_List[power_selection[indexes[j, 0],indexes[j, 1]]] - self.V2V_channels_with_fastfading[indexes[j][0]][receiver_j][i] + 2*self.vehAntGain - self.vehNoiseFigure)/10) 
384 |                 #V2V_Signal[indexes[j, 0],indexes[j, 1]] = 10**((self.V2V_power_dB_List[0] - self.V2V_channels_with_fastfading[indexes[j][0]][receiver_j][i])/10) 
385 |                 if i < self.n_Veh:
386 |                     V2V_Interference[indexes[j,0],indexes[j,1]] += 10**((self.V2I_power_dB - self.V2V_channels_with_fastfading[i][receiver_j][i]+ 2*self.vehAntGain - self.vehNoiseFigure )/10)  # V2I links interference to V2V links  
387 |                 for k in range(j+1, len(indexes)):                  # computer the peer V2V links
388 |                     #receiver_k = self.vehicles[indexes[k][0]].neighbors[indexes[k][1]]
389 |                     receiver_k = self.vehicles[indexes[k][0]].destinations[indexes[k][1]]
390 |                     V2V_Interference[indexes[j,0],indexes[j,1]] += 10**((self.V2V_power_dB_List[power_selection[indexes[k,0],indexes[k,1]]] - self.V2V_channels_with_fastfading[indexes[k][0]][receiver_j][i]+ 2*self.vehAntGain - self.vehNoiseFigure)/10)
391 |                     V2V_Interference[indexes[k,0],indexes[k,1]] += 10**((self.V2V_power_dB_List[power_selection[indexes[j,0],indexes[j,1]]] - self.V2V_channels_with_fastfading[indexes[j][0]][receiver_k][i]+ 2*self.vehAntGain - self.vehNoiseFigure)/10)               
392 |        
393 |         self.V2V_Interference = V2V_Interference + self.sig2
394 |         V2V_Rate = np.zeros(self.activate_links.shape)
395 |         V2V_Rate[self.activate_links] = np.log2(1 + np.divide(V2V_Signal[self.activate_links], self.V2V_Interference[self.activate_links]))
396 | 
397 |         #print("V2V Rate", V2V_Rate * self.update_time_test * 1500)
398 |         #print ('V2V_Signal is ', np.log(np.mean(V2V_Signal[self.activate_links])))
399 |         V2I_Signals = self.V2I_power_dB-self.V2I_channels_abs[0:min(self.n_RB,self.n_Veh)] + self.vehAntGain + self.bsAntGain - self.bsNoiseFigure
400 |         V2I_Rate = np.log2(1 + np.divide(10**(V2I_Signals/10), self.V2I_Interference[0:min(self.n_RB,self.n_Veh)]))
401 | 
402 | 
403 |          # -- compute the latency constraits --
404 |         self.demand -= V2V_Rate * self.update_time_test * 1500    # decrease the demand
405 |         self.test_time_count -= self.update_time_test               # compute the time left for estimation
406 |         self.individual_time_limit -= self.update_time_test         # compute the time left for individual V2V transmission
407 |         self.individual_time_interval -= self.update_time_test      # compute the time interval left for next transmission
408 | 
409 |         # --- update the demand ---
410 |         
411 |         new_active = self.individual_time_interval <= 0
412 |         self.activate_links[new_active] = True
413 |         self.individual_time_interval[new_active] = np.random.exponential(0.02, self.individual_time_interval[new_active].shape ) + self.V2V_limit
414 |         self.individual_time_limit[new_active] = self.V2V_limit
415 |         self.demand[new_active] = self.demand_amount
416 |         #print("demand is", self.demand)
417 |         #print('mean rate of average V2V link is', np.mean(V2V_Rate[self.activate_links]))
418 |         
419 |         # -- update the statistics---
420 |         early_finish = np.multiply(self.demand <= 0, self.activate_links)        
421 |         unqulified = np.multiply(self.individual_time_limit <=0, self.activate_links)
422 |         self.activate_links[np.add(early_finish, unqulified)] = False 
423 |         #print('number of activate links is', np.sum(self.activate_links)) 
424 |         self.success_transmission += np.sum(early_finish)
425 |         self.failed_transmission += np.sum(unqulified)
426 |         #if self.n_step % 1000 == 0 :
427 |         #    self.success_transmission = 0
428 |         #    self.failed_transmission = 0
429 |         failed_percentage = self.failed_transmission/(self.failed_transmission + self.success_transmission + 0.0001)
430 |         # print('Percentage of failed', np.sum(new_active), self.failed_transmission, self.failed_transmission + self.success_transmission , failed_percentage)    
431 |         return V2I_Rate, failed_percentage #failed_percentage
432 | 
433 |         
434 |     def Compute_Performance_Reward_fast_fading_with_power_asyn(self, actions_power):   # revising based on the fast fading part
435 |         # ===================================================
436 |         #  --------- Used for Testing -------
437 |         # ===================================================
438 |         actions = actions_power[:,:,0]  # the channel_selection_part
439 |         power_selection = actions_power[:,:,1]
440 |         Interference = np.zeros(self.n_RB)   # Calculate the interference from V2V to V2I
441 |         for i in range(len(self.vehicles)):
442 |             for j in range(len(actions[i,:])):
443 |                 if not self.activate_links[i,j]:
444 |                     continue
445 |                 Interference[actions[i][j]] += 10**((self.V2V_power_dB_List[power_selection[i,j]] - \
446 |                                                      self.V2I_channels_with_fastfading[i, actions[i,j]] + \
447 |                                                      self.vehAntGain + self.bsAntGain - self.bsNoiseFigure)/10)
448 |         self.V2I_Interference = Interference + self.sig2
449 |         V2V_Interference = np.zeros((len(self.vehicles), 3))
450 |         V2V_Signal = np.zeros((len(self.vehicles), 3))
451 |         Interfence_times = np.zeros((len(self.vehicles), 3))
452 |         actions[(np.logical_not(self.activate_links))] = -1
453 |         for i in range(self.n_RB):
454 |             indexes = np.argwhere(actions == i)
455 |             for j in range(len(indexes)):
456 |                 #receiver_j = self.vehicles[indexes[j,0]].neighbors[indexes[j,1]]
457 |                 receiver_j = self.vehicles[indexes[j,0]].destinations[indexes[j,1]]
458 |                 V2V_Signal[indexes[j, 0],indexes[j, 1]] = 10**((self.V2V_power_dB_List[power_selection[indexes[j, 0],indexes[j, 1]]] -\
459 |                 self.V2V_channels_with_fastfading[indexes[j][0]][receiver_j][i] + 2*self.vehAntGain - self.vehNoiseFigure)/10)
460 |                 #V2V_Signal[indexes[j, 0],indexes[j, 1]] = 10**((self.V2V_power_dB_List[0] - self.V2V_channels_with_fastfading[indexes[j][0]][receiver_j][i])/10) 
461 |                 if i<self.n_Veh:
462 |                     V2V_Interference[indexes[j,0],indexes[j,1]] += 10**((self.V2I_power_dB - \
463 |                     self.V2V_channels_with_fastfading[i][receiver_j][i] + 2*self.vehAntGain - self.vehNoiseFigure )/10)  # V2I links interference to V2V links
464 |                 for k in range(j+1, len(indexes)):
465 |                     receiver_k = self.vehicles[indexes[k][0]].destinations[indexes[k][1]]
466 |                     V2V_Interference[indexes[j,0],indexes[j,1]] += 10**((self.V2V_power_dB_List[power_selection[indexes[k,0],indexes[k,1]]] -\
467 |                     self.V2V_channels_with_fastfading[indexes[k][0]][receiver_j][i]+ 2*self.vehAntGain - self.vehNoiseFigure)/10)
468 |                     V2V_Interference[indexes[k,0],indexes[k,1]] += 10**((self.V2V_power_dB_List[power_selection[indexes[j,0],indexes[j,1]]] - \
469 |                     self.V2V_channels_with_fastfading[indexes[j][0]][receiver_k][i]+ 2*self.vehAntGain - self.vehNoiseFigure)/10)
470 |                     Interfence_times[indexes[j,0],indexes[j,1]] += 1
471 |                     Interfence_times[indexes[k,0],indexes[k,1]] += 1               
472 | 
473 |         self.V2V_Interference = V2V_Interference + self.sig2
474 |         V2V_Rate = np.log2(1 + np.divide(V2V_Signal, self.V2V_Interference))
475 |         V2I_Signals = self.V2I_power_dB-self.V2I_channels_abs[0:min(self.n_RB,self.n_Veh)] + self.vehAntGain + self.bsAntGain - self.bsNoiseFigure
476 |         V2I_Rate = np.log2(1 + np.divide(10**(V2I_Signals/10), self.V2I_Interference[0:min(self.n_RB,self.n_Veh)]))
477 |         #print("V2I information", V2I_Signals, self.V2I_Interference, V2I_Rate)
478 |         
479 |         # -- compute the latency constraits --
480 |         self.demand -= V2V_Rate * self.update_time_asyn * 1500    # decrease the demand
481 |         self.test_time_count -= self.update_time_asyn               # compute the time left for estimation
482 |         self.individual_time_limit -= self.update_time_asyn         # compute the time left for individual V2V transmission
483 |         self.individual_time_interval -= self.update_time_asyn     # compute the time interval left for next transmission
484 | 
485 |         # --- update the demand ---
486 |         new_active = self.individual_time_interval <= 0
487 |         self.activate_links[new_active] = True
488 |         self.individual_time_interval[new_active] = np.random.exponential(0.02, self.individual_time_interval[new_active].shape) + self.V2V_limit
489 |         self.individual_time_limit[new_active] = self.V2V_limit
490 |         self.demand[new_active] = self.demand_amount
491 |         
492 |         # -- update the statistics---
493 |         early_finish = np.multiply(self.demand <= 0, self.activate_links)        
494 |         unqulified = np.multiply(self.individual_time_limit <=0, self.activate_links)
495 |         self.activate_links[np.add(early_finish, unqulified)] = False
496 |         self.success_transmission += np.sum(early_finish)
497 |         self.failed_transmission += np.sum(unqulified)
498 |         fail_percent = self.failed_transmission/(self.failed_transmission + self.success_transmission + 0.0001)            
499 |         return V2I_Rate, fail_percent
500 | 
501 |     def Compute_Performance_Reward_Batch(self, actions_power, idx):    # add the power dimension to the action selection
502 |         # ==================================================
503 |         # ------------- Used for Training ----------------
504 |         # ==================================================
505 |         actions = actions_power.copy()[:,:,0]           #
506 |         power_selection = actions_power.copy()[:,:,1]   #
507 |         V2V_Interference = np.zeros((len(self.vehicles), 3))
508 |         V2V_Signal = np.zeros((len(self.vehicles), 3))
509 |         Interfence_times = np.zeros((len(self.vehicles), 3))    #  3 neighbors
510 |         #print(actions)
511 |         origin_channel_selection = actions[idx[0], idx[1]]
512 |         actions[idx[0], idx[1]] = 100  # something not relavant
513 |         for i in range(self.n_RB):
514 |             indexes = np.argwhere(actions == i)
515 |             #print('index',indexes)
516 |             for j in range(len(indexes)):
517 |                 #receiver_j = self.vehicles[indexes[j,0]].neighbors[indexes[j,1]]
518 |                 receiver_j = self.vehicles[indexes[j,0]].destinations[indexes[j,1]]
519 |                 V2V_Signal[indexes[j, 0],indexes[j, 1]] = 10**((self.V2V_power_dB_List[power_selection[indexes[j, 0],indexes[j, 1]]] -\
520 |                 self.V2V_channels_with_fastfading[indexes[j,0], receiver_j, i]+ 2*self.vehAntGain - self.vehNoiseFigure)/10) 
521 |                 V2V_Interference[indexes[j,0],indexes[j,1]] +=  10**((self.V2I_power_dB- self.V2V_channels_with_fastfading[i,receiver_j,i] + \
522 |                 2*self.vehAntGain - self.vehNoiseFigure)/10)  # interference from the V2I links
523 |                 
524 |                 for k in range(j+1, len(indexes)):
525 |                     receiver_k = self.vehicles[indexes[k,0]].destinations[indexes[k,1]]
526 |                     V2V_Interference[indexes[j,0],indexes[j,1]] += 10**((self.V2V_power_dB_List[power_selection[indexes[k,0],indexes[k,1]]] - \
527 |                     self.V2V_channels_with_fastfading[indexes[k,0],receiver_j,i] + 2*self.vehAntGain - self.vehNoiseFigure)/10)
528 |                     V2V_Interference[indexes[k,0],indexes[k,1]] += 10**((self.V2V_power_dB_List[power_selection[indexes[j,0],indexes[j,1]]] - \
529 |                     self.V2V_channels_with_fastfading[indexes[j,0], receiver_k, i] + 2*self.vehAntGain - self.vehNoiseFigure)/10)
530 |                     Interfence_times[indexes[j,0],indexes[j,1]] += 1
531 |                     Interfence_times[indexes[k,0],indexes[k,1]] += 1
532 |                     
533 |         self.V2V_Interference = V2V_Interference + self.sig2
534 |         V2V_Rate_list = np.zeros((self.n_RB, len(self.V2V_power_dB_List)))  # the number of RB times the power level
535 |         Deficit_list = np.zeros((self.n_RB, len(self.V2V_power_dB_List)))
536 |         for i in range(self.n_RB):
537 |             indexes = np.argwhere(actions == i)
538 |             V2V_Signal_temp = V2V_Signal.copy()            
539 |             #receiver_k = self.vehicles[idx[0]].neighbors[idx[1]]
540 |             receiver_k = self.vehicles[idx[0]].destinations[idx[1]]
541 |             for power_idx in range(len(self.V2V_power_dB_List)):
542 |                 V2V_Interference_temp = V2V_Interference.copy()
543 |                 V2V_Signal_temp[idx[0],idx[1]] = 10**((self.V2V_power_dB_List[power_idx] - \
544 |                 self.V2V_channels_with_fastfading[idx[0], self.vehicles[idx[0]].destinations[idx[1]],i] + 2*self.vehAntGain - self.vehNoiseFigure )/10)
545 |                 V2V_Interference_temp[idx[0],idx[1]] +=  10**((self.V2I_power_dB - \
546 |                 self.V2V_channels_with_fastfading[i,self.vehicles[idx[0]].destinations[idx[1]],i] + 2*self.vehAntGain - self.vehNoiseFigure)/10)
547 |                 for j in range(len(indexes)):
548 |                     receiver_j = self.vehicles[indexes[j,0]].destinations[indexes[j,1]]
549 |                     V2V_Interference_temp[idx[0],idx[1]] += 10**((self.V2V_power_dB_List[power_selection[indexes[j,0], indexes[j,1]]] -\
550 |                     self.V2V_channels_with_fastfading[indexes[j,0],receiver_k, i] + 2*self.vehAntGain - self.vehNoiseFigure)/10)
551 |                     V2V_Interference_temp[indexes[j,0],indexes[j,1]] += 10**((self.V2V_power_dB_List[power_idx]-\
552 |                     self.V2V_channels_with_fastfading[idx[0],receiver_j, i] + 2*self.vehAntGain - self.vehNoiseFigure)/10)
553 |                 V2V_Rate_cur = np.log2(1 + np.divide(V2V_Signal_temp, V2V_Interference_temp))
554 |                 if (origin_channel_selection == i) and (power_selection[idx[0], idx[1]] == power_idx):
555 |                     V2V_Rate = V2V_Rate_cur.copy()
556 |                 V2V_Rate_list[i, power_idx] = np.sum(V2V_Rate_cur)
557 |                 Deficit_list[i,power_idx] = 0 - 1 * np.sum(np.maximum(np.zeros(V2V_Signal_temp.shape), (self.demand - self.individual_time_limit * V2V_Rate_cur * 1500)))
558 |         Interference = np.zeros(self.n_RB)  
559 |         V2I_Rate_list = np.zeros((self.n_RB,len(self.V2V_power_dB_List)))    # 3 of power level
560 |         for i in range(len(self.vehicles)):
561 |             for j in range(len(actions[i,:])):
562 |                 if (i ==idx[0] and j == idx[1]):
563 |                     continue
564 |                 Interference[actions[i][j]] += 10**((self.V2V_power_dB_List[power_selection[i,j]] - \
565 |                 self.V2I_channels_with_fastfading[i, actions[i][j]] + self.vehAntGain + self.bsAntGain - self.bsNoiseFigure)/10) 
566 |         V2I_Interference = Interference + self.sig2
567 |         for i in range(self.n_RB):            
568 |             for j in range(len(self.V2V_power_dB_List)):
569 |                 V2I_Interference_temp = V2I_Interference.copy()
570 |                 V2I_Interference_temp[i] += 10**((self.V2V_power_dB_List[j] - self.V2I_channels_with_fastfading[idx[0], i] + self.vehAntGain + self.bsAntGain - self.bsNoiseFigure)/10)
571 |                 V2I_Rate_list[i, j] = np.sum(np.log2(1 + np.divide(10**((self.V2I_power_dB + self.vehAntGain + self.bsAntGain \
572 |                 - self.bsNoiseFigure-self.V2I_channels_abs[0:min(self.n_RB,self.n_Veh)])/10), V2I_Interference_temp[0:min(self.n_RB,self.n_Veh)])))
573 |                      
574 |         self.demand -= V2V_Rate * self.update_time_train * 1500
575 |         self.test_time_count -= self.update_time_train
576 |         self.individual_time_limit -= self.update_time_train
577 |         self.individual_time_limit [np.add(self.individual_time_limit <= 0,  self.demand < 0)] = self.V2V_limit
578 |         self.demand[self.demand < 0] = self.demand_amount
579 |         if self.test_time_count == 0:
580 |             self.test_time_count = 10
581 |         return V2I_Rate_list, Deficit_list, self.individual_time_limit[idx[0], idx[1]]
582 | 
583 |     def Compute_Interference(self, actions):
584 |         # ====================================================
585 |         # Compute the Interference to each channel_selection
586 |         # ====================================================
587 |         V2V_Interference = np.zeros((len(self.vehicles), 3, self.n_RB)) + self.sig2
588 |         if len(actions.shape) == 3:
589 |             channel_selection = actions.copy()[:,:,0]
590 |             power_selection = actions[:,:,1]
591 |             channel_selection[np.logical_not(self.activate_links)] = -1
592 |             for i in range(self.n_RB):
593 |                 for k in range(len(self.vehicles)):
594 |                     for m in range(len(channel_selection[k,:])):
595 |                         V2V_Interference[k, m, i] += 10 ** ((self.V2I_power_dB - self.V2V_channels_with_fastfading[i][self.vehicles[k].destinations[m]][i] + \
596 |                         2 * self.vehAntGain - self.vehNoiseFigure)/10)
597 |             for i in range(len(self.vehicles)):
598 |                 for j in range(len(channel_selection[i,:])):
599 |                     for k in range(len(self.vehicles)):
600 |                         for m in range(len(channel_selection[k,:])):
601 |                             if (i==k) or (channel_selection[i,j] >= 0):
602 |                                 continue
603 |                             V2V_Interference[k, m, channel_selection[i,j]] += 10**((self.V2V_power_dB_List[power_selection[i,j]] -\
604 |                             self.V2V_channels_with_fastfading[i][self.vehicles[k].destinations[m]][channel_selection[i,j]] + 2*self.vehAntGain - self.vehNoiseFigure)/10)
605 | 
606 |         self.V2V_Interference_all = 10 * np.log10(V2V_Interference)
607 |                 
608 |         
609 |     def renew_demand(self):
610 |         # generate a new demand of a V2V
611 |         self.demand = self.demand_amount*np.ones((self.n_RB,3))
612 |         self.time_limit = 10
613 |     def act_for_training(self, actions, idx):
614 |         # =============================================
615 |         # This function gives rewards for training
616 |         # ===========================================
617 |         rewards_list = np.zeros(self.n_RB)
618 |         action_temp = actions.copy()
619 |         self.activate_links = np.ones((self.n_Veh,3), dtype = 'bool')
620 |         V2I_rewardlist, V2V_rewardlist, time_left = self.Compute_Performance_Reward_Batch(action_temp,idx)
621 |         self.renew_positions()
622 |         self.renew_channels_fastfading()
623 |         self.Compute_Interference(actions) 
624 |         rewards_list = rewards_list.T.reshape([-1])
625 |         V2I_rewardlist = V2I_rewardlist.T.reshape([-1])
626 |         V2V_rewardlist = V2V_rewardlist.T.reshape([-1])
627 |         V2I_reward = (V2I_rewardlist[actions[idx[0],idx[1], 0]+ 20*actions[idx[0],idx[1], 1]] -\
628 |                       np.min(V2I_rewardlist))/(np.max(V2I_rewardlist) -np.min(V2I_rewardlist) + 0.000001)
629 |         V2V_reward = (V2V_rewardlist[actions[idx[0],idx[1], 0]+ 20*actions[idx[0],idx[1], 1]] -\
630 |                      np.min(V2V_rewardlist))/(np.max(V2V_rewardlist) -np.min(V2V_rewardlist) + 0.000001)
631 |         lambdda = 0.1
632 |         #print ("Reward", V2I_reward, V2V_reward, time_left)
633 |         t = lambdda * V2I_reward + (1-lambdda) * V2V_reward
634 |         #print("time left", time_left)
635 |         #return t
636 |         return t - (self.V2V_limit - time_left)/self.V2V_limit
637 |         
638 |     def act_asyn(self, actions):
639 |         self.n_step += 1
640 |         if self.n_step % 10 == 0:
641 |             self.renew_positions()            
642 |             self.renew_channels_fastfading()
643 |         reward = self.Compute_Performance_Reward_fast_fading_with_power_asyn(actions)
644 |         self.Compute_Interference(actions)
645 |         return reward
646 |     def act(self, actions):
647 |         # simulate the next state after the action is given
648 |         self.n_step += 1        
649 |         reward = self.Compute_Performance_Reward_fast_fading_with_power(actions)
650 |         self.renew_positions()            
651 |         self.renew_channels_fastfading()
652 |         self.Compute_Interference(actions)
653 |         return reward
654 |         
655 |     def new_random_game(self, n_Veh = 0):
656 |         # make a new game
657 |         self.n_step = 0
658 |         self.vehicles = []
659 |         if n_Veh > 0:
660 |             self.n_Veh = n_Veh
661 |         self.add_new_vehicles_by_number(int(self.n_Veh/4))
662 |         self.V2Vchannels = V2Vchannels(self.n_Veh, self.n_RB)  # number of vehicles
663 |         self.V2Ichannels = V2Ichannels(self.n_Veh, self.n_RB)
664 |         self.renew_channels_fastfading()
665 |         self.renew_neighbor()
666 |         self.demand_amount = 30
667 |         self.demand = self.demand_amount * np.ones((self.n_Veh,3))
668 |         self.test_time_count = 10
669 |         self.V2V_limit = 0.1  # 100 ms V2V toleratable latency
670 |         self.individual_time_limit = self.V2V_limit * np.ones((self.n_Veh,3))
671 |         self.individual_time_interval = np.random.exponential(0.05, (self.n_Veh,3))
672 |         self.UnsuccessfulLink = np.zeros((self.n_Veh,3))
673 |         self.success_transmission = 0
674 |         self.failed_transmission = 0
675 |         self.update_time_train = 0.01  # 10ms update time for the training
676 |         self.update_time_test = 0.002 # 2ms update time for testing
677 |         self.update_time_asyn = 0.0002 # 0.2 ms update one subset of the vehicles; for each vehicle, the update time is 2 ms
678 |         self.activate_links = np.zeros((self.n_Veh,3), dtype='bool')
679 | 
680 | if __name__ == "__main__":
681 |     up_lanes = [3.5/2,3.5/2 + 3.5,250+3.5/2, 250+3.5+3.5/2, 500+3.5/2, 500+3.5+3.5/2]
682 |     down_lanes = [250-3.5-3.5/2,250-3.5/2,500-3.5-3.5/2,500-3.5/2,750-3.5-3.5/2,750-3.5/2]
683 |     left_lanes = [3.5/2,3.5/2 + 3.5,433+3.5/2, 433+3.5+3.5/2, 866+3.5/2, 866+3.5+3.5/2]
684 |     right_lanes = [433-3.5-3.5/2,433-3.5/2,866-3.5-3.5/2,866-3.5/2,1299-3.5-3.5/2,1299-3.5/2]
685 |     width = 750
686 |     height = 1299
687 |     Env = Environ(down_lanes,up_lanes,left_lanes,right_lanes, width, height) 
688 |     Env.test_channel()    
689 | 


--------------------------------------------------------------------------------