├── README.md ├── .gitattributes ├── utils.py ├── replay_memory.py ├── Baseline_random.py ├── base.py ├── main.py ├── agent.py └── Environment.py /README.md: -------------------------------------------------------------------------------- 1 | # ResourceAllocationReinforcementLearning 2 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import time 2 | import numpy as np 3 | import _pickle as cPickle 4 | def save_pkl(obj, path): 5 | with open(path, 'wb') as f: 6 | cPickle.dump(obj, f) 7 | print(" [*] save %s" % path) 8 | def load_pkl(path): 9 | with open(path, 'rb') as f: 10 | obj = cPickle.load(f) 11 | print(" [*] load %s" % path) 12 | return obj -------------------------------------------------------------------------------- /replay_memory.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import logging 4 | import numpy as np 5 | #from utils import save_npy, load_npy 6 | 7 | class ReplayMemory: 8 | def __init__(self, model_dir): 9 | self.model_dir = model_dir 10 | self.memory_size = 1000000 11 | self.actions = np.empty(self.memory_size, dtype = np.uint8) 12 | self.rewards = np.empty(self.memory_size, dtype = np.float64) 13 | self.prestate = np.empty((self.memory_size, 82), dtype = np.float16) 14 | self.poststate = np.empty((self.memory_size, 82), dtype = np.float16) 15 | self.batch_size = 2000 16 | self.count = 0 17 | self.current = 0 18 | 19 | 20 | def add(self, prestate, poststate, reward, action): 21 | self.actions[self.current] = action 22 | self.rewards[self.current] = reward 23 | self.prestate[self.current] = prestate 24 | self.poststate[self.current] = poststate 25 | self.count = max(self.count, self.current + 1) 26 | self.current = (self.current + 1) % self.memory_size 27 | 28 | 29 | 30 | def sample(self): 31 | indexes = [] 32 | while len(indexes) < self.batch_size: 33 | index = random.randint(0, self.count - 1) 34 | indexes.append(index) 35 | prestate = self.prestate[indexes] 36 | poststate = self.poststate[indexes] 37 | actions = self.actions[indexes] 38 | rewards = self.rewards[indexes] 39 | return prestate, poststate, actions, rewards 40 | 41 | -------------------------------------------------------------------------------- /Baseline_random.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function 2 | import numpy as np 3 | from Environment import * 4 | import matplotlib.pyplot as plt 5 | 6 | # This py file using the random algorithm. 7 | 8 | def main(): 9 | up_lanes = [3.5/2,3.5/2 + 3.5,250+3.5/2, 250+3.5+3.5/2, 500+3.5/2, 500+3.5+3.5/2] 10 | down_lanes = [250-3.5-3.5/2,250-3.5/2,500-3.5-3.5/2,500-3.5/2,750-3.5-3.5/2,750-3.5/2] 11 | left_lanes = [3.5/2,3.5/2 + 3.5,433+3.5/2, 433+3.5+3.5/2, 866+3.5/2, 866+3.5+3.5/2] 12 | right_lanes = [433-3.5-3.5/2,433-3.5/2,866-3.5-3.5/2,866-3.5/2,1299-3.5-3.5/2,1299-3.5/2] 13 | width = 750 14 | height = 1299 15 | n = 40 16 | Env = Environ(down_lanes,up_lanes,left_lanes,right_lanes, width, height) 17 | number_of_game = 500 18 | n_step = 1000 19 | V2I_Rate_List = np.zeros([number_of_game, n_step]) 20 | Fail_Percent = np.zeros([number_of_game, n_step]) 21 | for game_idx in range(number_of_game): 22 | print (game_idx) 23 | Env.new_random_game(n) 24 | for i in range(n_step): 25 | #print(i) 26 | actions = np.random.randint(0,20,[n,3]) 27 | power_selection = np.zeros(actions.shape, dtype = 'int') 28 | actions = np.concatenate((actions[..., np.newaxis],power_selection[...,np.newaxis]), axis = 2) 29 | reward, percent = Env.act(actions) 30 | V2I_Rate_List[game_idx, i] = np.sum(reward) 31 | Fail_Percent[game_idx, i] = percent 32 | print(np.sum(reward)) 33 | print ('percentage here is ', percent) 34 | print ('The number of vehicles is ', n) 35 | print ('mean of V2I rate is that ', np.mean(V2I_Rate_List)) 36 | print ('mean of percent is ', np.mean(Fail_Percent[:,-1])) 37 | 38 | main() 39 | -------------------------------------------------------------------------------- /base.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | 4 | 5 | class BaseModel(object): 6 | 7 | def __init__(self, config): 8 | self._saver = None 9 | self.config = config 10 | 11 | try: 12 | self._attr = config.__dict__['__flags'] 13 | except: 14 | self._attr = class_var(config) 15 | 16 | self.config = config 17 | for attr in self._attrs: 18 | name = attr if not attr.startswith('_') else attr[1:] 19 | setattr(self, name, getattr(self.config, attr)) 20 | 21 | 22 | 23 | def save_model(self, step=None): 24 | print(" [*] Saving checkpoints...") 25 | model_name = type(self).__name__ 26 | 27 | if not os.path.exists(self.checkpoint_dir): 28 | os.makedirs(self.checkpoint_dir) 29 | self.saver.save(self.sess, self.checkpoint_dir, global_step=step) 30 | 31 | def load_model(self): 32 | print(" [*] Loading checkpoints...") 33 | 34 | ckpt = tf.train.get_checkpoint_state(self.checkpoint_dir) 35 | if ckpt and ckpt.model_checkpoint_path: 36 | ckpt_name = os.path.basename(ckpt.model_checkpoint_path) 37 | fname = os.path.join(self.checkpoint_dir, ckpt_name) 38 | self.saver.restore(self.sess, fname) 39 | print(" [*] Load SUCCESS: %s" % fname) 40 | return True 41 | else: 42 | print(" [!] Load FAILED: %s" % self.checkpoint_dir) 43 | return False 44 | 45 | @property 46 | def checkpoint_dir(self): 47 | return os.path.join('checkpoints', self.model_dir) 48 | 49 | @property 50 | def model_dir(self): 51 | model_dir = self.config.env_name 52 | for k, v in self._attrs.items(): 53 | if not k.startswith('_') and k not in ['display']: 54 | model_dir += "/%s-%s" % (k, ",".join([str(i) for i in v]) 55 | if type(v) == list else v) 56 | return model_dir + '/' 57 | 58 | @property 59 | def saver(self): 60 | if self._saver == None: 61 | self._saver = tf.train.Saver(max_to_keep=10) 62 | return self._saver 63 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function 2 | import random 3 | import tensorflow as tf 4 | from agent import Agent 5 | from Environment import * 6 | flags = tf.app.flags 7 | 8 | # Model 9 | flags.DEFINE_string('model', 'm1', 'Type of model') 10 | flags.DEFINE_boolean('dueling', False, 'Whether to use dueling deep q-network') 11 | flags.DEFINE_boolean('double_q', False, 'Whether to use double q-learning') 12 | 13 | # Environment 14 | flags.DEFINE_string('env_name', 'Breakout-v0', 'The name of gym environment to use') 15 | flags.DEFINE_integer('action_repeat', 4, 'The number of action to be repeated') 16 | 17 | # Etc 18 | flags.DEFINE_boolean('use_gpu', True, 'Whether to use gpu or not') 19 | flags.DEFINE_string('gpu_fraction', '1/1', 'idx / # of gpu fraction e.g. 1/3, 2/3, 3/3') 20 | flags.DEFINE_boolean('display', False, 'Whether to do display the game screen or not') 21 | flags.DEFINE_boolean('is_train', True, 'Whether to do training or testing') 22 | flags.DEFINE_integer('random_seed', 123, 'Value of random seed') 23 | 24 | FLAGS = flags.FLAGS 25 | 26 | # Set random seed 27 | tf.set_random_seed(FLAGS.random_seed) 28 | random.seed(FLAGS.random_seed) 29 | 30 | if FLAGS.gpu_fraction == '': 31 | raise ValueError("--gpu_fraction should be defined") 32 | 33 | def calc_gpu_fraction(fraction_string): 34 | idx, num = fraction_string.split('/') 35 | idx, num = float(idx), float(num) 36 | 37 | fraction = 1 / (num - idx + 1) 38 | print(" [*] GPU : %.4f" % fraction) 39 | return fraction 40 | 41 | def main(_): 42 | 43 | up_lanes = [3.5/2,3.5/2 + 3.5,250+3.5/2, 250+3.5+3.5/2, 500+3.5/2, 500+3.5+3.5/2] 44 | down_lanes = [250-3.5-3.5/2,250-3.5/2,500-3.5-3.5/2,500-3.5/2,750-3.5-3.5/2,750-3.5/2] 45 | left_lanes = [3.5/2,3.5/2 + 3.5,433+3.5/2, 433+3.5+3.5/2, 866+3.5/2, 866+3.5+3.5/2] 46 | right_lanes = [433-3.5-3.5/2,433-3.5/2,866-3.5-3.5/2,866-3.5/2,1299-3.5-3.5/2,1299-3.5/2] 47 | width = 750 48 | height = 1299 49 | Env = Environ(down_lanes,up_lanes,left_lanes,right_lanes, width, height) 50 | Env.new_random_game() 51 | gpu_options = tf.GPUOptions( 52 | per_process_gpu_memory_fraction=calc_gpu_fraction(FLAGS.gpu_fraction)) 53 | config = tf.ConfigProto() 54 | config.gpu_options.allow_growth = True 55 | 56 | with tf.Session(config=config) as sess: 57 | config = [] 58 | agent = Agent(config, Env, sess) 59 | #agent.play() 60 | agent.train() 61 | 62 | #agent.play() 63 | 64 | if __name__ == '__main__': 65 | tf.app.run() 66 | -------------------------------------------------------------------------------- /agent.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import os 3 | import time 4 | import random 5 | import numpy as np 6 | from base import BaseModel 7 | from replay_memory import ReplayMemory 8 | from utils import save_pkl, load_pkl 9 | import tensorflow as tf 10 | import matplotlib.pyplot as plt 11 | 12 | class Agent(BaseModel): 13 | def __init__(self, config, environment, sess): 14 | self.sess = sess 15 | self.weight_dir = 'weight' 16 | self.env = environment 17 | #self.history = History(self.config) 18 | model_dir = './Model/a.model' 19 | self.memory = ReplayMemory(model_dir) 20 | self.max_step = 100000 21 | self.RB_number = 20 22 | self.num_vehicle = len(self.env.vehicles) 23 | self.action_all_with_power = np.zeros([self.num_vehicle, 3, 2],dtype = 'int32') # this is actions that taken by V2V links with power 24 | self.action_all_with_power_training = np.zeros([20, 3, 2],dtype = 'int32') # this is actions that taken by V2V links with power 25 | self.reward = [] 26 | self.learning_rate = 0.01 27 | self.learning_rate_minimum = 0.0001 28 | self.learning_rate_decay = 0.96 29 | self.learning_rate_decay_step = 500000 30 | self.target_q_update_step = 100 31 | self.discount = 0.5 32 | self.double_q = True 33 | self.build_dqn() 34 | self.V2V_number = 3 * len(self.env.vehicles) # every vehicle need to communicate with 3 neighbors 35 | self.training = True 36 | #self.actions_all = np.zeros([len(self.env.vehicles),3], dtype = 'int32') 37 | def merge_action(self, idx, action): 38 | self.action_all_with_power[idx[0], idx[1], 0] = action % self.RB_number 39 | self.action_all_with_power[idx[0], idx[1], 1] = int(np.floor(action/self.RB_number)) 40 | def get_state(self, idx): 41 | # =============== 42 | # Get State from the environment 43 | # ============= 44 | vehicle_number = len(self.env.vehicles) 45 | V2V_channel = (self.env.V2V_channels_with_fastfading[idx[0],self.env.vehicles[idx[0]].destinations[idx[1]],:] - 80)/60 46 | V2I_channel = (self.env.V2I_channels_with_fastfading[idx[0], :] - 80)/60 47 | V2V_interference = (-self.env.V2V_Interference_all[idx[0],idx[1],:] - 60)/60 48 | NeiSelection = np.zeros(self.RB_number) 49 | for i in range(3): 50 | for j in range(3): 51 | if self.training: 52 | NeiSelection[self.action_all_with_power_training[self.env.vehicles[idx[0]].neighbors[i], j, 0 ]] = 1 53 | else: 54 | NeiSelection[self.action_all_with_power[self.env.vehicles[idx[0]].neighbors[i], j, 0 ]] = 1 55 | 56 | for i in range(3): 57 | if i == idx[1]: 58 | continue 59 | if self.training: 60 | if self.action_all_with_power_training[idx[0],i,0] >= 0: 61 | NeiSelection[self.action_all_with_power_training[idx[0],i,0]] = 1 62 | else: 63 | if self.action_all_with_power[idx[0],i,0] >= 0: 64 | NeiSelection[self.action_all_with_power[idx[0],i,0]] = 1 65 | time_remaining = np.asarray([self.env.demand[idx[0],idx[1]] / self.env.demand_amount]) 66 | load_remaining = np.asarray([self.env.individual_time_limit[idx[0],idx[1]] / self.env.V2V_limit]) 67 | #print('shapes', time_remaining.shape,load_remaining.shape) 68 | return np.concatenate((V2I_channel, V2V_interference, V2V_channel, NeiSelection, time_remaining, load_remaining))#,time_remaining)) 69 | #return np.concatenate((V2I_channel, V2V_interference, V2V_channel, time_remaining, load_remaining))#,time_remaining)) 70 | def predict(self, s_t, step, test_ep = False): 71 | # ========================== 72 | # Select actions 73 | # ====================== 74 | ep = 1/(step/1000000 + 1) 75 | if random.random() < ep and test_ep == False: # epsion to balance the exporation and exploition 76 | action = np.random.randint(60) 77 | else: 78 | action = self.q_action.eval({self.s_t:[s_t]})[0] 79 | return action 80 | def observe(self, prestate, state, reward, action): 81 | # ----------- 82 | # Collect Data for Training 83 | # --------- 84 | self.memory.add(prestate, state, reward, action) # add the state and the action and the reward to the memory 85 | #print(self.step) 86 | if self.step > 0: 87 | if self.step % 50 == 0: 88 | #print('Training') 89 | self.q_learning_mini_batch() # training a mini batch 90 | #self.save_weight_to_pkl() 91 | if self.step % self.target_q_update_step == self.target_q_update_step - 1: 92 | #print("Update Target Q network:") 93 | self.update_target_q_network() # ?? what is the meaning ?? 94 | def train(self): 95 | num_game, self.update_count, ep_reward = 0, 0, 0. 96 | total_reward, self.total_loss, self.total_q = 0.,0.,0. 97 | max_avg_ep_reward = 0 98 | ep_reward, actions = [], [] 99 | mean_big = 0 100 | number_big = 0 101 | mean_not_big = 0 102 | number_not_big = 0 103 | self.env.new_random_game(20) 104 | for self.step in (range(0, 40000)): # need more configuration 105 | if self.step == 0: # initialize set some varibles 106 | num_game, self.update_count,ep_reward = 0, 0, 0. 107 | total_reward, self.total_loss, self.total_q = 0., 0., 0. 108 | ep_reward, actions = [], [] 109 | 110 | # prediction 111 | # action = self.predict(self.history.get()) 112 | if (self.step % 2000 == 1): 113 | self.env.new_random_game(20) 114 | print(self.step) 115 | state_old = self.get_state([0,0]) 116 | #print("state", state_old) 117 | self.training = True 118 | for k in range(1): 119 | for i in range(len(self.env.vehicles)): 120 | for j in range(3): 121 | state_old = self.get_state([i,j]) 122 | action = self.predict(state_old, self.step) 123 | #self.merge_action([i,j], action) 124 | self.action_all_with_power_training[i, j, 0] = action % self.RB_number 125 | self.action_all_with_power_training[i, j, 1] = int(np.floor(action/self.RB_number)) 126 | reward_train = self.env.act_for_training(self.action_all_with_power_training, [i,j]) 127 | state_new = self.get_state([i,j]) 128 | self.observe(state_old, state_new, reward_train, action) 129 | if (self.step % 2000 == 0) and (self.step > 0): 130 | # testing 131 | self.training = False 132 | number_of_game = 10 133 | if (self.step % 10000 == 0) and (self.step > 0): 134 | number_of_game = 50 135 | if (self.step == 38000): 136 | number_of_game = 100 137 | V2I_Rate_list = np.zeros(number_of_game) 138 | Fail_percent_list = np.zeros(number_of_game) 139 | for game_idx in range(number_of_game): 140 | self.env.new_random_game(self.num_vehicle) 141 | test_sample = 200 142 | Rate_list = [] 143 | print('test game idx:', game_idx) 144 | for k in range(test_sample): 145 | action_temp = self.action_all_with_power.copy() 146 | for i in range(len(self.env.vehicles)): 147 | self.action_all_with_power[i,:,0] = -1 148 | sorted_idx = np.argsort(self.env.individual_time_limit[i,:]) 149 | for j in sorted_idx: 150 | state_old = self.get_state([i,j]) 151 | action = self.predict(state_old, self.step, True) 152 | self.merge_action([i,j], action) 153 | if i % (len(self.env.vehicles)/10) == 1: 154 | action_temp = self.action_all_with_power.copy() 155 | reward, percent = self.env.act_asyn(action_temp) #self.action_all) 156 | Rate_list.append(np.sum(reward)) 157 | #print("actions", self.action_all_with_power) 158 | V2I_Rate_list[game_idx] = np.mean(np.asarray(Rate_list)) 159 | Fail_percent_list[game_idx] = percent 160 | #print("action is", self.action_all_with_power) 161 | print('failure probability is, ', percent) 162 | #print('action is that', action_temp[0,:]) 163 | self.save_weight_to_pkl() 164 | print ('The number of vehicle is ', len(self.env.vehicles)) 165 | print ('Mean of the V2I rate is that ', np.mean(V2I_Rate_list)) 166 | print('Mean of Fail percent is that ', np.mean(Fail_percent_list)) 167 | #print('Test Reward is ', np.mean(test_result)) 168 | 169 | 170 | 171 | 172 | def q_learning_mini_batch(self): 173 | 174 | # Training the DQN model 175 | # ------ 176 | #s_t, action,reward, s_t_plus_1, terminal = self.memory.sample() 177 | s_t, s_t_plus_1, action, reward = self.memory.sample() 178 | #print() 179 | #print('samples:', s_t[0:10], s_t_plus_1[0:10], action[0:10], reward[0:10]) 180 | t = time.time() 181 | if self.double_q: #double Q learning 182 | pred_action = self.q_action.eval({self.s_t: s_t_plus_1}) 183 | q_t_plus_1_with_pred_action = self.target_q_with_idx.eval({self.target_s_t: s_t_plus_1, self.target_q_idx: [[idx, pred_a] for idx, pred_a in enumerate(pred_action)]}) 184 | target_q_t = self.discount * q_t_plus_1_with_pred_action + reward 185 | else: 186 | q_t_plus_1 = self.target_q.eval({self.target_s_t: s_t_plus_1}) 187 | max_q_t_plus_1 = np.max(q_t_plus_1, axis=1) 188 | target_q_t = self.discount * max_q_t_plus_1 +reward 189 | _, q_t, loss,w = self.sess.run([self.optim, self.q, self.loss, self.w], {self.target_q_t: target_q_t, self.action:action, self.s_t:s_t, self.learning_rate_step: self.step}) # training the network 190 | 191 | print('loss is ', loss) 192 | self.total_loss += loss 193 | self.total_q += q_t.mean() 194 | self.update_count += 1 195 | 196 | 197 | def build_dqn(self): 198 | # --- Building the DQN ------- 199 | self.w = {} 200 | self.t_w = {} 201 | 202 | initializer = tf. truncated_normal_initializer(0, 0.02) 203 | activation_fn = tf.nn.relu 204 | n_hidden_1 = 500 205 | n_hidden_2 = 250 206 | n_hidden_3 = 120 207 | n_input = 82 208 | n_output = 60 209 | def encoder(x): 210 | weights = { 211 | 'encoder_h1': tf.Variable(tf.truncated_normal([n_input, n_hidden_1],stddev=0.1)), 212 | 'encoder_h2': tf.Variable(tf.truncated_normal([n_hidden_1, n_hidden_2],stddev=0.1)), 213 | 'encoder_h3': tf.Variable(tf.truncated_normal([n_hidden_2, n_hidden_3],stddev=0.1)), 214 | 'encoder_h4': tf.Variable(tf.truncated_normal([n_hidden_3, n_output],stddev=0.1)), 215 | 'encoder_b1': tf.Variable(tf.truncated_normal([n_hidden_1],stddev=0.1)), 216 | 'encoder_b2': tf.Variable(tf.truncated_normal([n_hidden_2],stddev=0.1)), 217 | 'encoder_b3': tf.Variable(tf.truncated_normal([n_hidden_3],stddev=0.1)), 218 | 'encoder_b4': tf.Variable(tf.truncated_normal([n_output],stddev=0.1)), 219 | 220 | } 221 | layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights['encoder_h1']), weights['encoder_b1'])) 222 | layer_2 = tf.nn.relu(tf.add(tf.matmul(layer_1, weights['encoder_h2']), weights['encoder_b2'])) 223 | layer_3 = tf.nn.relu(tf.add(tf.matmul(layer_2, weights['encoder_h3']), weights['encoder_b3'])) 224 | layer_4 = tf.nn.relu(tf.add(tf.matmul(layer_3, weights['encoder_h4']), weights['encoder_b4'])) 225 | return layer_4, weights 226 | with tf.variable_scope('prediction'): 227 | self.s_t = tf.placeholder('float32',[None, n_input]) 228 | self.q, self.w = encoder(self.s_t) 229 | self.q_action = tf.argmax(self.q, dimension = 1) 230 | with tf.variable_scope('target'): 231 | self.target_s_t = tf.placeholder('float32', [None, n_input]) 232 | self.target_q, self.target_w = encoder(self.target_s_t) 233 | self.target_q_idx = tf.placeholder('int32', [None,None], 'output_idx') 234 | self.target_q_with_idx = tf.gather_nd(self.target_q, self.target_q_idx) 235 | with tf.variable_scope('pred_to_target'): 236 | self.t_w_input = {} 237 | self.t_w_assign_op = {} 238 | for name in self.w.keys(): 239 | print('name in self w keys', name) 240 | self.t_w_input[name] = tf.placeholder('float32', self.target_w[name].get_shape().as_list(),name = name) 241 | self.t_w_assign_op[name] = self.target_w[name].assign(self.t_w_input[name]) 242 | 243 | def clipped_error(x): 244 | try: 245 | return tf.select(tf.abs(x) < 1.0, 0.5 * tf.square(x), tf.abs(x) - 0.5) 246 | except: 247 | return tf.where(tf.abs(x) < 1.0, 0.5 * tf.square(x), tf.abs(x) - 0.5) 248 | 249 | with tf.variable_scope('optimizer'): 250 | self.target_q_t = tf.placeholder('float32', None, name='target_q_t') 251 | self.action = tf.placeholder('int32',None, name = 'action') 252 | action_one_hot = tf.one_hot(self.action, n_output, 1.0, 0.0, name='action_one_hot') 253 | q_acted = tf.reduce_sum(self.q * action_one_hot, reduction_indices = 1, name='q_acted') 254 | self.delta = self.target_q_t - q_acted 255 | self.global_step = tf.Variable(0, trainable=False) 256 | self.loss = tf.reduce_mean(tf.square(self.delta), name = 'loss') 257 | self.learning_rate_step = tf.placeholder('int64', None, name='learning_rate_step') 258 | self.learning_rate_op = tf.maximum(self.learning_rate_minimum, tf.train.exponential_decay(self.learning_rate, self.learning_rate_step, self.learning_rate_decay_step, self.learning_rate_decay, staircase=True)) 259 | self.optim = tf.train.RMSPropOptimizer(self.learning_rate_op, momentum=0.95, epsilon=0.01).minimize(self.loss) 260 | 261 | tf.initialize_all_variables().run() 262 | self.update_target_q_network() 263 | 264 | 265 | 266 | def update_target_q_network(self): 267 | for name in self.w.keys(): 268 | self.t_w_assign_op[name].eval({self.t_w_input[name]: self.w[name].eval()}) 269 | 270 | def save_weight_to_pkl(self): 271 | if not os.path.exists(self.weight_dir): 272 | os.makedirs(self.weight_dir) 273 | for name in self.w.keys(): 274 | save_pkl(self.w[name].eval(), os.path.join(self.weight_dir,"%s.pkl" % name)) 275 | def load_weight_from_pkl(self): 276 | with tf.variable_scope('load_pred_from_pkl'): 277 | self.w_input = {} 278 | self.w_assign_op = {} 279 | for name in self.w.keys(): 280 | self.w_input[name] = tf.placeholder('float32') 281 | self.w_assign_op[name] = self.w[name].assign(self.w_input[name]) 282 | for name in self.w.keys(): 283 | self.w_assign_op[name].eval({self.w_input[name]:load_pkl(os.path.join(self.weight_dir, "%s.pkl" % name))}) 284 | self.update_target_q_network() 285 | 286 | def play(self, n_step = 100, n_episode = 100, test_ep = None, render = False): 287 | number_of_game = 100 288 | V2I_Rate_list = np.zeros(number_of_game) 289 | Fail_percent_list = np.zeros(number_of_game) 290 | self.load_weight_from_pkl() 291 | self.training = False 292 | 293 | 294 | for game_idx in range(number_of_game): 295 | self.env.new_random_game(self.num_vehicle) 296 | test_sample = 200 297 | Rate_list = [] 298 | print('test game idx:', game_idx) 299 | print('The number of vehicle is ', len(self.env.vehicles)) 300 | time_left_list = [] 301 | power_select_list_0 = [] 302 | power_select_list_1 = [] 303 | power_select_list_2 = [] 304 | 305 | for k in range(test_sample): 306 | action_temp = self.action_all_with_power.copy() 307 | for i in range(len(self.env.vehicles)): 308 | self.action_all_with_power[i, :, 0] = -1 309 | sorted_idx = np.argsort(self.env.individual_time_limit[i, :]) 310 | for j in sorted_idx: 311 | state_old = self.get_state([i, j]) 312 | time_left_list.append(state_old[-1]) 313 | action = self.predict(state_old, 0, True) 314 | ''' 315 | if state_old[-1] <=0: 316 | continue 317 | power_selection = int(np.floor(action/self.RB_number)) 318 | if power_selection == 0: 319 | power_select_list_0.append(state_old[-1]) 320 | 321 | if power_selection == 1: 322 | power_select_list_1.append(state_old[-1]) 323 | if power_selection == 2: 324 | power_select_list_2.append(state_old[-1]) 325 | ''' 326 | self.merge_action([i, j], action) 327 | if i % (len(self.env.vehicles) / 10) == 1: 328 | action_temp = self.action_all_with_power.copy() 329 | reward, percent = self.env.act_asyn(action_temp) # self.action_all) 330 | Rate_list.append(np.sum(reward)) 331 | # print("actions", self.action_all_with_power) 332 | ''' 333 | number_0, bin_edges = np.histogram(power_select_list_0, bins = 10) 334 | 335 | number_1, bin_edges = np.histogram(power_select_list_1, bins = 10) 336 | 337 | number_2, bin_edges = np.histogram(power_select_list_2, bins = 10) 338 | 339 | 340 | p_0 = number_0 / (number_0 + number_1 + number_2) 341 | p_1 = number_1 / (number_0 + number_1 + number_2) 342 | p_2 = number_2 / (number_0 + number_1 + number_2) 343 | 344 | plt.plot(bin_edges[:-1]*0.1 + 0.01, p_0, 'b*-', label='Power Level 23 dB') 345 | plt.plot(bin_edges[:-1]*0.1 + 0.01, p_1, 'rs-', label='Power Level 10 dB') 346 | plt.plot(bin_edges[:-1]*0.1 + 0.01, p_2, 'go-', label='Power Level 5 dB') 347 | plt.xlim([0,0.12]) 348 | plt.xlabel("Time left for V2V transmission (s)") 349 | plt.ylabel("Probability of power selection") 350 | plt.legend() 351 | plt.grid() 352 | plt.show() 353 | ''' 354 | V2I_Rate_list[game_idx] = np.mean(np.asarray(Rate_list)) 355 | Fail_percent_list[game_idx] = percent 356 | 357 | print('Mean of the V2I rate is that ', np.mean(V2I_Rate_list[0:game_idx] )) 358 | print('Mean of Fail percent is that ',percent, np.mean(Fail_percent_list[0:game_idx])) 359 | # print('action is that', action_temp[0,:]) 360 | 361 | print('The number of vehicle is ', len(self.env.vehicles)) 362 | print('Mean of the V2I rate is that ', np.mean(V2I_Rate_list)) 363 | print('Mean of Fail percent is that ', np.mean(Fail_percent_list)) 364 | # print('Test Reward is ', np.mean(test_result)) 365 | 366 | 367 | 368 | 369 | 370 | -------------------------------------------------------------------------------- /Environment.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | import time 4 | import random 5 | import math 6 | # This file is revised for more precise and concise expression. 7 | class V2Vchannels: 8 | # Simulator of the V2V Channels 9 | def __init__(self, n_Veh, n_RB): 10 | self.t = 0 11 | self.h_bs = 1.5 12 | self.h_ms = 1.5 13 | self.fc = 2 14 | self.decorrelation_distance = 10 15 | self.shadow_std = 3 16 | self.n_Veh = n_Veh 17 | self.n_RB = n_RB 18 | self.update_shadow([]) 19 | def update_positions(self, positions): 20 | self.positions = positions 21 | def update_pathloss(self): 22 | self.PathLoss = np.zeros(shape=(len(self.positions),len(self.positions))) 23 | for i in range(len(self.positions)): 24 | for j in range(len(self.positions)): 25 | self.PathLoss[i][j] = self.get_path_loss(self.positions[i], self.positions[j]) 26 | def update_shadow(self, delta_distance_list): 27 | delta_distance = np.zeros((len(delta_distance_list), len(delta_distance_list))) 28 | for i in range(len(delta_distance)): 29 | for j in range(len(delta_distance)): 30 | delta_distance[i][j] = delta_distance_list[i] + delta_distance_list[j] 31 | if len(delta_distance_list) == 0: 32 | self.Shadow = np.random.normal(0,self.shadow_std, size=(self.n_Veh, self.n_Veh)) 33 | else: 34 | self.Shadow = np.exp(-1*(delta_distance/self.decorrelation_distance)) * self.Shadow +\ 35 | np.sqrt(1 - np.exp(-2*(delta_distance/self.decorrelation_distance))) * np.random.normal(0, self.shadow_std, size = (self.n_Veh, self.n_Veh)) 36 | def update_fast_fading(self): 37 | h = 1/np.sqrt(2) * (np.random.normal(size=(self.n_Veh, self.n_Veh, self.n_RB) ) + 1j * np.random.normal(size=(self.n_Veh, self.n_Veh, self.n_RB))) 38 | self.FastFading = 20 * np.log10(np.abs(h)) 39 | def get_path_loss(self, position_A, position_B): 40 | d1 = abs(position_A[0] - position_B[0]) 41 | d2 = abs(position_A[1] - position_B[1]) 42 | d = math.hypot(d1,d2)+0.001 43 | d_bp = 4 * (self.h_bs - 1) * (self.h_ms - 1) * self.fc * (10**9)/(3*10**8) 44 | def PL_Los(d): 45 | if d <= 3: 46 | return 22.7 * np.log10(3) + 41 + 20*np.log10(self.fc/5) 47 | else: 48 | if d < d_bp: 49 | return 22.7 * np.log10(d) + 41 + 20 * np.log10(self.fc/5) 50 | else: 51 | return 40.0 * np.log10(d) + 9.45 - 17.3 * np.log10(self.h_bs) - 17.3 * np.log10(self.h_ms) + 2.7 * np.log10(self.fc/5) 52 | def PL_NLos(d_a,d_b): 53 | n_j = max(2.8 - 0.0024*d_b, 1.84) 54 | return PL_Los(d_a) + 20 - 12.5*n_j + 10 * n_j * np.log10(d_b) + 3*np.log10(self.fc/5) 55 | if min(d1,d2) < 7: 56 | PL = PL_Los(d) 57 | self.ifLOS = True 58 | self.shadow_std = 3 59 | else: 60 | PL = min(PL_NLos(d1,d2), PL_NLos(d2,d1)) 61 | self.ifLOS = False 62 | self.shadow_std = 4 # if Non line of sight, the std is 4 63 | return PL 64 | 65 | class V2Ichannels: 66 | # Simulator of the V2I channels 67 | def __init__(self, n_Veh, n_RB): 68 | self.h_bs = 25 69 | self.h_ms = 1.5 70 | self.Decorrelation_distance = 50 71 | self.BS_position = [750/2, 1299/2] # Suppose the BS is in the center 72 | self.shadow_std = 8 73 | self.n_Veh = n_Veh 74 | self.n_RB = n_RB 75 | self.update_shadow([]) 76 | def update_positions(self, positions): 77 | self.positions = positions 78 | 79 | def update_pathloss(self): 80 | self.PathLoss = np.zeros(len(self.positions)) 81 | for i in range(len(self.positions)): 82 | d1 = abs(self.positions[i][0] - self.BS_position[0]) 83 | d2 = abs(self.positions[i][1] - self.BS_position[1]) 84 | distance = math.hypot(d1,d2) # change from meters to kilometers 85 | self.PathLoss[i] = 128.1 + 37.6*np.log10(math.sqrt(distance**2 + (self.h_bs-self.h_ms)**2)/1000) 86 | def update_shadow(self, delta_distance_list): 87 | if len(delta_distance_list) == 0: # initialization 88 | self.Shadow = np.random.normal(0, self.shadow_std, self.n_Veh) 89 | else: 90 | delta_distance = np.asarray(delta_distance_list) 91 | self.Shadow = np.exp(-1*(delta_distance/self.Decorrelation_distance))* self.Shadow +\ 92 | np.sqrt(1-np.exp(-2*(delta_distance/self.Decorrelation_distance)))*np.random.normal(0,self.shadow_std, self.n_Veh) 93 | def update_fast_fading(self): 94 | h = 1/np.sqrt(2) * (np.random.normal(size = (self.n_Veh, self.n_RB)) + 1j* np.random.normal(size = (self.n_Veh, self.n_RB))) 95 | self.FastFading = 20 * np.log10(np.abs(h)) 96 | 97 | class Vehicle: 98 | # Vehicle simulator: include all the information for a vehicle 99 | def __init__(self, start_position, start_direction, velocity): 100 | self.position = start_position 101 | self.direction = start_direction 102 | self.velocity = velocity 103 | self.neighbors = [] 104 | self.destinations = [] 105 | class Environ: 106 | # Enviroment Simulator: Provide states and rewards to agents. 107 | # Evolve to new state based on the actions taken by the vehicles. 108 | def __init__ (self, down_lane, up_lane, left_lane, right_lane, width, height): 109 | self.timestep = 0.01 110 | self.down_lanes = down_lane 111 | self.up_lanes = up_lane 112 | self.left_lanes = left_lane 113 | self.right_lanes = right_lane 114 | self.width = width 115 | self.height = height 116 | self.vehicles = [] 117 | self.demands = [] 118 | self.V2V_power_dB = 23 # dBm 119 | self.V2I_power_dB = 23 # dBm 120 | self.V2V_power_dB_List = [23, 10, 5] # the power levels 121 | #self.V2V_power = 10**(self.V2V_power_dB) 122 | #self.V2I_power = 10**(self.V2I_power_dB) 123 | self.sig2_dB = -114 124 | self.bsAntGain = 8 125 | self.bsNoiseFigure = 5 126 | self.vehAntGain = 3 127 | self.vehNoiseFigure = 9 128 | self.sig2 = 10**(self.sig2_dB/10) 129 | self.V2V_Shadowing = [] 130 | self.V2I_Shadowing = [] 131 | self.delta_distance = [] 132 | self.n_RB = 20 133 | self.n_Veh = 40 134 | self.V2Vchannels = V2Vchannels(self.n_Veh, self.n_RB) # number of vehicles 135 | self.V2Ichannels = V2Ichannels(self.n_Veh, self.n_RB) 136 | 137 | self.V2V_Interference_all = np.zeros((self.n_Veh, 3, self.n_RB)) + self.sig2 138 | self.n_step = 0 139 | def add_new_vehicles(self, start_position, start_direction, start_velocity): 140 | self.vehicles.append(Vehicle(start_position, start_direction, start_velocity)) 141 | 142 | def add_new_vehicles_by_number(self, n): 143 | for i in range(n): 144 | ind = np.random.randint(0,len(self.down_lanes)) 145 | start_position = [self.down_lanes[ind], random.randint(0,self.height)] 146 | start_direction = 'd' 147 | self.add_new_vehicles(start_position,start_direction,random.randint(10,15)) 148 | start_position = [self.up_lanes[ind], random.randint(0,self.height)] 149 | start_direction = 'u' 150 | self.add_new_vehicles(start_position,start_direction,random.randint(10,15)) 151 | start_position = [random.randint(0,self.width), self.left_lanes[ind]] 152 | start_direction = 'l' 153 | self.add_new_vehicles(start_position,start_direction,random.randint(10,15)) 154 | start_position = [random.randint(0,self.width), self.right_lanes[ind]] 155 | start_direction = 'r' 156 | self.add_new_vehicles(start_position,start_direction,random.randint(10,15)) 157 | self.V2V_Shadowing = np.random.normal(0, 3, [len(self.vehicles), len(self.vehicles)]) 158 | self.V2I_Shadowing = np.random.normal(0, 8, len(self.vehicles)) 159 | self.delta_distance = np.asarray([c.velocity for c in self.vehicles]) 160 | #self.renew_channel() 161 | def renew_positions(self): 162 | # ======================================================== 163 | # This function update the position of each vehicle 164 | # =========================================================== 165 | i = 0 166 | #for i in range(len(self.position)): 167 | while(i < len(self.vehicles)): 168 | #print ('start iteration ', i) 169 | #print(self.position, len(self.position), self.direction) 170 | delta_distance = self.vehicles[i].velocity * self.timestep 171 | change_direction = False 172 | if self.vehicles[i].direction == 'u': 173 | #print ('len of position', len(self.position), i) 174 | for j in range(len(self.left_lanes)): 175 | 176 | if (self.vehicles[i].position[1] <=self.left_lanes[j]) and ((self.vehicles[i].position[1] + delta_distance) >= self.left_lanes[j]): # came to an cross 177 | if (random.uniform(0,1) < 0.4): 178 | self.vehicles[i].position = [self.vehicles[i].position[0] - (delta_distance - (self.left_lanes[j] - self.vehicles[i].position[1])),self.left_lanes[j] ] 179 | self.vehicles[i].direction = 'l' 180 | change_direction = True 181 | break 182 | if change_direction == False : 183 | for j in range(len(self.right_lanes)): 184 | if (self.vehicles[i].position[1] <=self.right_lanes[j]) and ((self.vehicles[i].position[1] + delta_distance) >= self.right_lanes[j]): 185 | if (random.uniform(0,1) < 0.4): 186 | self.vehicles[i].position = [self.vehicles[i].position[0] + (delta_distance + (self.right_lanes[j] - self.vehicles[i].position[1])), self.right_lanes[j] ] 187 | self.vehicles[i].direction = 'r' 188 | change_direction = True 189 | break 190 | if change_direction == False: 191 | self.vehicles[i].position[1] += delta_distance 192 | if (self.vehicles[i].direction == 'd') and (change_direction == False): 193 | #print ('len of position', len(self.position), i) 194 | for j in range(len(self.left_lanes)): 195 | if (self.vehicles[i].position[1] >=self.left_lanes[j]) and ((self.vehicles[i].position[1] - delta_distance) <= self.left_lanes[j]): # came to an cross 196 | if (random.uniform(0,1) < 0.4): 197 | self.vehicles[i].position = [self.vehicles[i].position[0] - (delta_distance - ( self.vehicles[i].position[1]- self.left_lanes[j])), self.left_lanes[j] ] 198 | #print ('down with left', self.vehicles[i].position) 199 | self.vehicles[i].direction = 'l' 200 | change_direction = True 201 | break 202 | if change_direction == False : 203 | for j in range(len(self.right_lanes)): 204 | if (self.vehicles[i].position[1] >=self.right_lanes[j]) and (self.vehicles[i].position[1] - delta_distance <= self.right_lanes[j]): 205 | if (random.uniform(0,1) < 0.4): 206 | self.vehicles[i].position = [self.vehicles[i].position[0] + (delta_distance + ( self.vehicles[i].position[1]- self.right_lanes[j])),self.right_lanes[j] ] 207 | #print ('down with right', self.vehicles[i].position) 208 | self.vehicles[i].direction = 'r' 209 | change_direction = True 210 | break 211 | if change_direction == False: 212 | self.vehicles[i].position[1] -= delta_distance 213 | if (self.vehicles[i].direction == 'r') and (change_direction == False): 214 | #print ('len of position', len(self.position), i) 215 | for j in range(len(self.up_lanes)): 216 | if (self.vehicles[i].position[0] <= self.up_lanes[j]) and ((self.vehicles[i].position[0] + delta_distance) >= self.up_lanes[j]): # came to an cross 217 | if (random.uniform(0,1) < 0.4): 218 | self.vehicles[i].position = [self.up_lanes[j], self.vehicles[i].position[1] + (delta_distance - (self.up_lanes[j] - self.vehicles[i].position[0]))] 219 | change_direction = True 220 | self.vehicles[i].direction = 'u' 221 | break 222 | if change_direction == False : 223 | for j in range(len(self.down_lanes)): 224 | if (self.vehicles[i].position[0] <= self.down_lanes[j]) and ((self.vehicles[i].position[0] + delta_distance) >= self.down_lanes[j]): 225 | if (random.uniform(0,1) < 0.4): 226 | self.vehicles[i].position = [self.down_lanes[j], self.vehicles[i].position[1] - (delta_distance - (self.down_lanes[j] - self.vehicles[i].position[0]))] 227 | change_direction = True 228 | self.vehicles[i].direction = 'd' 229 | break 230 | if change_direction == False: 231 | self.vehicles[i].position[0] += delta_distance 232 | if (self.vehicles[i].direction == 'l') and (change_direction == False): 233 | for j in range(len(self.up_lanes)): 234 | 235 | if (self.vehicles[i].position[0] >= self.up_lanes[j]) and ((self.vehicles[i].position[0] - delta_distance) <= self.up_lanes[j]): # came to an cross 236 | if (random.uniform(0,1) < 0.4): 237 | self.vehicles[i].position = [self.up_lanes[j], self.vehicles[i].position[1] + (delta_distance - (self.vehicles[i].position[0] - self.up_lanes[j]))] 238 | change_direction = True 239 | self.vehicles[i].direction = 'u' 240 | break 241 | if change_direction == False : 242 | for j in range(len(self.down_lanes)): 243 | if (self.vehicles[i].position[0] >= self.down_lanes[j]) and ((self.vehicles[i].position[0] - delta_distance) <= self.down_lanes[j]): 244 | if (random.uniform(0,1) < 0.4): 245 | self.vehicles[i].position = [self.down_lanes[j], self.vehicles[i].position[1] - (delta_distance - (self.vehicles[i].position[0] - self.down_lanes[j]))] 246 | change_direction = True 247 | self.vehicles[i].direction = 'd' 248 | break 249 | if change_direction == False: 250 | self.vehicles[i].position[0] -= delta_distance 251 | # if it comes to an exit 252 | if (self.vehicles[i].position[0] < 0) or (self.vehicles[i].position[1] < 0) or (self.vehicles[i].position[0] > self.width) or (self.vehicles[i].position[1] > self.height): 253 | # delete 254 | # print ('delete ', self.position[i]) 255 | if (self.vehicles[i].direction == 'u'): 256 | self.vehicles[i].direction = 'r' 257 | self.vehicles[i].position = [self.vehicles[i].position[0], self.right_lanes[-1]] 258 | else: 259 | if (self.vehicles[i].direction == 'd'): 260 | self.vehicles[i].direction = 'l' 261 | self.vehicles[i].position = [self.vehicles[i].position[0], self.left_lanes[0]] 262 | else: 263 | if (self.vehicles[i].direction == 'l'): 264 | self.vehicles[i].direction = 'u' 265 | self.vehicles[i].position = [self.up_lanes[0],self.vehicles[i].position[1]] 266 | else: 267 | if (self.vehicles[i].direction == 'r'): 268 | self.vehicles[i].direction = 'd' 269 | self.vehicles[i].position = [self.down_lanes[-1],self.vehicles[i].position[1]] 270 | 271 | i += 1 272 | def test_channel(self): 273 | # =================================== 274 | # test the V2I and the V2V channel 275 | # =================================== 276 | self.n_step = 0 277 | self.vehicles = [] 278 | n_Veh = 20 279 | self.n_Veh = n_Veh 280 | self.add_new_vehicles_by_number(int(self.n_Veh/4)) 281 | step = 1000 282 | time_step = 0.1 # every 0.1s update 283 | for i in range(step): 284 | self.renew_positions() 285 | positions = [c.position for c in self.vehicles] 286 | self.update_large_fading(positions, time_step) 287 | self.update_small_fading() 288 | print("Time step: ", i) 289 | print(" ============== V2I ===========") 290 | print("Path Loss: ", self.V2Ichannels.PathLoss) 291 | print("Shadow:", self.V2Ichannels.Shadow) 292 | print("Fast Fading: ", self.V2Ichannels.FastFading) 293 | print(" ============== V2V ===========") 294 | print("Path Loss: ", self.V2Vchannels.PathLoss[0:3]) 295 | print("Shadow:", self.V2Vchannels.Shadow[0:3]) 296 | print("Fast Fading: ", self.V2Vchannels.FastFading[0:3]) 297 | 298 | def update_large_fading(self, positions, time_step): 299 | self.V2Ichannels.update_positions(positions) 300 | self.V2Vchannels.update_positions(positions) 301 | self.V2Ichannels.update_pathloss() 302 | self.V2Vchannels.update_pathloss() 303 | delta_distance = time_step * np.asarray([c.velocity for c in self.vehicles]) 304 | self.V2Ichannels.update_shadow(delta_distance) 305 | self.V2Vchannels.update_shadow(delta_distance) 306 | def update_small_fading(self): 307 | self.V2Ichannels.update_fast_fading() 308 | self.V2Vchannels.update_fast_fading() 309 | 310 | def renew_neighbor(self): 311 | # ========================================== 312 | # update the neighbors of each vehicle. 313 | # =========================================== 314 | for i in range(len(self.vehicles)): 315 | self.vehicles[i].neighbors = [] 316 | self.vehicles[i].actions = [] 317 | #print('action and neighbors delete', self.vehicles[i].actions, self.vehicles[i].neighbors) 318 | Distance = np.zeros((len(self.vehicles),len(self.vehicles))) 319 | z = np.array([[complex(c.position[0],c.position[1]) for c in self.vehicles]]) 320 | Distance = abs(z.T-z) 321 | for i in range(len(self.vehicles)): 322 | sort_idx = np.argsort(Distance[:,i]) 323 | for j in range(3): 324 | self.vehicles[i].neighbors.append(sort_idx[j+1]) 325 | destination = np.random.choice(sort_idx[1:int(len(sort_idx)/5)],3, replace = False) 326 | self.vehicles[i].destinations = destination 327 | def renew_channel(self): 328 | # =========================================================================== 329 | # This function updates all the channels including V2V and V2I channels 330 | # ============================================================================= 331 | positions = [c.position for c in self.vehicles] 332 | self.V2Ichannels.update_positions(positions) 333 | self.V2Vchannels.update_positions(positions) 334 | self.V2Ichannels.update_pathloss() 335 | self.V2Vchannels.update_pathloss() 336 | delta_distance = 0.002 * np.asarray([c.velocity for c in self.vehicles]) # time slot is 2 ms. 337 | self.V2Ichannels.update_shadow(delta_distance) 338 | self.V2Vchannels.update_shadow(delta_distance) 339 | self.V2V_channels_abs = self.V2Vchannels.PathLoss + self.V2Vchannels.Shadow + 50 * np.identity( 340 | len(self.vehicles)) 341 | self.V2I_channels_abs = self.V2Ichannels.PathLoss + self.V2Ichannels.Shadow 342 | 343 | def renew_channels_fastfading(self): 344 | # ======================================================================= 345 | # This function updates all the channels including V2V and V2I channels 346 | # ========================================================================= 347 | self.renew_channel() 348 | self.V2Ichannels.update_fast_fading() 349 | self.V2Vchannels.update_fast_fading() 350 | V2V_channels_with_fastfading = np.repeat(self.V2V_channels_abs[:, :, np.newaxis], self.n_RB, axis=2) 351 | self.V2V_channels_with_fastfading = V2V_channels_with_fastfading - self.V2Vchannels.FastFading 352 | V2I_channels_with_fastfading = np.repeat(self.V2I_channels_abs[:, np.newaxis], self.n_RB, axis=1) 353 | self.V2I_channels_with_fastfading = V2I_channels_with_fastfading - self.V2Ichannels.FastFading 354 | #print("V2I channels", self.V2I_channels_with_fastfading) 355 | 356 | def Compute_Performance_Reward_fast_fading_with_power(self, actions_power): # revising based on the fast fading part 357 | actions = actions_power.copy()[:,:,0] # the channel_selection_part 358 | power_selection = actions_power.copy()[:,:,1] 359 | Rate = np.zeros(len(self.vehicles)) 360 | Interference = np.zeros(self.n_RB) # V2V signal interference to V2I links 361 | for i in range(len(self.vehicles)): 362 | for j in range(len(actions[i,:])): 363 | if not self.activate_links[i,j]: 364 | continue 365 | #print('power selection,', power_selection[i,j]) 366 | Interference[actions[i][j]] += 10**((self.V2V_power_dB_List[power_selection[i,j]] - self.V2I_channels_with_fastfading[i, actions[i,j]] + self.vehAntGain + self.bsAntGain - self.bsNoiseFigure)/10) # fast fading 367 | 368 | self.V2I_Interference = Interference + self.sig2 369 | V2V_Interference = np.zeros((len(self.vehicles), 3)) 370 | V2V_Signal = np.zeros((len(self.vehicles), 3)) 371 | 372 | # remove the effects of none active links 373 | #print('shapes', actions.shape, self.activate_links.shape) 374 | #print(not self.activate_links) 375 | actions[(np.logical_not(self.activate_links))] = -1 376 | #print('action are', actions) 377 | for i in range(self.n_RB): 378 | indexes = np.argwhere(actions == i) 379 | for j in range(len(indexes)): 380 | #receiver_j = self.vehicles[indexes[j,0]].neighbors[indexes[j,1]] 381 | receiver_j = self.vehicles[indexes[j,0]].destinations[indexes[j,1]] 382 | # compute the V2V signal links 383 | V2V_Signal[indexes[j, 0],indexes[j, 1]] = 10**((self.V2V_power_dB_List[power_selection[indexes[j, 0],indexes[j, 1]]] - self.V2V_channels_with_fastfading[indexes[j][0]][receiver_j][i] + 2*self.vehAntGain - self.vehNoiseFigure)/10) 384 | #V2V_Signal[indexes[j, 0],indexes[j, 1]] = 10**((self.V2V_power_dB_List[0] - self.V2V_channels_with_fastfading[indexes[j][0]][receiver_j][i])/10) 385 | if i < self.n_Veh: 386 | V2V_Interference[indexes[j,0],indexes[j,1]] += 10**((self.V2I_power_dB - self.V2V_channels_with_fastfading[i][receiver_j][i]+ 2*self.vehAntGain - self.vehNoiseFigure )/10) # V2I links interference to V2V links 387 | for k in range(j+1, len(indexes)): # computer the peer V2V links 388 | #receiver_k = self.vehicles[indexes[k][0]].neighbors[indexes[k][1]] 389 | receiver_k = self.vehicles[indexes[k][0]].destinations[indexes[k][1]] 390 | V2V_Interference[indexes[j,0],indexes[j,1]] += 10**((self.V2V_power_dB_List[power_selection[indexes[k,0],indexes[k,1]]] - self.V2V_channels_with_fastfading[indexes[k][0]][receiver_j][i]+ 2*self.vehAntGain - self.vehNoiseFigure)/10) 391 | V2V_Interference[indexes[k,0],indexes[k,1]] += 10**((self.V2V_power_dB_List[power_selection[indexes[j,0],indexes[j,1]]] - self.V2V_channels_with_fastfading[indexes[j][0]][receiver_k][i]+ 2*self.vehAntGain - self.vehNoiseFigure)/10) 392 | 393 | self.V2V_Interference = V2V_Interference + self.sig2 394 | V2V_Rate = np.zeros(self.activate_links.shape) 395 | V2V_Rate[self.activate_links] = np.log2(1 + np.divide(V2V_Signal[self.activate_links], self.V2V_Interference[self.activate_links])) 396 | 397 | #print("V2V Rate", V2V_Rate * self.update_time_test * 1500) 398 | #print ('V2V_Signal is ', np.log(np.mean(V2V_Signal[self.activate_links]))) 399 | V2I_Signals = self.V2I_power_dB-self.V2I_channels_abs[0:min(self.n_RB,self.n_Veh)] + self.vehAntGain + self.bsAntGain - self.bsNoiseFigure 400 | V2I_Rate = np.log2(1 + np.divide(10**(V2I_Signals/10), self.V2I_Interference[0:min(self.n_RB,self.n_Veh)])) 401 | 402 | 403 | # -- compute the latency constraits -- 404 | self.demand -= V2V_Rate * self.update_time_test * 1500 # decrease the demand 405 | self.test_time_count -= self.update_time_test # compute the time left for estimation 406 | self.individual_time_limit -= self.update_time_test # compute the time left for individual V2V transmission 407 | self.individual_time_interval -= self.update_time_test # compute the time interval left for next transmission 408 | 409 | # --- update the demand --- 410 | 411 | new_active = self.individual_time_interval <= 0 412 | self.activate_links[new_active] = True 413 | self.individual_time_interval[new_active] = np.random.exponential(0.02, self.individual_time_interval[new_active].shape ) + self.V2V_limit 414 | self.individual_time_limit[new_active] = self.V2V_limit 415 | self.demand[new_active] = self.demand_amount 416 | #print("demand is", self.demand) 417 | #print('mean rate of average V2V link is', np.mean(V2V_Rate[self.activate_links])) 418 | 419 | # -- update the statistics--- 420 | early_finish = np.multiply(self.demand <= 0, self.activate_links) 421 | unqulified = np.multiply(self.individual_time_limit <=0, self.activate_links) 422 | self.activate_links[np.add(early_finish, unqulified)] = False 423 | #print('number of activate links is', np.sum(self.activate_links)) 424 | self.success_transmission += np.sum(early_finish) 425 | self.failed_transmission += np.sum(unqulified) 426 | #if self.n_step % 1000 == 0 : 427 | # self.success_transmission = 0 428 | # self.failed_transmission = 0 429 | failed_percentage = self.failed_transmission/(self.failed_transmission + self.success_transmission + 0.0001) 430 | # print('Percentage of failed', np.sum(new_active), self.failed_transmission, self.failed_transmission + self.success_transmission , failed_percentage) 431 | return V2I_Rate, failed_percentage #failed_percentage 432 | 433 | 434 | def Compute_Performance_Reward_fast_fading_with_power_asyn(self, actions_power): # revising based on the fast fading part 435 | # =================================================== 436 | # --------- Used for Testing ------- 437 | # =================================================== 438 | actions = actions_power[:,:,0] # the channel_selection_part 439 | power_selection = actions_power[:,:,1] 440 | Interference = np.zeros(self.n_RB) # Calculate the interference from V2V to V2I 441 | for i in range(len(self.vehicles)): 442 | for j in range(len(actions[i,:])): 443 | if not self.activate_links[i,j]: 444 | continue 445 | Interference[actions[i][j]] += 10**((self.V2V_power_dB_List[power_selection[i,j]] - \ 446 | self.V2I_channels_with_fastfading[i, actions[i,j]] + \ 447 | self.vehAntGain + self.bsAntGain - self.bsNoiseFigure)/10) 448 | self.V2I_Interference = Interference + self.sig2 449 | V2V_Interference = np.zeros((len(self.vehicles), 3)) 450 | V2V_Signal = np.zeros((len(self.vehicles), 3)) 451 | Interfence_times = np.zeros((len(self.vehicles), 3)) 452 | actions[(np.logical_not(self.activate_links))] = -1 453 | for i in range(self.n_RB): 454 | indexes = np.argwhere(actions == i) 455 | for j in range(len(indexes)): 456 | #receiver_j = self.vehicles[indexes[j,0]].neighbors[indexes[j,1]] 457 | receiver_j = self.vehicles[indexes[j,0]].destinations[indexes[j,1]] 458 | V2V_Signal[indexes[j, 0],indexes[j, 1]] = 10**((self.V2V_power_dB_List[power_selection[indexes[j, 0],indexes[j, 1]]] -\ 459 | self.V2V_channels_with_fastfading[indexes[j][0]][receiver_j][i] + 2*self.vehAntGain - self.vehNoiseFigure)/10) 460 | #V2V_Signal[indexes[j, 0],indexes[j, 1]] = 10**((self.V2V_power_dB_List[0] - self.V2V_channels_with_fastfading[indexes[j][0]][receiver_j][i])/10) 461 | if i= 0): 602 | continue 603 | V2V_Interference[k, m, channel_selection[i,j]] += 10**((self.V2V_power_dB_List[power_selection[i,j]] -\ 604 | self.V2V_channels_with_fastfading[i][self.vehicles[k].destinations[m]][channel_selection[i,j]] + 2*self.vehAntGain - self.vehNoiseFigure)/10) 605 | 606 | self.V2V_Interference_all = 10 * np.log10(V2V_Interference) 607 | 608 | 609 | def renew_demand(self): 610 | # generate a new demand of a V2V 611 | self.demand = self.demand_amount*np.ones((self.n_RB,3)) 612 | self.time_limit = 10 613 | def act_for_training(self, actions, idx): 614 | # ============================================= 615 | # This function gives rewards for training 616 | # =========================================== 617 | rewards_list = np.zeros(self.n_RB) 618 | action_temp = actions.copy() 619 | self.activate_links = np.ones((self.n_Veh,3), dtype = 'bool') 620 | V2I_rewardlist, V2V_rewardlist, time_left = self.Compute_Performance_Reward_Batch(action_temp,idx) 621 | self.renew_positions() 622 | self.renew_channels_fastfading() 623 | self.Compute_Interference(actions) 624 | rewards_list = rewards_list.T.reshape([-1]) 625 | V2I_rewardlist = V2I_rewardlist.T.reshape([-1]) 626 | V2V_rewardlist = V2V_rewardlist.T.reshape([-1]) 627 | V2I_reward = (V2I_rewardlist[actions[idx[0],idx[1], 0]+ 20*actions[idx[0],idx[1], 1]] -\ 628 | np.min(V2I_rewardlist))/(np.max(V2I_rewardlist) -np.min(V2I_rewardlist) + 0.000001) 629 | V2V_reward = (V2V_rewardlist[actions[idx[0],idx[1], 0]+ 20*actions[idx[0],idx[1], 1]] -\ 630 | np.min(V2V_rewardlist))/(np.max(V2V_rewardlist) -np.min(V2V_rewardlist) + 0.000001) 631 | lambdda = 0.1 632 | #print ("Reward", V2I_reward, V2V_reward, time_left) 633 | t = lambdda * V2I_reward + (1-lambdda) * V2V_reward 634 | #print("time left", time_left) 635 | #return t 636 | return t - (self.V2V_limit - time_left)/self.V2V_limit 637 | 638 | def act_asyn(self, actions): 639 | self.n_step += 1 640 | if self.n_step % 10 == 0: 641 | self.renew_positions() 642 | self.renew_channels_fastfading() 643 | reward = self.Compute_Performance_Reward_fast_fading_with_power_asyn(actions) 644 | self.Compute_Interference(actions) 645 | return reward 646 | def act(self, actions): 647 | # simulate the next state after the action is given 648 | self.n_step += 1 649 | reward = self.Compute_Performance_Reward_fast_fading_with_power(actions) 650 | self.renew_positions() 651 | self.renew_channels_fastfading() 652 | self.Compute_Interference(actions) 653 | return reward 654 | 655 | def new_random_game(self, n_Veh = 0): 656 | # make a new game 657 | self.n_step = 0 658 | self.vehicles = [] 659 | if n_Veh > 0: 660 | self.n_Veh = n_Veh 661 | self.add_new_vehicles_by_number(int(self.n_Veh/4)) 662 | self.V2Vchannels = V2Vchannels(self.n_Veh, self.n_RB) # number of vehicles 663 | self.V2Ichannels = V2Ichannels(self.n_Veh, self.n_RB) 664 | self.renew_channels_fastfading() 665 | self.renew_neighbor() 666 | self.demand_amount = 30 667 | self.demand = self.demand_amount * np.ones((self.n_Veh,3)) 668 | self.test_time_count = 10 669 | self.V2V_limit = 0.1 # 100 ms V2V toleratable latency 670 | self.individual_time_limit = self.V2V_limit * np.ones((self.n_Veh,3)) 671 | self.individual_time_interval = np.random.exponential(0.05, (self.n_Veh,3)) 672 | self.UnsuccessfulLink = np.zeros((self.n_Veh,3)) 673 | self.success_transmission = 0 674 | self.failed_transmission = 0 675 | self.update_time_train = 0.01 # 10ms update time for the training 676 | self.update_time_test = 0.002 # 2ms update time for testing 677 | self.update_time_asyn = 0.0002 # 0.2 ms update one subset of the vehicles; for each vehicle, the update time is 2 ms 678 | self.activate_links = np.zeros((self.n_Veh,3), dtype='bool') 679 | 680 | if __name__ == "__main__": 681 | up_lanes = [3.5/2,3.5/2 + 3.5,250+3.5/2, 250+3.5+3.5/2, 500+3.5/2, 500+3.5+3.5/2] 682 | down_lanes = [250-3.5-3.5/2,250-3.5/2,500-3.5-3.5/2,500-3.5/2,750-3.5-3.5/2,750-3.5/2] 683 | left_lanes = [3.5/2,3.5/2 + 3.5,433+3.5/2, 433+3.5+3.5/2, 866+3.5/2, 866+3.5+3.5/2] 684 | right_lanes = [433-3.5-3.5/2,433-3.5/2,866-3.5-3.5/2,866-3.5/2,1299-3.5-3.5/2,1299-3.5/2] 685 | width = 750 686 | height = 1299 687 | Env = Environ(down_lanes,up_lanes,left_lanes,right_lanes, width, height) 688 | Env.test_channel() 689 | --------------------------------------------------------------------------------