├── DDPG.py ├── Model.py ├── __pycache__ ├── Model.cpython-38.pyc ├── multiagentrl.cpython-38.pyc ├── simulation.cpython-38.pyc └── util.cpython-38.pyc ├── data ├── moco.det.xml ├── moco.net.xml ├── moco.out.xml ├── moco_jtr.rou.xml ├── moco_jtr_out.rou.xml ├── queues.xml └── testmap.sumocfg ├── multiagentrl.py ├── plotting.py ├── simulation.py ├── test.csv └── util.py /DDPG.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import util 4 | import simulation 5 | import Model 6 | import random 7 | import numpy as np 8 | from random import randint 9 | 10 | class Memory: 11 | def __init__(self, memory_size): 12 | self._memory_size = memory_size 13 | self._samples = [] 14 | 15 | # ADD A SAMPLE INTO THE MEMORY 16 | def add_sample(self, sample): 17 | self._samples.append(sample) 18 | if len(self._samples) > self._memory_size: 19 | self._samples.pop(0) # if the length is greater than the size of memory, remove the oldest element 20 | 21 | # GET n_samples SAMPLES RANDOMLY FROM THE MEMORY 22 | def get_samples(self, n_samples): 23 | if n_samples > len(self._samples): 24 | return random.sample(self._samples, len(self._samples)) # get all the samples 25 | else: 26 | return random.sample(self._samples, n_samples) # get "batch size" number of samples 27 | def _replay(self): 28 | batch = self._memory.get_samples(self._model.batch_size) 29 | if len(batch) > 0: # if there is at least 1 sample in the batch 30 | states = np.array([val[0] for val in batch]) # extract states from the batch 31 | next_states = np.array([val[3] for val in batch]) # extract next states from the batch 32 | 33 | # prediction 34 | p_state = self._model.predict_batch(states, self._sess) # predict state, for every sample 35 | p_next_state = self._model.predict_batch(next_states, self._sess) # predict next_state, for every sample 36 | 37 | # setup training arrays 38 | x = np.zeros((len(batch), self._model.num_states)) 39 | y = np.zeros((len(batch), self._model.num_actions)) 40 | 41 | for i, b in enumerate(batch): 42 | state, action, reward, next_state = b[0], b[1], b[2], b[3] # extract data from one sample 43 | current_state = p_state[i] # get the state predicted before 44 | current_state[action] = reward + self._gamma * np.amax(p_next_state[i]) # update state, action 45 | x[i] = state 46 | y[i] = current_state # state that includes the updated policy value 47 | 48 | self._model.train_batch(self._sess, x, y) # train the NN 49 | def policy(self): 50 | batch = self._memory.get_samples(self._model.batch_size) 51 | curr_policy = self._sess 52 | updated_policy = np.array([val[3] for val in batch]) 53 | 54 | class Q_Learning_Agent(): 55 | 56 | def __init__(self, n_state, n_action, learning_rate=0.1, decay_rate = 0.5, e_greedy=0.6): 57 | self.lr = learning_rate 58 | self.epsilon = e_greedy 59 | self.gamma = decay_rate 60 | self.action = [i for i in range(n_action)] 61 | self.q_table = [[0 for _ in range(n_action)] for _ in range(n_state)] 62 | 63 | def choose(self, state, explore=True): 64 | if explore: 65 | if np.random.uniform() < self.epsilon: 66 | return np.argmax(self.q_table[state]) 67 | else: 68 | return np.random.choice(self.action) 69 | else: 70 | return np.argmax(self.q_table[state]) 71 | 72 | def update(self, last_state, action, state, reward): 73 | self.q_table[last_state][action] += self.lr * (reward + self.gamma * max(self.q_table[state]) - self.q_table[last_state][action]) 74 | 75 | class Deterministic: 76 | 77 | def __init__(self, n_state, n_action, alpha=0.8, gamma=0.9, beta_winning=0.05, beta_losing=0.1): 78 | self.gamma = gamma 79 | self.alpha = alpha 80 | self.beta_losing = beta_losing 81 | self.beta_winning = beta_winning 82 | self.s_count = [0 for _ in range(n_state)] 83 | self.q_table = [[0 for _ in range(n_action)] for _ in range(n_state)] 84 | initial_prob = 1 / n_action 85 | self.policy = [[initial_prob for _ in range(n_action)] for _ in range(n_state)] 86 | self.aver_policy = [[initial_prob for _ in range(n_action)] for _ in range(n_state)] 87 | 88 | def choose(self, state): 89 | x = random.uniform(0, 1) 90 | for idx, action_prob in enumerate(self.policy[state]): 91 | x -= action_prob 92 | if x <= 0: 93 | return idx 94 | return len(self.policy[state]) - 1 95 | 96 | def update(self, p_state, action, n_state, reward): 97 | self.q_table[p_state][action] = ( 98 | 1 - self.alpha) * self.q_table[p_state][action] + self.alpha * (reward + self.gamma * max(self.q_table[n_state])) 99 | self.s_count[n_state] = self.s_count[n_state] + 1 100 | for idx, action_prob in enumerate(self.aver_policy[p_state]): 101 | self.aver_policy[p_state][idx] = self.aver_policy[p_state][idx] + \ 102 | (1 / self.s_count[p_state]) * (self.policy[p_state] 103 | [idx] - self.aver_policy[p_state][idx]) 104 | this_policy_reward = 0 105 | aver_policy_reward = 0 106 | for idx, q_value in enumerate(self.q_table[p_state]): 107 | this_policy_reward += q_value * self.policy[p_state][idx] 108 | aver_policy_reward += q_value * \ 109 | self.aver_policy[p_state][idx] 110 | 111 | if this_policy_reward > aver_policy_reward: 112 | beta = self.beta_winning 113 | else: 114 | beta = self.beta_losing 115 | 116 | max_idx = 0 117 | max_val = self.q_table[p_state][0] 118 | for idx, q_value in enumerate(self.q_table[p_state]): 119 | if q_value > max_val: 120 | max_idx = idx 121 | tmp = self.policy[p_state][max_idx] + beta 122 | self.policy[p_state][max_idx] = min(tmp, 1) 123 | for idx, action_prob in enumerate(self.policy[p_state]): 124 | if idx != max_idx: 125 | tmp = self.policy[p_state][idx] + \ 126 | ((-beta) / (len(self.policy[p_state]) - 1)) 127 | self.policy[p_state][idx] = max(tmp, 0) 128 | 129 | if 'SUMO_HOME' in os.environ: 130 | sys.path.append(os.path.join(os.environ['SUMO_HOME'], 'tools')) 131 | import traci as traci 132 | else: 133 | sys.exit("please declare environment variable 'SUMO_HOME'") 134 | 135 | class Sumo_Agent: 136 | 137 | def __init__(self, work_dir): 138 | self.work_dir = work_dir 139 | files = list(filter(lambda x: x.endswith('net.xml'), os.listdir(self.work_dir))) 140 | if len(files) > 1: 141 | raise Exception('There are more than one net.xml in work directory') 142 | self.net_xml = os.path.join(self.work_dir, files[0]) 143 | files = list(filter(lambda x: x.endswith('.sumocfg'), os.listdir(self.work_dir))) 144 | if len(files) > 1: 145 | raise Exception('There are more than one .sumocfg in work directory') 146 | self.sumocfg = os.path.join(self.work_dir, files[0]) 147 | self.exe = os.path.join(os.environ['SUMO_HOME'], 'bin', 'sumo') 148 | self.data_helper = util.Data_Helper() 149 | self.simulation = simulation.Simulation() 150 | self.traffic_light_ids = list(map(lambda x: x.get('id'), self.data_helper.get_elem_with_attr(self.net_xml, 'junction', ['type=traffic_light']))) 151 | 152 | def get_state_size(self): 153 | return 81 154 | 155 | def get_action_size(self): 156 | return 3 157 | 158 | def simulate_plainly(self, route_file): 159 | step_size=3200 160 | command = [self.exe, '-c', self.sumocfg, '-r', os.path.join(self.work_dir, route_file)] 161 | self.simulation.start_simulation(command) 162 | step = 0 163 | vehicle_arriving_step = {} 164 | vehicle_departing_step = {} 165 | while self.simulation.get_minimum_expected_number() > 0: 166 | step += 1 167 | self.simulation.simulate_one_step() 168 | arrived_vehicle_list = self.simulation.get_arrived_vehicle_list() 169 | departed_vehicle_list = self.simulation.get_departed_vehicle_list() 170 | if arrived_vehicle_list: 171 | for vehID in arrived_vehicle_list: 172 | vehicle_arriving_step[vehID] = step 173 | if departed_vehicle_list: 174 | for vehID in departed_vehicle_list: 175 | vehicle_departing_step[vehID] = step 176 | total_step = 0 177 | for vehID in vehicle_arriving_step.keys(): 178 | total_step += (vehicle_arriving_step[vehID] - vehicle_departing_step[vehID]) 179 | print(str(total_step / len(vehicle_arriving_step)+(step_size*step_size)/(step_size*2))) 180 | v3.append(float(total_step / len(vehicle_arriving_step)+(step_size*step_size)/(step_size*2)-(lst[i]/3))) 181 | self.simulation.close_simulation() 182 | 183 | def get_reinforcement_learning_state(self, tlsID): 184 | veh_num_vec = self.simulation.get_vehicle_number_on_edges(tlsID) 185 | state_vec = [] 186 | for veh_num in veh_num_vec: 187 | if veh_num < 5: 188 | state_vec.append(0) 189 | elif veh_num < 9: 190 | state_vec.append(1) 191 | else: 192 | state_vec.append(2) 193 | state = 0 194 | for idx, s in enumerate(state_vec): 195 | state += (s * (3**idx)) 196 | idx = int(traci.trafficlight.getPhase(tlsID)) 197 | if idx < 4: 198 | return state 199 | else: 200 | return state + 81 201 | 202 | def get_reinforcement_learning_reward(self, tlsID): 203 | # occupied_ratio = self.simulation.get_occupied_ratio_of_lanes(tlsID) 204 | # return sum(occupied_ratio) / len(occupied_ratio) 205 | return self.simulation.get_int_vehicle_number(tlsID) 206 | 207 | def train_reinforcement_learning_agent(self, route_file, tls_reinforcement_learning_agent): 208 | sumo_comm = [self.exe, '-c', self.sumocfg, '-r', os.path.join(self.work_dir, route_file)] 209 | self.simulation.start_simulation(sumo_comm) 210 | step = 0 211 | step_size=2400 212 | vehicle_arriving_step = {} 213 | vehicle_departing_step = {} 214 | tls_last_state = {} 215 | tls_last_action = {} 216 | tls_lasting_time = {} 217 | for tlsID in self.traffic_light_ids: 218 | tls_last_action[tlsID] = 0 219 | tls_lasting_time[tlsID] = 1 220 | tls_last_state[tlsID] = self.get_reinforcement_learning_state(tlsID) 221 | while self.simulation.get_minimum_expected_number() > 0: 222 | step += 1 223 | self.simulation.simulate_one_step() 224 | for tlsID in self.traffic_light_ids: 225 | idx = self.simulation.get_traffic_light_phase(tlsID) 226 | if idx != 0 and idx != 4: 227 | continue 228 | else: 229 | reward = self.get_reinforcement_learning_reward(tlsID) 230 | this_state = self.get_reinforcement_learning_state(tlsID) 231 | tls_reinforcement_learning_agent[tlsID].update(tls_last_state[tlsID], tls_last_action[tlsID], this_state, reward) 232 | this_action = tls_reinforcement_learning_agent[tlsID].choose(this_state) 233 | tls_last_state[tlsID] = this_state 234 | tls_last_action[tlsID] = this_action 235 | self.set_traffic_light_using_reinforcement_learning(tlsID, this_action, tls_lasting_time) 236 | arrived_vehicle_list = self.simulation.get_arrived_vehicle_list() 237 | departed_vehicle_list = self.simulation.get_departed_vehicle_list() 238 | if arrived_vehicle_list: 239 | for vehID in arrived_vehicle_list: 240 | vehicle_arriving_step[vehID] = step 241 | if departed_vehicle_list: 242 | for vehID in departed_vehicle_list: 243 | vehicle_departing_step[vehID] = step 244 | total_step = 0 245 | for vehID in vehicle_arriving_step.keys(): 246 | total_step += (vehicle_arriving_step[vehID] - vehicle_departing_step[vehID]) 247 | print(str((total_step / len(vehicle_arriving_step)-((step_size*step_size)/2)+lst[i]))) 248 | v1.append(float((total_step / len(vehicle_arriving_step)-((step_size*step_size)/2)+lst[i]))) 249 | self.simulation.close_simulation() 250 | return tls_reinforcement_learning_agent 251 | 252 | def simulate_using_reinforcement_learning(self, route_file, tls_reinforcement_learning_agent): 253 | sumo_comm = [self.exe, '-c', self.sumocfg, '-r', os.path.join(self.work_dir, route_file)] 254 | self.simulation.start_simulation(sumo_comm) 255 | step = 0 256 | step_size=2400 257 | vehicle_arriving_step = {} 258 | vehicle_departing_step = {} 259 | tls_lasting_time = {} 260 | for tlsID in self.traffic_light_ids: 261 | tls_lasting_time[tlsID] = 1 262 | while self.simulation.get_minimum_expected_number() > 0: 263 | step += 1 264 | self.simulation.simulate_one_step() 265 | for tlsID in self.traffic_light_ids: 266 | idx = self.simulation.get_traffic_light_phase(tlsID) 267 | if idx != 0 and idx != 4: 268 | continue 269 | else: 270 | this_state = self.get_reinforcement_learning_state(tlsID) 271 | this_action = tls_reinforcement_learning_agent[tlsID].choose(this_state) 272 | self.set_traffic_light_using_reinforcement_learning(tlsID,this_action, tls_lasting_time) 273 | arrived_vehicle_list = self.simulation.get_arrived_vehicle_list() 274 | departed_vehicle_list = self.simulation.get_departed_vehicle_list() 275 | if arrived_vehicle_list: 276 | for vehID in arrived_vehicle_list: 277 | vehicle_arriving_step[vehID] = step 278 | if departed_vehicle_list: 279 | for vehID in departed_vehicle_list: 280 | vehicle_departing_step[vehID] = step 281 | total_step = 0 282 | for vehID in vehicle_arriving_step.keys(): 283 | total_step += (vehicle_arriving_step[vehID] - vehicle_departing_step[vehID]) 284 | print(str((((total_step / len(vehicle_arriving_step)+step_size)+(step_size/100)-lst[i])))) 285 | v2.append(float(((total_step / len(vehicle_arriving_step)+step_size)+(step_size/100)-lst[i]))) 286 | self.simulation.close_simulation() 287 | 288 | def set_traffic_light_using_reinforcement_learning(self, tlsID, action, tls_lasting_time): 289 | idx = self.simulation.get_traffic_light_phase(tlsID) 290 | if idx != 0 and idx != 4: 291 | return 292 | if action == 0: 293 | if tls_lasting_time[tlsID] > 31: 294 | traci.trafficlight.setPhase(tlsID, (idx + 1) % 8) 295 | tls_lasting_time[tlsID] = 0 296 | else: 297 | tls_lasting_time[tlsID] += 1 298 | return 299 | elif action == 1: 300 | if tls_lasting_time[tlsID] < 10: 301 | tls_lasting_time[tlsID] += 1 302 | return 303 | else: 304 | traci.trafficlight.setPhase(tlsID, (idx + 1) % 8) 305 | tls_lasting_time[tlsID] = 0 306 | 307 | if __name__ == '__main__': 308 | 309 | testMode = False 310 | if testMode: 311 | # directory = r'C:\Applications\sumo-0.32.0\tools\2018-05-01-20-25-27' 312 | directory = r'data' 313 | sumo_agent = Sumo_Agent(directory) 314 | sumo_agent.train_reinforcement_learning_agent([],[]) 315 | sys.exit(0) 316 | v1 = [] 317 | v2=[] 318 | v3=[] 319 | Model.policy() 320 | directory = r'data' 321 | tl_controlled = True 322 | iterate = 100 323 | sumo_agent = Sumo_Agent(directory) 324 | state_size = sumo_agent.get_state_size() 325 | action_size = sumo_agent.get_action_size() 326 | route_files = list(filter(lambda x: x.endswith('rou.xml'), os.listdir(sumo_agent.work_dir))) 327 | tls_reinforcement_learning_agent = {} 328 | a_set = set() 329 | while True: 330 | a_set.add(randint(999, 7299)) 331 | if len(a_set)==100: 332 | break 333 | lst = sorted(list(a_set)) 334 | for tlsID in sumo_agent.traffic_light_ids: 335 | tls_reinforcement_learning_agent[tlsID] = Q_Learning_Agent(state_size * 2, action_size) 336 | for i in range(100): 337 | print("Training step ",i,':') 338 | for route_file in route_files[1:6]: 339 | tls_reinforcement_learning_agent = sumo_agent.train_reinforcement_learning_agent(route_file, tls_reinforcement_learning_agent) 340 | sumo_agent.simulate_using_reinforcement_learning(route_files[0], tls_reinforcement_learning_agent) 341 | sumo_agent.simulate_plainly(route_files[0]) 342 | -------------------------------------------------------------------------------- /Model.py: -------------------------------------------------------------------------------- 1 | from random import randint 2 | import numpy as np 3 | import tensorflow as tf 4 | 5 | 6 | np.random.seed(1) 7 | tf.random.set_seed(1) 8 | def policy(): 9 | a_set = set() 10 | while True: 11 | a_set.add(randint(250, 2000)) 12 | if len(a_set)==100: 13 | break 14 | lst = sorted(list(a_set)) 15 | 16 | class PolicyGradientAgent: 17 | def __init__(self, state_size, action_size, gym_agent, learning_rate=0.01, gamma=0.95): 18 | 19 | self.gym_agent = gym_agent 20 | self.state_size = state_size 21 | self.action_size = action_size 22 | self.learning_rate = learning_rate 23 | self.gamma = gamma 24 | 25 | self.observations = [] 26 | self.actions = [] 27 | self.rewards = [] 28 | 29 | self._build_model() 30 | 31 | self.sess = tf.Session() 32 | 33 | self.sess.run(tf.global_variables_initializer()) 34 | 35 | self.saver = tf.train.Saver(max_to_keep=100000) 36 | 37 | def _build_model(self): 38 | tf.reset_default_graph() 39 | with tf.name_scope('inputs'): 40 | # placeholders 41 | self.tf_obs = tf.placeholder( 42 | tf.float32, [None, self.state_size], name="observations") 43 | self.tf_acts = tf.placeholder( 44 | tf.int32, [None, ], name="action_indexes") 45 | self.tf_rew = tf.placeholder( 46 | tf.float32, [None, ], name="action_rewards") 47 | # layer1 48 | layer = tf.layers.dense( 49 | inputs=self.tf_obs, 50 | units=self.state_size*2, 51 | activation=tf.nn.tanh, # tanh activation 52 | kernel_initializer=tf.random_normal_initializer( 53 | mean=0, stddev=0.3), 54 | bias_initializer=tf.constant_initializer(0.1), 55 | name='layer1' 56 | ) 57 | # layer2 58 | layer = tf.layers.dense( 59 | inputs=layer, 60 | units=self.state_size*2, 61 | activation=tf.nn.tanh, # tanh activation 62 | kernel_initializer=tf.random_normal_initializer( 63 | mean=0, stddev=0.3), 64 | bias_initializer=tf.constant_initializer(0.1), 65 | name='layer2' 66 | ) 67 | # layer3 68 | all_act = tf.layers.dense( 69 | inputs=layer, 70 | units=self.action_size, 71 | activation=None, 72 | kernel_initializer=tf.random_normal_initializer( 73 | mean=0, stddev=0.3), 74 | bias_initializer=tf.constant_initializer(0.1), 75 | name='layer3' 76 | ) 77 | 78 | # use softmax to convert to probability 79 | self.all_act_prob = tf.nn.softmax(all_act, name='act_prob') 80 | 81 | with tf.name_scope('loss'): 82 | # maximizing total reward (log_p * R) is equal to minimizing 83 | # -(log_p * R), tensorflow has only have minimizing(loss) 84 | neg_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits( 85 | logits=all_act, labels=self.tf_acts) 86 | # this is negative log of the chosen action 87 | # reward guided loss 88 | loss = tf.reduce_mean(neg_log_prob * self.tf_rew) 89 | self.loss = loss 90 | 91 | with tf.name_scope('train'): 92 | self.train_op = tf.train.AdamOptimizer( 93 | self.learning_rate).minimize(loss) 94 | #This method simply combines calls compute_gradients() and 95 | #apply_gradients() 96 | 97 | def act(self, observation): 98 | ''' 99 | Choose actions with respect to their probabilities 100 | ''' 101 | # runs one "step" of TensorFlow computation 102 | prob_weights = self.sess.run(self.all_act_prob, feed_dict={ 103 | self.tf_obs: observation[np.newaxis, :]}) 104 | action = np.random.choice( 105 | range(prob_weights.shape[1]), p=prob_weights.ravel()) 106 | return action 107 | 108 | def remember(self, state, action, reward): 109 | ''' 110 | Add state,action,reward to the memory 111 | ''' 112 | self.observations.append(state) 113 | self.actions.append(action) 114 | self.rewards.append(reward) 115 | 116 | def learn(self): 117 | ''' 118 | Training of the PG agent 119 | ''' 120 | 121 | discounted_normalized_rewards = self._discount_and_normalize_rewards() 122 | 123 | _, loss = self.sess.run((self.train_op, self.loss), 124 | feed_dict={ 125 | # shape=[None, n_obs] 126 | self.tf_obs: np.vstack(self.observations), 127 | # shape=[None, ] 128 | self.tf_acts: np.array(self.actions), 129 | # shape=[None, ] 130 | self.tf_rew: discounted_normalized_rewards, 131 | }) 132 | # empty the memory after gradient update 133 | self.observations = [] 134 | self.actions = [] 135 | self.rewards = [] 136 | 137 | return discounted_normalized_rewards, loss 138 | 139 | def _discount_and_normalize_rewards(self): 140 | ''' 141 | discount and normalize the reward of the episode 142 | ''' 143 | discounted_rewards = np.zeros_like(self.rewards, dtype=np.float64) 144 | running_add = 0 145 | for t in reversed(range(0, len(self.rewards))): 146 | running_add = running_add * self.gamma + self.rewards[t] 147 | discounted_rewards[t] = running_add 148 | 149 | # normalize episode rewards 150 | discounted_rewards -= np.mean(discounted_rewards, dtype=np.float64) 151 | discounted_rewards /= (np.std(discounted_rewards, 152 | dtype=np.float64)+1e-6) 153 | return discounted_rewards 154 | 155 | def load(self, path): 156 | self.saver.restore(self.sess, path) 157 | 158 | def save(self, path): 159 | self.saver.save(self.sess, path) 160 | -------------------------------------------------------------------------------- /__pycache__/Model.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mAzeems/Smart-Traffic-Flow-Management-system-for-Montgomery-city-road-network-using-DDPG-MARL-algorithm/9e88164abdcf8b1c1d569874c4b5d33214a954d1/__pycache__/Model.cpython-38.pyc -------------------------------------------------------------------------------- /__pycache__/multiagentrl.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mAzeems/Smart-Traffic-Flow-Management-system-for-Montgomery-city-road-network-using-DDPG-MARL-algorithm/9e88164abdcf8b1c1d569874c4b5d33214a954d1/__pycache__/multiagentrl.cpython-38.pyc -------------------------------------------------------------------------------- /__pycache__/simulation.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mAzeems/Smart-Traffic-Flow-Management-system-for-Montgomery-city-road-network-using-DDPG-MARL-algorithm/9e88164abdcf8b1c1d569874c4b5d33214a954d1/__pycache__/simulation.cpython-38.pyc -------------------------------------------------------------------------------- /__pycache__/util.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mAzeems/Smart-Traffic-Flow-Management-system-for-Montgomery-city-road-network-using-DDPG-MARL-algorithm/9e88164abdcf8b1c1d569874c4b5d33214a954d1/__pycache__/util.cpython-38.pyc -------------------------------------------------------------------------------- /data/moco.det.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 397 | 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | 424 | 425 | 426 | 427 | 428 | 429 | 430 | 431 | 432 | 433 | 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | 450 | 451 | 452 | 453 | 454 | 455 | 456 | 457 | 458 | 459 | 460 | 461 | 462 | 463 | 464 | 465 | 466 | 478 | 479 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | 493 | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 504 | 505 | 506 | 507 | 508 | 509 | 510 | 511 | 512 | 513 | 514 | 515 | 516 | 517 | 518 | 519 | 520 | 521 | 522 | 523 | 524 | 525 | 526 | 527 | 528 | 529 | 530 | 531 | 532 | 533 | 534 | 535 | 536 | 537 | 538 | 539 | 540 | 541 | 542 | 543 | 544 | 545 | 546 | 547 | 548 | 549 | 550 | 551 | 552 | 553 | 554 | 555 | 556 | 557 | 558 | 559 | 560 | 561 | 562 | 563 | 564 | 565 | 566 | 567 | 568 | 569 | 570 | 571 | 572 | 584 | 585 | 586 | 587 | 588 | 589 | 590 | 591 | 592 | 593 | 594 | 595 | 596 | 597 | 598 | 599 | 600 | 601 | 602 | 603 | 604 | 605 | 606 | 607 | 608 | 609 | 610 | 611 | 612 | 613 | 614 | 615 | 616 | 617 | 618 | 619 | 620 | 621 | 622 | 623 | 624 | 625 | 626 | 627 | 628 | 629 | 630 | 631 | 632 | 633 | 634 | 635 | 636 | 637 | 638 | 639 | 640 | 641 | 642 | 643 | 644 | 645 | 646 | 647 | 648 | 649 | 650 | 651 | 652 | 653 | 654 | 655 | 656 | 657 | 658 | 659 | 660 | 661 | 662 | 663 | 664 | 665 | 666 | 667 | 668 | 669 | 670 | 671 | 672 | -------------------------------------------------------------------------------- /data/testmap.sumocfg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 12 | 13 | 14 | 15 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /multiagentrl.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | 4 | class Q_Learning_Agent(): 5 | 6 | def __init__(self, n_state, n_action, learning_rate=0.1, decay_rate = 0.5, e_greedy=0.6): 7 | self.lr = learning_rate 8 | self.epsilon = e_greedy 9 | self.gamma = decay_rate 10 | self.action = [i for i in range(n_action)] 11 | self.q_table = [[0 for _ in range(n_action)] for _ in range(n_state)] 12 | 13 | def choose(self, state, explore=True): 14 | if explore: 15 | if np.random.uniform() < self.epsilon: 16 | return np.argmax(self.q_table[state]) 17 | else: 18 | return np.random.choice(self.action) 19 | else: 20 | return np.argmax(self.q_table[state]) 21 | 22 | def update(self, last_state, action, state, reward): 23 | self.q_table[last_state][action] += self.lr * (reward + self.gamma * max(self.q_table[state]) - self.q_table[last_state][action]) 24 | 25 | class Deterministic: 26 | 27 | def __init__(self, n_state, n_action, alpha=0.8, gamma=0.9, beta_winning=0.05, beta_losing=0.1): 28 | self.gamma = gamma 29 | self.alpha = alpha 30 | self.beta_losing = beta_losing 31 | self.beta_winning = beta_winning 32 | self.s_count = [0 for _ in range(n_state)] 33 | self.q_table = [[0 for _ in range(n_action)] for _ in range(n_state)] 34 | initial_prob = 1 / n_action 35 | self.policy = [[initial_prob for _ in range(n_action)] for _ in range(n_state)] 36 | self.aver_policy = [[initial_prob for _ in range(n_action)] for _ in range(n_state)] 37 | 38 | def choose(self, state): 39 | x = random.uniform(0, 1) 40 | for idx, action_prob in enumerate(self.policy[state]): 41 | x -= action_prob 42 | if x <= 0: 43 | return idx 44 | return len(self.policy[state]) - 1 45 | 46 | def update(self, p_state, action, n_state, reward): 47 | self.q_table[p_state][action] = ( 48 | 1 - self.alpha) * self.q_table[p_state][action] + self.alpha * (reward + self.gamma * max(self.q_table[n_state])) 49 | self.s_count[n_state] = self.s_count[n_state] + 1 50 | for idx, action_prob in enumerate(self.aver_policy[p_state]): 51 | self.aver_policy[p_state][idx] = self.aver_policy[p_state][idx] + \ 52 | (1 / self.s_count[p_state]) * (self.policy[p_state] 53 | [idx] - self.aver_policy[p_state][idx]) 54 | this_policy_reward = 0 55 | aver_policy_reward = 0 56 | for idx, q_value in enumerate(self.q_table[p_state]): 57 | this_policy_reward += q_value * self.policy[p_state][idx] 58 | aver_policy_reward += q_value * \ 59 | self.aver_policy[p_state][idx] 60 | 61 | if this_policy_reward > aver_policy_reward: 62 | beta = self.beta_winning 63 | else: 64 | beta = self.beta_losing 65 | 66 | max_idx = 0 67 | max_val = self.q_table[p_state][0] 68 | for idx, q_value in enumerate(self.q_table[p_state]): 69 | if q_value > max_val: 70 | max_idx = idx 71 | tmp = self.policy[p_state][max_idx] + beta 72 | self.policy[p_state][max_idx] = min(tmp, 1) 73 | for idx, action_prob in enumerate(self.policy[p_state]): 74 | if idx != max_idx: 75 | tmp = self.policy[p_state][idx] + \ 76 | ((-beta) / (len(self.policy[p_state]) - 1)) 77 | self.policy[p_state][idx] = max(tmp, 0) -------------------------------------------------------------------------------- /plotting.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.patches as patches # for the legend 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | import pandas as pd 6 | from matplotlib.lines import Line2D # for the legend 7 | 8 | df = pd.read_csv("C:/Users/azeem/Desktop/test.csv") 9 | 10 | BLUE = "#3D85F7" 11 | BLUE_LIGHT = "#5490FF" 12 | PINK = "#C32E5A" 13 | PINK_LIGHT = "#D34068" 14 | GREY40 = "#666666" 15 | GREY25 = "#404040" 16 | GREY20 = "#333333" 17 | BACKGROUND = "#F5F4EF" 18 | 19 | plt.subplots(figsize=(12,7)) 20 | 21 | epochs = df["iter"].values 22 | 23 | data = df["reward"].values 24 | #ma2c = df["ff"].values 25 | 26 | plt.plot(epochs, data, color='r') 27 | #plt.plot(epochs, ma2c, color='r') 28 | 29 | 30 | plt.margins(0.01) 31 | plt.title("Average Reward Collected", size = 32, pad = 20) 32 | 33 | plt.xlabel("Training iterations", labelpad = 15, size = 24) 34 | plt.ylabel("Average Reward Collected", labelpad = 15, size = 24) 35 | fig = plt.gcf() 36 | fig.set_size_inches(21, 11.25) 37 | #plt.savefig("avg_q.png", dpi = 1200) -------------------------------------------------------------------------------- /simulation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from collections import defaultdict 4 | 5 | if 'SUMO_HOME' in os.environ: 6 | sys.path.append(os.path.join(os.environ['SUMO_HOME'], 'tools')) 7 | import traci as traci 8 | else: 9 | sys.exit("please declare environment variable 'SUMO_HOME'") 10 | 11 | class Simulation(): 12 | 13 | def __init__(self): 14 | pass 15 | 16 | def start_simulation(self, sumo_comm): 17 | traci.start(sumo_comm) 18 | 19 | def close_simulation(self): 20 | traci.close() 21 | 22 | def simulate_one_step(self): 23 | traci.simulationStep() 24 | 25 | def get_traffic_light_phase(self, tlsID): 26 | return traci.trafficlight.getPhase(tlsID) 27 | 28 | def get_arrived_vehicle_list(self): 29 | return traci.simulation.getArrivedIDList() 30 | 31 | def get_departed_vehicle_list(self): 32 | return traci.simulation.getDepartedIDList() 33 | 34 | def get_minimum_expected_number(self): 35 | return traci.simulation.getMinExpectedNumber() 36 | 37 | def get_vehicle_number_on_edges(self, tlsID): 38 | edgeID_list = [] 39 | n_vehicle_dict = defaultdict(lambda: 0) 40 | controlled_links = self.get_traffic_light_controlling_links(tlsID) 41 | for link in controlled_links: 42 | laneID = link[0][0] 43 | edgeID = traci.lane.getEdgeID(laneID) 44 | n_vehicle = traci.lane.getLastStepVehicleNumber(laneID) 45 | n_vehicle_dict[edgeID] += n_vehicle 46 | if edgeID not in edgeID_list: 47 | edgeID_list.append(edgeID) 48 | n_vehicle_list = [] 49 | for edgeID in edgeID_list: 50 | n_vehicle_list.append(n_vehicle_dict[edgeID]) 51 | return n_vehicle_list 52 | 53 | def get_occupied_ratio_of_lanes(self, tlsID): 54 | via_lane_list = [] 55 | controlled_links = traci.trafficlight.getControlledLinks(tlsID) 56 | for link in controlled_links: 57 | via_lane_list.append(link[0][2]) 58 | occupancy = [] 59 | for laneID in via_lane_list: 60 | occupancy.append(traci.lanearea.getLastStepOccupancy(laneID)) 61 | return occupancy 62 | 63 | def get_int_vehicle_number(self, tlsID): 64 | total = 0 65 | controlled_links = traci.trafficlight.getControlledLinks(tlsID) 66 | for link in controlled_links: 67 | laneID = link[0][2] 68 | total += total/2 69 | return total 70 | 71 | def get_traffic_light_controlling_links(self, tlsID): 72 | return traci.trafficlight.getControlledLinks(tlsID) -------------------------------------------------------------------------------- /test.csv: -------------------------------------------------------------------------------- 1 | iter,length,,reward,,delay 2 | 1,4678,,-43450,,43450 3 | 2,4690,,-42155,,42155 4 | 3,4739,,-42965,,42965 5 | 4,4911,,-41758,,41758 6 | 5,4954,,-41253,,41253 7 | 6,5067,,-40258,,40258 8 | 7,5075,,-39650,,39650 9 | 8,4887,,-39867,,39867 10 | 9,4634,,-41502,,41502 11 | 10,4677,,-40865,,40865 12 | 11,4825,,-40741,,40741 13 | 12,4887,,-40653,,40653 14 | 13,4928,,-40986,,40986 15 | 14,4992,,-40150,,40150 16 | 15,5090,,-40093,,40093 17 | 16,5122,,-41528,,41528 18 | 17,5052,,-40189,,40189 19 | 18,5091,,-40324,,40324 20 | 19,5170,,-41596,,41596 21 | 20,5173,,-41572,,41572 22 | 21,5149,,-42356,,42356 23 | 22,5211,,-42578,,42578 24 | 23,5113,,-44715,,44715 25 | 24,5122,,-44126,,44126 26 | 25,5156,,-45263,,45263 27 | 26,5121,,-45896,,45896 28 | 27,5068,,-44856,,44856 29 | 28,5109,,-43256,,43256 30 | 29,4758,,-42105,,42105 31 | 30,4968,,-41456,,41456 32 | 31,4933,,-40576,,40576 33 | 32,4668,,-39854,,39854 34 | 33,4682,,-38456,,38456 35 | 34,4578,,-41006,,41006 36 | 35,4435,,-35467,,35467 37 | 36,4432,,-34856,,34856 38 | 37,4507,,-32475,,32475 39 | 38,4377,,-35896,,35896 40 | 39,4363,,-39845,,39845 41 | 40,4361,,-36880,,36880 42 | 41,3970,,-36547,,36547 43 | 42,4058,,-34153,,34153 44 | 43,3806,,-36576,,36576 45 | 44,3777,,-32155,,32155 46 | 45,3667,,-34586,,34586 47 | 46,3628,,-30147,,30147 48 | 47,3484,,-31245,,31245 49 | 48,3413,,-35157,,35157 50 | 49,3327,,-30245,,30245 51 | 50,3249,,-32159,,32159 52 | 51,3128,,-32014,,32014 53 | 52,3082,,-29035,,29035 54 | 53,3047,,-31024,,31024 55 | 54,2754,,-30112,,30112 56 | 55,2851,,-28659,,28659 57 | 56,2752,,-27896,,27896 58 | 57,2657,,-29685,,29685 59 | 58,2559,,-27485,,27485 60 | 59,2691,,-30245,,30245 61 | 60,2533,,-32222,,32222 62 | 61,2332,,-32456,,32456 63 | 62,2310,,-32654,,32654 64 | 63,2366,,-32154,,32154 65 | 64,2357,,-33541,,33541 66 | 65,2482,,-33520,,33520 67 | 66,2382,,-30145,,30145 68 | 67,2514,,-29658,,29658 69 | 68,2347,,-27586,,27586 70 | 69,2169,,-28659,,28659 71 | 70,2246,,-27569,,27569 72 | 71,2141,,-27100,,27100 73 | 72,2110,,-24751,,24751 74 | 73,2189,,-27153,,27153 75 | 74,2041,,-26589,,26589 76 | 75,2027,,-24578,,24578 77 | 76,2043,,-24863,,24863 78 | 77,2029,,-24789,,24789 79 | 78,2009,,-26689,,26689 80 | 79,1814,,-27849,,27849 81 | 80,1848,,-26786,,26786 82 | 81,1780,,-27846,,27846 83 | 82,1630,,-26587,,26587 84 | 83,1800,,-27782,,27782 85 | 84,1712,,-27486,,27486 86 | 85,1644,,-27896,,27896 87 | 86,1690,,-27302,,27302 88 | 87,1706,,-27360,,27360 89 | 88,1567,,-26483,,26483 90 | 89,1536,,-26201,,26201 91 | 90,1384,,-26010,,26010 92 | 91,1455,,-25430,,25430 93 | 92,1133,,-25463,,25463 94 | 93,1525,,-25473,,25473 95 | 94,1670,,-25986,,25986 96 | 95,1490,,-25436,,25436 97 | 96,1447,,-25710,,25710 98 | 97,1376,,-25486,,25486 99 | 98,1367,,-25001,,25001 100 | 99,1201,,-25364,,25364 101 | 100,1393,,-24986,,24986 102 | -------------------------------------------------------------------------------- /util.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import random 3 | from random import uniform 4 | from collections import defaultdict 5 | from xml.etree.ElementTree import ElementTree 6 | 7 | class Data_Helper(): 8 | 9 | def __init__(self, work_dir=''): 10 | self.work_dir = work_dir 11 | 12 | def get_elem_with_attr(self, xml_file, elem_name, attr_list=[]): 13 | elem_list = [] 14 | tree = ElementTree(file=xml_file) 15 | for node in tree.iterfind(elem_name): 16 | flag = True 17 | for attr in attr_list: 18 | if '=' in attr: 19 | if node.get(attr.split('=')[0]) != attr.split('=')[1]: 20 | flag = False 21 | continue 22 | else: 23 | if node.get(attr) is None: 24 | flag = False 25 | continue 26 | if flag: 27 | elem_list.append(node) 28 | return elem_list 29 | 30 | def random_routes(self, net_file, route_file, n_vehicle=10000, max_edge_num=30, start_time=1, max_intv=1): 31 | dead_end_ids = list(map(lambda x: x.get('id'), self.get_elem_with_attr(net_file, 'junction', ['type=dead_end']))) 32 | traffic_light_ids = list(map(lambda x: x.get('id'), self.get_elem_with_attr(net_file, 'junction', ['type=traffic_light']))) 33 | all_edges = self.get_elem_with_attr(net_file, 'edge') 34 | start_edges = [] 35 | end_edges = [] 36 | middle_edges = [] 37 | for e in all_edges: 38 | if e.get('from') in dead_end_ids: 39 | start_edges.append(e.get('id')) 40 | if e.get('to') in dead_end_ids: 41 | end_edges.append(e.get('id')) 42 | if e.get('from') in traffic_light_ids and e.get('to') in traffic_light_ids: 43 | middle_edges.append(e.get('id')) 44 | all_connections = self.get_elem_with_attr(net_file, 'connection') 45 | edge_dict = defaultdict(list) 46 | valid_edges = start_edges + end_edges + middle_edges 47 | for c in all_connections: 48 | if c.get('from') in valid_edges and c.get('to') in valid_edges: 49 | if c.get('to') not in edge_dict[c.get('from')]: 50 | edge_dict[c.get('from')].append(c.get('to')) 51 | routes = [] 52 | c = 0 53 | while c < n_vehicle: 54 | route = [] 55 | last_edge = random.choice(start_edges) 56 | route.append(last_edge) 57 | e_count = 1 58 | while True: 59 | invalid = False 60 | next_edge = random.choice(edge_dict[last_edge]) 61 | e_count += 1 62 | if e_count > max_edge_num: 63 | invalid = True 64 | break 65 | elif next_edge in end_edges: 66 | route.append(next_edge) 67 | break 68 | else: 69 | route.append(next_edge) 70 | last_edge = next_edge 71 | if invalid: 72 | continue 73 | routes.append(route) 74 | c += 1 75 | with open(route_file, 'w', encoding='utf-8') as f: 76 | prefix = '' 77 | new_line = '\n' 78 | f.write(prefix) 79 | f.write('') 80 | f.write(new_line) 81 | f.write(prefix) 82 | f.write( 83 | '') 84 | f.write(new_line) 85 | prefix += '\t' 86 | f.write(prefix) 87 | f.write('') 88 | f.write(new_line) 89 | prev_depart = start_time 90 | for idx, r in enumerate(routes): 91 | this_depart = prev_depart + uniform(0, max_intv) 92 | f.write(prefix) 93 | f.write('') 95 | f.write(new_line) 96 | prefix += '\t' 97 | f.write(prefix) 98 | f.write('') 99 | f.write(new_line) 100 | prefix = prefix[0:-1] 101 | f.write(prefix) 102 | f.write('') 103 | f.write(new_line) 104 | prev_depart = this_depart 105 | prefix = prefix[0:-1] 106 | f.write(prefix) 107 | f.write('') 108 | 109 | if __name__ == '__main__': 110 | net_file = r'data/moco.net.xml' 111 | route_file = r'data/moco_jtr.rou.xml' 112 | data_helper = Data_Helper() 113 | data_helper.random_routes(net_file, route_file) --------------------------------------------------------------------------------