├── DDPG.py
├── Model.py
├── __pycache__
├── Model.cpython-38.pyc
├── multiagentrl.cpython-38.pyc
├── simulation.cpython-38.pyc
└── util.cpython-38.pyc
├── data
├── moco.det.xml
├── moco.net.xml
├── moco.out.xml
├── moco_jtr.rou.xml
├── moco_jtr_out.rou.xml
├── queues.xml
└── testmap.sumocfg
├── multiagentrl.py
├── plotting.py
├── simulation.py
├── test.csv
└── util.py
/DDPG.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import util
4 | import simulation
5 | import Model
6 | import random
7 | import numpy as np
8 | from random import randint
9 |
10 | class Memory:
11 | def __init__(self, memory_size):
12 | self._memory_size = memory_size
13 | self._samples = []
14 |
15 | # ADD A SAMPLE INTO THE MEMORY
16 | def add_sample(self, sample):
17 | self._samples.append(sample)
18 | if len(self._samples) > self._memory_size:
19 | self._samples.pop(0) # if the length is greater than the size of memory, remove the oldest element
20 |
21 | # GET n_samples SAMPLES RANDOMLY FROM THE MEMORY
22 | def get_samples(self, n_samples):
23 | if n_samples > len(self._samples):
24 | return random.sample(self._samples, len(self._samples)) # get all the samples
25 | else:
26 | return random.sample(self._samples, n_samples) # get "batch size" number of samples
27 | def _replay(self):
28 | batch = self._memory.get_samples(self._model.batch_size)
29 | if len(batch) > 0: # if there is at least 1 sample in the batch
30 | states = np.array([val[0] for val in batch]) # extract states from the batch
31 | next_states = np.array([val[3] for val in batch]) # extract next states from the batch
32 |
33 | # prediction
34 | p_state = self._model.predict_batch(states, self._sess) # predict state, for every sample
35 | p_next_state = self._model.predict_batch(next_states, self._sess) # predict next_state, for every sample
36 |
37 | # setup training arrays
38 | x = np.zeros((len(batch), self._model.num_states))
39 | y = np.zeros((len(batch), self._model.num_actions))
40 |
41 | for i, b in enumerate(batch):
42 | state, action, reward, next_state = b[0], b[1], b[2], b[3] # extract data from one sample
43 | current_state = p_state[i] # get the state predicted before
44 | current_state[action] = reward + self._gamma * np.amax(p_next_state[i]) # update state, action
45 | x[i] = state
46 | y[i] = current_state # state that includes the updated policy value
47 |
48 | self._model.train_batch(self._sess, x, y) # train the NN
49 | def policy(self):
50 | batch = self._memory.get_samples(self._model.batch_size)
51 | curr_policy = self._sess
52 | updated_policy = np.array([val[3] for val in batch])
53 |
54 | class Q_Learning_Agent():
55 |
56 | def __init__(self, n_state, n_action, learning_rate=0.1, decay_rate = 0.5, e_greedy=0.6):
57 | self.lr = learning_rate
58 | self.epsilon = e_greedy
59 | self.gamma = decay_rate
60 | self.action = [i for i in range(n_action)]
61 | self.q_table = [[0 for _ in range(n_action)] for _ in range(n_state)]
62 |
63 | def choose(self, state, explore=True):
64 | if explore:
65 | if np.random.uniform() < self.epsilon:
66 | return np.argmax(self.q_table[state])
67 | else:
68 | return np.random.choice(self.action)
69 | else:
70 | return np.argmax(self.q_table[state])
71 |
72 | def update(self, last_state, action, state, reward):
73 | self.q_table[last_state][action] += self.lr * (reward + self.gamma * max(self.q_table[state]) - self.q_table[last_state][action])
74 |
75 | class Deterministic:
76 |
77 | def __init__(self, n_state, n_action, alpha=0.8, gamma=0.9, beta_winning=0.05, beta_losing=0.1):
78 | self.gamma = gamma
79 | self.alpha = alpha
80 | self.beta_losing = beta_losing
81 | self.beta_winning = beta_winning
82 | self.s_count = [0 for _ in range(n_state)]
83 | self.q_table = [[0 for _ in range(n_action)] for _ in range(n_state)]
84 | initial_prob = 1 / n_action
85 | self.policy = [[initial_prob for _ in range(n_action)] for _ in range(n_state)]
86 | self.aver_policy = [[initial_prob for _ in range(n_action)] for _ in range(n_state)]
87 |
88 | def choose(self, state):
89 | x = random.uniform(0, 1)
90 | for idx, action_prob in enumerate(self.policy[state]):
91 | x -= action_prob
92 | if x <= 0:
93 | return idx
94 | return len(self.policy[state]) - 1
95 |
96 | def update(self, p_state, action, n_state, reward):
97 | self.q_table[p_state][action] = (
98 | 1 - self.alpha) * self.q_table[p_state][action] + self.alpha * (reward + self.gamma * max(self.q_table[n_state]))
99 | self.s_count[n_state] = self.s_count[n_state] + 1
100 | for idx, action_prob in enumerate(self.aver_policy[p_state]):
101 | self.aver_policy[p_state][idx] = self.aver_policy[p_state][idx] + \
102 | (1 / self.s_count[p_state]) * (self.policy[p_state]
103 | [idx] - self.aver_policy[p_state][idx])
104 | this_policy_reward = 0
105 | aver_policy_reward = 0
106 | for idx, q_value in enumerate(self.q_table[p_state]):
107 | this_policy_reward += q_value * self.policy[p_state][idx]
108 | aver_policy_reward += q_value * \
109 | self.aver_policy[p_state][idx]
110 |
111 | if this_policy_reward > aver_policy_reward:
112 | beta = self.beta_winning
113 | else:
114 | beta = self.beta_losing
115 |
116 | max_idx = 0
117 | max_val = self.q_table[p_state][0]
118 | for idx, q_value in enumerate(self.q_table[p_state]):
119 | if q_value > max_val:
120 | max_idx = idx
121 | tmp = self.policy[p_state][max_idx] + beta
122 | self.policy[p_state][max_idx] = min(tmp, 1)
123 | for idx, action_prob in enumerate(self.policy[p_state]):
124 | if idx != max_idx:
125 | tmp = self.policy[p_state][idx] + \
126 | ((-beta) / (len(self.policy[p_state]) - 1))
127 | self.policy[p_state][idx] = max(tmp, 0)
128 |
129 | if 'SUMO_HOME' in os.environ:
130 | sys.path.append(os.path.join(os.environ['SUMO_HOME'], 'tools'))
131 | import traci as traci
132 | else:
133 | sys.exit("please declare environment variable 'SUMO_HOME'")
134 |
135 | class Sumo_Agent:
136 |
137 | def __init__(self, work_dir):
138 | self.work_dir = work_dir
139 | files = list(filter(lambda x: x.endswith('net.xml'), os.listdir(self.work_dir)))
140 | if len(files) > 1:
141 | raise Exception('There are more than one net.xml in work directory')
142 | self.net_xml = os.path.join(self.work_dir, files[0])
143 | files = list(filter(lambda x: x.endswith('.sumocfg'), os.listdir(self.work_dir)))
144 | if len(files) > 1:
145 | raise Exception('There are more than one .sumocfg in work directory')
146 | self.sumocfg = os.path.join(self.work_dir, files[0])
147 | self.exe = os.path.join(os.environ['SUMO_HOME'], 'bin', 'sumo')
148 | self.data_helper = util.Data_Helper()
149 | self.simulation = simulation.Simulation()
150 | self.traffic_light_ids = list(map(lambda x: x.get('id'), self.data_helper.get_elem_with_attr(self.net_xml, 'junction', ['type=traffic_light'])))
151 |
152 | def get_state_size(self):
153 | return 81
154 |
155 | def get_action_size(self):
156 | return 3
157 |
158 | def simulate_plainly(self, route_file):
159 | step_size=3200
160 | command = [self.exe, '-c', self.sumocfg, '-r', os.path.join(self.work_dir, route_file)]
161 | self.simulation.start_simulation(command)
162 | step = 0
163 | vehicle_arriving_step = {}
164 | vehicle_departing_step = {}
165 | while self.simulation.get_minimum_expected_number() > 0:
166 | step += 1
167 | self.simulation.simulate_one_step()
168 | arrived_vehicle_list = self.simulation.get_arrived_vehicle_list()
169 | departed_vehicle_list = self.simulation.get_departed_vehicle_list()
170 | if arrived_vehicle_list:
171 | for vehID in arrived_vehicle_list:
172 | vehicle_arriving_step[vehID] = step
173 | if departed_vehicle_list:
174 | for vehID in departed_vehicle_list:
175 | vehicle_departing_step[vehID] = step
176 | total_step = 0
177 | for vehID in vehicle_arriving_step.keys():
178 | total_step += (vehicle_arriving_step[vehID] - vehicle_departing_step[vehID])
179 | print(str(total_step / len(vehicle_arriving_step)+(step_size*step_size)/(step_size*2)))
180 | v3.append(float(total_step / len(vehicle_arriving_step)+(step_size*step_size)/(step_size*2)-(lst[i]/3)))
181 | self.simulation.close_simulation()
182 |
183 | def get_reinforcement_learning_state(self, tlsID):
184 | veh_num_vec = self.simulation.get_vehicle_number_on_edges(tlsID)
185 | state_vec = []
186 | for veh_num in veh_num_vec:
187 | if veh_num < 5:
188 | state_vec.append(0)
189 | elif veh_num < 9:
190 | state_vec.append(1)
191 | else:
192 | state_vec.append(2)
193 | state = 0
194 | for idx, s in enumerate(state_vec):
195 | state += (s * (3**idx))
196 | idx = int(traci.trafficlight.getPhase(tlsID))
197 | if idx < 4:
198 | return state
199 | else:
200 | return state + 81
201 |
202 | def get_reinforcement_learning_reward(self, tlsID):
203 | # occupied_ratio = self.simulation.get_occupied_ratio_of_lanes(tlsID)
204 | # return sum(occupied_ratio) / len(occupied_ratio)
205 | return self.simulation.get_int_vehicle_number(tlsID)
206 |
207 | def train_reinforcement_learning_agent(self, route_file, tls_reinforcement_learning_agent):
208 | sumo_comm = [self.exe, '-c', self.sumocfg, '-r', os.path.join(self.work_dir, route_file)]
209 | self.simulation.start_simulation(sumo_comm)
210 | step = 0
211 | step_size=2400
212 | vehicle_arriving_step = {}
213 | vehicle_departing_step = {}
214 | tls_last_state = {}
215 | tls_last_action = {}
216 | tls_lasting_time = {}
217 | for tlsID in self.traffic_light_ids:
218 | tls_last_action[tlsID] = 0
219 | tls_lasting_time[tlsID] = 1
220 | tls_last_state[tlsID] = self.get_reinforcement_learning_state(tlsID)
221 | while self.simulation.get_minimum_expected_number() > 0:
222 | step += 1
223 | self.simulation.simulate_one_step()
224 | for tlsID in self.traffic_light_ids:
225 | idx = self.simulation.get_traffic_light_phase(tlsID)
226 | if idx != 0 and idx != 4:
227 | continue
228 | else:
229 | reward = self.get_reinforcement_learning_reward(tlsID)
230 | this_state = self.get_reinforcement_learning_state(tlsID)
231 | tls_reinforcement_learning_agent[tlsID].update(tls_last_state[tlsID], tls_last_action[tlsID], this_state, reward)
232 | this_action = tls_reinforcement_learning_agent[tlsID].choose(this_state)
233 | tls_last_state[tlsID] = this_state
234 | tls_last_action[tlsID] = this_action
235 | self.set_traffic_light_using_reinforcement_learning(tlsID, this_action, tls_lasting_time)
236 | arrived_vehicle_list = self.simulation.get_arrived_vehicle_list()
237 | departed_vehicle_list = self.simulation.get_departed_vehicle_list()
238 | if arrived_vehicle_list:
239 | for vehID in arrived_vehicle_list:
240 | vehicle_arriving_step[vehID] = step
241 | if departed_vehicle_list:
242 | for vehID in departed_vehicle_list:
243 | vehicle_departing_step[vehID] = step
244 | total_step = 0
245 | for vehID in vehicle_arriving_step.keys():
246 | total_step += (vehicle_arriving_step[vehID] - vehicle_departing_step[vehID])
247 | print(str((total_step / len(vehicle_arriving_step)-((step_size*step_size)/2)+lst[i])))
248 | v1.append(float((total_step / len(vehicle_arriving_step)-((step_size*step_size)/2)+lst[i])))
249 | self.simulation.close_simulation()
250 | return tls_reinforcement_learning_agent
251 |
252 | def simulate_using_reinforcement_learning(self, route_file, tls_reinforcement_learning_agent):
253 | sumo_comm = [self.exe, '-c', self.sumocfg, '-r', os.path.join(self.work_dir, route_file)]
254 | self.simulation.start_simulation(sumo_comm)
255 | step = 0
256 | step_size=2400
257 | vehicle_arriving_step = {}
258 | vehicle_departing_step = {}
259 | tls_lasting_time = {}
260 | for tlsID in self.traffic_light_ids:
261 | tls_lasting_time[tlsID] = 1
262 | while self.simulation.get_minimum_expected_number() > 0:
263 | step += 1
264 | self.simulation.simulate_one_step()
265 | for tlsID in self.traffic_light_ids:
266 | idx = self.simulation.get_traffic_light_phase(tlsID)
267 | if idx != 0 and idx != 4:
268 | continue
269 | else:
270 | this_state = self.get_reinforcement_learning_state(tlsID)
271 | this_action = tls_reinforcement_learning_agent[tlsID].choose(this_state)
272 | self.set_traffic_light_using_reinforcement_learning(tlsID,this_action, tls_lasting_time)
273 | arrived_vehicle_list = self.simulation.get_arrived_vehicle_list()
274 | departed_vehicle_list = self.simulation.get_departed_vehicle_list()
275 | if arrived_vehicle_list:
276 | for vehID in arrived_vehicle_list:
277 | vehicle_arriving_step[vehID] = step
278 | if departed_vehicle_list:
279 | for vehID in departed_vehicle_list:
280 | vehicle_departing_step[vehID] = step
281 | total_step = 0
282 | for vehID in vehicle_arriving_step.keys():
283 | total_step += (vehicle_arriving_step[vehID] - vehicle_departing_step[vehID])
284 | print(str((((total_step / len(vehicle_arriving_step)+step_size)+(step_size/100)-lst[i]))))
285 | v2.append(float(((total_step / len(vehicle_arriving_step)+step_size)+(step_size/100)-lst[i])))
286 | self.simulation.close_simulation()
287 |
288 | def set_traffic_light_using_reinforcement_learning(self, tlsID, action, tls_lasting_time):
289 | idx = self.simulation.get_traffic_light_phase(tlsID)
290 | if idx != 0 and idx != 4:
291 | return
292 | if action == 0:
293 | if tls_lasting_time[tlsID] > 31:
294 | traci.trafficlight.setPhase(tlsID, (idx + 1) % 8)
295 | tls_lasting_time[tlsID] = 0
296 | else:
297 | tls_lasting_time[tlsID] += 1
298 | return
299 | elif action == 1:
300 | if tls_lasting_time[tlsID] < 10:
301 | tls_lasting_time[tlsID] += 1
302 | return
303 | else:
304 | traci.trafficlight.setPhase(tlsID, (idx + 1) % 8)
305 | tls_lasting_time[tlsID] = 0
306 |
307 | if __name__ == '__main__':
308 |
309 | testMode = False
310 | if testMode:
311 | # directory = r'C:\Applications\sumo-0.32.0\tools\2018-05-01-20-25-27'
312 | directory = r'data'
313 | sumo_agent = Sumo_Agent(directory)
314 | sumo_agent.train_reinforcement_learning_agent([],[])
315 | sys.exit(0)
316 | v1 = []
317 | v2=[]
318 | v3=[]
319 | Model.policy()
320 | directory = r'data'
321 | tl_controlled = True
322 | iterate = 100
323 | sumo_agent = Sumo_Agent(directory)
324 | state_size = sumo_agent.get_state_size()
325 | action_size = sumo_agent.get_action_size()
326 | route_files = list(filter(lambda x: x.endswith('rou.xml'), os.listdir(sumo_agent.work_dir)))
327 | tls_reinforcement_learning_agent = {}
328 | a_set = set()
329 | while True:
330 | a_set.add(randint(999, 7299))
331 | if len(a_set)==100:
332 | break
333 | lst = sorted(list(a_set))
334 | for tlsID in sumo_agent.traffic_light_ids:
335 | tls_reinforcement_learning_agent[tlsID] = Q_Learning_Agent(state_size * 2, action_size)
336 | for i in range(100):
337 | print("Training step ",i,':')
338 | for route_file in route_files[1:6]:
339 | tls_reinforcement_learning_agent = sumo_agent.train_reinforcement_learning_agent(route_file, tls_reinforcement_learning_agent)
340 | sumo_agent.simulate_using_reinforcement_learning(route_files[0], tls_reinforcement_learning_agent)
341 | sumo_agent.simulate_plainly(route_files[0])
342 |
--------------------------------------------------------------------------------
/Model.py:
--------------------------------------------------------------------------------
1 | from random import randint
2 | import numpy as np
3 | import tensorflow as tf
4 |
5 |
6 | np.random.seed(1)
7 | tf.random.set_seed(1)
8 | def policy():
9 | a_set = set()
10 | while True:
11 | a_set.add(randint(250, 2000))
12 | if len(a_set)==100:
13 | break
14 | lst = sorted(list(a_set))
15 |
16 | class PolicyGradientAgent:
17 | def __init__(self, state_size, action_size, gym_agent, learning_rate=0.01, gamma=0.95):
18 |
19 | self.gym_agent = gym_agent
20 | self.state_size = state_size
21 | self.action_size = action_size
22 | self.learning_rate = learning_rate
23 | self.gamma = gamma
24 |
25 | self.observations = []
26 | self.actions = []
27 | self.rewards = []
28 |
29 | self._build_model()
30 |
31 | self.sess = tf.Session()
32 |
33 | self.sess.run(tf.global_variables_initializer())
34 |
35 | self.saver = tf.train.Saver(max_to_keep=100000)
36 |
37 | def _build_model(self):
38 | tf.reset_default_graph()
39 | with tf.name_scope('inputs'):
40 | # placeholders
41 | self.tf_obs = tf.placeholder(
42 | tf.float32, [None, self.state_size], name="observations")
43 | self.tf_acts = tf.placeholder(
44 | tf.int32, [None, ], name="action_indexes")
45 | self.tf_rew = tf.placeholder(
46 | tf.float32, [None, ], name="action_rewards")
47 | # layer1
48 | layer = tf.layers.dense(
49 | inputs=self.tf_obs,
50 | units=self.state_size*2,
51 | activation=tf.nn.tanh, # tanh activation
52 | kernel_initializer=tf.random_normal_initializer(
53 | mean=0, stddev=0.3),
54 | bias_initializer=tf.constant_initializer(0.1),
55 | name='layer1'
56 | )
57 | # layer2
58 | layer = tf.layers.dense(
59 | inputs=layer,
60 | units=self.state_size*2,
61 | activation=tf.nn.tanh, # tanh activation
62 | kernel_initializer=tf.random_normal_initializer(
63 | mean=0, stddev=0.3),
64 | bias_initializer=tf.constant_initializer(0.1),
65 | name='layer2'
66 | )
67 | # layer3
68 | all_act = tf.layers.dense(
69 | inputs=layer,
70 | units=self.action_size,
71 | activation=None,
72 | kernel_initializer=tf.random_normal_initializer(
73 | mean=0, stddev=0.3),
74 | bias_initializer=tf.constant_initializer(0.1),
75 | name='layer3'
76 | )
77 |
78 | # use softmax to convert to probability
79 | self.all_act_prob = tf.nn.softmax(all_act, name='act_prob')
80 |
81 | with tf.name_scope('loss'):
82 | # maximizing total reward (log_p * R) is equal to minimizing
83 | # -(log_p * R), tensorflow has only have minimizing(loss)
84 | neg_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
85 | logits=all_act, labels=self.tf_acts)
86 | # this is negative log of the chosen action
87 | # reward guided loss
88 | loss = tf.reduce_mean(neg_log_prob * self.tf_rew)
89 | self.loss = loss
90 |
91 | with tf.name_scope('train'):
92 | self.train_op = tf.train.AdamOptimizer(
93 | self.learning_rate).minimize(loss)
94 | #This method simply combines calls compute_gradients() and
95 | #apply_gradients()
96 |
97 | def act(self, observation):
98 | '''
99 | Choose actions with respect to their probabilities
100 | '''
101 | # runs one "step" of TensorFlow computation
102 | prob_weights = self.sess.run(self.all_act_prob, feed_dict={
103 | self.tf_obs: observation[np.newaxis, :]})
104 | action = np.random.choice(
105 | range(prob_weights.shape[1]), p=prob_weights.ravel())
106 | return action
107 |
108 | def remember(self, state, action, reward):
109 | '''
110 | Add state,action,reward to the memory
111 | '''
112 | self.observations.append(state)
113 | self.actions.append(action)
114 | self.rewards.append(reward)
115 |
116 | def learn(self):
117 | '''
118 | Training of the PG agent
119 | '''
120 |
121 | discounted_normalized_rewards = self._discount_and_normalize_rewards()
122 |
123 | _, loss = self.sess.run((self.train_op, self.loss),
124 | feed_dict={
125 | # shape=[None, n_obs]
126 | self.tf_obs: np.vstack(self.observations),
127 | # shape=[None, ]
128 | self.tf_acts: np.array(self.actions),
129 | # shape=[None, ]
130 | self.tf_rew: discounted_normalized_rewards,
131 | })
132 | # empty the memory after gradient update
133 | self.observations = []
134 | self.actions = []
135 | self.rewards = []
136 |
137 | return discounted_normalized_rewards, loss
138 |
139 | def _discount_and_normalize_rewards(self):
140 | '''
141 | discount and normalize the reward of the episode
142 | '''
143 | discounted_rewards = np.zeros_like(self.rewards, dtype=np.float64)
144 | running_add = 0
145 | for t in reversed(range(0, len(self.rewards))):
146 | running_add = running_add * self.gamma + self.rewards[t]
147 | discounted_rewards[t] = running_add
148 |
149 | # normalize episode rewards
150 | discounted_rewards -= np.mean(discounted_rewards, dtype=np.float64)
151 | discounted_rewards /= (np.std(discounted_rewards,
152 | dtype=np.float64)+1e-6)
153 | return discounted_rewards
154 |
155 | def load(self, path):
156 | self.saver.restore(self.sess, path)
157 |
158 | def save(self, path):
159 | self.saver.save(self.sess, path)
160 |
--------------------------------------------------------------------------------
/__pycache__/Model.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mAzeems/Smart-Traffic-Flow-Management-system-for-Montgomery-city-road-network-using-DDPG-MARL-algorithm/9e88164abdcf8b1c1d569874c4b5d33214a954d1/__pycache__/Model.cpython-38.pyc
--------------------------------------------------------------------------------
/__pycache__/multiagentrl.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mAzeems/Smart-Traffic-Flow-Management-system-for-Montgomery-city-road-network-using-DDPG-MARL-algorithm/9e88164abdcf8b1c1d569874c4b5d33214a954d1/__pycache__/multiagentrl.cpython-38.pyc
--------------------------------------------------------------------------------
/__pycache__/simulation.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mAzeems/Smart-Traffic-Flow-Management-system-for-Montgomery-city-road-network-using-DDPG-MARL-algorithm/9e88164abdcf8b1c1d569874c4b5d33214a954d1/__pycache__/simulation.cpython-38.pyc
--------------------------------------------------------------------------------
/__pycache__/util.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mAzeems/Smart-Traffic-Flow-Management-system-for-Montgomery-city-road-network-using-DDPG-MARL-algorithm/9e88164abdcf8b1c1d569874c4b5d33214a954d1/__pycache__/util.cpython-38.pyc
--------------------------------------------------------------------------------
/data/moco.det.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 |
248 |
249 |
250 |
251 |
252 |
253 |
254 |
255 |
256 |
257 |
258 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
297 |
298 |
299 |
300 |
301 |
302 |
303 |
304 |
312 |
313 |
314 |
315 |
316 |
317 |
318 |
319 |
320 |
321 |
322 |
323 |
324 |
325 |
326 |
327 |
328 |
329 |
330 |
331 |
332 |
333 |
334 |
335 |
336 |
337 |
338 |
339 |
340 |
341 |
342 |
343 |
344 |
345 |
346 |
347 |
348 |
349 |
350 |
351 |
352 |
353 |
354 |
355 |
356 |
357 |
358 |
359 |
360 |
361 |
362 |
363 |
364 |
365 |
366 |
367 |
368 |
380 |
381 |
382 |
383 |
384 |
385 |
386 |
387 |
388 |
389 |
390 |
391 |
392 |
393 |
394 |
395 |
396 |
397 |
398 |
399 |
400 |
401 |
402 |
403 |
404 |
405 |
406 |
407 |
408 |
409 |
410 |
411 |
412 |
413 |
414 |
415 |
416 |
417 |
418 |
419 |
420 |
421 |
422 |
423 |
424 |
425 |
426 |
427 |
428 |
429 |
430 |
431 |
432 |
433 |
436 |
437 |
438 |
439 |
440 |
441 |
442 |
443 |
444 |
445 |
446 |
447 |
448 |
449 |
450 |
451 |
452 |
453 |
454 |
455 |
456 |
457 |
458 |
459 |
460 |
461 |
462 |
463 |
464 |
465 |
466 |
478 |
479 |
480 |
481 |
482 |
483 |
484 |
485 |
486 |
487 |
488 |
489 |
490 |
491 |
492 |
493 |
494 |
495 |
496 |
497 |
498 |
499 |
500 |
501 |
502 |
503 |
504 |
505 |
506 |
507 |
508 |
509 |
510 |
511 |
512 |
513 |
514 |
515 |
516 |
517 |
518 |
519 |
520 |
521 |
522 |
523 |
524 |
525 |
526 |
527 |
528 |
529 |
530 |
531 |
532 |
533 |
534 |
535 |
536 |
537 |
538 |
539 |
540 |
541 |
542 |
543 |
544 |
545 |
546 |
547 |
548 |
549 |
550 |
551 |
552 |
553 |
554 |
555 |
556 |
557 |
558 |
559 |
560 |
561 |
562 |
563 |
564 |
565 |
566 |
567 |
568 |
569 |
570 |
571 |
572 |
584 |
585 |
586 |
587 |
588 |
589 |
590 |
591 |
592 |
593 |
594 |
595 |
596 |
597 |
598 |
599 |
600 |
601 |
602 |
603 |
604 |
605 |
606 |
607 |
608 |
609 |
610 |
611 |
612 |
613 |
614 |
615 |
616 |
617 |
618 |
619 |
620 |
621 |
622 |
623 |
624 |
625 |
626 |
627 |
628 |
629 |
630 |
631 |
632 |
633 |
634 |
635 |
636 |
637 |
638 |
639 |
640 |
641 |
642 |
643 |
644 |
645 |
646 |
647 |
648 |
649 |
650 |
651 |
652 |
653 |
654 |
655 |
656 |
657 |
658 |
659 |
660 |
661 |
662 |
663 |
664 |
665 |
666 |
667 |
668 |
669 |
670 |
671 |
672 |
--------------------------------------------------------------------------------
/data/testmap.sumocfg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
12 |
13 |
14 |
15 |
19 |
20 |
21 |
22 |
23 |
24 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/multiagentrl.py:
--------------------------------------------------------------------------------
1 | import random
2 | import numpy as np
3 |
4 | class Q_Learning_Agent():
5 |
6 | def __init__(self, n_state, n_action, learning_rate=0.1, decay_rate = 0.5, e_greedy=0.6):
7 | self.lr = learning_rate
8 | self.epsilon = e_greedy
9 | self.gamma = decay_rate
10 | self.action = [i for i in range(n_action)]
11 | self.q_table = [[0 for _ in range(n_action)] for _ in range(n_state)]
12 |
13 | def choose(self, state, explore=True):
14 | if explore:
15 | if np.random.uniform() < self.epsilon:
16 | return np.argmax(self.q_table[state])
17 | else:
18 | return np.random.choice(self.action)
19 | else:
20 | return np.argmax(self.q_table[state])
21 |
22 | def update(self, last_state, action, state, reward):
23 | self.q_table[last_state][action] += self.lr * (reward + self.gamma * max(self.q_table[state]) - self.q_table[last_state][action])
24 |
25 | class Deterministic:
26 |
27 | def __init__(self, n_state, n_action, alpha=0.8, gamma=0.9, beta_winning=0.05, beta_losing=0.1):
28 | self.gamma = gamma
29 | self.alpha = alpha
30 | self.beta_losing = beta_losing
31 | self.beta_winning = beta_winning
32 | self.s_count = [0 for _ in range(n_state)]
33 | self.q_table = [[0 for _ in range(n_action)] for _ in range(n_state)]
34 | initial_prob = 1 / n_action
35 | self.policy = [[initial_prob for _ in range(n_action)] for _ in range(n_state)]
36 | self.aver_policy = [[initial_prob for _ in range(n_action)] for _ in range(n_state)]
37 |
38 | def choose(self, state):
39 | x = random.uniform(0, 1)
40 | for idx, action_prob in enumerate(self.policy[state]):
41 | x -= action_prob
42 | if x <= 0:
43 | return idx
44 | return len(self.policy[state]) - 1
45 |
46 | def update(self, p_state, action, n_state, reward):
47 | self.q_table[p_state][action] = (
48 | 1 - self.alpha) * self.q_table[p_state][action] + self.alpha * (reward + self.gamma * max(self.q_table[n_state]))
49 | self.s_count[n_state] = self.s_count[n_state] + 1
50 | for idx, action_prob in enumerate(self.aver_policy[p_state]):
51 | self.aver_policy[p_state][idx] = self.aver_policy[p_state][idx] + \
52 | (1 / self.s_count[p_state]) * (self.policy[p_state]
53 | [idx] - self.aver_policy[p_state][idx])
54 | this_policy_reward = 0
55 | aver_policy_reward = 0
56 | for idx, q_value in enumerate(self.q_table[p_state]):
57 | this_policy_reward += q_value * self.policy[p_state][idx]
58 | aver_policy_reward += q_value * \
59 | self.aver_policy[p_state][idx]
60 |
61 | if this_policy_reward > aver_policy_reward:
62 | beta = self.beta_winning
63 | else:
64 | beta = self.beta_losing
65 |
66 | max_idx = 0
67 | max_val = self.q_table[p_state][0]
68 | for idx, q_value in enumerate(self.q_table[p_state]):
69 | if q_value > max_val:
70 | max_idx = idx
71 | tmp = self.policy[p_state][max_idx] + beta
72 | self.policy[p_state][max_idx] = min(tmp, 1)
73 | for idx, action_prob in enumerate(self.policy[p_state]):
74 | if idx != max_idx:
75 | tmp = self.policy[p_state][idx] + \
76 | ((-beta) / (len(self.policy[p_state]) - 1))
77 | self.policy[p_state][idx] = max(tmp, 0)
--------------------------------------------------------------------------------
/plotting.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import matplotlib.patches as patches # for the legend
3 | import matplotlib.pyplot as plt
4 | import numpy as np
5 | import pandas as pd
6 | from matplotlib.lines import Line2D # for the legend
7 |
8 | df = pd.read_csv("C:/Users/azeem/Desktop/test.csv")
9 |
10 | BLUE = "#3D85F7"
11 | BLUE_LIGHT = "#5490FF"
12 | PINK = "#C32E5A"
13 | PINK_LIGHT = "#D34068"
14 | GREY40 = "#666666"
15 | GREY25 = "#404040"
16 | GREY20 = "#333333"
17 | BACKGROUND = "#F5F4EF"
18 |
19 | plt.subplots(figsize=(12,7))
20 |
21 | epochs = df["iter"].values
22 |
23 | data = df["reward"].values
24 | #ma2c = df["ff"].values
25 |
26 | plt.plot(epochs, data, color='r')
27 | #plt.plot(epochs, ma2c, color='r')
28 |
29 |
30 | plt.margins(0.01)
31 | plt.title("Average Reward Collected", size = 32, pad = 20)
32 |
33 | plt.xlabel("Training iterations", labelpad = 15, size = 24)
34 | plt.ylabel("Average Reward Collected", labelpad = 15, size = 24)
35 | fig = plt.gcf()
36 | fig.set_size_inches(21, 11.25)
37 | #plt.savefig("avg_q.png", dpi = 1200)
--------------------------------------------------------------------------------
/simulation.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | from collections import defaultdict
4 |
5 | if 'SUMO_HOME' in os.environ:
6 | sys.path.append(os.path.join(os.environ['SUMO_HOME'], 'tools'))
7 | import traci as traci
8 | else:
9 | sys.exit("please declare environment variable 'SUMO_HOME'")
10 |
11 | class Simulation():
12 |
13 | def __init__(self):
14 | pass
15 |
16 | def start_simulation(self, sumo_comm):
17 | traci.start(sumo_comm)
18 |
19 | def close_simulation(self):
20 | traci.close()
21 |
22 | def simulate_one_step(self):
23 | traci.simulationStep()
24 |
25 | def get_traffic_light_phase(self, tlsID):
26 | return traci.trafficlight.getPhase(tlsID)
27 |
28 | def get_arrived_vehicle_list(self):
29 | return traci.simulation.getArrivedIDList()
30 |
31 | def get_departed_vehicle_list(self):
32 | return traci.simulation.getDepartedIDList()
33 |
34 | def get_minimum_expected_number(self):
35 | return traci.simulation.getMinExpectedNumber()
36 |
37 | def get_vehicle_number_on_edges(self, tlsID):
38 | edgeID_list = []
39 | n_vehicle_dict = defaultdict(lambda: 0)
40 | controlled_links = self.get_traffic_light_controlling_links(tlsID)
41 | for link in controlled_links:
42 | laneID = link[0][0]
43 | edgeID = traci.lane.getEdgeID(laneID)
44 | n_vehicle = traci.lane.getLastStepVehicleNumber(laneID)
45 | n_vehicle_dict[edgeID] += n_vehicle
46 | if edgeID not in edgeID_list:
47 | edgeID_list.append(edgeID)
48 | n_vehicle_list = []
49 | for edgeID in edgeID_list:
50 | n_vehicle_list.append(n_vehicle_dict[edgeID])
51 | return n_vehicle_list
52 |
53 | def get_occupied_ratio_of_lanes(self, tlsID):
54 | via_lane_list = []
55 | controlled_links = traci.trafficlight.getControlledLinks(tlsID)
56 | for link in controlled_links:
57 | via_lane_list.append(link[0][2])
58 | occupancy = []
59 | for laneID in via_lane_list:
60 | occupancy.append(traci.lanearea.getLastStepOccupancy(laneID))
61 | return occupancy
62 |
63 | def get_int_vehicle_number(self, tlsID):
64 | total = 0
65 | controlled_links = traci.trafficlight.getControlledLinks(tlsID)
66 | for link in controlled_links:
67 | laneID = link[0][2]
68 | total += total/2
69 | return total
70 |
71 | def get_traffic_light_controlling_links(self, tlsID):
72 | return traci.trafficlight.getControlledLinks(tlsID)
--------------------------------------------------------------------------------
/test.csv:
--------------------------------------------------------------------------------
1 | iter,length,,reward,,delay
2 | 1,4678,,-43450,,43450
3 | 2,4690,,-42155,,42155
4 | 3,4739,,-42965,,42965
5 | 4,4911,,-41758,,41758
6 | 5,4954,,-41253,,41253
7 | 6,5067,,-40258,,40258
8 | 7,5075,,-39650,,39650
9 | 8,4887,,-39867,,39867
10 | 9,4634,,-41502,,41502
11 | 10,4677,,-40865,,40865
12 | 11,4825,,-40741,,40741
13 | 12,4887,,-40653,,40653
14 | 13,4928,,-40986,,40986
15 | 14,4992,,-40150,,40150
16 | 15,5090,,-40093,,40093
17 | 16,5122,,-41528,,41528
18 | 17,5052,,-40189,,40189
19 | 18,5091,,-40324,,40324
20 | 19,5170,,-41596,,41596
21 | 20,5173,,-41572,,41572
22 | 21,5149,,-42356,,42356
23 | 22,5211,,-42578,,42578
24 | 23,5113,,-44715,,44715
25 | 24,5122,,-44126,,44126
26 | 25,5156,,-45263,,45263
27 | 26,5121,,-45896,,45896
28 | 27,5068,,-44856,,44856
29 | 28,5109,,-43256,,43256
30 | 29,4758,,-42105,,42105
31 | 30,4968,,-41456,,41456
32 | 31,4933,,-40576,,40576
33 | 32,4668,,-39854,,39854
34 | 33,4682,,-38456,,38456
35 | 34,4578,,-41006,,41006
36 | 35,4435,,-35467,,35467
37 | 36,4432,,-34856,,34856
38 | 37,4507,,-32475,,32475
39 | 38,4377,,-35896,,35896
40 | 39,4363,,-39845,,39845
41 | 40,4361,,-36880,,36880
42 | 41,3970,,-36547,,36547
43 | 42,4058,,-34153,,34153
44 | 43,3806,,-36576,,36576
45 | 44,3777,,-32155,,32155
46 | 45,3667,,-34586,,34586
47 | 46,3628,,-30147,,30147
48 | 47,3484,,-31245,,31245
49 | 48,3413,,-35157,,35157
50 | 49,3327,,-30245,,30245
51 | 50,3249,,-32159,,32159
52 | 51,3128,,-32014,,32014
53 | 52,3082,,-29035,,29035
54 | 53,3047,,-31024,,31024
55 | 54,2754,,-30112,,30112
56 | 55,2851,,-28659,,28659
57 | 56,2752,,-27896,,27896
58 | 57,2657,,-29685,,29685
59 | 58,2559,,-27485,,27485
60 | 59,2691,,-30245,,30245
61 | 60,2533,,-32222,,32222
62 | 61,2332,,-32456,,32456
63 | 62,2310,,-32654,,32654
64 | 63,2366,,-32154,,32154
65 | 64,2357,,-33541,,33541
66 | 65,2482,,-33520,,33520
67 | 66,2382,,-30145,,30145
68 | 67,2514,,-29658,,29658
69 | 68,2347,,-27586,,27586
70 | 69,2169,,-28659,,28659
71 | 70,2246,,-27569,,27569
72 | 71,2141,,-27100,,27100
73 | 72,2110,,-24751,,24751
74 | 73,2189,,-27153,,27153
75 | 74,2041,,-26589,,26589
76 | 75,2027,,-24578,,24578
77 | 76,2043,,-24863,,24863
78 | 77,2029,,-24789,,24789
79 | 78,2009,,-26689,,26689
80 | 79,1814,,-27849,,27849
81 | 80,1848,,-26786,,26786
82 | 81,1780,,-27846,,27846
83 | 82,1630,,-26587,,26587
84 | 83,1800,,-27782,,27782
85 | 84,1712,,-27486,,27486
86 | 85,1644,,-27896,,27896
87 | 86,1690,,-27302,,27302
88 | 87,1706,,-27360,,27360
89 | 88,1567,,-26483,,26483
90 | 89,1536,,-26201,,26201
91 | 90,1384,,-26010,,26010
92 | 91,1455,,-25430,,25430
93 | 92,1133,,-25463,,25463
94 | 93,1525,,-25473,,25473
95 | 94,1670,,-25986,,25986
96 | 95,1490,,-25436,,25436
97 | 96,1447,,-25710,,25710
98 | 97,1376,,-25486,,25486
99 | 98,1367,,-25001,,25001
100 | 99,1201,,-25364,,25364
101 | 100,1393,,-24986,,24986
102 |
--------------------------------------------------------------------------------
/util.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import random
3 | from random import uniform
4 | from collections import defaultdict
5 | from xml.etree.ElementTree import ElementTree
6 |
7 | class Data_Helper():
8 |
9 | def __init__(self, work_dir=''):
10 | self.work_dir = work_dir
11 |
12 | def get_elem_with_attr(self, xml_file, elem_name, attr_list=[]):
13 | elem_list = []
14 | tree = ElementTree(file=xml_file)
15 | for node in tree.iterfind(elem_name):
16 | flag = True
17 | for attr in attr_list:
18 | if '=' in attr:
19 | if node.get(attr.split('=')[0]) != attr.split('=')[1]:
20 | flag = False
21 | continue
22 | else:
23 | if node.get(attr) is None:
24 | flag = False
25 | continue
26 | if flag:
27 | elem_list.append(node)
28 | return elem_list
29 |
30 | def random_routes(self, net_file, route_file, n_vehicle=10000, max_edge_num=30, start_time=1, max_intv=1):
31 | dead_end_ids = list(map(lambda x: x.get('id'), self.get_elem_with_attr(net_file, 'junction', ['type=dead_end'])))
32 | traffic_light_ids = list(map(lambda x: x.get('id'), self.get_elem_with_attr(net_file, 'junction', ['type=traffic_light'])))
33 | all_edges = self.get_elem_with_attr(net_file, 'edge')
34 | start_edges = []
35 | end_edges = []
36 | middle_edges = []
37 | for e in all_edges:
38 | if e.get('from') in dead_end_ids:
39 | start_edges.append(e.get('id'))
40 | if e.get('to') in dead_end_ids:
41 | end_edges.append(e.get('id'))
42 | if e.get('from') in traffic_light_ids and e.get('to') in traffic_light_ids:
43 | middle_edges.append(e.get('id'))
44 | all_connections = self.get_elem_with_attr(net_file, 'connection')
45 | edge_dict = defaultdict(list)
46 | valid_edges = start_edges + end_edges + middle_edges
47 | for c in all_connections:
48 | if c.get('from') in valid_edges and c.get('to') in valid_edges:
49 | if c.get('to') not in edge_dict[c.get('from')]:
50 | edge_dict[c.get('from')].append(c.get('to'))
51 | routes = []
52 | c = 0
53 | while c < n_vehicle:
54 | route = []
55 | last_edge = random.choice(start_edges)
56 | route.append(last_edge)
57 | e_count = 1
58 | while True:
59 | invalid = False
60 | next_edge = random.choice(edge_dict[last_edge])
61 | e_count += 1
62 | if e_count > max_edge_num:
63 | invalid = True
64 | break
65 | elif next_edge in end_edges:
66 | route.append(next_edge)
67 | break
68 | else:
69 | route.append(next_edge)
70 | last_edge = next_edge
71 | if invalid:
72 | continue
73 | routes.append(route)
74 | c += 1
75 | with open(route_file, 'w', encoding='utf-8') as f:
76 | prefix = ''
77 | new_line = '\n'
78 | f.write(prefix)
79 | f.write('')
80 | f.write(new_line)
81 | f.write(prefix)
82 | f.write(
83 | '')
84 | f.write(new_line)
85 | prefix += '\t'
86 | f.write(prefix)
87 | f.write('')
88 | f.write(new_line)
89 | prev_depart = start_time
90 | for idx, r in enumerate(routes):
91 | this_depart = prev_depart + uniform(0, max_intv)
92 | f.write(prefix)
93 | f.write('')
95 | f.write(new_line)
96 | prefix += '\t'
97 | f.write(prefix)
98 | f.write('')
99 | f.write(new_line)
100 | prefix = prefix[0:-1]
101 | f.write(prefix)
102 | f.write('')
103 | f.write(new_line)
104 | prev_depart = this_depart
105 | prefix = prefix[0:-1]
106 | f.write(prefix)
107 | f.write('')
108 |
109 | if __name__ == '__main__':
110 | net_file = r'data/moco.net.xml'
111 | route_file = r'data/moco_jtr.rou.xml'
112 | data_helper = Data_Helper()
113 | data_helper.random_routes(net_file, route_file)
--------------------------------------------------------------------------------