├── .gitignore
├── .gitmodules
├── Aimsun
├── AAPI.py
├── BusInPOZ.py
├── RunSeveralReplications.py
├── corridor.py
├── intersection.py
├── prePOZ.py
├── script.py
└── util.py
├── DQN
└── ddqn_framework.py
├── Env
├── __init__.py
├── aimsun_env.py
└── env.py
├── README.md
├── agent.ipynb
├── config.py
├── demo
├── dynamic_senario.png
├── navBar.png
├── prePOZ.png
└── tsp_flow.png
└── train.sh
/.gitignore:
--------------------------------------------------------------------------------
1 | ### git ignore files ###
2 |
3 | # mac folder attribute file
4 | **/.DS_Store
5 | # python compiles
6 | *.pyc
7 | # cache folder
8 | **/__pycache__
9 | # keep log files not files
10 | log_files/*
11 | !log_files/.keep
12 | # ipynb
13 | **/.ipynb_checkpoints
14 | # IPython
15 | profile_default/
16 | ipython_config.py
17 | .idea/
18 | Aimsun/detector_original.py
19 | agent.py
20 | Aimsun/.~intersection.py
21 | Aimsun/detector.py
22 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "DQN-model"]
2 | path = DQN-model
3 | url = https://github.com/JerryIshihara/DQN-model.git
4 |
--------------------------------------------------------------------------------
/Aimsun/BusInPOZ.py:
--------------------------------------------------------------------------------
1 | class BusInPOZ:
2 |
3 | def __init__(self, intersection, check_in_bus_info, check_in_phase, check_in_phasetime, check_in_time, last_check_in):
4 | self.intersection_of_interest = intersection
5 | self.bus_id = check_in_bus_info.idVeh
6 | self.check_in_time = check_in_time
7 | self.check_in_phase = check_in_phase
8 | self.check_in_phasetime = check_in_phasetime
9 | self.last_check_in = last_check_in # previous bus check in time
10 |
11 | self.check_in_headway = check_in_time - last_check_in
12 |
13 | self.check_out_time = -1
14 | self.check_out_headway = -1
15 |
16 | self.last_update_time = check_in_time
17 | self.original_action = None
18 | self.original_state = None # state generated at check in
19 |
20 | def check_out(self, check_out_time, last_check_out=0):
21 | self.check_out_time = check_out_time
22 | self.check_out_headway = check_out_time - last_check_out
23 | self.last_update_time = check_out_time
24 |
25 | def set_action(self, action):
26 | if self.original_action is None:
27 | self.original_action = action
28 | else:
29 | print("duplicate set original action, check to make sure implementation is correct")
30 |
31 | def set_state(self, state):
32 | if self.original_state is None:
33 | self.original_state = state
34 | else:
35 | print("duplicate set original state, check to make sure implementation is correct")
36 |
--------------------------------------------------------------------------------
/Aimsun/RunSeveralReplications.py:
--------------------------------------------------------------------------------
1 | '''
2 | Script to run several replications via Aconsole, one replication gets to run after the previous one finishes.
3 |
4 | Usage of the script:
5 | In command prompt go to where your Python 2.7 is located and type: python **PathToThisScript** -aconsolePath **PATHTO_aconsole.exe** -modelPath **PATHTOMODEL** -targets **Target1** **Target2** **...TargetN***
6 | Where Target1, Target2 ... TargetN are replications or macroexperiments...
7 |
8 | You can also run several replications from several models by doing
9 |
10 | python **PathToThisScript** -aconsolePath **PATHTO_aconsole.exe** -modelPath **PATHTOMODEL1** -targets **Target1** **Target2** **...TargetN*** -modelPath **PATHTOMODEL2** -targets **Target1** **Target2** ...
11 |
12 | '''
13 |
14 |
15 |
16 | import sys
17 | import os.path
18 | import locale
19 | from datetime import datetime
20 | import subprocess # This library allows you to open a command prompt with aconsole.exe
21 |
22 | def RunSimulation(replicationID,modelPath): # This calls a subprocess like C:>ProgramFiles>Aimsuns>Aimsun Next 8.2_R5233>aconsole.exe -v -log -project **PROJECT** -cmd execute -target 1060
23 | #So each of the subprocesses generated by this function is an aconsole execution
24 | print "modelPath: " + modelPath
25 | print "replication id: " + str(replicationID)
26 | args = [execmd, '-v', '-log', '-project', modelPath, '-cmd', 'execute', '-target', replicationID]
27 | for x in range(0, 1):
28 | print(x)
29 | popen = subprocess.Popen(args)
30 | popen.wait() # This makes the script wait until the subprocess (aconsole) has finished. This way the memory consumption wont skyrocket. (There will be only one replication running at a time. )
31 |
32 | argv=sys.argv # The arguments this script will take are the ones provided via command prompt
33 |
34 | if argv[1] == '-aconsolePath':
35 |
36 | execmd = argv[2]
37 | print "\n Aconsole: " + execmd + "\n"
38 |
39 | if argv[3] == '-modelPath':
40 | modelPath = argv[4]
41 | print "------------\n"
42 | print "Model: " + modelPath + "\n"
43 |
44 | else:
45 | print "no -modelPath parameter"
46 | raw_input("Press enter to exit ;)")
47 | sys.exit()
48 | else:
49 | print "No -aconsolePath parameter"
50 | raw_input("Press enter to exit ;)")
51 | sys.exit()
52 |
53 | if argv[5] == '-targets':
54 | print "targets: \n "
55 | for i in range(len(argv[6:])):
56 | j = i +6
57 | if argv[j].isdigit():
58 | print argv[j] + "\n "
59 | else:
60 | if argv[j] =='-modelPath':
61 | print "------------\n"
62 | print "Model: " + argv[j+1] + "\n"
63 |
64 | if argv[j] == '-targets':
65 | print "targets: \n"
66 | print '===== NOW ===== \n'
67 | print datetime.now()
68 | else:
69 | print "no -targets parameter"
70 | raw_input("Press enter to exit ;)")
71 | sys.exit()
72 |
73 |
74 | # answer = raw_input("Continue? [y/n] \n")
75 | answer = 'y'
76 | if answer == 'y':
77 | for j in range(len(argv[6:])):
78 | i = j+6
79 | if argv[i].isdigit():
80 | print "Running simulation: " + argv[i] + " in model: " + modelPath
81 | RunSimulation(argv[i],modelPath)
82 | elif argv[i] == '-modelPath':
83 | modelPath = argv[i+1]
84 |
85 | else:
86 | print "execution canceled "
87 | raw_input("Press enter to exit ;)")
88 | sys.exit()
89 | print "Done"
90 | # raw_input("Press enter to exit ;)")
91 |
92 |
--------------------------------------------------------------------------------
/Aimsun/corridor.py:
--------------------------------------------------------------------------------
1 | """Aimsun Corridor
2 | """
3 | from uuid import uuid4
4 | import os, sys, inspect
5 | # import numpy as np
6 | current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
7 | parent_dir = os.path.dirname(current_dir)
8 | sys.path.insert(0, parent_dir)
9 | from config import *
10 | from intersection import *
11 | from prePOZ import *
12 |
13 | class Corridor:
14 |
15 | """Summary
16 |
17 | Attributes
18 | ----------
19 | action_flag : int
20 | Description
21 | intx_1 : TYPE
22 | Description
23 | intx_2 : TYPE
24 | Description
25 | joint_state : TYPE
26 | Description
27 | """
28 |
29 | def __init__(self, intersections):
30 | """Initialize Corridor object
31 |
32 | Parameters
33 | ----------
34 | intersections : list
35 | a list of intersection configurations
36 | """
37 | # first prePOZ + POZ
38 | self.intx_1 = Intersection(intersections[0])
39 | self.prePOZ_1 = PrePOZ(intersections[0]['prePOZ'])
40 |
41 | # second prePOZ + POZ
42 | self.intx_2 = Intersection(intersections[1])
43 | self.prePOZ_2 = PrePOZ(intersections[1]['prePOZ'])
44 |
45 | self.joint_state = ([], uuid4().int) # ([joint state], flag)
46 | self.action_flag = 0
47 | self.counter = 0
48 |
49 | def write_last_reward(self):
50 | r_1 = self.intx_1.get_reward()
51 | r_2 = self.intx_2.get_reward()
52 | self.counter = 0
53 | # cumulative reward between time step t and t + 1
54 | total_reward = r_1 + r_2
55 | self._write_state_reward(total_reward, last_reward=True)
56 | return
57 |
58 | def _write_state_reward(self, reward, last_reward=False):
59 | """Send joint state and reward to DQN
60 | """
61 | uuid = uuid4().int
62 | if self.counter == 0 or last_reward:
63 | uuid = 0
64 | self.counter += 1
65 | # first reward is 0
66 | is_reward_written = False
67 | while not is_reward_written:
68 | try:
69 | f = open(REWARD, "w+")
70 | f.write("{} {}".format(reward, uuid))
71 | f.close()
72 | is_reward_written = True
73 | with open(REWARD_CSV, "a+") as out: # Log key parameters
74 | out.write("{},{}\n".format(reward, uuid))
75 | except:
76 | continue
77 |
78 | joint_state = self.joint_state
79 | joint_state_str = ' '.join(str(n) for n in joint_state[0])
80 | is_state_written = False
81 | while not is_state_written:
82 | try:
83 | f = open(STATE, "w+")
84 | f.write("{} {}".format(joint_state_str, joint_state[1]))
85 | f.close()
86 | is_state_written = True
87 | except:
88 | continue
89 |
90 |
91 |
92 | def _read_action(self):
93 | """Read and return the actions from DQN
94 |
95 | Returns
96 | -------
97 | int, int
98 | action1, action2 from DQN
99 | """
100 | flag = self.action_flag
101 | while flag == self.action_flag:
102 | try:
103 | f = open(ACTION, "r")
104 | data = f.read()
105 | f.close()
106 | data = data.split()
107 | if len(data) != 3:
108 | continue
109 | action1 = int(data[0])
110 | action2 = int(data[1])
111 | self.action_flag = int(data[2]) # new flag read from file
112 | except:
113 | continue
114 | return action1, action2
115 |
116 | def aapi_post_manage(self, time, timeSta, timeTrans, acycle):
117 | """A life cycle in Aimsun where replication is currently running
118 |
119 | Parameters
120 | ----------
121 | time : int
122 | current replication time in Aimsun
123 | timeSta : int
124 | defaul Aimsun input
125 | timeTrans : int
126 | defaul Aimsun input
127 | acycle : int
128 | defaul Aimsun input
129 |
130 | Returns
131 | -------
132 | int
133 | 0 indicates successful function call to Aimsun Next
134 | """
135 | # prePOZ update
136 | self.prePOZ_1.update(time, timeSta)
137 | self.prePOZ_2.update(time, timeSta)
138 | # check-out event
139 | self.intx_1._bus_out_handler(time, timeSta)
140 | self.intx_2._bus_out_handler(time, timeSta)
141 | # check-in event
142 | intx1_bus_checkin = self.intx_1._bus_enter_handler(time, timeSta)
143 | intx2_bus_checkin = self.intx_2._bus_enter_handler(time, timeSta)
144 | if ( intx1_bus_checkin or intx2_bus_checkin ):
145 | # update states based on each intersection
146 | pre1 = self.prePOZ_1.get_state()
147 | pre2 = self.prePOZ_2.get_state()
148 | poz1 = self.intx_1.get_state()
149 | poz2 = self.intx_2.get_state()
150 | self.joint_state = (pre1 + poz1 + pre2 + poz2, uuid4().int)
151 | # - send new state and previous reward to DQN and clear reward
152 | # no need to clear state since get_state() function is synchronous
153 | # to Aimsun
154 | # - use get_reward() function to fetch cumulative reward in each intersection
155 | # since last timestep clear the stored reward internally
156 | r_1 = self.intx_1.get_reward()
157 | r_2 = self.intx_2.get_reward()
158 | # cumulative reward between time step t and t + 1
159 | total_reward = r_1 + r_2
160 | # total_reward = 1 / (1 + np.exp(-total_reward))
161 | self._write_state_reward(total_reward)
162 | # apply action
163 | action1, action2 = self._read_action()
164 | # record the action decided to the checked in bus
165 | if intx1_bus_checkin:
166 | self.intx_1.set_bus_actions_and_state([action1, action2], pre1 + poz1 + pre2 + poz2)
167 | if intx2_bus_checkin:
168 | self.intx_2.set_bus_actions_and_state([action1, action2], pre1 + poz1 + pre2 + poz2)
169 | # apply action to each intersection
170 | if self.intx_1.numbus == 0:
171 | action1 = 0 # if there is no bus in intx 1, no action can be applied
172 | self.intx_1.apply_action(action1, time, timeSta)
173 | self.intx_2.apply_action(action2, time, timeSta)
174 |
175 |
176 | return 0
177 |
178 |
179 |
--------------------------------------------------------------------------------
/Aimsun/intersection.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Summary
3 | """
4 | from AAPI import *
5 | from BusInPOZ import BusInPOZ
6 | from util import *
7 | import util
8 | import csv
9 |
10 | class Intersection:
11 |
12 | """Summary
13 |
14 | Attributes
15 | ----------
16 | CONFIG : dict
17 | configuration of the intersection, see config.py
18 | cycled_bus : int
19 | bus that checked in at the previous cycle (a cycle start at phase of interest and end before the next phase of interest)
20 | total_bus : int
21 | total number of bus enetered this intersection (used to determine the first & last checked in bus in training)
22 | bus_list : list
23 | list of bus object CURRENTLY in the intersection (has not checked out yet)
24 | arranged by check in time, with the newest bus at the end of the list
25 | last_checkout_bus : BusInPOZ object
26 | the latest bus that checked out of the intersection, initialized as None before the first bus checkout event
27 | extended : int
28 | Registered action that is applied on this intersection
29 | extendedalready : int
30 | 0 or 1, 0 means no change has been made to the intersection signal length
31 | numbus : int
32 | Number of bus CURRENTLY in the intersection POZ
33 | allnumvel : int
34 | Number of bus + cars currently in the intersection POZ
35 | last_in_info : int
36 | The bus id of the bus which initiated the last check in event (used to avoid repeated check in event)
37 | last_out_info : int
38 | The bus id of the bus which initiated the last check out event (used to avoid repeated check out event)
39 | markedbus : int
40 | not used yet
41 | markedbusgone : int
42 | not used yet
43 | reward : int
44 | cumulative reward collected in check out events after the last check in event
45 | replicationID : int
46 | Aimsun replication ID for the current simulation
47 | downstream_intersection : Intersection object
48 | pointer to the downstream intersection
49 | prePOZ_numbus : int
50 | (initialized to None to indicate no upstream intersection exist to the current intersection)
51 | number of bus in prePOZ of the current intersection, this number should only be increased by the intersection before
52 | this intersection
53 | prePOZ_bus_checkout_time_dict : dict
54 | dict with bus id as keys and bus checkout time as values. This shows all bus in prePOZ of the current intersection
55 | This dict is only valid IF there is a detector/intersection before the current intersection
56 | state : list with 6 elements
57 | infomation about the bus and vehicles INSIDE POZ (not including prePOZ)
58 | [last_available_checkout_time, last_check_in_time, check_in_headway,
59 | number of buses in POZ, number of cars + buses in POZ, time To Nearest Green]
60 |
61 | """
62 |
63 | def __init__(self, CONFIG):
64 | """Summary
65 |
66 | Parameters
67 | ----------
68 | CONFIG : TYPE
69 | Description
70 | """
71 | self.CONFIG = CONFIG
72 | self.cycled_bus = 0
73 | self.total_bus = 0
74 | self.bus_list = [] # list of bus in POZ
75 | self.last_checkout_bus = None
76 | self.extended = 0 # registered action
77 | self.numbus = 0
78 | self.allnumvel = 0
79 | self.last_in_info = -99 # bus id for last checked in bus
80 | self.last_out_info = -99
81 | self.extendedalready = 0 # extended or not in current cycle
82 | # self.markedbus = 0 # if a bus is marked as indicator
83 | # self.markedbusgone = 0 # if a marked bus had exited
84 | self.reward = 0 # cumulative reward
85 | self.replicationID = None
86 | # self.extend_record = {}
87 |
88 | # number of bus in prePOZ will produce error if a bus enters POZ without being recorded in prePOZ (aka checkout from last intersection)
89 | self.prePOZ_numbus = None
90 | self.prePOZ_bus_checkout_time_dict = None # this dict is only valid if there is a detector/intersection before this intersection
91 | self.reset_intersection = 0
92 | self.state = [0, 0, 0, 0, 0, 0]
93 |
94 | def empty_intersection(self):
95 | self.cycled_bus = 0
96 | self.total_bus = 0
97 | self.bus_list = [] # list of bus in POZ
98 | self.last_checkout_bus = None
99 | self.extended = 0 # registered action
100 | self.numbus = 0
101 | self.allnumvel = 0
102 | self.last_in_info = -99 # bus id for last checked in bus
103 | self.last_out_info = -99
104 | self.extendedalready = 0 # extended or not in current cycle
105 |
106 | self.state = [0, 0, 0, 0, 0, 0]
107 |
108 |
109 | def _find_first_checkout_time_in_prePOZ(self):
110 | """
111 | find the last
112 |
113 | Returns
114 | -------
115 | first_checkout_time : int
116 | the checkout time of the first bus in prePOZ
117 | """
118 | if self.prePOZ_numbus is None or self.prePOZ_numbus == 0:
119 | return 0
120 | prePOZ_busdict = self.prePOZ_bus_checkout_time_dict
121 | first_checkout_time = min(prePOZ_busdict, key=prePOZ_busdict.get)
122 | return first_checkout_time
123 |
124 | def _checkout_bus_from_POZ(self, checkout_id, checkout_time):
125 | # remove bus with corresponding id from POZ and return the bus
126 | for bus in self.bus_list:
127 | if bus.bus_id == checkout_id:
128 | if self.last_checkout_bus is not None:
129 | last_checkout_time = self.last_checkout_bus.check_out_time
130 | else:
131 | # if there is no previous bus, assume checkout headway is perfect
132 | last_checkout_time = checkout_time - self.CONFIG['target_headway']
133 | bus.check_out(checkout_time, last_checkout_time)
134 | self.bus_list.remove(bus)
135 | return bus
136 | return False
137 |
138 | def get_state(self):
139 | """Return a list containing prePOZ state and POZ state (8 slots)
140 |
141 | based on 3 conditions:
142 | 1. bus in prePOZ
143 | 2. bus in POZ
144 | 3. no bus
145 |
146 | Returns
147 | -------
148 | list
149 | a list containing prePOZ state and POZ state
150 | """
151 |
152 | if self.prePOZ_numbus is None:
153 | prePOZ = [0, 0]
154 | else:
155 | prePOZ = [len(self.prePOZ_bus_checkout_time_dict.keys()), self._find_first_checkout_time_in_prePOZ()]
156 |
157 | return self.state
158 |
159 | def set_bus_actions_and_state(self, actions, joint_state):
160 | """
161 | save the action decided by DQN at bus checkin onto the bus object so it can be compared with the actual action applied
162 | Parameters
163 | ----------
164 | actions : list of int
165 | action to all intersections
166 | joint_state : list of int
167 | joint state of the intersections
168 | """
169 | self.bus_list[-1].set_action(actions)
170 | self.bus_list[-1].set_state(joint_state)
171 | return
172 |
173 | def apply_action(self, action, time, timeSta):
174 | """Apply the action to the intersection according to different
175 | situations (hard code for upstream)
176 |
177 | Parameters
178 | ----------
179 | action : int
180 | action to the intersection (extend/shorten this amount)
181 | time : int
182 | Absolute time of simulation in seconds
183 | timeSta : int
184 | Time of simulation in stationary period, in sec
185 | """
186 | intersection = self.CONFIG['intersection']
187 | phase_of_interest = self.CONFIG['phase_of_interest']
188 | total_phases = len(self.CONFIG['phase_duration'])
189 | green_duration = self.CONFIG['phase_duration'][phase_of_interest - 1]
190 |
191 | # check the time duration is correct
192 | pdur = doublep()
193 | pcmax = doublep()
194 | pcmin = doublep()
195 | ECIGetDurationsPhase(self.CONFIG['intersection'], self.CONFIG['phase_of_interest'], timeSta, pdur, pcmax, pcmin)
196 | poi_duration = int(pdur.value())
197 |
198 | if self.extendedalready:
199 | print("int {} already extened for {} seconds, apply {} extension on top of it".format(self.CONFIG['intersection'], self.extended, action))
200 | action = action + self.extended
201 | # clip the action to the legal limit
202 | if action >20:
203 | action =20
204 | if action <-20:
205 | action = -20
206 | else:
207 | print("int {} has no assigned extension, the phase of interest is {} sec, extending {} sec extension".format(self.CONFIG['intersection'], poi_duration, action))
208 |
209 |
210 | if poi_duration != green_duration + self.extended:
211 | print("\n\n ERROR: phase duration already changed from {} to {} and self.extended value is {}\n\n".format(green_duration, poi_duration, self.extended))
212 |
213 | phasetime = time - ECIGetStartingTimePhase(intersection)
214 | currentPhase = ECIGetCurrentPhase(intersection)
215 | if currentPhase == phase_of_interest:
216 | # check if the action is legal
217 | remaining_green = self._get_toNearGreenPhase(currentPhase, phasetime, 0)
218 | if remaining_green>=0 and action + remaining_green < 0:
219 | action = -remaining_green
220 | ECIChangeTimingPhase(intersection, phase_of_interest, green_duration + action, timeSta)
221 | if action != 0:
222 | self.extendedalready = 1
223 | else:
224 | self.extendedalready = 0
225 | self.extended = action
226 |
227 | print("------- {} Extend start here ----------".format(intersection))
228 | print("Extended at time: {}".format(time))
229 | print("Extended length: " + str(action) + " sec")
230 |
231 |
232 | def log_state_for_check_in(self, phasetime, checked_in_bus):
233 | replicationID = ANGConnGetReplicationId()
234 | vehicleID = checked_in_bus.bus_id
235 | target_headway = self.CONFIG['target_headway']
236 | parameter_log_file = self.CONFIG['log']
237 | corridor_log_file = self.CONFIG['corridor_log']
238 | reward = self.reward
239 | check_in_headway = checked_in_bus.check_in_headway
240 | check_in_time = checked_in_bus.check_in_time
241 | travelTime = '-'
242 | state = None
243 | if state is None:
244 | state = ['-'] * 16
245 | action = ['-'] * 2
246 |
247 | # list of things in log by index
248 | # 0: replication ID
249 | # 1: vehicle ID
250 | # 2: check in time
251 | # 3: checkout time
252 | # 4: check in phase number
253 | # 5: check in phase time
254 | # 6: checkout phase time
255 | # 7: check in headway
256 | # 8: checkout headway
257 | # 9 - 10: action 1, action 2 as decided at the bus check in
258 | # 11: registered action at bus check out
259 | # 12: Travel time
260 | # 13: reward
261 | # 14+: states
262 |
263 | # the same cycle
264 | output = [replicationID, vehicleID, check_in_time, '-', checked_in_bus.check_in_phase,
265 | checked_in_bus.check_in_phasetime, '-', check_in_headway, '-'] + action + [self.extended,
266 | travelTime, reward] + state
267 |
268 | with open(corridor_log_file, 'a+') as out:
269 | csv_write = csv.writer(out, dialect='excel')
270 | corridor_log_output = ['int_{}_checkin'.format(self.CONFIG['intersection'])] + output
271 | csv_write.writerow(corridor_log_output)
272 |
273 |
274 | def log_parameter_file(self, phasetime, checked_out_bus):
275 | replicationID = ANGConnGetReplicationId()
276 | vehicleID = checked_out_bus.bus_id
277 | target_headway = self.CONFIG['target_headway']
278 | parameter_log_file = self.CONFIG['log']
279 | corridor_log_file = self.CONFIG['corridor_log']
280 | reward = self.reward
281 | check_in_time = checked_out_bus.check_in_time
282 | check_in_hdy = checked_out_bus.check_in_headway
283 | check_out_hdy = checked_out_bus.check_out_headway
284 | travelTime = checked_out_bus.check_out_time - checked_out_bus.check_in_time
285 | state = checked_out_bus.original_state
286 | if state is None:
287 | state = [-99]*16
288 | action = checked_out_bus.original_action
289 | if action is None:
290 | action = [-99]*2
291 |
292 | # list of things in log by index
293 | # 0: replication ID
294 | # 1: vehicle ID
295 | # 2: check in time
296 | # 3: checkout time
297 | # 4: check in phase number
298 | # 5: check in phase time
299 | # 6: checkout phase time
300 | # 7: check in headway
301 | # 8: checkout headway
302 | # 9 - 10: action 1, action 2 as decided at the bus check in
303 | # 11: registered action at bus check out
304 | # 12: Travel time
305 | # 13: reward
306 | # 14+: states
307 |
308 | # the same cycle
309 | output = [replicationID, vehicleID, check_in_time, checked_out_bus.check_out_time, checked_out_bus.check_in_phase,
310 | checked_out_bus.check_in_phasetime, phasetime, check_in_hdy, check_out_hdy] + list(action) + [self.extended, travelTime, reward] + state
311 |
312 | with open(parameter_log_file, "a+") as out: # Log key parameters
313 | csv_write = csv.writer(out, dialect='excel')
314 | csv_write.writerow(output)
315 | with open(corridor_log_file, 'a+') as out:
316 | csv_write = csv.writer(out, dialect='excel')
317 | corridor_log_output = ['int_{}_checkout'.format(self.CONFIG['intersection'])] + output
318 | csv_write.writerow(corridor_log_output)
319 | return
320 |
321 |
322 | def get_reward(self):
323 | """Return the reward of the most current bus check-out event, and
324 | CLEAR the reward attribute
325 |
326 | Returns
327 | -------
328 | float
329 | the reward of the most current bus check-out event
330 | """
331 | reward, self.reward = self.reward, 0
332 | return reward
333 |
334 | def _compute_reward(self, travelTime, bus_object):
335 | """Compute reward gained by a newly checked out bus
336 |
337 | Parameters
338 | ----------
339 | travelTime : TYPE
340 | Description
341 | bus_object : TYPE
342 | Description
343 | """
344 | d_out = abs(bus_object.check_out_headway -
345 | self.CONFIG['target_headway'])
346 | d_in = abs(bus_object.check_in_headway - self.CONFIG['target_headway'])
347 | improve = d_in - d_out
348 | reward = 1 * improve - 0 * travelTime
349 | max_TT = self.CONFIG['maxTT']
350 | # reward = sigmoid((max_TT - travelTime)/max_TT-0.5)
351 | reward = (max_TT - travelTime)/max_TT-0.5
352 | return reward
353 |
354 | def _get_toNearGreenPhase(self, currentPhase, phasetime, extended):
355 | """Calculate the time to the nearest focus phase green signal.
356 |
357 | Parameters
358 | ----------
359 | currentPhase : int
360 | current intersection phase
361 | phasetime : int
362 | passed time from start of the current phase
363 | extended : int
364 | applied cumulated action on the intersection
365 |
366 | Returns
367 | -------
368 | int
369 | the time to the nearest focus phase green signal
370 | """
371 | if currentPhase <= self.CONFIG['phase_of_interest']:
372 | to_interest = util.time_to_phase_end(self.CONFIG['phase_duration'],
373 | self.CONFIG['phase_of_interest'])
374 | past_phase = util.time_to_phase_end(self.CONFIG['phase_duration'],
375 | currentPhase - 1)
376 | return to_interest - phasetime + extended - past_phase
377 | return sum(self.CONFIG['phase_duration']) - phasetime + extended
378 |
379 | def _find_last_check_in_time(self, bus_list):
380 | last_check_in= None
381 | for bus in bus_list:
382 | if last_check_in is not None:
383 | last_check_in = max(bus.check_in_time, last_check_in)
384 | else:
385 | last_check_in = bus.check_in_time
386 | return last_check_in
387 |
388 | def _bus_enter_handler(self, time, timeSta):
389 | """Summary
390 |
391 | Parameters
392 | ----------
393 | time : int
394 | Absolute time of simulation in seconds
395 | timeSta : int
396 | Time of simulation in stationary period, in sec
397 |
398 | Returns
399 | -------
400 | bool
401 | True if a bus has entered this intersection
402 | """
403 | # retrieve intersection info from CONFIG
404 | intersection = self.CONFIG['intersection']
405 | busCallDetector = self.CONFIG['busCallDetector']
406 | section = self.CONFIG['section']
407 | # get bus internal position
408 | busVehiclePosition = AKIVehGetVehTypeInternalPosition(1171922)
409 | target_headway = self.CONFIG['target_headway']
410 | current_replicationID = ANGConnGetReplicationId()
411 | if current_replicationID != self.replicationID:
412 | self.empty_intersection() # clean the bus list in new replication ID
413 | self.replicationID = current_replicationID
414 | # determine which phase is green in the bus's perspective
415 | phase_of_interest = self.CONFIG['phase_of_interest']
416 | # assumption for this is that all phases has duration defined
417 | total_phases = len(self.CONFIG['phase_duration'])
418 | # current phase time
419 | phasetime = time - ECIGetStartingTimePhase(intersection)
420 | # get current phase
421 | currentPhase = ECIGetCurrentPhase(intersection)
422 | # find phase before and after phase of interest
423 | phase_after_phase_of_interest = util.get_phase_number(total_phases, phase_of_interest + 1)
424 | # green phase ended and the buses that are still in POZ becomes cycled buses
425 | if currentPhase == phase_after_phase_of_interest and int(phasetime) <=1:
426 | self.cycled_bus = self.numbus
427 | self.reset_intersection = 1
428 | if self.extendedalready:
429 | print("phase of interest passed, try to reset extension")
430 | self.extendedalready = 0 # clear the extended already flag
431 | if currentPhase != phase_of_interest and self.reset_intersection==1:
432 | if self.extended!=0:
433 | print("time extension reset at time {}".format(time))
434 | self.reset_intersection = 0
435 | self.extended = 0
436 | ECIChangeTimingPhase(
437 | intersection,
438 | phase_of_interest,
439 | self.CONFIG['phase_duration'][phase_of_interest - 1],
440 | timeSta)
441 |
442 | # Check number of all vehicles in and out
443 | self.allnumvel = AKIVehStateGetNbVehiclesSection(section, True)
444 | # bus enter check
445 | enterNum = AKIDetGetCounterCyclebyId(
446 | busCallDetector,
447 | busVehiclePosition) # Number of entering bus(es) in last step
448 |
449 | new_bus_entered = False
450 |
451 | if enterNum > 0:
452 | self.cycled_bus = 0
453 | self.total_bus += 1
454 | # First vehicle info
455 | busin_info = AKIDetGetInfVehInDetectionInfVehCyclebyId(
456 | busCallDetector, 0, busVehiclePosition)
457 | # Last vehicle info
458 | temp_info = AKIDetGetInfVehInDetectionInfVehCyclebyId(
459 | busCallDetector,
460 | AKIDetGetNbVehsEquippedInDetectionCyclebyId(busCallDetector, busVehiclePosition) - 1,
461 | busVehiclePosition)
462 |
463 | for i in range(enterNum):
464 | # If first vehicle equals last vehicle of last step
465 | if i == 0 and busin_info.idVeh == self.last_in_info:
466 | # Skip first vehicle and loop
467 | continue
468 | else:
469 | print("-------INTX:{} - No.{} Bus Checked -------".format(self.CONFIG['intersection'], self.total_bus))
470 | new_bus_entered = True
471 | last_check_in_time_in_intersection = 0
472 | last_check_in_time_checkedout_bus = 0
473 | if self.bus_list:
474 | # there is still bus in intx (need to double check if it is a missed bus)
475 | last_check_in_time_in_intersection = self._find_last_check_in_time(self.bus_list)
476 | if self.last_checkout_bus is not None:
477 | # there is a checked out bus
478 | last_check_in_time_checkedout_bus = self.last_checkout_bus.check_in_time
479 | if not self.bus_list and self.last_checkout_bus is None:
480 | last_check_in_time = time - target_headway
481 | else:
482 | last_check_in_time = max(last_check_in_time_in_intersection, last_check_in_time_checkedout_bus)
483 | checked_in_bus = BusInPOZ(intersection,
484 | busin_info,
485 | currentPhase,
486 | phasetime,
487 | time,
488 | last_check_in=last_check_in_time)
489 | self.bus_list.append(checked_in_bus)
490 | self.numbus += 1
491 | self.allnumvel += 1
492 | self.log_state_for_check_in(phasetime, checked_in_bus)
493 |
494 | self.last_in_info = temp_info.idVeh
495 |
496 | # update state
497 | self._update_state(currentPhase, phasetime, time)
498 |
499 |
500 | return new_bus_entered
501 |
502 | def _update_state(self, currentPhase, phasetime, time):
503 | """
504 | Update the state attribute of the intersection
505 |
506 | Parameters
507 | ----------
508 | currentPhase: int
509 | current traffic phase
510 | phasetime: int
511 | time (in sec) elapsed in the current traffic phase
512 | time: int
513 | Absolute time of simulation in seconds
514 |
515 | Returns
516 | -------
517 | None
518 |
519 | """
520 | # compute new state without registered action
521 | tToNearGreenPhase = self._get_toNearGreenPhase(currentPhase, phasetime, self.extended)
522 |
523 | if self.numbus > 0:
524 | # last available checkout for this intersection
525 | if self.numbus > 1:
526 | # bunch, use current time as last checkout
527 | last_available_checkout_time = time
528 | elif self.last_checkout_bus is None:
529 | # no checked out bus, assume perfect headway
530 | last_available_checkout_time = time - self.CONFIG['target_headway']
531 | else:
532 | last_available_checkout_time = self.last_checkout_bus.check_out_time
533 | # check in time of the last bus checked in
534 | last_check_in_time = self.bus_list[-1].check_in_time
535 | check_in_hdy = self.bus_list[-1].check_in_headway
536 | new_state = [last_available_checkout_time, last_check_in_time, check_in_hdy, self.numbus, self.allnumvel,
537 | tToNearGreenPhase]
538 | else:
539 | if self.last_checkout_bus:
540 | last_available_checkout_time = self.last_checkout_bus.check_out_time
541 | check_in_hdy = self.last_checkout_bus.check_in_headway
542 | last_check_in_time = self.last_checkout_bus.check_in_time
543 | new_state = [last_available_checkout_time, last_check_in_time, check_in_hdy, 0, self.allnumvel, tToNearGreenPhase]
544 | else:
545 | new_state = [0, 0, 0, 0, self.allnumvel, tToNearGreenPhase]
546 |
547 | self.state = new_state
548 | return
549 |
550 | def _bus_out_handler(self, time, timeSta):
551 | """Summary
552 |
553 | Parameters
554 | ----------
555 | time : int
556 | Absolute time of simulation in seconds
557 | timeSta : int
558 | Time of simulation in stationary period, in sec
559 | """
560 | # retrieve intersection info from CONFIG
561 | intersection = self.CONFIG['intersection']
562 | busExitDetector = self.CONFIG['busExitDetector']
563 | section = self.CONFIG['section']
564 | # get bus internal position
565 | busVehiclePosition = AKIVehGetVehTypeInternalPosition(1171922)
566 | target_headway = self.CONFIG['target_headway']
567 | self.replicationID = ANGConnGetReplicationId()
568 | # determine which phase is green in the bus's perspective
569 | phase_of_interest = self.CONFIG['phase_of_interest']
570 | # assumption for this is that all phases has duration defined
571 | total_phases = len(self.CONFIG['phase_duration'])
572 | # current phase time
573 | phasetime = time - ECIGetStartingTimePhase(intersection)
574 | # get current phase
575 | currentPhase = ECIGetCurrentPhase(intersection)
576 | # find phase before and after phase of interest
577 | phase_after_phase_of_interest = get_phase_number(
578 | total_phases, phase_of_interest + 1)
579 | phase_before_phase_of_interest = get_phase_number(
580 | total_phases, phase_of_interest - 1)
581 | # green phase ended and the buses that are still in POZ becomes cycled buses
582 | if currentPhase == phase_after_phase_of_interest and int(phasetime) <=1:
583 | self.cycled_bus = self.numbus
584 | self.reset_intersection = 1
585 | if self.extendedalready:
586 | print("phase of interest passed, try to reset extension")
587 | self.extendedalready = 0 # clear the extended already flag
588 | if currentPhase != phase_of_interest and self.reset_intersection==1:
589 | if self.extended!=0:
590 | print("time extension reset at time {}".format(time))
591 | self.reset_intersection = 0
592 | self.extended = 0
593 | ECIChangeTimingPhase(
594 | intersection,
595 | phase_of_interest,
596 | self.CONFIG['phase_duration'][phase_of_interest - 1],
597 | timeSta)
598 |
599 | # Check number of all vehicles in and out
600 | self.allnumvel = AKIVehStateGetNbVehiclesSection(section, True)
601 |
602 | # bus exit check
603 | exitNum = AKIDetGetCounterCyclebyId(busExitDetector, busVehiclePosition) # Number of exit vehicle in last step
604 | if exitNum > 0:
605 | print("-------- Bus exited %d ---------" % exitNum)
606 | print("Exited at time: " + str(time))
607 | # First vehicle info
608 | busout_info = AKIDetGetInfVehInDetectionInfVehCyclebyId(
609 | busExitDetector, 0, busVehiclePosition)
610 | # Last vehicle info
611 | temp_info = AKIDetGetInfVehInDetectionInfVehCyclebyId(
612 | busExitDetector, AKIDetGetNbVehsEquippedInDetectionCyclebyId(
613 | busExitDetector, busVehiclePosition) - 1, busVehiclePosition)
614 | for i in range(exitNum):
615 | # If first vehicle equals last vehicle of last step
616 | if i == 0 and busout_info.idVeh == self.last_out_info:
617 | # Skip first vehicle and loop
618 | continue
619 | else:
620 | if self.numbus >=1:
621 | self.numbus -= 1
622 | self.allnumvel -= 1
623 | else:
624 | print("ERROR: try to reduce numbus to negative, checkout bus: {}".format(busout_info.idVeh))
625 |
626 | print("Bus banching %d" % self.numbus)
627 | checkout_id = busout_info.idVeh
628 | successfully_checked_out_bus = self._checkout_bus_from_POZ(checkout_id, time)
629 |
630 | # update to keep track of the last checkout bus
631 | if successfully_checked_out_bus is False:
632 | raise Exception("Checkout detected for bus {}, but cannot found this bus in POZ".format(checkout_id))
633 | self.last_checkout_bus = successfully_checked_out_bus
634 |
635 | travelTime = successfully_checked_out_bus.check_out_time - successfully_checked_out_bus.check_in_time
636 | # log parameters
637 | reward_gained = self._compute_reward(travelTime, successfully_checked_out_bus)
638 | self.reward += reward_gained
639 | self.log_parameter_file(phasetime, successfully_checked_out_bus)
640 | print("Reward gained at checked out: {}".format(reward_gained))
641 |
642 | self.last_out_info = temp_info.idVeh
643 |
644 | self._update_state(currentPhase, phasetime, time)
645 |
646 | return
647 |
648 |
649 |
--------------------------------------------------------------------------------
/Aimsun/prePOZ.py:
--------------------------------------------------------------------------------
1 | from AAPI import *
2 |
3 | class PrePOZ:
4 |
5 | def __init__(self, config):
6 | self.CONFIG = config
7 | self.last_in_info = None
8 | self.last_out_info = None
9 | self.time_list = []
10 |
11 | def get_state(self):
12 | if len(self.time_list) == 0:
13 | return [0, 0]
14 | return [self.time_list[0], len(self.time_list)]
15 |
16 | def update(self, time, timeSta):
17 | self._enter_prePOZ(time, timeSta)
18 | self._exit_prePOZ(time, timeSta)
19 |
20 | def _enter_prePOZ(self, time, timeSta):
21 | # retrieve intersection info from CONFIG
22 | busExitDetector = self.CONFIG['busExitDetector']
23 | # get bus internal position
24 | busVehiclePosition = AKIVehGetVehTypeInternalPosition(1171922)
25 | # bus exit check
26 | exitNum = AKIDetGetCounterCyclebyId(busExitDetector, busVehiclePosition) # Number of exit vehicle in last step
27 | if exitNum > 0:
28 | # First vehicle info
29 | busout_info = AKIDetGetInfVehInDetectionInfVehCyclebyId(
30 | busExitDetector, 0, busVehiclePosition)
31 | # Last vehicle info
32 | temp_info = AKIDetGetInfVehInDetectionInfVehCyclebyId(
33 | busExitDetector, AKIDetGetNbVehsEquippedInDetectionCyclebyId(
34 | busExitDetector, busVehiclePosition) - 1, busVehiclePosition)
35 | for i in range(exitNum):
36 | # If first vehicle equals last vehicle of last step
37 | if i == 0 and busout_info.idVeh == self.last_out_info:
38 | # Skip first vehicle and loop
39 | continue
40 | else:
41 | print("prePOZ-{} enter-{}".format(busExitDetector, time))
42 | self.time_list.append(time)
43 | self.last_out_info = temp_info.idVeh
44 |
45 |
46 | def _exit_prePOZ(self, time, timeSta):
47 | busCallDetector = self.CONFIG['busCallDetector']
48 | # get bus internal position
49 | busVehiclePosition = AKIVehGetVehTypeInternalPosition(1171922)
50 | # bus enter check
51 | enterNum = AKIDetGetCounterCyclebyId(busCallDetector, busVehiclePosition)
52 | if enterNum > 0:
53 | # First vehicle info
54 | busin_info = AKIDetGetInfVehInDetectionInfVehCyclebyId(
55 | busCallDetector, 0, busVehiclePosition)
56 | # Last vehicle info
57 | temp_info = AKIDetGetInfVehInDetectionInfVehCyclebyId(
58 | busCallDetector,
59 | AKIDetGetNbVehsEquippedInDetectionCyclebyId(busCallDetector, busVehiclePosition) - 1,
60 | busVehiclePosition)
61 |
62 | for i in range(enterNum):
63 | # If first vehicle equals last vehicle of last step
64 | if i == 0 and busin_info.idVeh == self.last_in_info:
65 | # Skip first vehicle and loop
66 | continue
67 | else:
68 | print("prePOZ-{} exit-{}".format(busCallDetector, time))
69 | self.time_list.pop(0)
70 |
71 | self.last_in_info = temp_info.idVeh
72 |
73 |
74 |
--------------------------------------------------------------------------------
/Aimsun/script.py:
--------------------------------------------------------------------------------
1 | from AAPI import *
2 | import os, sys, inspect
3 | current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
4 | parent_dir = os.path.dirname(current_dir)
5 | sys.path.insert(0, parent_dir)
6 | from config import *
7 | from corridor import *
8 |
9 |
10 | def AAPILoad():
11 | """
12 | Create Intersection objects. Called when the module is loaded by Aimsum Next
13 | """
14 | global corridor
15 | corridor = Corridor(CORRIDOR)
16 | return 0
17 |
18 |
19 | def AAPIInit():
20 | """Summary
21 | Initializes the module. Called when Aimsum Next starts the simulation
22 | """
23 | ANGConnEnableVehiclesInBatch(True)
24 | return 0
25 |
26 |
27 | def AAPIManage(time, timeSta, timeTrans, acycle):
28 | """Summary
29 | Called in every simulation step at the beginning of the cycle, and can be used to update states
30 | and output states to DQN, and implement TSP stategies
31 |
32 | Parameters
33 | ----------
34 | time : double
35 | Absolute time of simulation in seconds
36 | timeSta : double
37 | Time of simulation in stationary period, in seconds
38 | timeTrans : double
39 | Duration of warm-up period, in seconds
40 | acycle : double
41 | Duration of each simulation step in seconds
42 | """
43 | return 0
44 |
45 |
46 | def AAPIPostManage(time, timeSta, timeTrans, acycle):
47 | """Summary
48 | Called in every simulation step at the beginning of the cycle, and can be used to update states
49 | and output states to DQN, and implement TSP stategies
50 |
51 | Parameters
52 | ----------
53 | time : double
54 | Absolute time of simulation in seconds
55 | timeSta : double
56 | Time of simulation in stationary period, in seconds
57 | timeTrans : double
58 | Duration of warm-up period, in seconds
59 | acycle : double
60 | Duration of each simulation step in seconds
61 | """
62 | global corridor
63 | corridor.aapi_post_manage(time, timeSta, timeTrans, acycle)
64 | return 0
65 |
66 |
67 | def AAPIFinish():
68 | """Summary
69 | Called when Aimsun Next finishes the simulation and can be used to terminate the module operations,
70 | write summary information, close files, etc.
71 | """
72 | global corridor
73 | # write last reward to indicate that the replication is done
74 | corridor.write_last_reward()
75 | return 0
76 |
77 |
78 | def AAPIUnLoad():
79 | """Summary
80 | Called when the module is unloaded by Aimsun Next.
81 | """
82 | return 0
83 |
--------------------------------------------------------------------------------
/Aimsun/util.py:
--------------------------------------------------------------------------------
1 | """Utility functions
2 | """
3 |
4 |
5 |
6 | def get_phase_number(total_number_of_phases, phase_number):
7 | """Summary
8 |
9 | Parameters
10 | ----------
11 | total_number_of_phases : TYPE
12 | Description
13 | phase_number : TYPE
14 | Description
15 |
16 | Returns
17 | -------
18 | TYPE
19 | Description
20 | """
21 | # wrap around the phases (use this to find phase after last phase or before phase 1)
22 | while phase_number <= 0:
23 | phase_number += total_number_of_phases
24 | while phase_number > total_number_of_phases:
25 | phase_number -= total_number_of_phases
26 | return phase_number
27 |
28 |
29 | def time_to_phase_end(phase_duration, phase):
30 | """Summary
31 |
32 | Parameters
33 | ----------
34 | phase_duration : TYPE
35 | Description
36 | phase : TYPE
37 | Description
38 |
39 | Returns
40 | -------
41 | TYPE
42 | Description
43 | """
44 | return sum(phase_duration[:phase]
45 | ) if phase != len(phase_duration) else sum(phase_duration)
--------------------------------------------------------------------------------
/DQN/ddqn_framework.py:
--------------------------------------------------------------------------------
1 | #######################################################################################
2 | # Deep Q - Learning framework to play around with (dueling-, dense- and double q-learning )
3 | # Author: Manuel Hass
4 | # 2017
5 | #
6 | # *uses mlp_framework.py as model framework
7 | # *examples in the end
8 | #######################################################################################
9 |
10 |
11 | ### imports
12 | import numpy as np
13 | from numpy import linalg as LA
14 | import time
15 | import csv
16 | import pickle
17 | import os, sys, inspect
18 | current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
19 | parent_dir = os.path.dirname(current_dir)
20 | sys.path.insert(0, parent_dir)
21 | from config import *
22 |
23 | Q_target_log = LOG_PATH + 'Q_target.csv'
24 | Q_online_log = LOG_PATH + 'Q_online.csv'
25 | Rt_log = LOG_PATH + 'Rt.csv'
26 | Loss = LOG_PATH + 'loss.csv'
27 |
28 |
29 |
30 | def write_csv(path, data):
31 | with open(path, "a+") as out:
32 | csv_write = csv.writer(out, dialect='excel')
33 | csv_write.writerow(data)
34 |
35 | # helper functions
36 | def train_bellman(onlineDQN, targetDQN, batch, GAMMA):
37 | '''
38 | updates the onlineDQN with target Q values for the greedy action(chosen by onlineDQN)
39 | '''
40 |
41 | state, action, reward, next_state, done = batch
42 | Q = onlineDQN.infer(state)
43 | t = targetDQN.infer(next_state)
44 | a = np.argmax(onlineDQN.infer(next_state), axis=1)
45 | Q[range(Q.shape[0]), action.astype(int)] = reward + np.logical_not(done) * GAMMA * t[range(t.shape[0]), a]
46 | state_batch_ = state
47 | target_batch_ = Q
48 |
49 | onlineDQN.train(state_batch_, target_batch_)
50 |
51 |
52 | def update_target(onlineDQN, targetDQN, duel=False):
53 | '''
54 | copies weights from onlineDQN to targetDQN
55 | '''
56 | if duel:
57 | for i in range(len(targetDQN.LL0)):
58 | targetDQN.LL0[i].w = np.copy(onlineDQN.LL0[i].w)
59 | for i in range(len(targetDQN.LLA)):
60 | targetDQN.LLA[i].w = np.copy(onlineDQN.LLA[i].w)
61 | for i in range(len(targetDQN.LLV)):
62 | targetDQN.LLV[i].w = np.copy(onlineDQN.LLV[i].w)
63 | else:
64 | for i in range(len(targetDQN.Layerlist)):
65 | targetDQN.Layerlist[i].w = np.copy(onlineDQN.Layerlist[i].w)
66 |
67 | beta = 0.9
68 | for i in range(len(targetDQN.Layerlist)):
69 | targetDQN.Layerlist[i].w = beta * np.copy(onlineDQN.Layerlist[i].w) + (1 - beta) * targetDQN.Layerlist[i].w
70 |
71 |
72 | class ringbuffer:
73 | '''
74 | fast ringbuffer for the experience replay (numpy)
75 | '''
76 |
77 | def __init__(self, SIZE):
78 | self.buffer_size = 0
79 | self.SIZE = SIZE
80 | # buffers
81 | self.state_buffer = None
82 | self.action_buffer = None
83 | self.reward_buffer = None
84 | self.next_state_buffer = None
85 | self.done_buffer = None
86 | self.priorities = None
87 |
88 | def add(self, sample):
89 | if self.state_buffer is None:
90 | self.state_buffer = np.empty((0, sample[0].shape[1])) # [1:]
91 | self.action_buffer = np.empty((0, sample[1].shape[1])) # [1:]
92 | self.reward_buffer = np.empty((0, sample[2].shape[1])) # [1:]
93 | self.next_state_buffer = np.empty((0, sample[3].shape[1])) # [1:]
94 | self.done_buffer = np.empty((0, 1)) # [1:]
95 | self.priorities = np.empty((0, 1)) # [1:]
96 | # self.state_buffer = np.append(self.state_buffer, sample[0][True, :], axis=0)
97 | self.state_buffer = np.append(self.state_buffer, sample[0], axis=0)
98 | self.action_buffer = np.append(self.action_buffer, sample[1], axis=0)
99 | self.reward_buffer = np.append(self.reward_buffer, sample[2], axis=0)
100 | self.next_state_buffer = np.append(self.next_state_buffer, sample[3], axis=0)
101 | self.done_buffer = np.append(self.done_buffer, sample[4], axis=0)
102 | new_sample_prio = np.max(self.priorities) if self.priorities.shape[0] > 0 and np.max(
103 | np.abs(self.priorities)) < 1e10 else 1.
104 | self.priorities = np.append(self.priorities, np.array([new_sample_prio]).reshape(1, 1), axis=0)
105 | self.priorities /= np.sum(self.priorities)
106 | self.buffer_size += 1.
107 | if self.buffer_size > self.SIZE:
108 | self.state_buffer = self.state_buffer[1:]
109 | self.action_buffer = self.action_buffer[1:]
110 | self.reward_buffer = self.reward_buffer[1:]
111 | self.next_state_buffer = self.next_state_buffer[1:]
112 | self.done_buffer = self.done_buffer[1:]
113 | self.priorities = self.priorities[1:]
114 |
115 | def delete(self):
116 | if self.buffer_size > 0:
117 | # self.state_buffer = np.append(self.state_buffer, sample[0][True, :], axis=0)
118 | self.state_buffer = np.delete(self.state_buffer, -1, axis=0)
119 | self.action_buffer = np.delete(self.action_buffer, -1, axis=0)
120 | self.reward_buffer = np.delete(self.reward_buffer, -1, axis=0)
121 | self.next_state_buffer = np.delete(self.next_state_buffer, -1, axis=0)
122 | self.done_buffer = np.delete(self.done_buffer, -1, axis=0)
123 | self.priorities = np.delete(self.priorities, -1, axis=0)
124 | self.priorities /= np.sum(self.priorities)
125 | self.buffer_size -= 1.
126 |
127 | def get(self):
128 | return [self.state_buffer,
129 | self.action_buffer,
130 | self.reward_buffer,
131 | self.next_state_buffer,
132 | self.done_buffer]
133 |
134 | def sample(self, BATCHSIZE, prio=False):
135 | if prio:
136 | a = self.done_buffer.shape[0]
137 | c = self.priorities.reshape((a))
138 | b = c / np.sum(c)
139 | ind = np.random.choice(np.arange(a), BATCHSIZE, replace=False, p=b).astype(int)
140 | else:
141 | ind = np.random.choice(np.arange(self.done_buffer.shape[0]), BATCHSIZE, replace=False).astype(int)
142 |
143 | return [self.state_buffer[ind],
144 | self.action_buffer[ind].reshape(-1),
145 | self.reward_buffer[ind].reshape(-1),
146 | self.next_state_buffer[ind],
147 | self.done_buffer[ind].reshape(-1)]
148 |
149 | def prio_update(self, onlineDQN, targetDQN, epsilon=0.01, alpha=0.6, GAMMA=0.99, CHUNK=5000.):
150 |
151 | # state,action,reward,next_state,done = self.get()
152 | getbuffer = self.get()
153 | # CHUNK = 5000. # max number of states used for inference at once
154 | loops = int(getbuffer[0].shape[0] / CHUNK) # number of loops needed to update all prios
155 | priobuffer = np.empty((0))
156 | j = -1
157 |
158 | for j in range(loops): # if replaybuffer size bigger than CHUNK size
159 | state, action, reward, next_state, done = [x[int(j * CHUNK):int((j + 1) * CHUNK)] for x in getbuffer]
160 | Q = onlineDQN.infer(state)
161 | Q_ = np.copy(Q)
162 | t = targetDQN.infer(next_state)
163 | a = np.argmax(onlineDQN.infer(next_state), axis=1)
164 | Q[range(Q.shape[0]), action.astype(int)] = reward + np.logical_not(done) * GAMMA * t[range(t.shape[0]), a]
165 | TD_loss = np.abs((Q_ - Q))
166 | TD_loss = TD_loss[range(TD_loss.shape[0]), a]
167 | prio = np.power((TD_loss + epsilon), alpha)
168 | prio /= np.sum(prio)
169 | priobuffer = np.append(priobuffer, prio)
170 |
171 | state, action, reward, next_state, done = [x[int((j + 1) * CHUNK):] for x in getbuffer]
172 | Q = onlineDQN.infer(state)
173 | Q_ = np.copy(Q)
174 | t = targetDQN.infer(next_state)
175 | a = np.argmax(onlineDQN.infer(next_state), axis=1)
176 | Q[range(Q.shape[0]), action.astype(int)] = reward + np.logical_not(done) * GAMMA * t[range(t.shape[0]), a]
177 | TD_loss = np.abs((Q_ - Q))
178 | TD_loss = TD_loss[range(TD_loss.shape[0]), a]
179 | prio = np.power((TD_loss + epsilon), alpha)
180 | prio /= np.sum(prio)
181 | priobuffer = np.append(priobuffer, prio)
182 | self.priorities = priobuffer[:, True]
183 |
184 |
185 | class trainer_config:
186 | '''
187 | configuration for the Q learner (trainer) for easy reuse
188 | everything not model related goes here. maybe
189 | '''
190 |
191 | def __init__(self,
192 | app_name,
193 | BUFFER_SIZE=50e3,
194 | STEPS_PER_EPISODE=500,
195 | MAX_STEPS=100000,
196 | UPDATE_TARGET_STEPS=1000,
197 | BATCH_SIZE=32,
198 | GAMMA=0.99,
199 | EXPLORATION=100,
200 | E_MIN=0.01,
201 | priority=False,
202 | alpha=0.6,
203 | epsilon=0.01
204 |
205 | ):
206 | ### game environment
207 | self.app_name = app_name
208 | # env.close()
209 | ### training variables
210 | self.BUFFER_SIZE = BUFFER_SIZE
211 | self.STEPS_PER_EPISODE = STEPS_PER_EPISODE
212 | self.MAX_STEPS = MAX_STEPS
213 | self.UPDATE_TARGET_STEPS = UPDATE_TARGET_STEPS
214 | self.BATCH_SIZE = BATCH_SIZE
215 | self.GAMMA = GAMMA
216 | self.EXPLORATION = EXPLORATION
217 | self.E_MIN = E_MIN
218 | #### PRIO MODULE ( default := alpha= 0.,epsilon=0.01)
219 | self.priority = priority
220 | self.alpha = alpha
221 | self.epsilon = epsilon
222 |
223 |
224 | class trainer:
225 | '''
226 | the actual DDQN-> 2 models, 1 config
227 | train here, get your models and plots
228 | '''
229 |
230 | def __init__(self, onlineModel, targetModel, trainer_config, env):
231 | ### load config
232 | self.app_name = trainer_config.app_name
233 | self.env = env
234 |
235 | ### training variables
236 | self.BUFFER_SIZE = trainer_config.BUFFER_SIZE
237 | self.STEPS_PER_EPISODE = trainer_config.STEPS_PER_EPISODE
238 | self.MAX_STEPS = trainer_config.MAX_STEPS
239 | self.UPDATE_TARGET_STEPS = trainer_config.UPDATE_TARGET_STEPS
240 | self.BATCH_SIZE = trainer_config.BATCH_SIZE
241 | self.GAMMA = trainer_config.GAMMA
242 | self.EXPLORATION = trainer_config.EXPLORATION
243 | self.E_MIN = trainer_config.E_MIN
244 | self.priority = trainer_config.priority
245 | self.alpha = trainer_config.alpha
246 | self.epsilon = trainer_config.epsilon
247 |
248 | ### models
249 | self.onlineNet = onlineModel
250 | self.targetNet = targetModel
251 |
252 | ### logs
253 | self.reward_plot = []
254 | self.loss_plot = []
255 | self.online_q_plot = []
256 | self.target_q_plot = []
257 |
258 | ### ringbuffer
259 | self.REPLAY_BUFFER = ringbuffer(self.BUFFER_SIZE)
260 |
261 | # def load_config(self, config):
262 | # '''
263 | # loads new config
264 | # '''
265 | # ### env
266 | # self.app_name = config.app_name
267 | # self.env = AimsunEnv()
268 | # ### training variables
269 | # self.BUFFER_SIZE = config.BUFFER_SIZE
270 | # self.STEPS_PER_EPISODE = config.STEPS_PER_EPISODE
271 | # self.MAX_STEPS = config.MAX_STEPS
272 | # self.UPDATE_TARGET_STEPS = config.UPDATE_TARGET_STEPS
273 | # self.BATCH_SIZE = config.BATCH_SIZE
274 | # self.GAMMA = config.GAMMA
275 | # self.EXPLORATION = config.EXPLORATION
276 | # self.E_MIN = config.E_MIN
277 | # self.priority = config.priority
278 | # self.alpha = config.alpha
279 | # self.epsilon = config.epsilon
280 |
281 | def save_config(self):
282 | '''
283 | returns current config
284 | '''
285 | return trainer_config(self.app_name,
286 | self.BUFFER_SIZE,
287 | self.STEPS_PER_EPISODE,
288 | self.MAX_STEPS,
289 | self.UPDATE_TARGET_STEPS,
290 | self.BATCH_SIZE,
291 | self.GAMMA,
292 | self.EXPLORATION,
293 | self.E_MIN,
294 | self.priority,
295 | self.alpha,
296 | self.epsilon
297 | )
298 |
299 | def normalize_state(self, state):
300 | state_buffer = self.REPLAY_BUFFER.state_buffer
301 | state = np.array(state).reshape(1, len(state))
302 | if not (state_buffer is None or state_buffer.shape[0] <= 1):
303 | mean = np.mean(state_buffer, axis=0)
304 | std = np.std(state_buffer, axis=0)
305 | state = np.divide((state - mean), std, out=(state - mean), where=(std!=0))
306 | return state
307 |
308 | def save_model(self):
309 | all_attribute = [self.save_config(),
310 | self.env,
311 | self.onlineNet,
312 | self.targetNet,
313 | self.reward_plot,
314 | self.loss_plot,
315 | self.REPLAY_BUFFER,
316 | self.target_q_plot,
317 | self.online_q_plot]
318 | is_written = False
319 | while not is_written:
320 | try:
321 | with open(LOG_PATH + 'Model', 'wb') as fout:
322 | pickle.dump(all_attribute, fout)
323 | is_written = True
324 | except:
325 | print("Save model failed.")
326 | return
327 |
328 | def load_model(self, flag=False):
329 | if flag:
330 | try:
331 | # all_attribute = [self.save_config(), self.env, self.onlineNet, self.targetNet,
332 | # self.reward_plot, self.loss_plot, self.REPLAY_BUFFER]
333 | with open(LOG_PATH + 'Model', 'rb') as fin:
334 | all_attribute = pickle.load(fin)
335 |
336 |
337 | if len(all_attribute) != 7:
338 | print("Model empty...")
339 | pass
340 |
341 | trainer_config = all_attribute[0]
342 | env = all_attribute[1]
343 | onlineNet = all_attribute[2]
344 | targetNet = all_attribute[3]
345 | reward_plot = all_attribute[4]
346 | loss_plot = all_attribute[5]
347 | REPLAY_BUFFER = all_attribute[6]
348 |
349 |
350 | self.app_name = trainer_config.app_name
351 | self.env = env
352 | ### training variables
353 | self.BUFFER_SIZE = trainer_config.BUFFER_SIZE
354 | self.STEPS_PER_EPISODE = trainer_config.STEPS_PER_EPISODE
355 | self.MAX_STEPS = trainer_config.MAX_STEPS
356 | self.UPDATE_TARGET_STEPS = trainer_config.UPDATE_TARGET_STEPS
357 | self.BATCH_SIZE = trainer_config.BATCH_SIZE
358 | self.GAMMA = trainer_config.GAMMA
359 | self.EXPLORATION = trainer_config.EXPLORATION
360 | self.E_MIN = trainer_config.E_MIN
361 | self.priority = trainer_config.priority
362 | self.alpha = trainer_config.alpha
363 | self.epsilon = trainer_config.epsilon
364 |
365 | ### models
366 | self.onlineNet = onlineNet
367 | self.targetNet = targetNet
368 |
369 | ### logs
370 | self.reward_plot = reward_plot
371 | self.loss_plot = loss_plot
372 |
373 | ### ringbuffer
374 | self.REPLAY_BUFFER = REPLAY_BUFFER
375 | except:
376 | print("Model not found...")
377 | pass
378 | else:
379 | pass
380 |
381 | # def log_weight(self):
382 | # onlineFrob = []
383 | # targetFrob = []
384 | # online = self.onlineNet
385 | # target = self.targetNet
386 | # for L in range(len(online.Layerlist)):
387 | # onlineFrob.append(LA.norm(online.Layerlist[L].w))
388 | # targetFrob.append(LA.norm(target.Layerlist[L].w))
389 |
390 | # with open(online_w, "a+") as online:
391 | # csv_write = csv.writer(online, dialect='excel')
392 | # csv_write.writerow(onlineFrob)
393 | # with open(target_w, "a+") as target:
394 | # csv_write = csv.writer(target, dialect='excel')
395 | # csv_write.writerow(targetFrob)
396 |
397 | def train(self, flag=False, log=False):
398 |
399 | EPOCH = 9999
400 | step_counter = 0.
401 | eps_rew = 0.
402 |
403 | for epoch in range(EPOCH):
404 | current_state = self.normalize_state(self.env.reset())
405 |
406 | for STEP in range(self.MAX_STEPS):
407 | e = 1. / ((len(self.loss_plot) / self.EXPLORATION) + 1)
408 | if np.random.uniform(0, 1) < max(self.E_MIN, e):
409 | # random action
410 | action = self.env.rand_action()
411 |
412 | else:
413 | Q = (self.onlineNet.infer(current_state))[0]
414 | action = np.argmax(Q)
415 | # apply action
416 | next_state, reward, done = self.env.step(action)
417 | next_state = self.normalize_state(next_state)
418 |
419 | # end training when simulation ends
420 | if done: break
421 | if not self.env.exclude():
422 | eps_rew += reward
423 | self.REPLAY_BUFFER.add(
424 | [current_state,
425 | np.array(action).reshape(1, 1),
426 | np.array(reward).reshape(1, 1),
427 | next_state,
428 | np.array(done).reshape(1, 1)])
429 | step_counter += 1.
430 |
431 |
432 | if STEP > 2000 or flag:
433 | BATCH = self.REPLAY_BUFFER.sample(self.BATCH_SIZE, prio=self.priority)
434 | train_bellman(self.onlineNet, self.targetNet, BATCH, self.GAMMA)
435 | write_csv(Q_target_log, (self.targetNet.infer(current_state))[0])
436 | write_csv(Q_online_log, (self.onlineNet.infer(current_state))[0])
437 | write_csv(Rt_log, [reward])
438 | write_csv(Loss, [self.onlineNet.loss])
439 | self.loss_plot += [self.onlineNet.loss]
440 | self.reward_plot += [eps_rew]
441 |
442 | current_state = next_state
443 |
444 | if (STEP + 1) % self.UPDATE_TARGET_STEPS == 0:
445 | if self.priority: self.REPLAY_BUFFER.prio_update(self.onlineNet, self.targetNet, GAMMA=self.GAMMA,
446 | alpha=self.alpha, epsilon=self.epsilon)
447 | if log: print('update: ', len(self.reward_plot), ' episodes ---- 2 eps average reward: ',
448 | np.array(self.reward_plot)[-2:].mean())
449 | update_target(self.onlineNet, self.targetNet, duel=False)
450 |
451 | # self.log_weight()
452 | if STEP % 10 == 0: self.save_model()
453 |
454 |
455 |
456 |
--------------------------------------------------------------------------------
/Env/__init__.py:
--------------------------------------------------------------------------------
1 | """Env module init file
2 | """
3 | # __init__.py
4 | from .aimsun_env import *
--------------------------------------------------------------------------------
/Env/aimsun_env.py:
--------------------------------------------------------------------------------
1 | """AimsunEnv
2 | """
3 | import os, sys, inspect
4 | current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
5 | parent_dir = os.path.dirname(current_dir)
6 | sys.path.insert(0, parent_dir)
7 | from config import *
8 | import csv
9 | import numpy as np
10 | from uuid import uuid4
11 | from .env import Environment
12 |
13 |
14 | REWARD_INPUT_LEN = 2
15 | STATE_INPUT_LEN = 17
16 |
17 | class AimsunEnv(Environment):
18 | """Aimsun Next environment
19 |
20 | Attributes
21 | ----------
22 | num_step : int
23 | total time steps simulated
24 | reward_flag : int
25 | check if received reward is at the new time step
26 | state_flag : int
27 | check if received state is at the new time step
28 | """
29 |
30 | def __init__(self, action_space):
31 | """Initialize Aimsun Next environment object
32 |
33 | Parameters
34 | ----------
35 | action_space : list
36 | list of available actions
37 | """
38 | Environment.__init__(self, name='Aimsun', action_space=action_space)
39 | self.reward_flag = 0
40 | self.state_flag = 0
41 | self.num_step = 0
42 | self.check_in_time = []
43 |
44 | def get_state_size(self):
45 | """Return the state size
46 |
47 | Returns
48 | -------
49 | int
50 | state size
51 | """
52 | return STATE_INPUT_LEN - 1
53 |
54 | def get_action_size(self):
55 | """Return the action space size
56 |
57 | Returns
58 | -------
59 | int
60 | action space size
61 | """
62 | return len(self.action_space)
63 |
64 | def _receive_and_log_reward(self):
65 | """Receive, log and return the new reward
66 |
67 | Returns
68 | -------
69 | float
70 | newly received reward
71 | """
72 | # receive from REWARD_LOG
73 | is_read = False
74 | while not is_read:
75 | try:
76 | f = open(self.REWARD_LOG, "r")
77 | data = f.read()
78 | f.close()
79 | data = data.split()
80 | if len(data) != REWARD_INPUT_LEN: continue
81 | reward, new_flag = float(data[0]), int(data[1])
82 | if new_flag != self.reward_flag:
83 | is_read = True
84 | self.reward_flag = new_flag
85 | if new_flag == 0:
86 | return reward, True
87 | except:
88 | continue
89 | return reward, False
90 |
91 | def _write_action(self, index):
92 | """write the newly received action to Aimsun
93 |
94 | Parameters
95 | ----------
96 | index : int
97 | the index of the new action
98 | """
99 | is_written = False
100 | while not is_written:
101 | try:
102 | f = open(self.ACTION_LOG, "w+")
103 | f.write("{} {} {}".format(self.action_space[index][0], self.action_space[index][1], uuid4().int))
104 | f.close()
105 | is_written = True
106 | except:
107 | continue
108 |
109 | def _get_state(self):
110 | """Receive and return the new state
111 |
112 | Returns
113 | -------
114 | list
115 | received state
116 | """
117 | is_read = False
118 | while not is_read:
119 | try:
120 | f = open(self.STATE_LOG, "r")
121 | data = f.read()
122 | f.close()
123 | data = data.split()
124 | if len(data) != STATE_INPUT_LEN: continue
125 | new_flag = int(data[-1])
126 | if new_flag != self.state_flag:
127 | S_ = np.array(list(map(lambda x: float(x), data[:-1])))
128 | self.check_in_time.append(max(S_[3], S_[11]))
129 | is_read = True
130 | self.state_flag = new_flag
131 | except:
132 | continue
133 | self.num_step += 1
134 | return S_
135 |
136 | def step(self, action_index):
137 | """Apply the write the action to Aimsun and wait for the new
138 | state and reward
139 |
140 | Parameters
141 | ----------
142 | action_index : int
143 | the index of the action space
144 |
145 | Returns
146 | -------
147 | list, float, bool
148 | new state, new reward, and simulation finish
149 | """
150 | self._write_action(action_index)
151 | S_ = self._get_state()
152 | reward, done = self._receive_and_log_reward()
153 | # print log
154 | if self.num_step < 50 or self.num_step % 1000 == 0:
155 | print("="*20 + " Step: {} ".format(self.num_step) + "="*20)
156 | return S_, reward, done
157 |
158 | def reset(self):
159 | """Reset the Aimsun environment and receive the first state
160 | """
161 | print('Reset Aimsun Environment')
162 | print('Waiting for the first bus...')
163 | return self._get_state()
164 |
165 | def exclude(self):
166 | """Summary
167 |
168 | Returns:
169 | TYPE: Description
170 | """
171 | if len(self.check_in_time) > 10: self.check_in_time.pop(0)
172 | if len(self.check_in_time) <= 2: return True
173 | if self.check_in_time[-1] < self.check_in_time[-2]:
174 | return True
175 | return False
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
--------------------------------------------------------------------------------
/Env/env.py:
--------------------------------------------------------------------------------
1 | """
2 | A class that creates functionally that the enviroment must have
3 | """
4 | import numpy as np
5 | import random
6 | from config import *
7 |
8 |
9 |
10 | class Environment():
11 | """
12 | Attributes:
13 | env_name (str): name of this environment
14 | action_space (List[(int, int)]): a set of possible actions
15 | STATE_LOG (str): file path to the textfile that records the states (collected when time steps are renewed)
16 | ACTION_LOG (str): file path to the textfile that records the chosen action at every time step
17 | REWARD_LOG (str): file path to the csv that records reward at every time step
18 | """
19 |
20 | def __init__(self, name, action_space):
21 | """
22 | Initialize an environment object
23 |
24 | Args:
25 | name (str): name of the initialized environment
26 | action_space (List[(int, int)]): a set of possible actions
27 | """
28 | self.env_name = name
29 | self.action_space = action_space
30 | self.STATE_LOG = STATE # temp state
31 | self.ACTION_LOG = ACTION # temp action
32 | self.REWARD_LOG = REWARD # temp reward
33 | self.REWARD_CSV = REWARD_CSV # rewards of all steps
34 |
35 | def step(self, action_index):
36 | """
37 | steps return, called when apply actions. Returns the next state, reward, and two
38 | booleans indicating whether the simulation ends and whether the episode is done
39 |
40 | Args:
41 | action_index (int): the action index is equal to argmax Q, and will
42 | use the index to obtain the action from the action space
43 |
44 | Raises:
45 | NotImplementedError
46 | """
47 | raise NotImplementedError
48 |
49 | def reset(self):
50 | """
51 | Begin the episode with a random state
52 |
53 | Raises:
54 | NotImplementedError
55 | """
56 | raise NotImplementedError
57 |
58 | def rand_action(self):
59 | """
60 | Choose an action randomly (exploring)
61 |
62 | Returns:
63 | int: index of a random action drew from the action space
64 | """
65 | return np.random.randint(0, len(self.action_space))
66 |
67 |
68 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Coordinated Transit Signal Priority (cTSP)
2 |
3 |
4 | ## Introduction
5 |
6 | Coordinate Transit Signal Priority with two traffic intersections in Toronto area, improving speed and reliability with a single deep reinforcement learning agent.
7 |
8 |
35 |
36 | ## Model Set-up
37 | ### 1. Time step
38 | Time steps are renewed upon bus check-in events (should avoid bug if more than one bus checked in at the same time. For example, Bus A checks in at time x at Intx 1, and Bus B checks in at time x at Intx 2.).
39 | A time step is a time point at which a bus is detected by a loop detector. Every check-in event at any check-in loop detector in the system (a system includes all intersections and road segments connecting these intersections) would initiate a new time step.
40 | > Example: Bus 1, 2 and 3 are in the system at time step t. When Bus 4 checks in, time step t+1 is initiated.
41 |
42 | At each time step, the RL model
43 | - reads the state of the current environment,
44 | - chooses an action, and
45 | - calculates the reward of the last time step.
46 |
47 | ### 2. State
48 | States are collected when time steps are renewed. A state includes observations at all intersections in the system which contains bus-, traffic-, and signal-related information. Each intersection has following observations:
49 |
50 | - Upstream of the POZ, downstream of the upstream intersection (prePOZ)
51 | - Check-out time of the bus closest to the downstream POZ
52 | - number of buses
53 | - In the POZ
54 | - Last available check-out time
55 | > If bunch (POZ has more than one bus, Number of buses > 1): use the current time as the check-out time
56 | - Check-in time of the current bus (current time) that initiated this time step
57 | - Check-in headway
58 | - Number of buses in the POZ,
59 | - Number of cars in the POZ
60 | - Time to the end of EW green: exclude any registered action
61 | > Registered action: any action that has not been executed but planned, or is now being executed at the time of check-in
62 | ### 3. Action
63 | Action is chosen at every time step as soon as the state is received by the RL model. Actions make adjustment of the durations of the first available EW green for each intersection at time step t.
64 | - If a bus checked in during EW red, adjustment is made to the first available EW green following the red
65 | - If a bus checked in during EW green, adjustment is made to the current EW green
66 | > Example:
67 | A bus checks in at intersection 1 at time step t. At time step t, the phase at Intx 1 is red in the direction of bus movement, the adjustment is made to the EW green following red. At time step t, the phase at Intx 2 is EW green, the adjustment is made to this EW green.
68 |
69 | To ensure consistency with iTSP, actions are EW green truncations of -20, -15, -10, -5, do-nothing, green extensions of +5, +10, +15, +20s.
70 |
71 |
72 |
73 | When at is selected, if there is a registered action (maybe decided at time step t-1) for an intersection, at would overwrite at-1 if possible.
74 | If a truncation action is selected, and the truncation amount > remaining EW green, EW green would be end “now.”
75 | ### 4. Reward
76 | Reward associated with state and action at time step t is calculated at time step t+1. Rewards are computed using data (headway and travel time in the POZ) of all check-out events occurred between time step t and t+1.
77 | > Example:
78 | If bus A and B checked out two different intersections (or the same intersection) between time step t and t+1, rt = rA + rB = 0.6*(headway improvement of bus A) – 0.4*(travel time of bus A in the POZ) + 0.6*(headway improvement of bus B) – 0.4*(travel time of bus B in the POZ)
79 |
80 | If no bus checked out between time step t and t+1, rt = 0.
81 |
82 |
83 |
--------------------------------------------------------------------------------
/agent.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {
7 | "colab": {},
8 | "colab_type": "code",
9 | "id": "oxGS_XGxlaYb",
10 | "scrolled": true
11 | },
12 | "outputs": [
13 | {
14 | "name": "stdout",
15 | "output_type": "stream",
16 | "text": [
17 | "Reset Aimsun Environment\n",
18 | "Waiting for the first bus...\n"
19 | ]
20 | }
21 | ],
22 | "source": [
23 | "from Env import AimsunEnv\n",
24 | "from DQN import ddqn_framework as ddqn\n",
25 | "from DQN import mlp_framework as nn\n",
26 | "import numpy as np\n",
27 | "import pickle\n",
28 | "import itertools\n",
29 | "\n",
30 | "# env setup\n",
31 | "intx_action = np.arange(-20, 21, 5)\n",
32 | "action_space = list(itertools.product(intx_action, intx_action))\n",
33 | "env = AimsunEnv(action_space)\n",
34 | "\n",
35 | "clean_folder = True\n",
36 | "if clean_folder:\n",
37 | " import config\n",
38 | " config.clean_folder_and_initialize()\n",
39 | "\n",
40 | "# model config\n",
41 | "configuration = ddqn.trainer_config(app_name='AimsunNext',\n",
42 | " # BUFFER_SIZE = 50e3,\n",
43 | " BUFFER_SIZE = 50000,\n",
44 | " # STEPS_PER_EPISODE = 500,\n",
45 | " STEPS_PER_EPISODE=500,\n",
46 | " MAX_STEPS = 9999999,\n",
47 | " # UPDATE_TARGET_STEPS = 1000,\n",
48 | " UPDATE_TARGET_STEPS=3000,\n",
49 | " BATCH_SIZE = 32,\n",
50 | " GAMMA = 0.9,\n",
51 | " EXPLORATION = 5000,\n",
52 | " E_MIN = 0.1,\n",
53 | " priority = True,\n",
54 | " # alpha = 0.001,\n",
55 | " alpha = 0.01,\n",
56 | " epsilon = 0.1\n",
57 | " )\n",
58 | "\n",
59 | "# online model\n",
60 | "A1 = nn.layer(env.get_state_size(), nodes=512)\n",
61 | "A2 = nn.layer(512, 512)\n",
62 | "A3 = nn.layer(512, 512)\n",
63 | "A4 = nn.layer(512, 512)\n",
64 | "AOUT = nn.layer(512, env.get_action_size())\n",
65 | "AOUT.f = nn.f_iden\n",
66 | "\n",
67 | "# target model\n",
68 | "L1 = nn.layer(env.get_state_size(), nodes=512)\n",
69 | "L2 = nn.layer(512, 512)\n",
70 | "L3 = nn.layer(512, 512)\n",
71 | "L4 = nn.layer(512, 512)\n",
72 | "LOUT = nn.layer(512, env.get_action_size())\n",
73 | "LOUT.f = nn.f_iden\n",
74 | "\n",
75 | "onlineNet = nn.mlp([A1, A2, A3, A4, AOUT])\n",
76 | "onlineNet.erf = nn.log_cosh # cost in prev aimsun_dqn\n",
77 | "targetNet = nn.mlp([L1, L2, L3, L4, LOUT])\n",
78 | "targetNet.erf = nn.log_cosh # cost in prev aimsun_dqn\n",
79 | "\n",
80 | "ddqn_model = ddqn.trainer(onlineNet,targetNet,configuration, env)\n",
81 | "ddqn_model.load_model(False)\n",
82 | "ddqn_model.train(log=True)"
83 | ]
84 | },
85 | {
86 | "cell_type": "code",
87 | "execution_count": null,
88 | "metadata": {},
89 | "outputs": [],
90 | "source": []
91 | }
92 | ],
93 | "metadata": {
94 | "colab": {
95 | "name": "Untitled0.ipynb",
96 | "provenance": [],
97 | "version": "0.3.2"
98 | },
99 | "kernelspec": {
100 | "display_name": "Python 3",
101 | "language": "python",
102 | "name": "python3"
103 | },
104 | "language_info": {
105 | "codemirror_mode": {
106 | "name": "ipython",
107 | "version": 3
108 | },
109 | "file_extension": ".py",
110 | "mimetype": "text/x-python",
111 | "name": "python",
112 | "nbconvert_exporter": "python",
113 | "pygments_lexer": "ipython3",
114 | "version": "3.7.6"
115 | }
116 | },
117 | "nbformat": 4,
118 | "nbformat_minor": 1
119 | }
120 |
--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
1 | """configuration for the project
2 |
3 | Attributes
4 | ----------
5 | CONFIG_1 : str
6 | the meta data of each intersection
7 | CONFIG_2 : str
8 | the meta data of each intersection
9 | CORRIDOR : list
10 | the corridor for training
11 | CWD : str
12 | the project directory
13 | LOG_PATH : str
14 | log files folder
15 | """
16 |
17 | import os, inspect, shutil
18 |
19 | CWD = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
20 | LOG_PATH = CWD + '/log_files/'
21 |
22 | # log files
23 | STATE = LOG_PATH + 'state.txt'
24 | ACTION = LOG_PATH + 'action.txt'
25 | REWARD = LOG_PATH + 'reward.txt'
26 | REWARD_CSV = LOG_PATH + 'reward_log.csv'
27 | PARAMETER_LOG = LOG_PATH + 'parameter_log.csv'
28 | Num_bus_in_rep = LOG_PATH + 'Num_bus_in_rep.txt'
29 |
30 |
31 | def clean_folder_and_initialize():
32 | folder = LOG_PATH
33 | for filename in os.listdir(folder):
34 | file_path = os.path.join(folder, filename)
35 | try:
36 | if os.path.isfile(file_path) or os.path.islink(file_path):
37 | os.unlink(file_path)
38 | elif os.path.isdir(file_path):
39 | shutil.rmtree(file_path)
40 | except Exception as e:
41 | print('Failed to delete %s. Reason: %s' % (file_path, e))
42 | os.makedirs(LOG_PATH, exist_ok=True)
43 | with open(PARAMETER_LOG, 'w+') as log_file:
44 | log_file.write('log, replication ID, vehicle ID, checkin time, checkout time, check in phase number, check in phase time, checkout phase time, checkin headway, checkout headway, action 1, action 2 as decided at the bus check in, registered action at bus check out, Travel time, reward, prePOZ bus checkout time, prePOZ numbus, last_available_checkout_time, last_check_in_time, check_in_hdy, numbus, allnumvel, tToNearGreenPhase, prePOZ bus checkout time, prePOZ numbus, last_available_checkout_time, last_check_in_time, check_in_hdy, numbus, allnumvel, tToNearGreenPhase\n')
45 |
46 |
47 | INTERSECTION_1 = {
48 | 'corridor_log': PARAMETER_LOG,
49 | 'intersection': 1171274,
50 | 'busCallDetector': 1171405,
51 | 'busExitDetector': 1171391,
52 | 'section': 6601,
53 | 'phase_duration': [16, 38, 7, 11, 32, 6],
54 | 'phase_of_interest': 5,
55 | 'AlgB_decision': 9,
56 | 'log': LOG_PATH + '1171274.csv',
57 | 'target_headway': 290,
58 | 'prePOZ': {
59 | 'busExitDetector': 1171393,
60 | 'busCallDetector': 1171405,
61 | },
62 | 'maxTT': 400
63 |
64 | }
65 | INTERSECTION_2 = {
66 | 'corridor_log': PARAMETER_LOG,
67 | 'intersection': 1171288,
68 | 'busCallDetector': 1171407,
69 | 'busExitDetector': 1171389,
70 | 'section': 6563,
71 | 'phase_duration': [38, 8, 13, 4, 40, 7],
72 | 'phase_of_interest': 5,
73 | 'AlgB_decision': 12,
74 | 'log': LOG_PATH + '1171288.csv',
75 | 'target_headway': 290,
76 | 'prePOZ': {
77 | 'busExitDetector': 1171391,
78 | 'busCallDetector': 1171407,
79 | },
80 | 'maxTT': 200
81 | }
82 |
83 |
84 | CORRIDOR = [INTERSECTION_1, INTERSECTION_2]
85 |
86 |
--------------------------------------------------------------------------------
/demo/dynamic_senario.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JerryIshihara/coordinated-transit-signal-priority/09fe74809e9aec0820d43300f1a087697ba6be1a/demo/dynamic_senario.png
--------------------------------------------------------------------------------
/demo/navBar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JerryIshihara/coordinated-transit-signal-priority/09fe74809e9aec0820d43300f1a087697ba6be1a/demo/navBar.png
--------------------------------------------------------------------------------
/demo/prePOZ.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JerryIshihara/coordinated-transit-signal-priority/09fe74809e9aec0820d43300f1a087697ba6be1a/demo/prePOZ.png
--------------------------------------------------------------------------------
/demo/tsp_flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JerryIshihara/coordinated-transit-signal-priority/09fe74809e9aec0820d43300f1a087697ba6be1a/demo/tsp_flow.png
--------------------------------------------------------------------------------
/train.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # default server python directory
4 | py_dir="C:\Python27"
5 | # aimsun controller path
6 | controller="~/Aimsun/RunSeveralReplications.py"
7 | # default model path in the server
8 | model_path="C:\Users\Public\Documents\ShalabyGroup\finchTSPs_3 intx_west_Subnetwork 1171379_newInters.ang"
9 | # default aconsole path
10 | aconsole_path="C:\Program Files\Aimsun\Aimsun Next 8.3\aconsole.exe"
11 | # replicaion start and end
12 | start=1177671 end=1180580
13 |
14 | while [ "$1" != "" ]; do
15 | case $1 in
16 | -p | --pythonDir ) shift
17 | py_dir="$1"
18 | ;;
19 | -m | --modelPath ) shift
20 | model_path="$1"
21 | ;;
22 | -a | --aconsolePath ) shift
23 | aconsole_path="$1"
24 | ;;
25 | -s | --start ) shift
26 | start=$1
27 | ;;
28 | -e | --end ) shift
29 | end=$1
30 | ;;
31 | esac
32 | shift
33 | done
34 |
35 | echo ${end}
36 |
37 | cd ${py_dir}
38 |
39 | for /l %x in (${start}, 1, ${end}) do (python ${controller} -aconsolePath ${aconsole_path} -modelPath ${model_path} -targets %x)
--------------------------------------------------------------------------------