├── .gitignore ├── .gitmodules ├── Aimsun ├── AAPI.py ├── BusInPOZ.py ├── RunSeveralReplications.py ├── corridor.py ├── intersection.py ├── prePOZ.py ├── script.py └── util.py ├── DQN └── ddqn_framework.py ├── Env ├── __init__.py ├── aimsun_env.py └── env.py ├── README.md ├── agent.ipynb ├── config.py ├── demo ├── dynamic_senario.png ├── navBar.png ├── prePOZ.png └── tsp_flow.png └── train.sh /.gitignore: -------------------------------------------------------------------------------- 1 | ### git ignore files ### 2 | 3 | # mac folder attribute file 4 | **/.DS_Store 5 | # python compiles 6 | *.pyc 7 | # cache folder 8 | **/__pycache__ 9 | # keep log files not files 10 | log_files/* 11 | !log_files/.keep 12 | # ipynb 13 | **/.ipynb_checkpoints 14 | # IPython 15 | profile_default/ 16 | ipython_config.py 17 | .idea/ 18 | Aimsun/detector_original.py 19 | agent.py 20 | Aimsun/.~intersection.py 21 | Aimsun/detector.py 22 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "DQN-model"] 2 | path = DQN-model 3 | url = https://github.com/JerryIshihara/DQN-model.git 4 | -------------------------------------------------------------------------------- /Aimsun/BusInPOZ.py: -------------------------------------------------------------------------------- 1 | class BusInPOZ: 2 | 3 | def __init__(self, intersection, check_in_bus_info, check_in_phase, check_in_phasetime, check_in_time, last_check_in): 4 | self.intersection_of_interest = intersection 5 | self.bus_id = check_in_bus_info.idVeh 6 | self.check_in_time = check_in_time 7 | self.check_in_phase = check_in_phase 8 | self.check_in_phasetime = check_in_phasetime 9 | self.last_check_in = last_check_in # previous bus check in time 10 | 11 | self.check_in_headway = check_in_time - last_check_in 12 | 13 | self.check_out_time = -1 14 | self.check_out_headway = -1 15 | 16 | self.last_update_time = check_in_time 17 | self.original_action = None 18 | self.original_state = None # state generated at check in 19 | 20 | def check_out(self, check_out_time, last_check_out=0): 21 | self.check_out_time = check_out_time 22 | self.check_out_headway = check_out_time - last_check_out 23 | self.last_update_time = check_out_time 24 | 25 | def set_action(self, action): 26 | if self.original_action is None: 27 | self.original_action = action 28 | else: 29 | print("duplicate set original action, check to make sure implementation is correct") 30 | 31 | def set_state(self, state): 32 | if self.original_state is None: 33 | self.original_state = state 34 | else: 35 | print("duplicate set original state, check to make sure implementation is correct") 36 | -------------------------------------------------------------------------------- /Aimsun/RunSeveralReplications.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Script to run several replications via Aconsole, one replication gets to run after the previous one finishes. 3 | 4 | Usage of the script: 5 | In command prompt go to where your Python 2.7 is located and type: python **PathToThisScript** -aconsolePath **PATHTO_aconsole.exe** -modelPath **PATHTOMODEL** -targets **Target1** **Target2** **...TargetN*** 6 | Where Target1, Target2 ... TargetN are replications or macroexperiments... 7 | 8 | You can also run several replications from several models by doing 9 | 10 | python **PathToThisScript** -aconsolePath **PATHTO_aconsole.exe** -modelPath **PATHTOMODEL1** -targets **Target1** **Target2** **...TargetN*** -modelPath **PATHTOMODEL2** -targets **Target1** **Target2** ... 11 | 12 | ''' 13 | 14 | 15 | 16 | import sys 17 | import os.path 18 | import locale 19 | from datetime import datetime 20 | import subprocess # This library allows you to open a command prompt with aconsole.exe 21 | 22 | def RunSimulation(replicationID,modelPath): # This calls a subprocess like C:>ProgramFiles>Aimsuns>Aimsun Next 8.2_R5233>aconsole.exe -v -log -project **PROJECT** -cmd execute -target 1060 23 | #So each of the subprocesses generated by this function is an aconsole execution 24 | print "modelPath: " + modelPath 25 | print "replication id: " + str(replicationID) 26 | args = [execmd, '-v', '-log', '-project', modelPath, '-cmd', 'execute', '-target', replicationID] 27 | for x in range(0, 1): 28 | print(x) 29 | popen = subprocess.Popen(args) 30 | popen.wait() # This makes the script wait until the subprocess (aconsole) has finished. This way the memory consumption wont skyrocket. (There will be only one replication running at a time. ) 31 | 32 | argv=sys.argv # The arguments this script will take are the ones provided via command prompt 33 | 34 | if argv[1] == '-aconsolePath': 35 | 36 | execmd = argv[2] 37 | print "\n Aconsole: " + execmd + "\n" 38 | 39 | if argv[3] == '-modelPath': 40 | modelPath = argv[4] 41 | print "------------\n" 42 | print "Model: " + modelPath + "\n" 43 | 44 | else: 45 | print "no -modelPath parameter" 46 | raw_input("Press enter to exit ;)") 47 | sys.exit() 48 | else: 49 | print "No -aconsolePath parameter" 50 | raw_input("Press enter to exit ;)") 51 | sys.exit() 52 | 53 | if argv[5] == '-targets': 54 | print "targets: \n " 55 | for i in range(len(argv[6:])): 56 | j = i +6 57 | if argv[j].isdigit(): 58 | print argv[j] + "\n " 59 | else: 60 | if argv[j] =='-modelPath': 61 | print "------------\n" 62 | print "Model: " + argv[j+1] + "\n" 63 | 64 | if argv[j] == '-targets': 65 | print "targets: \n" 66 | print '===== NOW ===== \n' 67 | print datetime.now() 68 | else: 69 | print "no -targets parameter" 70 | raw_input("Press enter to exit ;)") 71 | sys.exit() 72 | 73 | 74 | # answer = raw_input("Continue? [y/n] \n") 75 | answer = 'y' 76 | if answer == 'y': 77 | for j in range(len(argv[6:])): 78 | i = j+6 79 | if argv[i].isdigit(): 80 | print "Running simulation: " + argv[i] + " in model: " + modelPath 81 | RunSimulation(argv[i],modelPath) 82 | elif argv[i] == '-modelPath': 83 | modelPath = argv[i+1] 84 | 85 | else: 86 | print "execution canceled " 87 | raw_input("Press enter to exit ;)") 88 | sys.exit() 89 | print "Done" 90 | # raw_input("Press enter to exit ;)") 91 | 92 | -------------------------------------------------------------------------------- /Aimsun/corridor.py: -------------------------------------------------------------------------------- 1 | """Aimsun Corridor 2 | """ 3 | from uuid import uuid4 4 | import os, sys, inspect 5 | # import numpy as np 6 | current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 7 | parent_dir = os.path.dirname(current_dir) 8 | sys.path.insert(0, parent_dir) 9 | from config import * 10 | from intersection import * 11 | from prePOZ import * 12 | 13 | class Corridor: 14 | 15 | """Summary 16 | 17 | Attributes 18 | ---------- 19 | action_flag : int 20 | Description 21 | intx_1 : TYPE 22 | Description 23 | intx_2 : TYPE 24 | Description 25 | joint_state : TYPE 26 | Description 27 | """ 28 | 29 | def __init__(self, intersections): 30 | """Initialize Corridor object 31 | 32 | Parameters 33 | ---------- 34 | intersections : list 35 | a list of intersection configurations 36 | """ 37 | # first prePOZ + POZ 38 | self.intx_1 = Intersection(intersections[0]) 39 | self.prePOZ_1 = PrePOZ(intersections[0]['prePOZ']) 40 | 41 | # second prePOZ + POZ 42 | self.intx_2 = Intersection(intersections[1]) 43 | self.prePOZ_2 = PrePOZ(intersections[1]['prePOZ']) 44 | 45 | self.joint_state = ([], uuid4().int) # ([joint state], flag) 46 | self.action_flag = 0 47 | self.counter = 0 48 | 49 | def write_last_reward(self): 50 | r_1 = self.intx_1.get_reward() 51 | r_2 = self.intx_2.get_reward() 52 | self.counter = 0 53 | # cumulative reward between time step t and t + 1 54 | total_reward = r_1 + r_2 55 | self._write_state_reward(total_reward, last_reward=True) 56 | return 57 | 58 | def _write_state_reward(self, reward, last_reward=False): 59 | """Send joint state and reward to DQN 60 | """ 61 | uuid = uuid4().int 62 | if self.counter == 0 or last_reward: 63 | uuid = 0 64 | self.counter += 1 65 | # first reward is 0 66 | is_reward_written = False 67 | while not is_reward_written: 68 | try: 69 | f = open(REWARD, "w+") 70 | f.write("{} {}".format(reward, uuid)) 71 | f.close() 72 | is_reward_written = True 73 | with open(REWARD_CSV, "a+") as out: # Log key parameters 74 | out.write("{},{}\n".format(reward, uuid)) 75 | except: 76 | continue 77 | 78 | joint_state = self.joint_state 79 | joint_state_str = ' '.join(str(n) for n in joint_state[0]) 80 | is_state_written = False 81 | while not is_state_written: 82 | try: 83 | f = open(STATE, "w+") 84 | f.write("{} {}".format(joint_state_str, joint_state[1])) 85 | f.close() 86 | is_state_written = True 87 | except: 88 | continue 89 | 90 | 91 | 92 | def _read_action(self): 93 | """Read and return the actions from DQN 94 | 95 | Returns 96 | ------- 97 | int, int 98 | action1, action2 from DQN 99 | """ 100 | flag = self.action_flag 101 | while flag == self.action_flag: 102 | try: 103 | f = open(ACTION, "r") 104 | data = f.read() 105 | f.close() 106 | data = data.split() 107 | if len(data) != 3: 108 | continue 109 | action1 = int(data[0]) 110 | action2 = int(data[1]) 111 | self.action_flag = int(data[2]) # new flag read from file 112 | except: 113 | continue 114 | return action1, action2 115 | 116 | def aapi_post_manage(self, time, timeSta, timeTrans, acycle): 117 | """A life cycle in Aimsun where replication is currently running 118 | 119 | Parameters 120 | ---------- 121 | time : int 122 | current replication time in Aimsun 123 | timeSta : int 124 | defaul Aimsun input 125 | timeTrans : int 126 | defaul Aimsun input 127 | acycle : int 128 | defaul Aimsun input 129 | 130 | Returns 131 | ------- 132 | int 133 | 0 indicates successful function call to Aimsun Next 134 | """ 135 | # prePOZ update 136 | self.prePOZ_1.update(time, timeSta) 137 | self.prePOZ_2.update(time, timeSta) 138 | # check-out event 139 | self.intx_1._bus_out_handler(time, timeSta) 140 | self.intx_2._bus_out_handler(time, timeSta) 141 | # check-in event 142 | intx1_bus_checkin = self.intx_1._bus_enter_handler(time, timeSta) 143 | intx2_bus_checkin = self.intx_2._bus_enter_handler(time, timeSta) 144 | if ( intx1_bus_checkin or intx2_bus_checkin ): 145 | # update states based on each intersection 146 | pre1 = self.prePOZ_1.get_state() 147 | pre2 = self.prePOZ_2.get_state() 148 | poz1 = self.intx_1.get_state() 149 | poz2 = self.intx_2.get_state() 150 | self.joint_state = (pre1 + poz1 + pre2 + poz2, uuid4().int) 151 | # - send new state and previous reward to DQN and clear reward 152 | # no need to clear state since get_state() function is synchronous 153 | # to Aimsun 154 | # - use get_reward() function to fetch cumulative reward in each intersection 155 | # since last timestep clear the stored reward internally 156 | r_1 = self.intx_1.get_reward() 157 | r_2 = self.intx_2.get_reward() 158 | # cumulative reward between time step t and t + 1 159 | total_reward = r_1 + r_2 160 | # total_reward = 1 / (1 + np.exp(-total_reward)) 161 | self._write_state_reward(total_reward) 162 | # apply action 163 | action1, action2 = self._read_action() 164 | # record the action decided to the checked in bus 165 | if intx1_bus_checkin: 166 | self.intx_1.set_bus_actions_and_state([action1, action2], pre1 + poz1 + pre2 + poz2) 167 | if intx2_bus_checkin: 168 | self.intx_2.set_bus_actions_and_state([action1, action2], pre1 + poz1 + pre2 + poz2) 169 | # apply action to each intersection 170 | if self.intx_1.numbus == 0: 171 | action1 = 0 # if there is no bus in intx 1, no action can be applied 172 | self.intx_1.apply_action(action1, time, timeSta) 173 | self.intx_2.apply_action(action2, time, timeSta) 174 | 175 | 176 | return 0 177 | 178 | 179 | -------------------------------------------------------------------------------- /Aimsun/intersection.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Summary 3 | """ 4 | from AAPI import * 5 | from BusInPOZ import BusInPOZ 6 | from util import * 7 | import util 8 | import csv 9 | 10 | class Intersection: 11 | 12 | """Summary 13 | 14 | Attributes 15 | ---------- 16 | CONFIG : dict 17 | configuration of the intersection, see config.py 18 | cycled_bus : int 19 | bus that checked in at the previous cycle (a cycle start at phase of interest and end before the next phase of interest) 20 | total_bus : int 21 | total number of bus enetered this intersection (used to determine the first & last checked in bus in training) 22 | bus_list : list 23 | list of bus object CURRENTLY in the intersection (has not checked out yet) 24 | arranged by check in time, with the newest bus at the end of the list 25 | last_checkout_bus : BusInPOZ object 26 | the latest bus that checked out of the intersection, initialized as None before the first bus checkout event 27 | extended : int 28 | Registered action that is applied on this intersection 29 | extendedalready : int 30 | 0 or 1, 0 means no change has been made to the intersection signal length 31 | numbus : int 32 | Number of bus CURRENTLY in the intersection POZ 33 | allnumvel : int 34 | Number of bus + cars currently in the intersection POZ 35 | last_in_info : int 36 | The bus id of the bus which initiated the last check in event (used to avoid repeated check in event) 37 | last_out_info : int 38 | The bus id of the bus which initiated the last check out event (used to avoid repeated check out event) 39 | markedbus : int 40 | not used yet 41 | markedbusgone : int 42 | not used yet 43 | reward : int 44 | cumulative reward collected in check out events after the last check in event 45 | replicationID : int 46 | Aimsun replication ID for the current simulation 47 | downstream_intersection : Intersection object 48 | pointer to the downstream intersection 49 | prePOZ_numbus : int 50 | (initialized to None to indicate no upstream intersection exist to the current intersection) 51 | number of bus in prePOZ of the current intersection, this number should only be increased by the intersection before 52 | this intersection 53 | prePOZ_bus_checkout_time_dict : dict 54 | dict with bus id as keys and bus checkout time as values. This shows all bus in prePOZ of the current intersection 55 | This dict is only valid IF there is a detector/intersection before the current intersection 56 | state : list with 6 elements 57 | infomation about the bus and vehicles INSIDE POZ (not including prePOZ) 58 | [last_available_checkout_time, last_check_in_time, check_in_headway, 59 | number of buses in POZ, number of cars + buses in POZ, time To Nearest Green] 60 | 61 | """ 62 | 63 | def __init__(self, CONFIG): 64 | """Summary 65 | 66 | Parameters 67 | ---------- 68 | CONFIG : TYPE 69 | Description 70 | """ 71 | self.CONFIG = CONFIG 72 | self.cycled_bus = 0 73 | self.total_bus = 0 74 | self.bus_list = [] # list of bus in POZ 75 | self.last_checkout_bus = None 76 | self.extended = 0 # registered action 77 | self.numbus = 0 78 | self.allnumvel = 0 79 | self.last_in_info = -99 # bus id for last checked in bus 80 | self.last_out_info = -99 81 | self.extendedalready = 0 # extended or not in current cycle 82 | # self.markedbus = 0 # if a bus is marked as indicator 83 | # self.markedbusgone = 0 # if a marked bus had exited 84 | self.reward = 0 # cumulative reward 85 | self.replicationID = None 86 | # self.extend_record = {} 87 | 88 | # number of bus in prePOZ will produce error if a bus enters POZ without being recorded in prePOZ (aka checkout from last intersection) 89 | self.prePOZ_numbus = None 90 | self.prePOZ_bus_checkout_time_dict = None # this dict is only valid if there is a detector/intersection before this intersection 91 | self.reset_intersection = 0 92 | self.state = [0, 0, 0, 0, 0, 0] 93 | 94 | def empty_intersection(self): 95 | self.cycled_bus = 0 96 | self.total_bus = 0 97 | self.bus_list = [] # list of bus in POZ 98 | self.last_checkout_bus = None 99 | self.extended = 0 # registered action 100 | self.numbus = 0 101 | self.allnumvel = 0 102 | self.last_in_info = -99 # bus id for last checked in bus 103 | self.last_out_info = -99 104 | self.extendedalready = 0 # extended or not in current cycle 105 | 106 | self.state = [0, 0, 0, 0, 0, 0] 107 | 108 | 109 | def _find_first_checkout_time_in_prePOZ(self): 110 | """ 111 | find the last 112 | 113 | Returns 114 | ------- 115 | first_checkout_time : int 116 | the checkout time of the first bus in prePOZ 117 | """ 118 | if self.prePOZ_numbus is None or self.prePOZ_numbus == 0: 119 | return 0 120 | prePOZ_busdict = self.prePOZ_bus_checkout_time_dict 121 | first_checkout_time = min(prePOZ_busdict, key=prePOZ_busdict.get) 122 | return first_checkout_time 123 | 124 | def _checkout_bus_from_POZ(self, checkout_id, checkout_time): 125 | # remove bus with corresponding id from POZ and return the bus 126 | for bus in self.bus_list: 127 | if bus.bus_id == checkout_id: 128 | if self.last_checkout_bus is not None: 129 | last_checkout_time = self.last_checkout_bus.check_out_time 130 | else: 131 | # if there is no previous bus, assume checkout headway is perfect 132 | last_checkout_time = checkout_time - self.CONFIG['target_headway'] 133 | bus.check_out(checkout_time, last_checkout_time) 134 | self.bus_list.remove(bus) 135 | return bus 136 | return False 137 | 138 | def get_state(self): 139 | """Return a list containing prePOZ state and POZ state (8 slots) 140 | 141 | based on 3 conditions: 142 | 1. bus in prePOZ 143 | 2. bus in POZ 144 | 3. no bus 145 | 146 | Returns 147 | ------- 148 | list 149 | a list containing prePOZ state and POZ state 150 | """ 151 | 152 | if self.prePOZ_numbus is None: 153 | prePOZ = [0, 0] 154 | else: 155 | prePOZ = [len(self.prePOZ_bus_checkout_time_dict.keys()), self._find_first_checkout_time_in_prePOZ()] 156 | 157 | return self.state 158 | 159 | def set_bus_actions_and_state(self, actions, joint_state): 160 | """ 161 | save the action decided by DQN at bus checkin onto the bus object so it can be compared with the actual action applied 162 | Parameters 163 | ---------- 164 | actions : list of int 165 | action to all intersections 166 | joint_state : list of int 167 | joint state of the intersections 168 | """ 169 | self.bus_list[-1].set_action(actions) 170 | self.bus_list[-1].set_state(joint_state) 171 | return 172 | 173 | def apply_action(self, action, time, timeSta): 174 | """Apply the action to the intersection according to different 175 | situations (hard code for upstream) 176 | 177 | Parameters 178 | ---------- 179 | action : int 180 | action to the intersection (extend/shorten this amount) 181 | time : int 182 | Absolute time of simulation in seconds 183 | timeSta : int 184 | Time of simulation in stationary period, in sec 185 | """ 186 | intersection = self.CONFIG['intersection'] 187 | phase_of_interest = self.CONFIG['phase_of_interest'] 188 | total_phases = len(self.CONFIG['phase_duration']) 189 | green_duration = self.CONFIG['phase_duration'][phase_of_interest - 1] 190 | 191 | # check the time duration is correct 192 | pdur = doublep() 193 | pcmax = doublep() 194 | pcmin = doublep() 195 | ECIGetDurationsPhase(self.CONFIG['intersection'], self.CONFIG['phase_of_interest'], timeSta, pdur, pcmax, pcmin) 196 | poi_duration = int(pdur.value()) 197 | 198 | if self.extendedalready: 199 | print("int {} already extened for {} seconds, apply {} extension on top of it".format(self.CONFIG['intersection'], self.extended, action)) 200 | action = action + self.extended 201 | # clip the action to the legal limit 202 | if action >20: 203 | action =20 204 | if action <-20: 205 | action = -20 206 | else: 207 | print("int {} has no assigned extension, the phase of interest is {} sec, extending {} sec extension".format(self.CONFIG['intersection'], poi_duration, action)) 208 | 209 | 210 | if poi_duration != green_duration + self.extended: 211 | print("\n\n ERROR: phase duration already changed from {} to {} and self.extended value is {}\n\n".format(green_duration, poi_duration, self.extended)) 212 | 213 | phasetime = time - ECIGetStartingTimePhase(intersection) 214 | currentPhase = ECIGetCurrentPhase(intersection) 215 | if currentPhase == phase_of_interest: 216 | # check if the action is legal 217 | remaining_green = self._get_toNearGreenPhase(currentPhase, phasetime, 0) 218 | if remaining_green>=0 and action + remaining_green < 0: 219 | action = -remaining_green 220 | ECIChangeTimingPhase(intersection, phase_of_interest, green_duration + action, timeSta) 221 | if action != 0: 222 | self.extendedalready = 1 223 | else: 224 | self.extendedalready = 0 225 | self.extended = action 226 | 227 | print("------- {} Extend start here ----------".format(intersection)) 228 | print("Extended at time: {}".format(time)) 229 | print("Extended length: " + str(action) + " sec") 230 | 231 | 232 | def log_state_for_check_in(self, phasetime, checked_in_bus): 233 | replicationID = ANGConnGetReplicationId() 234 | vehicleID = checked_in_bus.bus_id 235 | target_headway = self.CONFIG['target_headway'] 236 | parameter_log_file = self.CONFIG['log'] 237 | corridor_log_file = self.CONFIG['corridor_log'] 238 | reward = self.reward 239 | check_in_headway = checked_in_bus.check_in_headway 240 | check_in_time = checked_in_bus.check_in_time 241 | travelTime = '-' 242 | state = None 243 | if state is None: 244 | state = ['-'] * 16 245 | action = ['-'] * 2 246 | 247 | # list of things in log by index 248 | # 0: replication ID 249 | # 1: vehicle ID 250 | # 2: check in time 251 | # 3: checkout time 252 | # 4: check in phase number 253 | # 5: check in phase time 254 | # 6: checkout phase time 255 | # 7: check in headway 256 | # 8: checkout headway 257 | # 9 - 10: action 1, action 2 as decided at the bus check in 258 | # 11: registered action at bus check out 259 | # 12: Travel time 260 | # 13: reward 261 | # 14+: states 262 | 263 | # the same cycle 264 | output = [replicationID, vehicleID, check_in_time, '-', checked_in_bus.check_in_phase, 265 | checked_in_bus.check_in_phasetime, '-', check_in_headway, '-'] + action + [self.extended, 266 | travelTime, reward] + state 267 | 268 | with open(corridor_log_file, 'a+') as out: 269 | csv_write = csv.writer(out, dialect='excel') 270 | corridor_log_output = ['int_{}_checkin'.format(self.CONFIG['intersection'])] + output 271 | csv_write.writerow(corridor_log_output) 272 | 273 | 274 | def log_parameter_file(self, phasetime, checked_out_bus): 275 | replicationID = ANGConnGetReplicationId() 276 | vehicleID = checked_out_bus.bus_id 277 | target_headway = self.CONFIG['target_headway'] 278 | parameter_log_file = self.CONFIG['log'] 279 | corridor_log_file = self.CONFIG['corridor_log'] 280 | reward = self.reward 281 | check_in_time = checked_out_bus.check_in_time 282 | check_in_hdy = checked_out_bus.check_in_headway 283 | check_out_hdy = checked_out_bus.check_out_headway 284 | travelTime = checked_out_bus.check_out_time - checked_out_bus.check_in_time 285 | state = checked_out_bus.original_state 286 | if state is None: 287 | state = [-99]*16 288 | action = checked_out_bus.original_action 289 | if action is None: 290 | action = [-99]*2 291 | 292 | # list of things in log by index 293 | # 0: replication ID 294 | # 1: vehicle ID 295 | # 2: check in time 296 | # 3: checkout time 297 | # 4: check in phase number 298 | # 5: check in phase time 299 | # 6: checkout phase time 300 | # 7: check in headway 301 | # 8: checkout headway 302 | # 9 - 10: action 1, action 2 as decided at the bus check in 303 | # 11: registered action at bus check out 304 | # 12: Travel time 305 | # 13: reward 306 | # 14+: states 307 | 308 | # the same cycle 309 | output = [replicationID, vehicleID, check_in_time, checked_out_bus.check_out_time, checked_out_bus.check_in_phase, 310 | checked_out_bus.check_in_phasetime, phasetime, check_in_hdy, check_out_hdy] + list(action) + [self.extended, travelTime, reward] + state 311 | 312 | with open(parameter_log_file, "a+") as out: # Log key parameters 313 | csv_write = csv.writer(out, dialect='excel') 314 | csv_write.writerow(output) 315 | with open(corridor_log_file, 'a+') as out: 316 | csv_write = csv.writer(out, dialect='excel') 317 | corridor_log_output = ['int_{}_checkout'.format(self.CONFIG['intersection'])] + output 318 | csv_write.writerow(corridor_log_output) 319 | return 320 | 321 | 322 | def get_reward(self): 323 | """Return the reward of the most current bus check-out event, and 324 | CLEAR the reward attribute 325 | 326 | Returns 327 | ------- 328 | float 329 | the reward of the most current bus check-out event 330 | """ 331 | reward, self.reward = self.reward, 0 332 | return reward 333 | 334 | def _compute_reward(self, travelTime, bus_object): 335 | """Compute reward gained by a newly checked out bus 336 | 337 | Parameters 338 | ---------- 339 | travelTime : TYPE 340 | Description 341 | bus_object : TYPE 342 | Description 343 | """ 344 | d_out = abs(bus_object.check_out_headway - 345 | self.CONFIG['target_headway']) 346 | d_in = abs(bus_object.check_in_headway - self.CONFIG['target_headway']) 347 | improve = d_in - d_out 348 | reward = 1 * improve - 0 * travelTime 349 | max_TT = self.CONFIG['maxTT'] 350 | # reward = sigmoid((max_TT - travelTime)/max_TT-0.5) 351 | reward = (max_TT - travelTime)/max_TT-0.5 352 | return reward 353 | 354 | def _get_toNearGreenPhase(self, currentPhase, phasetime, extended): 355 | """Calculate the time to the nearest focus phase green signal. 356 | 357 | Parameters 358 | ---------- 359 | currentPhase : int 360 | current intersection phase 361 | phasetime : int 362 | passed time from start of the current phase 363 | extended : int 364 | applied cumulated action on the intersection 365 | 366 | Returns 367 | ------- 368 | int 369 | the time to the nearest focus phase green signal 370 | """ 371 | if currentPhase <= self.CONFIG['phase_of_interest']: 372 | to_interest = util.time_to_phase_end(self.CONFIG['phase_duration'], 373 | self.CONFIG['phase_of_interest']) 374 | past_phase = util.time_to_phase_end(self.CONFIG['phase_duration'], 375 | currentPhase - 1) 376 | return to_interest - phasetime + extended - past_phase 377 | return sum(self.CONFIG['phase_duration']) - phasetime + extended 378 | 379 | def _find_last_check_in_time(self, bus_list): 380 | last_check_in= None 381 | for bus in bus_list: 382 | if last_check_in is not None: 383 | last_check_in = max(bus.check_in_time, last_check_in) 384 | else: 385 | last_check_in = bus.check_in_time 386 | return last_check_in 387 | 388 | def _bus_enter_handler(self, time, timeSta): 389 | """Summary 390 | 391 | Parameters 392 | ---------- 393 | time : int 394 | Absolute time of simulation in seconds 395 | timeSta : int 396 | Time of simulation in stationary period, in sec 397 | 398 | Returns 399 | ------- 400 | bool 401 | True if a bus has entered this intersection 402 | """ 403 | # retrieve intersection info from CONFIG 404 | intersection = self.CONFIG['intersection'] 405 | busCallDetector = self.CONFIG['busCallDetector'] 406 | section = self.CONFIG['section'] 407 | # get bus internal position 408 | busVehiclePosition = AKIVehGetVehTypeInternalPosition(1171922) 409 | target_headway = self.CONFIG['target_headway'] 410 | current_replicationID = ANGConnGetReplicationId() 411 | if current_replicationID != self.replicationID: 412 | self.empty_intersection() # clean the bus list in new replication ID 413 | self.replicationID = current_replicationID 414 | # determine which phase is green in the bus's perspective 415 | phase_of_interest = self.CONFIG['phase_of_interest'] 416 | # assumption for this is that all phases has duration defined 417 | total_phases = len(self.CONFIG['phase_duration']) 418 | # current phase time 419 | phasetime = time - ECIGetStartingTimePhase(intersection) 420 | # get current phase 421 | currentPhase = ECIGetCurrentPhase(intersection) 422 | # find phase before and after phase of interest 423 | phase_after_phase_of_interest = util.get_phase_number(total_phases, phase_of_interest + 1) 424 | # green phase ended and the buses that are still in POZ becomes cycled buses 425 | if currentPhase == phase_after_phase_of_interest and int(phasetime) <=1: 426 | self.cycled_bus = self.numbus 427 | self.reset_intersection = 1 428 | if self.extendedalready: 429 | print("phase of interest passed, try to reset extension") 430 | self.extendedalready = 0 # clear the extended already flag 431 | if currentPhase != phase_of_interest and self.reset_intersection==1: 432 | if self.extended!=0: 433 | print("time extension reset at time {}".format(time)) 434 | self.reset_intersection = 0 435 | self.extended = 0 436 | ECIChangeTimingPhase( 437 | intersection, 438 | phase_of_interest, 439 | self.CONFIG['phase_duration'][phase_of_interest - 1], 440 | timeSta) 441 | 442 | # Check number of all vehicles in and out 443 | self.allnumvel = AKIVehStateGetNbVehiclesSection(section, True) 444 | # bus enter check 445 | enterNum = AKIDetGetCounterCyclebyId( 446 | busCallDetector, 447 | busVehiclePosition) # Number of entering bus(es) in last step 448 | 449 | new_bus_entered = False 450 | 451 | if enterNum > 0: 452 | self.cycled_bus = 0 453 | self.total_bus += 1 454 | # First vehicle info 455 | busin_info = AKIDetGetInfVehInDetectionInfVehCyclebyId( 456 | busCallDetector, 0, busVehiclePosition) 457 | # Last vehicle info 458 | temp_info = AKIDetGetInfVehInDetectionInfVehCyclebyId( 459 | busCallDetector, 460 | AKIDetGetNbVehsEquippedInDetectionCyclebyId(busCallDetector, busVehiclePosition) - 1, 461 | busVehiclePosition) 462 | 463 | for i in range(enterNum): 464 | # If first vehicle equals last vehicle of last step 465 | if i == 0 and busin_info.idVeh == self.last_in_info: 466 | # Skip first vehicle and loop 467 | continue 468 | else: 469 | print("-------INTX:{} - No.{} Bus Checked -------".format(self.CONFIG['intersection'], self.total_bus)) 470 | new_bus_entered = True 471 | last_check_in_time_in_intersection = 0 472 | last_check_in_time_checkedout_bus = 0 473 | if self.bus_list: 474 | # there is still bus in intx (need to double check if it is a missed bus) 475 | last_check_in_time_in_intersection = self._find_last_check_in_time(self.bus_list) 476 | if self.last_checkout_bus is not None: 477 | # there is a checked out bus 478 | last_check_in_time_checkedout_bus = self.last_checkout_bus.check_in_time 479 | if not self.bus_list and self.last_checkout_bus is None: 480 | last_check_in_time = time - target_headway 481 | else: 482 | last_check_in_time = max(last_check_in_time_in_intersection, last_check_in_time_checkedout_bus) 483 | checked_in_bus = BusInPOZ(intersection, 484 | busin_info, 485 | currentPhase, 486 | phasetime, 487 | time, 488 | last_check_in=last_check_in_time) 489 | self.bus_list.append(checked_in_bus) 490 | self.numbus += 1 491 | self.allnumvel += 1 492 | self.log_state_for_check_in(phasetime, checked_in_bus) 493 | 494 | self.last_in_info = temp_info.idVeh 495 | 496 | # update state 497 | self._update_state(currentPhase, phasetime, time) 498 | 499 | 500 | return new_bus_entered 501 | 502 | def _update_state(self, currentPhase, phasetime, time): 503 | """ 504 | Update the state attribute of the intersection 505 | 506 | Parameters 507 | ---------- 508 | currentPhase: int 509 | current traffic phase 510 | phasetime: int 511 | time (in sec) elapsed in the current traffic phase 512 | time: int 513 | Absolute time of simulation in seconds 514 | 515 | Returns 516 | ------- 517 | None 518 | 519 | """ 520 | # compute new state without registered action 521 | tToNearGreenPhase = self._get_toNearGreenPhase(currentPhase, phasetime, self.extended) 522 | 523 | if self.numbus > 0: 524 | # last available checkout for this intersection 525 | if self.numbus > 1: 526 | # bunch, use current time as last checkout 527 | last_available_checkout_time = time 528 | elif self.last_checkout_bus is None: 529 | # no checked out bus, assume perfect headway 530 | last_available_checkout_time = time - self.CONFIG['target_headway'] 531 | else: 532 | last_available_checkout_time = self.last_checkout_bus.check_out_time 533 | # check in time of the last bus checked in 534 | last_check_in_time = self.bus_list[-1].check_in_time 535 | check_in_hdy = self.bus_list[-1].check_in_headway 536 | new_state = [last_available_checkout_time, last_check_in_time, check_in_hdy, self.numbus, self.allnumvel, 537 | tToNearGreenPhase] 538 | else: 539 | if self.last_checkout_bus: 540 | last_available_checkout_time = self.last_checkout_bus.check_out_time 541 | check_in_hdy = self.last_checkout_bus.check_in_headway 542 | last_check_in_time = self.last_checkout_bus.check_in_time 543 | new_state = [last_available_checkout_time, last_check_in_time, check_in_hdy, 0, self.allnumvel, tToNearGreenPhase] 544 | else: 545 | new_state = [0, 0, 0, 0, self.allnumvel, tToNearGreenPhase] 546 | 547 | self.state = new_state 548 | return 549 | 550 | def _bus_out_handler(self, time, timeSta): 551 | """Summary 552 | 553 | Parameters 554 | ---------- 555 | time : int 556 | Absolute time of simulation in seconds 557 | timeSta : int 558 | Time of simulation in stationary period, in sec 559 | """ 560 | # retrieve intersection info from CONFIG 561 | intersection = self.CONFIG['intersection'] 562 | busExitDetector = self.CONFIG['busExitDetector'] 563 | section = self.CONFIG['section'] 564 | # get bus internal position 565 | busVehiclePosition = AKIVehGetVehTypeInternalPosition(1171922) 566 | target_headway = self.CONFIG['target_headway'] 567 | self.replicationID = ANGConnGetReplicationId() 568 | # determine which phase is green in the bus's perspective 569 | phase_of_interest = self.CONFIG['phase_of_interest'] 570 | # assumption for this is that all phases has duration defined 571 | total_phases = len(self.CONFIG['phase_duration']) 572 | # current phase time 573 | phasetime = time - ECIGetStartingTimePhase(intersection) 574 | # get current phase 575 | currentPhase = ECIGetCurrentPhase(intersection) 576 | # find phase before and after phase of interest 577 | phase_after_phase_of_interest = get_phase_number( 578 | total_phases, phase_of_interest + 1) 579 | phase_before_phase_of_interest = get_phase_number( 580 | total_phases, phase_of_interest - 1) 581 | # green phase ended and the buses that are still in POZ becomes cycled buses 582 | if currentPhase == phase_after_phase_of_interest and int(phasetime) <=1: 583 | self.cycled_bus = self.numbus 584 | self.reset_intersection = 1 585 | if self.extendedalready: 586 | print("phase of interest passed, try to reset extension") 587 | self.extendedalready = 0 # clear the extended already flag 588 | if currentPhase != phase_of_interest and self.reset_intersection==1: 589 | if self.extended!=0: 590 | print("time extension reset at time {}".format(time)) 591 | self.reset_intersection = 0 592 | self.extended = 0 593 | ECIChangeTimingPhase( 594 | intersection, 595 | phase_of_interest, 596 | self.CONFIG['phase_duration'][phase_of_interest - 1], 597 | timeSta) 598 | 599 | # Check number of all vehicles in and out 600 | self.allnumvel = AKIVehStateGetNbVehiclesSection(section, True) 601 | 602 | # bus exit check 603 | exitNum = AKIDetGetCounterCyclebyId(busExitDetector, busVehiclePosition) # Number of exit vehicle in last step 604 | if exitNum > 0: 605 | print("-------- Bus exited %d ---------" % exitNum) 606 | print("Exited at time: " + str(time)) 607 | # First vehicle info 608 | busout_info = AKIDetGetInfVehInDetectionInfVehCyclebyId( 609 | busExitDetector, 0, busVehiclePosition) 610 | # Last vehicle info 611 | temp_info = AKIDetGetInfVehInDetectionInfVehCyclebyId( 612 | busExitDetector, AKIDetGetNbVehsEquippedInDetectionCyclebyId( 613 | busExitDetector, busVehiclePosition) - 1, busVehiclePosition) 614 | for i in range(exitNum): 615 | # If first vehicle equals last vehicle of last step 616 | if i == 0 and busout_info.idVeh == self.last_out_info: 617 | # Skip first vehicle and loop 618 | continue 619 | else: 620 | if self.numbus >=1: 621 | self.numbus -= 1 622 | self.allnumvel -= 1 623 | else: 624 | print("ERROR: try to reduce numbus to negative, checkout bus: {}".format(busout_info.idVeh)) 625 | 626 | print("Bus banching %d" % self.numbus) 627 | checkout_id = busout_info.idVeh 628 | successfully_checked_out_bus = self._checkout_bus_from_POZ(checkout_id, time) 629 | 630 | # update to keep track of the last checkout bus 631 | if successfully_checked_out_bus is False: 632 | raise Exception("Checkout detected for bus {}, but cannot found this bus in POZ".format(checkout_id)) 633 | self.last_checkout_bus = successfully_checked_out_bus 634 | 635 | travelTime = successfully_checked_out_bus.check_out_time - successfully_checked_out_bus.check_in_time 636 | # log parameters 637 | reward_gained = self._compute_reward(travelTime, successfully_checked_out_bus) 638 | self.reward += reward_gained 639 | self.log_parameter_file(phasetime, successfully_checked_out_bus) 640 | print("Reward gained at checked out: {}".format(reward_gained)) 641 | 642 | self.last_out_info = temp_info.idVeh 643 | 644 | self._update_state(currentPhase, phasetime, time) 645 | 646 | return 647 | 648 | 649 | -------------------------------------------------------------------------------- /Aimsun/prePOZ.py: -------------------------------------------------------------------------------- 1 | from AAPI import * 2 | 3 | class PrePOZ: 4 | 5 | def __init__(self, config): 6 | self.CONFIG = config 7 | self.last_in_info = None 8 | self.last_out_info = None 9 | self.time_list = [] 10 | 11 | def get_state(self): 12 | if len(self.time_list) == 0: 13 | return [0, 0] 14 | return [self.time_list[0], len(self.time_list)] 15 | 16 | def update(self, time, timeSta): 17 | self._enter_prePOZ(time, timeSta) 18 | self._exit_prePOZ(time, timeSta) 19 | 20 | def _enter_prePOZ(self, time, timeSta): 21 | # retrieve intersection info from CONFIG 22 | busExitDetector = self.CONFIG['busExitDetector'] 23 | # get bus internal position 24 | busVehiclePosition = AKIVehGetVehTypeInternalPosition(1171922) 25 | # bus exit check 26 | exitNum = AKIDetGetCounterCyclebyId(busExitDetector, busVehiclePosition) # Number of exit vehicle in last step 27 | if exitNum > 0: 28 | # First vehicle info 29 | busout_info = AKIDetGetInfVehInDetectionInfVehCyclebyId( 30 | busExitDetector, 0, busVehiclePosition) 31 | # Last vehicle info 32 | temp_info = AKIDetGetInfVehInDetectionInfVehCyclebyId( 33 | busExitDetector, AKIDetGetNbVehsEquippedInDetectionCyclebyId( 34 | busExitDetector, busVehiclePosition) - 1, busVehiclePosition) 35 | for i in range(exitNum): 36 | # If first vehicle equals last vehicle of last step 37 | if i == 0 and busout_info.idVeh == self.last_out_info: 38 | # Skip first vehicle and loop 39 | continue 40 | else: 41 | print("prePOZ-{} enter-{}".format(busExitDetector, time)) 42 | self.time_list.append(time) 43 | self.last_out_info = temp_info.idVeh 44 | 45 | 46 | def _exit_prePOZ(self, time, timeSta): 47 | busCallDetector = self.CONFIG['busCallDetector'] 48 | # get bus internal position 49 | busVehiclePosition = AKIVehGetVehTypeInternalPosition(1171922) 50 | # bus enter check 51 | enterNum = AKIDetGetCounterCyclebyId(busCallDetector, busVehiclePosition) 52 | if enterNum > 0: 53 | # First vehicle info 54 | busin_info = AKIDetGetInfVehInDetectionInfVehCyclebyId( 55 | busCallDetector, 0, busVehiclePosition) 56 | # Last vehicle info 57 | temp_info = AKIDetGetInfVehInDetectionInfVehCyclebyId( 58 | busCallDetector, 59 | AKIDetGetNbVehsEquippedInDetectionCyclebyId(busCallDetector, busVehiclePosition) - 1, 60 | busVehiclePosition) 61 | 62 | for i in range(enterNum): 63 | # If first vehicle equals last vehicle of last step 64 | if i == 0 and busin_info.idVeh == self.last_in_info: 65 | # Skip first vehicle and loop 66 | continue 67 | else: 68 | print("prePOZ-{} exit-{}".format(busCallDetector, time)) 69 | self.time_list.pop(0) 70 | 71 | self.last_in_info = temp_info.idVeh 72 | 73 | 74 | -------------------------------------------------------------------------------- /Aimsun/script.py: -------------------------------------------------------------------------------- 1 | from AAPI import * 2 | import os, sys, inspect 3 | current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 4 | parent_dir = os.path.dirname(current_dir) 5 | sys.path.insert(0, parent_dir) 6 | from config import * 7 | from corridor import * 8 | 9 | 10 | def AAPILoad(): 11 | """ 12 | Create Intersection objects. Called when the module is loaded by Aimsum Next 13 | """ 14 | global corridor 15 | corridor = Corridor(CORRIDOR) 16 | return 0 17 | 18 | 19 | def AAPIInit(): 20 | """Summary 21 | Initializes the module. Called when Aimsum Next starts the simulation 22 | """ 23 | ANGConnEnableVehiclesInBatch(True) 24 | return 0 25 | 26 | 27 | def AAPIManage(time, timeSta, timeTrans, acycle): 28 | """Summary 29 | Called in every simulation step at the beginning of the cycle, and can be used to update states 30 | and output states to DQN, and implement TSP stategies 31 | 32 | Parameters 33 | ---------- 34 | time : double 35 | Absolute time of simulation in seconds 36 | timeSta : double 37 | Time of simulation in stationary period, in seconds 38 | timeTrans : double 39 | Duration of warm-up period, in seconds 40 | acycle : double 41 | Duration of each simulation step in seconds 42 | """ 43 | return 0 44 | 45 | 46 | def AAPIPostManage(time, timeSta, timeTrans, acycle): 47 | """Summary 48 | Called in every simulation step at the beginning of the cycle, and can be used to update states 49 | and output states to DQN, and implement TSP stategies 50 | 51 | Parameters 52 | ---------- 53 | time : double 54 | Absolute time of simulation in seconds 55 | timeSta : double 56 | Time of simulation in stationary period, in seconds 57 | timeTrans : double 58 | Duration of warm-up period, in seconds 59 | acycle : double 60 | Duration of each simulation step in seconds 61 | """ 62 | global corridor 63 | corridor.aapi_post_manage(time, timeSta, timeTrans, acycle) 64 | return 0 65 | 66 | 67 | def AAPIFinish(): 68 | """Summary 69 | Called when Aimsun Next finishes the simulation and can be used to terminate the module operations, 70 | write summary information, close files, etc. 71 | """ 72 | global corridor 73 | # write last reward to indicate that the replication is done 74 | corridor.write_last_reward() 75 | return 0 76 | 77 | 78 | def AAPIUnLoad(): 79 | """Summary 80 | Called when the module is unloaded by Aimsun Next. 81 | """ 82 | return 0 83 | -------------------------------------------------------------------------------- /Aimsun/util.py: -------------------------------------------------------------------------------- 1 | """Utility functions 2 | """ 3 | 4 | 5 | 6 | def get_phase_number(total_number_of_phases, phase_number): 7 | """Summary 8 | 9 | Parameters 10 | ---------- 11 | total_number_of_phases : TYPE 12 | Description 13 | phase_number : TYPE 14 | Description 15 | 16 | Returns 17 | ------- 18 | TYPE 19 | Description 20 | """ 21 | # wrap around the phases (use this to find phase after last phase or before phase 1) 22 | while phase_number <= 0: 23 | phase_number += total_number_of_phases 24 | while phase_number > total_number_of_phases: 25 | phase_number -= total_number_of_phases 26 | return phase_number 27 | 28 | 29 | def time_to_phase_end(phase_duration, phase): 30 | """Summary 31 | 32 | Parameters 33 | ---------- 34 | phase_duration : TYPE 35 | Description 36 | phase : TYPE 37 | Description 38 | 39 | Returns 40 | ------- 41 | TYPE 42 | Description 43 | """ 44 | return sum(phase_duration[:phase] 45 | ) if phase != len(phase_duration) else sum(phase_duration) -------------------------------------------------------------------------------- /DQN/ddqn_framework.py: -------------------------------------------------------------------------------- 1 | ####################################################################################### 2 | # Deep Q - Learning framework to play around with (dueling-, dense- and double q-learning ) 3 | # Author: Manuel Hass 4 | # 2017 5 | # 6 | # *uses mlp_framework.py as model framework 7 | # *examples in the end 8 | ####################################################################################### 9 | 10 | 11 | ### imports 12 | import numpy as np 13 | from numpy import linalg as LA 14 | import time 15 | import csv 16 | import pickle 17 | import os, sys, inspect 18 | current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 19 | parent_dir = os.path.dirname(current_dir) 20 | sys.path.insert(0, parent_dir) 21 | from config import * 22 | 23 | Q_target_log = LOG_PATH + 'Q_target.csv' 24 | Q_online_log = LOG_PATH + 'Q_online.csv' 25 | Rt_log = LOG_PATH + 'Rt.csv' 26 | Loss = LOG_PATH + 'loss.csv' 27 | 28 | 29 | 30 | def write_csv(path, data): 31 | with open(path, "a+") as out: 32 | csv_write = csv.writer(out, dialect='excel') 33 | csv_write.writerow(data) 34 | 35 | # helper functions 36 | def train_bellman(onlineDQN, targetDQN, batch, GAMMA): 37 | ''' 38 | updates the onlineDQN with target Q values for the greedy action(chosen by onlineDQN) 39 | ''' 40 | 41 | state, action, reward, next_state, done = batch 42 | Q = onlineDQN.infer(state) 43 | t = targetDQN.infer(next_state) 44 | a = np.argmax(onlineDQN.infer(next_state), axis=1) 45 | Q[range(Q.shape[0]), action.astype(int)] = reward + np.logical_not(done) * GAMMA * t[range(t.shape[0]), a] 46 | state_batch_ = state 47 | target_batch_ = Q 48 | 49 | onlineDQN.train(state_batch_, target_batch_) 50 | 51 | 52 | def update_target(onlineDQN, targetDQN, duel=False): 53 | ''' 54 | copies weights from onlineDQN to targetDQN 55 | ''' 56 | if duel: 57 | for i in range(len(targetDQN.LL0)): 58 | targetDQN.LL0[i].w = np.copy(onlineDQN.LL0[i].w) 59 | for i in range(len(targetDQN.LLA)): 60 | targetDQN.LLA[i].w = np.copy(onlineDQN.LLA[i].w) 61 | for i in range(len(targetDQN.LLV)): 62 | targetDQN.LLV[i].w = np.copy(onlineDQN.LLV[i].w) 63 | else: 64 | for i in range(len(targetDQN.Layerlist)): 65 | targetDQN.Layerlist[i].w = np.copy(onlineDQN.Layerlist[i].w) 66 | 67 | beta = 0.9 68 | for i in range(len(targetDQN.Layerlist)): 69 | targetDQN.Layerlist[i].w = beta * np.copy(onlineDQN.Layerlist[i].w) + (1 - beta) * targetDQN.Layerlist[i].w 70 | 71 | 72 | class ringbuffer: 73 | ''' 74 | fast ringbuffer for the experience replay (numpy) 75 | ''' 76 | 77 | def __init__(self, SIZE): 78 | self.buffer_size = 0 79 | self.SIZE = SIZE 80 | # buffers 81 | self.state_buffer = None 82 | self.action_buffer = None 83 | self.reward_buffer = None 84 | self.next_state_buffer = None 85 | self.done_buffer = None 86 | self.priorities = None 87 | 88 | def add(self, sample): 89 | if self.state_buffer is None: 90 | self.state_buffer = np.empty((0, sample[0].shape[1])) # [1:] 91 | self.action_buffer = np.empty((0, sample[1].shape[1])) # [1:] 92 | self.reward_buffer = np.empty((0, sample[2].shape[1])) # [1:] 93 | self.next_state_buffer = np.empty((0, sample[3].shape[1])) # [1:] 94 | self.done_buffer = np.empty((0, 1)) # [1:] 95 | self.priorities = np.empty((0, 1)) # [1:] 96 | # self.state_buffer = np.append(self.state_buffer, sample[0][True, :], axis=0) 97 | self.state_buffer = np.append(self.state_buffer, sample[0], axis=0) 98 | self.action_buffer = np.append(self.action_buffer, sample[1], axis=0) 99 | self.reward_buffer = np.append(self.reward_buffer, sample[2], axis=0) 100 | self.next_state_buffer = np.append(self.next_state_buffer, sample[3], axis=0) 101 | self.done_buffer = np.append(self.done_buffer, sample[4], axis=0) 102 | new_sample_prio = np.max(self.priorities) if self.priorities.shape[0] > 0 and np.max( 103 | np.abs(self.priorities)) < 1e10 else 1. 104 | self.priorities = np.append(self.priorities, np.array([new_sample_prio]).reshape(1, 1), axis=0) 105 | self.priorities /= np.sum(self.priorities) 106 | self.buffer_size += 1. 107 | if self.buffer_size > self.SIZE: 108 | self.state_buffer = self.state_buffer[1:] 109 | self.action_buffer = self.action_buffer[1:] 110 | self.reward_buffer = self.reward_buffer[1:] 111 | self.next_state_buffer = self.next_state_buffer[1:] 112 | self.done_buffer = self.done_buffer[1:] 113 | self.priorities = self.priorities[1:] 114 | 115 | def delete(self): 116 | if self.buffer_size > 0: 117 | # self.state_buffer = np.append(self.state_buffer, sample[0][True, :], axis=0) 118 | self.state_buffer = np.delete(self.state_buffer, -1, axis=0) 119 | self.action_buffer = np.delete(self.action_buffer, -1, axis=0) 120 | self.reward_buffer = np.delete(self.reward_buffer, -1, axis=0) 121 | self.next_state_buffer = np.delete(self.next_state_buffer, -1, axis=0) 122 | self.done_buffer = np.delete(self.done_buffer, -1, axis=0) 123 | self.priorities = np.delete(self.priorities, -1, axis=0) 124 | self.priorities /= np.sum(self.priorities) 125 | self.buffer_size -= 1. 126 | 127 | def get(self): 128 | return [self.state_buffer, 129 | self.action_buffer, 130 | self.reward_buffer, 131 | self.next_state_buffer, 132 | self.done_buffer] 133 | 134 | def sample(self, BATCHSIZE, prio=False): 135 | if prio: 136 | a = self.done_buffer.shape[0] 137 | c = self.priorities.reshape((a)) 138 | b = c / np.sum(c) 139 | ind = np.random.choice(np.arange(a), BATCHSIZE, replace=False, p=b).astype(int) 140 | else: 141 | ind = np.random.choice(np.arange(self.done_buffer.shape[0]), BATCHSIZE, replace=False).astype(int) 142 | 143 | return [self.state_buffer[ind], 144 | self.action_buffer[ind].reshape(-1), 145 | self.reward_buffer[ind].reshape(-1), 146 | self.next_state_buffer[ind], 147 | self.done_buffer[ind].reshape(-1)] 148 | 149 | def prio_update(self, onlineDQN, targetDQN, epsilon=0.01, alpha=0.6, GAMMA=0.99, CHUNK=5000.): 150 | 151 | # state,action,reward,next_state,done = self.get() 152 | getbuffer = self.get() 153 | # CHUNK = 5000. # max number of states used for inference at once 154 | loops = int(getbuffer[0].shape[0] / CHUNK) # number of loops needed to update all prios 155 | priobuffer = np.empty((0)) 156 | j = -1 157 | 158 | for j in range(loops): # if replaybuffer size bigger than CHUNK size 159 | state, action, reward, next_state, done = [x[int(j * CHUNK):int((j + 1) * CHUNK)] for x in getbuffer] 160 | Q = onlineDQN.infer(state) 161 | Q_ = np.copy(Q) 162 | t = targetDQN.infer(next_state) 163 | a = np.argmax(onlineDQN.infer(next_state), axis=1) 164 | Q[range(Q.shape[0]), action.astype(int)] = reward + np.logical_not(done) * GAMMA * t[range(t.shape[0]), a] 165 | TD_loss = np.abs((Q_ - Q)) 166 | TD_loss = TD_loss[range(TD_loss.shape[0]), a] 167 | prio = np.power((TD_loss + epsilon), alpha) 168 | prio /= np.sum(prio) 169 | priobuffer = np.append(priobuffer, prio) 170 | 171 | state, action, reward, next_state, done = [x[int((j + 1) * CHUNK):] for x in getbuffer] 172 | Q = onlineDQN.infer(state) 173 | Q_ = np.copy(Q) 174 | t = targetDQN.infer(next_state) 175 | a = np.argmax(onlineDQN.infer(next_state), axis=1) 176 | Q[range(Q.shape[0]), action.astype(int)] = reward + np.logical_not(done) * GAMMA * t[range(t.shape[0]), a] 177 | TD_loss = np.abs((Q_ - Q)) 178 | TD_loss = TD_loss[range(TD_loss.shape[0]), a] 179 | prio = np.power((TD_loss + epsilon), alpha) 180 | prio /= np.sum(prio) 181 | priobuffer = np.append(priobuffer, prio) 182 | self.priorities = priobuffer[:, True] 183 | 184 | 185 | class trainer_config: 186 | ''' 187 | configuration for the Q learner (trainer) for easy reuse 188 | everything not model related goes here. maybe 189 | ''' 190 | 191 | def __init__(self, 192 | app_name, 193 | BUFFER_SIZE=50e3, 194 | STEPS_PER_EPISODE=500, 195 | MAX_STEPS=100000, 196 | UPDATE_TARGET_STEPS=1000, 197 | BATCH_SIZE=32, 198 | GAMMA=0.99, 199 | EXPLORATION=100, 200 | E_MIN=0.01, 201 | priority=False, 202 | alpha=0.6, 203 | epsilon=0.01 204 | 205 | ): 206 | ### game environment 207 | self.app_name = app_name 208 | # env.close() 209 | ### training variables 210 | self.BUFFER_SIZE = BUFFER_SIZE 211 | self.STEPS_PER_EPISODE = STEPS_PER_EPISODE 212 | self.MAX_STEPS = MAX_STEPS 213 | self.UPDATE_TARGET_STEPS = UPDATE_TARGET_STEPS 214 | self.BATCH_SIZE = BATCH_SIZE 215 | self.GAMMA = GAMMA 216 | self.EXPLORATION = EXPLORATION 217 | self.E_MIN = E_MIN 218 | #### PRIO MODULE ( default := alpha= 0.,epsilon=0.01) 219 | self.priority = priority 220 | self.alpha = alpha 221 | self.epsilon = epsilon 222 | 223 | 224 | class trainer: 225 | ''' 226 | the actual DDQN-> 2 models, 1 config 227 | train here, get your models and plots 228 | ''' 229 | 230 | def __init__(self, onlineModel, targetModel, trainer_config, env): 231 | ### load config 232 | self.app_name = trainer_config.app_name 233 | self.env = env 234 | 235 | ### training variables 236 | self.BUFFER_SIZE = trainer_config.BUFFER_SIZE 237 | self.STEPS_PER_EPISODE = trainer_config.STEPS_PER_EPISODE 238 | self.MAX_STEPS = trainer_config.MAX_STEPS 239 | self.UPDATE_TARGET_STEPS = trainer_config.UPDATE_TARGET_STEPS 240 | self.BATCH_SIZE = trainer_config.BATCH_SIZE 241 | self.GAMMA = trainer_config.GAMMA 242 | self.EXPLORATION = trainer_config.EXPLORATION 243 | self.E_MIN = trainer_config.E_MIN 244 | self.priority = trainer_config.priority 245 | self.alpha = trainer_config.alpha 246 | self.epsilon = trainer_config.epsilon 247 | 248 | ### models 249 | self.onlineNet = onlineModel 250 | self.targetNet = targetModel 251 | 252 | ### logs 253 | self.reward_plot = [] 254 | self.loss_plot = [] 255 | self.online_q_plot = [] 256 | self.target_q_plot = [] 257 | 258 | ### ringbuffer 259 | self.REPLAY_BUFFER = ringbuffer(self.BUFFER_SIZE) 260 | 261 | # def load_config(self, config): 262 | # ''' 263 | # loads new config 264 | # ''' 265 | # ### env 266 | # self.app_name = config.app_name 267 | # self.env = AimsunEnv() 268 | # ### training variables 269 | # self.BUFFER_SIZE = config.BUFFER_SIZE 270 | # self.STEPS_PER_EPISODE = config.STEPS_PER_EPISODE 271 | # self.MAX_STEPS = config.MAX_STEPS 272 | # self.UPDATE_TARGET_STEPS = config.UPDATE_TARGET_STEPS 273 | # self.BATCH_SIZE = config.BATCH_SIZE 274 | # self.GAMMA = config.GAMMA 275 | # self.EXPLORATION = config.EXPLORATION 276 | # self.E_MIN = config.E_MIN 277 | # self.priority = config.priority 278 | # self.alpha = config.alpha 279 | # self.epsilon = config.epsilon 280 | 281 | def save_config(self): 282 | ''' 283 | returns current config 284 | ''' 285 | return trainer_config(self.app_name, 286 | self.BUFFER_SIZE, 287 | self.STEPS_PER_EPISODE, 288 | self.MAX_STEPS, 289 | self.UPDATE_TARGET_STEPS, 290 | self.BATCH_SIZE, 291 | self.GAMMA, 292 | self.EXPLORATION, 293 | self.E_MIN, 294 | self.priority, 295 | self.alpha, 296 | self.epsilon 297 | ) 298 | 299 | def normalize_state(self, state): 300 | state_buffer = self.REPLAY_BUFFER.state_buffer 301 | state = np.array(state).reshape(1, len(state)) 302 | if not (state_buffer is None or state_buffer.shape[0] <= 1): 303 | mean = np.mean(state_buffer, axis=0) 304 | std = np.std(state_buffer, axis=0) 305 | state = np.divide((state - mean), std, out=(state - mean), where=(std!=0)) 306 | return state 307 | 308 | def save_model(self): 309 | all_attribute = [self.save_config(), 310 | self.env, 311 | self.onlineNet, 312 | self.targetNet, 313 | self.reward_plot, 314 | self.loss_plot, 315 | self.REPLAY_BUFFER, 316 | self.target_q_plot, 317 | self.online_q_plot] 318 | is_written = False 319 | while not is_written: 320 | try: 321 | with open(LOG_PATH + 'Model', 'wb') as fout: 322 | pickle.dump(all_attribute, fout) 323 | is_written = True 324 | except: 325 | print("Save model failed.") 326 | return 327 | 328 | def load_model(self, flag=False): 329 | if flag: 330 | try: 331 | # all_attribute = [self.save_config(), self.env, self.onlineNet, self.targetNet, 332 | # self.reward_plot, self.loss_plot, self.REPLAY_BUFFER] 333 | with open(LOG_PATH + 'Model', 'rb') as fin: 334 | all_attribute = pickle.load(fin) 335 | 336 | 337 | if len(all_attribute) != 7: 338 | print("Model empty...") 339 | pass 340 | 341 | trainer_config = all_attribute[0] 342 | env = all_attribute[1] 343 | onlineNet = all_attribute[2] 344 | targetNet = all_attribute[3] 345 | reward_plot = all_attribute[4] 346 | loss_plot = all_attribute[5] 347 | REPLAY_BUFFER = all_attribute[6] 348 | 349 | 350 | self.app_name = trainer_config.app_name 351 | self.env = env 352 | ### training variables 353 | self.BUFFER_SIZE = trainer_config.BUFFER_SIZE 354 | self.STEPS_PER_EPISODE = trainer_config.STEPS_PER_EPISODE 355 | self.MAX_STEPS = trainer_config.MAX_STEPS 356 | self.UPDATE_TARGET_STEPS = trainer_config.UPDATE_TARGET_STEPS 357 | self.BATCH_SIZE = trainer_config.BATCH_SIZE 358 | self.GAMMA = trainer_config.GAMMA 359 | self.EXPLORATION = trainer_config.EXPLORATION 360 | self.E_MIN = trainer_config.E_MIN 361 | self.priority = trainer_config.priority 362 | self.alpha = trainer_config.alpha 363 | self.epsilon = trainer_config.epsilon 364 | 365 | ### models 366 | self.onlineNet = onlineNet 367 | self.targetNet = targetNet 368 | 369 | ### logs 370 | self.reward_plot = reward_plot 371 | self.loss_plot = loss_plot 372 | 373 | ### ringbuffer 374 | self.REPLAY_BUFFER = REPLAY_BUFFER 375 | except: 376 | print("Model not found...") 377 | pass 378 | else: 379 | pass 380 | 381 | # def log_weight(self): 382 | # onlineFrob = [] 383 | # targetFrob = [] 384 | # online = self.onlineNet 385 | # target = self.targetNet 386 | # for L in range(len(online.Layerlist)): 387 | # onlineFrob.append(LA.norm(online.Layerlist[L].w)) 388 | # targetFrob.append(LA.norm(target.Layerlist[L].w)) 389 | 390 | # with open(online_w, "a+") as online: 391 | # csv_write = csv.writer(online, dialect='excel') 392 | # csv_write.writerow(onlineFrob) 393 | # with open(target_w, "a+") as target: 394 | # csv_write = csv.writer(target, dialect='excel') 395 | # csv_write.writerow(targetFrob) 396 | 397 | def train(self, flag=False, log=False): 398 | 399 | EPOCH = 9999 400 | step_counter = 0. 401 | eps_rew = 0. 402 | 403 | for epoch in range(EPOCH): 404 | current_state = self.normalize_state(self.env.reset()) 405 | 406 | for STEP in range(self.MAX_STEPS): 407 | e = 1. / ((len(self.loss_plot) / self.EXPLORATION) + 1) 408 | if np.random.uniform(0, 1) < max(self.E_MIN, e): 409 | # random action 410 | action = self.env.rand_action() 411 | 412 | else: 413 | Q = (self.onlineNet.infer(current_state))[0] 414 | action = np.argmax(Q) 415 | # apply action 416 | next_state, reward, done = self.env.step(action) 417 | next_state = self.normalize_state(next_state) 418 | 419 | # end training when simulation ends 420 | if done: break 421 | if not self.env.exclude(): 422 | eps_rew += reward 423 | self.REPLAY_BUFFER.add( 424 | [current_state, 425 | np.array(action).reshape(1, 1), 426 | np.array(reward).reshape(1, 1), 427 | next_state, 428 | np.array(done).reshape(1, 1)]) 429 | step_counter += 1. 430 | 431 | 432 | if STEP > 2000 or flag: 433 | BATCH = self.REPLAY_BUFFER.sample(self.BATCH_SIZE, prio=self.priority) 434 | train_bellman(self.onlineNet, self.targetNet, BATCH, self.GAMMA) 435 | write_csv(Q_target_log, (self.targetNet.infer(current_state))[0]) 436 | write_csv(Q_online_log, (self.onlineNet.infer(current_state))[0]) 437 | write_csv(Rt_log, [reward]) 438 | write_csv(Loss, [self.onlineNet.loss]) 439 | self.loss_plot += [self.onlineNet.loss] 440 | self.reward_plot += [eps_rew] 441 | 442 | current_state = next_state 443 | 444 | if (STEP + 1) % self.UPDATE_TARGET_STEPS == 0: 445 | if self.priority: self.REPLAY_BUFFER.prio_update(self.onlineNet, self.targetNet, GAMMA=self.GAMMA, 446 | alpha=self.alpha, epsilon=self.epsilon) 447 | if log: print('update: ', len(self.reward_plot), ' episodes ---- 2 eps average reward: ', 448 | np.array(self.reward_plot)[-2:].mean()) 449 | update_target(self.onlineNet, self.targetNet, duel=False) 450 | 451 | # self.log_weight() 452 | if STEP % 10 == 0: self.save_model() 453 | 454 | 455 | 456 | -------------------------------------------------------------------------------- /Env/__init__.py: -------------------------------------------------------------------------------- 1 | """Env module init file 2 | """ 3 | # __init__.py 4 | from .aimsun_env import * -------------------------------------------------------------------------------- /Env/aimsun_env.py: -------------------------------------------------------------------------------- 1 | """AimsunEnv 2 | """ 3 | import os, sys, inspect 4 | current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 5 | parent_dir = os.path.dirname(current_dir) 6 | sys.path.insert(0, parent_dir) 7 | from config import * 8 | import csv 9 | import numpy as np 10 | from uuid import uuid4 11 | from .env import Environment 12 | 13 | 14 | REWARD_INPUT_LEN = 2 15 | STATE_INPUT_LEN = 17 16 | 17 | class AimsunEnv(Environment): 18 | """Aimsun Next environment 19 | 20 | Attributes 21 | ---------- 22 | num_step : int 23 | total time steps simulated 24 | reward_flag : int 25 | check if received reward is at the new time step 26 | state_flag : int 27 | check if received state is at the new time step 28 | """ 29 | 30 | def __init__(self, action_space): 31 | """Initialize Aimsun Next environment object 32 | 33 | Parameters 34 | ---------- 35 | action_space : list 36 | list of available actions 37 | """ 38 | Environment.__init__(self, name='Aimsun', action_space=action_space) 39 | self.reward_flag = 0 40 | self.state_flag = 0 41 | self.num_step = 0 42 | self.check_in_time = [] 43 | 44 | def get_state_size(self): 45 | """Return the state size 46 | 47 | Returns 48 | ------- 49 | int 50 | state size 51 | """ 52 | return STATE_INPUT_LEN - 1 53 | 54 | def get_action_size(self): 55 | """Return the action space size 56 | 57 | Returns 58 | ------- 59 | int 60 | action space size 61 | """ 62 | return len(self.action_space) 63 | 64 | def _receive_and_log_reward(self): 65 | """Receive, log and return the new reward 66 | 67 | Returns 68 | ------- 69 | float 70 | newly received reward 71 | """ 72 | # receive from REWARD_LOG 73 | is_read = False 74 | while not is_read: 75 | try: 76 | f = open(self.REWARD_LOG, "r") 77 | data = f.read() 78 | f.close() 79 | data = data.split() 80 | if len(data) != REWARD_INPUT_LEN: continue 81 | reward, new_flag = float(data[0]), int(data[1]) 82 | if new_flag != self.reward_flag: 83 | is_read = True 84 | self.reward_flag = new_flag 85 | if new_flag == 0: 86 | return reward, True 87 | except: 88 | continue 89 | return reward, False 90 | 91 | def _write_action(self, index): 92 | """write the newly received action to Aimsun 93 | 94 | Parameters 95 | ---------- 96 | index : int 97 | the index of the new action 98 | """ 99 | is_written = False 100 | while not is_written: 101 | try: 102 | f = open(self.ACTION_LOG, "w+") 103 | f.write("{} {} {}".format(self.action_space[index][0], self.action_space[index][1], uuid4().int)) 104 | f.close() 105 | is_written = True 106 | except: 107 | continue 108 | 109 | def _get_state(self): 110 | """Receive and return the new state 111 | 112 | Returns 113 | ------- 114 | list 115 | received state 116 | """ 117 | is_read = False 118 | while not is_read: 119 | try: 120 | f = open(self.STATE_LOG, "r") 121 | data = f.read() 122 | f.close() 123 | data = data.split() 124 | if len(data) != STATE_INPUT_LEN: continue 125 | new_flag = int(data[-1]) 126 | if new_flag != self.state_flag: 127 | S_ = np.array(list(map(lambda x: float(x), data[:-1]))) 128 | self.check_in_time.append(max(S_[3], S_[11])) 129 | is_read = True 130 | self.state_flag = new_flag 131 | except: 132 | continue 133 | self.num_step += 1 134 | return S_ 135 | 136 | def step(self, action_index): 137 | """Apply the write the action to Aimsun and wait for the new 138 | state and reward 139 | 140 | Parameters 141 | ---------- 142 | action_index : int 143 | the index of the action space 144 | 145 | Returns 146 | ------- 147 | list, float, bool 148 | new state, new reward, and simulation finish 149 | """ 150 | self._write_action(action_index) 151 | S_ = self._get_state() 152 | reward, done = self._receive_and_log_reward() 153 | # print log 154 | if self.num_step < 50 or self.num_step % 1000 == 0: 155 | print("="*20 + " Step: {} ".format(self.num_step) + "="*20) 156 | return S_, reward, done 157 | 158 | def reset(self): 159 | """Reset the Aimsun environment and receive the first state 160 | """ 161 | print('Reset Aimsun Environment') 162 | print('Waiting for the first bus...') 163 | return self._get_state() 164 | 165 | def exclude(self): 166 | """Summary 167 | 168 | Returns: 169 | TYPE: Description 170 | """ 171 | if len(self.check_in_time) > 10: self.check_in_time.pop(0) 172 | if len(self.check_in_time) <= 2: return True 173 | if self.check_in_time[-1] < self.check_in_time[-2]: 174 | return True 175 | return False 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | -------------------------------------------------------------------------------- /Env/env.py: -------------------------------------------------------------------------------- 1 | """ 2 | A class that creates functionally that the enviroment must have 3 | """ 4 | import numpy as np 5 | import random 6 | from config import * 7 | 8 | 9 | 10 | class Environment(): 11 | """ 12 | Attributes: 13 | env_name (str): name of this environment 14 | action_space (List[(int, int)]): a set of possible actions 15 | STATE_LOG (str): file path to the textfile that records the states (collected when time steps are renewed) 16 | ACTION_LOG (str): file path to the textfile that records the chosen action at every time step 17 | REWARD_LOG (str): file path to the csv that records reward at every time step 18 | """ 19 | 20 | def __init__(self, name, action_space): 21 | """ 22 | Initialize an environment object 23 | 24 | Args: 25 | name (str): name of the initialized environment 26 | action_space (List[(int, int)]): a set of possible actions 27 | """ 28 | self.env_name = name 29 | self.action_space = action_space 30 | self.STATE_LOG = STATE # temp state 31 | self.ACTION_LOG = ACTION # temp action 32 | self.REWARD_LOG = REWARD # temp reward 33 | self.REWARD_CSV = REWARD_CSV # rewards of all steps 34 | 35 | def step(self, action_index): 36 | """ 37 | steps return, called when apply actions. Returns the next state, reward, and two 38 | booleans indicating whether the simulation ends and whether the episode is done 39 | 40 | Args: 41 | action_index (int): the action index is equal to argmax Q, and will 42 | use the index to obtain the action from the action space 43 | 44 | Raises: 45 | NotImplementedError 46 | """ 47 | raise NotImplementedError 48 | 49 | def reset(self): 50 | """ 51 | Begin the episode with a random state 52 | 53 | Raises: 54 | NotImplementedError 55 | """ 56 | raise NotImplementedError 57 | 58 | def rand_action(self): 59 | """ 60 | Choose an action randomly (exploring) 61 | 62 | Returns: 63 | int: index of a random action drew from the action space 64 | """ 65 | return np.random.randint(0, len(self.action_space)) 66 | 67 | 68 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Coordinated Transit Signal Priority (cTSP) 2 | 3 | 4 | ## Introduction 5 |
6 | Coordinate Transit Signal Priority with two traffic intersections in Toronto area, improving speed and reliability with a single deep reinforcement learning agent. 7 | 8 | 35 | 36 | ## Model Set-up 37 | ### 1. Time step 38 | Time steps are renewed upon bus check-in events (should avoid bug if more than one bus checked in at the same time. For example, Bus A checks in at time x at Intx 1, and Bus B checks in at time x at Intx 2.). 39 | A time step is a time point at which a bus is detected by a loop detector. Every check-in event at any check-in loop detector in the system (a system includes all intersections and road segments connecting these intersections) would initiate a new time step. 40 | > Example: Bus 1, 2 and 3 are in the system at time step t. When Bus 4 checks in, time step t+1 is initiated. 41 | 42 | At each time step, the RL model 43 | - reads the state of the current environment, 44 | - chooses an action, and 45 | - calculates the reward of the last time step. 46 | 47 | ### 2. State 48 | States are collected when time steps are renewed. A state includes observations at all intersections in the system which contains bus-, traffic-, and signal-related information. Each intersection has following observations: 49 |
50 | - Upstream of the POZ, downstream of the upstream intersection (prePOZ) 51 | - Check-out time of the bus closest to the downstream POZ 52 | - number of buses 53 | - In the POZ 54 | - Last available check-out time 55 | > If bunch (POZ has more than one bus, Number of buses > 1): use the current time as the check-out time 56 | - Check-in time of the current bus (current time) that initiated this time step 57 | - Check-in headway 58 | - Number of buses in the POZ, 59 | - Number of cars in the POZ 60 | - Time to the end of EW green: exclude any registered action 61 | > Registered action: any action that has not been executed but planned, or is now being executed at the time of check-in 62 | ### 3. Action 63 | Action is chosen at every time step as soon as the state is received by the RL model. Actions make adjustment of the durations of the first available EW green for each intersection at time step t. 64 | - If a bus checked in during EW red, adjustment is made to the first available EW green following the red 65 | - If a bus checked in during EW green, adjustment is made to the current EW green 66 | > Example: 67 | A bus checks in at intersection 1 at time step t. At time step t, the phase at Intx 1 is red in the direction of bus movement, the adjustment is made to the EW green following red. At time step t, the phase at Intx 2 is EW green, the adjustment is made to this EW green. 68 | 69 | To ensure consistency with iTSP, actions are EW green truncations of -20, -15, -10, -5, do-nothing, green extensions of +5, +10, +15, +20s. 70 | 71 | a_t = (a_t^1, a_t^2) 72 | 73 | When at is selected, if there is a registered action (maybe decided at time step t-1) for an intersection, at would overwrite at-1 if possible. 74 | If a truncation action is selected, and the truncation amount > remaining EW green, EW green would be end “now.” 75 | ### 4. Reward 76 | Reward associated with state and action at time step t is calculated at time step t+1. Rewards are computed using data (headway and travel time in the POZ) of all check-out events occurred between time step t and t+1. 77 | > Example: 78 | If bus A and B checked out two different intersections (or the same intersection) between time step t and t+1, rt = rA + rB = 0.6*(headway improvement of bus A) – 0.4*(travel time of bus A in the POZ) + 0.6*(headway improvement of bus B) – 0.4*(travel time of bus B in the POZ) 79 | 80 | If no bus checked out between time step t and t+1, rt = 0. 81 | 82 | 83 | -------------------------------------------------------------------------------- /agent.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "colab": {}, 8 | "colab_type": "code", 9 | "id": "oxGS_XGxlaYb", 10 | "scrolled": true 11 | }, 12 | "outputs": [ 13 | { 14 | "name": "stdout", 15 | "output_type": "stream", 16 | "text": [ 17 | "Reset Aimsun Environment\n", 18 | "Waiting for the first bus...\n" 19 | ] 20 | } 21 | ], 22 | "source": [ 23 | "from Env import AimsunEnv\n", 24 | "from DQN import ddqn_framework as ddqn\n", 25 | "from DQN import mlp_framework as nn\n", 26 | "import numpy as np\n", 27 | "import pickle\n", 28 | "import itertools\n", 29 | "\n", 30 | "# env setup\n", 31 | "intx_action = np.arange(-20, 21, 5)\n", 32 | "action_space = list(itertools.product(intx_action, intx_action))\n", 33 | "env = AimsunEnv(action_space)\n", 34 | "\n", 35 | "clean_folder = True\n", 36 | "if clean_folder:\n", 37 | " import config\n", 38 | " config.clean_folder_and_initialize()\n", 39 | "\n", 40 | "# model config\n", 41 | "configuration = ddqn.trainer_config(app_name='AimsunNext',\n", 42 | " # BUFFER_SIZE = 50e3,\n", 43 | " BUFFER_SIZE = 50000,\n", 44 | " # STEPS_PER_EPISODE = 500,\n", 45 | " STEPS_PER_EPISODE=500,\n", 46 | " MAX_STEPS = 9999999,\n", 47 | " # UPDATE_TARGET_STEPS = 1000,\n", 48 | " UPDATE_TARGET_STEPS=3000,\n", 49 | " BATCH_SIZE = 32,\n", 50 | " GAMMA = 0.9,\n", 51 | " EXPLORATION = 5000,\n", 52 | " E_MIN = 0.1,\n", 53 | " priority = True,\n", 54 | " # alpha = 0.001,\n", 55 | " alpha = 0.01,\n", 56 | " epsilon = 0.1\n", 57 | " )\n", 58 | "\n", 59 | "# online model\n", 60 | "A1 = nn.layer(env.get_state_size(), nodes=512)\n", 61 | "A2 = nn.layer(512, 512)\n", 62 | "A3 = nn.layer(512, 512)\n", 63 | "A4 = nn.layer(512, 512)\n", 64 | "AOUT = nn.layer(512, env.get_action_size())\n", 65 | "AOUT.f = nn.f_iden\n", 66 | "\n", 67 | "# target model\n", 68 | "L1 = nn.layer(env.get_state_size(), nodes=512)\n", 69 | "L2 = nn.layer(512, 512)\n", 70 | "L3 = nn.layer(512, 512)\n", 71 | "L4 = nn.layer(512, 512)\n", 72 | "LOUT = nn.layer(512, env.get_action_size())\n", 73 | "LOUT.f = nn.f_iden\n", 74 | "\n", 75 | "onlineNet = nn.mlp([A1, A2, A3, A4, AOUT])\n", 76 | "onlineNet.erf = nn.log_cosh # cost in prev aimsun_dqn\n", 77 | "targetNet = nn.mlp([L1, L2, L3, L4, LOUT])\n", 78 | "targetNet.erf = nn.log_cosh # cost in prev aimsun_dqn\n", 79 | "\n", 80 | "ddqn_model = ddqn.trainer(onlineNet,targetNet,configuration, env)\n", 81 | "ddqn_model.load_model(False)\n", 82 | "ddqn_model.train(log=True)" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": null, 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [] 91 | } 92 | ], 93 | "metadata": { 94 | "colab": { 95 | "name": "Untitled0.ipynb", 96 | "provenance": [], 97 | "version": "0.3.2" 98 | }, 99 | "kernelspec": { 100 | "display_name": "Python 3", 101 | "language": "python", 102 | "name": "python3" 103 | }, 104 | "language_info": { 105 | "codemirror_mode": { 106 | "name": "ipython", 107 | "version": 3 108 | }, 109 | "file_extension": ".py", 110 | "mimetype": "text/x-python", 111 | "name": "python", 112 | "nbconvert_exporter": "python", 113 | "pygments_lexer": "ipython3", 114 | "version": "3.7.6" 115 | } 116 | }, 117 | "nbformat": 4, 118 | "nbformat_minor": 1 119 | } 120 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | """configuration for the project 2 | 3 | Attributes 4 | ---------- 5 | CONFIG_1 : str 6 | the meta data of each intersection 7 | CONFIG_2 : str 8 | the meta data of each intersection 9 | CORRIDOR : list 10 | the corridor for training 11 | CWD : str 12 | the project directory 13 | LOG_PATH : str 14 | log files folder 15 | """ 16 | 17 | import os, inspect, shutil 18 | 19 | CWD = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 20 | LOG_PATH = CWD + '/log_files/' 21 | 22 | # log files 23 | STATE = LOG_PATH + 'state.txt' 24 | ACTION = LOG_PATH + 'action.txt' 25 | REWARD = LOG_PATH + 'reward.txt' 26 | REWARD_CSV = LOG_PATH + 'reward_log.csv' 27 | PARAMETER_LOG = LOG_PATH + 'parameter_log.csv' 28 | Num_bus_in_rep = LOG_PATH + 'Num_bus_in_rep.txt' 29 | 30 | 31 | def clean_folder_and_initialize(): 32 | folder = LOG_PATH 33 | for filename in os.listdir(folder): 34 | file_path = os.path.join(folder, filename) 35 | try: 36 | if os.path.isfile(file_path) or os.path.islink(file_path): 37 | os.unlink(file_path) 38 | elif os.path.isdir(file_path): 39 | shutil.rmtree(file_path) 40 | except Exception as e: 41 | print('Failed to delete %s. Reason: %s' % (file_path, e)) 42 | os.makedirs(LOG_PATH, exist_ok=True) 43 | with open(PARAMETER_LOG, 'w+') as log_file: 44 | log_file.write('log, replication ID, vehicle ID, checkin time, checkout time, check in phase number, check in phase time, checkout phase time, checkin headway, checkout headway, action 1, action 2 as decided at the bus check in, registered action at bus check out, Travel time, reward, prePOZ bus checkout time, prePOZ numbus, last_available_checkout_time, last_check_in_time, check_in_hdy, numbus, allnumvel, tToNearGreenPhase, prePOZ bus checkout time, prePOZ numbus, last_available_checkout_time, last_check_in_time, check_in_hdy, numbus, allnumvel, tToNearGreenPhase\n') 45 | 46 | 47 | INTERSECTION_1 = { 48 | 'corridor_log': PARAMETER_LOG, 49 | 'intersection': 1171274, 50 | 'busCallDetector': 1171405, 51 | 'busExitDetector': 1171391, 52 | 'section': 6601, 53 | 'phase_duration': [16, 38, 7, 11, 32, 6], 54 | 'phase_of_interest': 5, 55 | 'AlgB_decision': 9, 56 | 'log': LOG_PATH + '1171274.csv', 57 | 'target_headway': 290, 58 | 'prePOZ': { 59 | 'busExitDetector': 1171393, 60 | 'busCallDetector': 1171405, 61 | }, 62 | 'maxTT': 400 63 | 64 | } 65 | INTERSECTION_2 = { 66 | 'corridor_log': PARAMETER_LOG, 67 | 'intersection': 1171288, 68 | 'busCallDetector': 1171407, 69 | 'busExitDetector': 1171389, 70 | 'section': 6563, 71 | 'phase_duration': [38, 8, 13, 4, 40, 7], 72 | 'phase_of_interest': 5, 73 | 'AlgB_decision': 12, 74 | 'log': LOG_PATH + '1171288.csv', 75 | 'target_headway': 290, 76 | 'prePOZ': { 77 | 'busExitDetector': 1171391, 78 | 'busCallDetector': 1171407, 79 | }, 80 | 'maxTT': 200 81 | } 82 | 83 | 84 | CORRIDOR = [INTERSECTION_1, INTERSECTION_2] 85 | 86 | -------------------------------------------------------------------------------- /demo/dynamic_senario.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JerryIshihara/coordinated-transit-signal-priority/09fe74809e9aec0820d43300f1a087697ba6be1a/demo/dynamic_senario.png -------------------------------------------------------------------------------- /demo/navBar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JerryIshihara/coordinated-transit-signal-priority/09fe74809e9aec0820d43300f1a087697ba6be1a/demo/navBar.png -------------------------------------------------------------------------------- /demo/prePOZ.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JerryIshihara/coordinated-transit-signal-priority/09fe74809e9aec0820d43300f1a087697ba6be1a/demo/prePOZ.png -------------------------------------------------------------------------------- /demo/tsp_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JerryIshihara/coordinated-transit-signal-priority/09fe74809e9aec0820d43300f1a087697ba6be1a/demo/tsp_flow.png -------------------------------------------------------------------------------- /train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # default server python directory 4 | py_dir="C:\Python27" 5 | # aimsun controller path 6 | controller="~/Aimsun/RunSeveralReplications.py" 7 | # default model path in the server 8 | model_path="C:\Users\Public\Documents\ShalabyGroup\finchTSPs_3 intx_west_Subnetwork 1171379_newInters.ang" 9 | # default aconsole path 10 | aconsole_path="C:\Program Files\Aimsun\Aimsun Next 8.3\aconsole.exe" 11 | # replicaion start and end 12 | start=1177671 end=1180580 13 | 14 | while [ "$1" != "" ]; do 15 | case $1 in 16 | -p | --pythonDir ) shift 17 | py_dir="$1" 18 | ;; 19 | -m | --modelPath ) shift 20 | model_path="$1" 21 | ;; 22 | -a | --aconsolePath ) shift 23 | aconsole_path="$1" 24 | ;; 25 | -s | --start ) shift 26 | start=$1 27 | ;; 28 | -e | --end ) shift 29 | end=$1 30 | ;; 31 | esac 32 | shift 33 | done 34 | 35 | echo ${end} 36 | 37 | cd ${py_dir} 38 | 39 | for /l %x in (${start}, 1, ${end}) do (python ${controller} -aconsolePath ${aconsole_path} -modelPath ${model_path} -targets %x) --------------------------------------------------------------------------------