├── test.py ├── testing_settings.ini ├── myGitUpdate.sh ├── training_settings.ini ├── intersection ├── sumo_config.sumocfg ├── environment.net.xml └── episode_routes.rou.xml ├── models ├── model_12 │ └── training_settings.ini ├── model_2 │ └── training_settings.ini ├── model_3 │ └── training_settings.ini ├── model_5 │ └── training_settings.ini └── model_8 │ └── training_settings.ini ├── memory.py ├── visualization.py ├── testing_main.py ├── new_train_main.py ├── training_main.py ├── model.py ├── utils.py ├── ddqn.py ├── testing_simulation.py ├── training_simulation.py └── generator.py /test.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | m = [[], []] 4 | batch = [] 5 | m1 = [[1,2,5], [1,2,1]] 6 | m3 = [3] 7 | m2 = [[2,3,4,],[2,2,2]] 8 | 9 | batch.append(m1) 10 | batch.append(m2) 11 | print(batch) 12 | print(m1+m2) -------------------------------------------------------------------------------- /testing_settings.ini: -------------------------------------------------------------------------------- 1 | [simulation] 2 | gui = True 3 | max_steps = 3100 4 | n_cars_generated = 4000 5 | episode_seed = 10000 6 | yellow_duration = 4 7 | green_duration = 10 8 | 9 | [agent] 10 | num_states = 80 11 | num_actions = 4 12 | 13 | [dir] 14 | models_path_name = models 15 | sumocfg_file_name = sumo_config.sumocfg 16 | model_to_test = 2 17 | -------------------------------------------------------------------------------- /myGitUpdate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # git init 4 | # git remote add pi git@10.2.125.15:/home/git/huaweiCodeCraft.git 5 | 6 | #echo "更新ing" 7 | # git pull 8 | #echo "更新end" 9 | 10 | echo "提交ing" 11 | 12 | git add . 13 | if [ -n "$1" ] 14 | then 15 | echo "commit: $1" 16 | git commit -am "$1" 17 | else 18 | echo "commit: updata" 19 | git commit -m "updata" 20 | fi 21 | # git push -f pi master 22 | git push 23 | echo "提交end" 24 | -------------------------------------------------------------------------------- /training_settings.ini: -------------------------------------------------------------------------------- 1 | [simulation] 2 | gui = False 3 | total_episodes = 100 4 | max_steps = 300 5 | n_cars_generated = 600 6 | green_duration = 10 7 | yellow_duration = 4 8 | 9 | [model] 10 | num_layers = 4 11 | width_layers = 400 12 | batch_size = 100 13 | learning_rate = 0.001 14 | training_epochs = 800 15 | 16 | [memory] 17 | memory_size_min = 600 18 | memory_size_max = 50000 19 | 20 | [agent] 21 | num_states = 80 22 | num_actions = 4 23 | gamma = 0.75 24 | 25 | [dir] 26 | models_path_name = models 27 | sumocfg_file_name = sumo_config.sumocfg -------------------------------------------------------------------------------- /intersection/sumo_config.sumocfg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /models/model_12/training_settings.ini: -------------------------------------------------------------------------------- 1 | [simulation] 2 | gui = False 3 | total_episodes = 100 4 | max_steps = 300 5 | n_cars_generated = 600 6 | green_duration = 10 7 | yellow_duration = 4 8 | 9 | [model] 10 | num_layers = 4 11 | width_layers = 400 12 | batch_size = 100 13 | learning_rate = 0.001 14 | training_epochs = 800 15 | 16 | [memory] 17 | memory_size_min = 600 18 | memory_size_max = 50000 19 | 20 | [agent] 21 | num_states = 80 22 | num_actions = 4 23 | gamma = 0.75 24 | 25 | [dir] 26 | models_path_name = models 27 | sumocfg_file_name = sumo_config.sumocfg -------------------------------------------------------------------------------- /models/model_2/training_settings.ini: -------------------------------------------------------------------------------- 1 | [simulation] 2 | gui = False 3 | total_episodes = 100 4 | max_steps = 300 5 | n_cars_generated = 600 6 | green_duration = 10 7 | yellow_duration = 4 8 | 9 | [model] 10 | num_layers = 4 11 | width_layers = 400 12 | batch_size = 100 13 | learning_rate = 0.001 14 | training_epochs = 800 15 | 16 | [memory] 17 | memory_size_min = 600 18 | memory_size_max = 50000 19 | 20 | [agent] 21 | num_states = 80 22 | num_actions = 4 23 | gamma = 0.75 24 | 25 | [dir] 26 | models_path_name = models 27 | sumocfg_file_name = sumo_config.sumocfg -------------------------------------------------------------------------------- /models/model_3/training_settings.ini: -------------------------------------------------------------------------------- 1 | [simulation] 2 | gui = False 3 | total_episodes = 100 4 | max_steps = 300 5 | n_cars_generated = 600 6 | green_duration = 10 7 | yellow_duration = 4 8 | 9 | [model] 10 | num_layers = 4 11 | width_layers = 400 12 | batch_size = 100 13 | learning_rate = 0.001 14 | training_epochs = 800 15 | 16 | [memory] 17 | memory_size_min = 600 18 | memory_size_max = 50000 19 | 20 | [agent] 21 | num_states = 80 22 | num_actions = 4 23 | gamma = 0.75 24 | 25 | [dir] 26 | models_path_name = models 27 | sumocfg_file_name = sumo_config.sumocfg -------------------------------------------------------------------------------- /models/model_5/training_settings.ini: -------------------------------------------------------------------------------- 1 | [simulation] 2 | gui = False 3 | total_episodes = 100 4 | max_steps = 300 5 | n_cars_generated = 600 6 | green_duration = 10 7 | yellow_duration = 4 8 | 9 | [model] 10 | num_layers = 4 11 | width_layers = 400 12 | batch_size = 100 13 | learning_rate = 0.001 14 | training_epochs = 800 15 | 16 | [memory] 17 | memory_size_min = 600 18 | memory_size_max = 50000 19 | 20 | [agent] 21 | num_states = 80 22 | num_actions = 4 23 | gamma = 0.75 24 | 25 | [dir] 26 | models_path_name = models 27 | sumocfg_file_name = sumo_config.sumocfg -------------------------------------------------------------------------------- /models/model_8/training_settings.ini: -------------------------------------------------------------------------------- 1 | [simulation] 2 | gui = False 3 | total_episodes = 100 4 | max_steps = 300 5 | n_cars_generated = 600 6 | green_duration = 10 7 | yellow_duration = 4 8 | 9 | [model] 10 | num_layers = 4 11 | width_layers = 400 12 | batch_size = 100 13 | learning_rate = 0.001 14 | training_epochs = 800 15 | 16 | [memory] 17 | memory_size_min = 600 18 | memory_size_max = 50000 19 | 20 | [agent] 21 | num_states = 80 22 | num_actions = 4 23 | gamma = 0.75 24 | 25 | [dir] 26 | models_path_name = models 27 | sumocfg_file_name = sumo_config.sumocfg -------------------------------------------------------------------------------- /memory.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | class Memory: 4 | def __init__(self, size_max, size_min): 5 | self._samples = [] 6 | self._size_max = size_max 7 | self._size_min = size_min 8 | 9 | 10 | def add_sample(self, sample): 11 | """ 12 | Add a sample into the memory 13 | """ 14 | self._samples.append(sample) 15 | if self._size_now() > self._size_max: 16 | self._samples.pop(0) # if the length is greater than the size of memory, remove the oldest element 17 | 18 | 19 | def get_samples(self, n): 20 | """ 21 | Get n samples randomly from the memory 22 | """ 23 | if self._size_now() < self._size_min: 24 | return [] 25 | 26 | if n > self._size_now(): 27 | return random.sample(self._samples, self._size_now()) # get all the samples 28 | else: 29 | return random.sample(self._samples, n) # get "batch size" number of samples 30 | 31 | 32 | def _size_now(self): 33 | """ 34 | Check how full the memory is 35 | """ 36 | return len(self._samples) -------------------------------------------------------------------------------- /visualization.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import os 3 | 4 | class Visualization: 5 | def __init__(self, path, dpi): 6 | self._path = path 7 | self._dpi = dpi 8 | 9 | 10 | def save_data_and_plot(self, data, filename, xlabel, ylabel): 11 | """ 12 | Produce a plot of performance of the agent over the session and save the relative data to txt 13 | """ 14 | min_val = min(data) 15 | max_val = max(data) 16 | 17 | plt.rcParams.update({'font.size': 24}) # set bigger font size 18 | 19 | plt.plot(data) 20 | plt.ylabel(ylabel) 21 | plt.xlabel(xlabel) 22 | plt.margins(0) 23 | plt.ylim(min_val - 0.05 * abs(min_val), max_val + 0.05 * abs(max_val)) 24 | fig = plt.gcf() 25 | fig.set_size_inches(20, 11.25) 26 | fig.savefig(os.path.join(self._path, 'plot_'+filename+'.png'), dpi=self._dpi) 27 | plt.close("all") 28 | 29 | with open(os.path.join(self._path, 'plot_'+filename + '_data.txt'), "w") as file: 30 | for value in data: 31 | file.write("%s\n" % value) 32 | -------------------------------------------------------------------------------- /testing_main.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | 4 | import os 5 | from shutil import copyfile 6 | 7 | from testing_simulation import Simulation 8 | from generator import TrafficGenerator 9 | from model import TestModel 10 | from visualization import Visualization 11 | from utils import import_test_configuration, set_sumo, set_test_path 12 | 13 | 14 | if __name__ == "__main__": 15 | 16 | 17 | config = import_test_configuration(config_file='testing_settings.ini') 18 | sumo_cmd = set_sumo(config['gui'], config['sumocfg_file_name'], config['max_steps']) 19 | model_path, plot_path = set_test_path(config['models_path_name'], config['model_to_test']) 20 | 21 | Model = TestModel( 22 | input_dim=config['num_states'], 23 | model_path=model_path 24 | ) 25 | 26 | TrafficGen = TrafficGenerator( 27 | config['max_steps'], 28 | config['n_cars_generated'] 29 | ) 30 | 31 | Visualization = Visualization( 32 | plot_path, 33 | dpi=96 34 | ) 35 | 36 | Simulation = Simulation( 37 | Model, 38 | TrafficGen, 39 | sumo_cmd, 40 | config['max_steps'], 41 | config['green_duration'], 42 | config['yellow_duration'], 43 | config['num_states'], 44 | config['num_actions'] 45 | ) 46 | print("car: ", config['n_cars_generated']) 47 | 48 | print('\n----- Test episode') 49 | simulation_time, w, co = Simulation.run(config['episode_seed']) # run the simulation 50 | print('Simulation time:', simulation_time, 's') 51 | print("sumo co: ", co, 'mg') 52 | print("sumo wait: ", w, 's') 53 | 54 | print("----- Testing info saved at:", plot_path) 55 | 56 | copyfile(src='testing_settings.ini', dst=os.path.join(plot_path, 'testing_settings.ini')) 57 | 58 | Visualization.save_data_and_plot(data=Simulation.reward_episode, filename='reward', xlabel='Action step', ylabel='Reward') 59 | Visualization.save_data_and_plot(data=Simulation.queue_length_episode, filename='queue', xlabel='Step', ylabel='Queue lenght (vehicles)') 60 | -------------------------------------------------------------------------------- /new_train_main.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | import os 4 | import datetime 5 | from shutil import copyfile 6 | from training_simulation import Simulation 7 | from generator import TrafficGenerator 8 | from ddqn import DoubleDQN 9 | from memory import Memory 10 | from utils import import_train_configuration, set_sumo, set_train_path 11 | from visualization import Visualization 12 | import matplotlib.pyplot as plt 13 | 14 | if __name__ == '__main__': 15 | X = [] 16 | Y = [] 17 | Z = [] 18 | R = [] 19 | 20 | config = import_train_configuration(config_file='training_settings.ini') 21 | sumo_cmd = set_sumo(config['gui'], config['sumocfg_file_name'], config['max_steps']) # gui=False,max_steos=5400 22 | path = set_train_path(config['models_path_name']) # models_path_name = models 23 | 24 | Model = DoubleDQN() 25 | 26 | Visualization = Visualization( 27 | path, 28 | dpi=96 29 | ) 30 | 31 | TrafficGen = TrafficGenerator( 32 | config['max_steps'], # max_steps = 5400 33 | config['n_cars_generated'] # n_cars_generated = 1000 34 | ) 35 | Memory = Memory( 36 | config['memory_size_max'], # memory_size_max = 50000 37 | config['memory_size_min'] # memory_size_min = 600 38 | ) 39 | 40 | Simulation = Simulation( 41 | Model, 42 | Memory, 43 | TrafficGen, 44 | sumo_cmd, 45 | config['gamma'], # 0.75 46 | config['max_steps'], # 5400 47 | config['green_duration'], # 10 48 | config['yellow_duration'], # 4 49 | config['num_states'], # 80 50 | config['num_actions'], # 4 51 | config['training_epochs'] # 800 52 | ) 53 | episode = 0 54 | timestamp_start = datetime.datetime.now() 55 | Model.reset() 56 | 57 | while episode < config['total_episodes']: 58 | print('\n----- Episode', str(episode+1), 'of', str(config['total_episodes'])) 59 | epsilon = 1.0 - (episode / config['total_episodes']) 60 | simulation_time, training_time, w, co, co2, reward = Simulation.run(episode, epsilon) 61 | print('Simulation time:', simulation_time, 's - Training time:', training_time, 's - Total:', round(simulation_time+training_time, 1), 's') 62 | X.append(w) # wait time 63 | Y.append(co) 64 | Z.append(co2) 65 | R.append(reward) 66 | episode += 1 67 | 68 | print("\n----- Start time:", timestamp_start) 69 | print("----- End time:", datetime.datetime.now()) 70 | print("----- Session info saved at:", path) 71 | 72 | plt.title("reward is car number") 73 | plt.subplot(3, 1, 1) 74 | plt.plot(X, label='waiting time') 75 | plt.legend() 76 | plt.subplot(3, 1, 2) 77 | plt.plot(Y, label='sum co') 78 | plt.legend() 79 | plt.subplot(3, 1, 3) 80 | plt.plot(Z, label='sum co2') 81 | plt.show() 82 | 83 | plt.title("Reward") 84 | plt.plot(R, label="Reward") 85 | plt.legend() 86 | plt.show() 87 | 88 | 89 | copyfile(src='training_settings.ini', dst=os.path.join(path, 'training_settings.ini')) -------------------------------------------------------------------------------- /training_main.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | import os 4 | import datetime 5 | from shutil import copyfile 6 | from training_simulation import Simulation 7 | from generator import TrafficGenerator 8 | from memory import Memory 9 | from model import TrainModel 10 | from visualization import Visualization 11 | from utils import import_train_configuration, set_sumo, set_train_path 12 | import matplotlib.pyplot as plt 13 | 14 | 15 | 16 | if __name__ == "__main__": 17 | X = [] 18 | Y = [] 19 | Z = [] 20 | R = [] 21 | 22 | config = import_train_configuration(config_file='training_settings.ini') 23 | sumo_cmd = set_sumo(config['gui'], config['sumocfg_file_name'], config['max_steps']) # gui=False,max_steos=5400 24 | path = set_train_path(config['models_path_name']) # models_path_name = models 25 | 26 | Model = TrainModel( 27 | config['num_layers'], # num_layers = 4 28 | config['width_layers'], # width_layers = 400 29 | config['batch_size'], # batch_size = 100 30 | config['learning_rate'], # learning_rate = 0.001 31 | input_dim=config['num_states'], # num_states = 80 32 | output_dim=config['num_actions'] # num_actions = 4 33 | ) 34 | 35 | Memory = Memory( 36 | config['memory_size_max'], # memory_size_max = 50000 37 | config['memory_size_min'] # memory_size_min = 600 38 | ) 39 | 40 | TrafficGen = TrafficGenerator( 41 | config['max_steps'], # max_steps = 5400 42 | config['n_cars_generated'] # n_cars_generated = 1000 43 | ) 44 | 45 | Visualization = Visualization( 46 | path, 47 | dpi=96 48 | ) 49 | 50 | Simulation = Simulation( 51 | Model, 52 | Memory, 53 | TrafficGen, 54 | sumo_cmd, 55 | config['gamma'], # 0.75 56 | config['max_steps'], # 5400 57 | config['green_duration'], # 10 58 | config['yellow_duration'], # 4 59 | config['num_states'], # 80 60 | config['num_actions'], # 4 61 | config['training_epochs'] # 800 62 | ) 63 | 64 | episode = 0 65 | timestamp_start = datetime.datetime.now() 66 | f = open("/3实验/6/Deep-QLearning-Agent-for-Traffic-Signal-Control-master/wait.txt", 'w') 67 | g = open("/3实验/6/Deep-QLearning-Agent-for-Traffic-Signal-Control-master/co.txt", 'w') 68 | 69 | f.truncate() 70 | g.truncate() 71 | 72 | while episode < config['total_episodes']: 73 | print('\n----- Episode', str(episode+1), 'of', str(config['total_episodes'])) 74 | epsilon = 1.0 - (episode / config['total_episodes']) # set the epsilon for this episode according to epsilon-greedy policy 75 | simulation_time, training_time, w, co, co2, reward = Simulation.run(episode, epsilon) # run the simulation 76 | print('Simulation time:', simulation_time, 's - Training time:', training_time, 's - Total:', round(simulation_time+training_time, 1), 's') 77 | X.append(w) # wait time 78 | Y.append(co) 79 | Z.append(co2) 80 | R.append(reward) 81 | f.write("%s\n" % w) 82 | g.write("%s\n" % co) 83 | episode += 1 84 | 85 | print("\n----- Start time:", timestamp_start) 86 | print("----- End time:", datetime.datetime.now()) 87 | print("----- Session info saved at:", path) 88 | 89 | plt.title("reward is car number") 90 | plt.subplot(3, 1, 1) 91 | plt.plot(X, label='waiting time') 92 | plt.legend() 93 | plt.subplot(3, 1, 2) 94 | plt.plot(Y, label='sum co') 95 | plt.legend() 96 | plt.subplot(3, 1, 3) 97 | plt.plot(Z, label='sum co2') 98 | plt.show() 99 | 100 | plt.title("Reward") 101 | plt.plot(R, label="Reward") 102 | plt.legend() 103 | plt.show() 104 | 105 | Model.save_model(path) 106 | 107 | copyfile(src='training_settings.ini', dst=os.path.join(path, 'training_settings.ini')) 108 | 109 | Visualization.save_data_and_plot(data=Simulation.reward_store, filename='reward', xlabel='Episode', ylabel='Cumulative negative reward') 110 | Visualization.save_data_and_plot(data=Simulation.cumulative_wait_store, filename='delay', xlabel='Episode', ylabel='Cumulative delay (s)') 111 | Visualization.save_data_and_plot(data=Simulation.avg_queue_length_store, filename='queue', xlabel='Episode', ylabel='Average queue length (vehicles)') -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tensorflow as tf 3 | import numpy as np 4 | import sys 5 | 6 | from tensorflow import keras 7 | from tensorflow.keras import layers 8 | from tensorflow.keras import losses 9 | from tensorflow.keras.optimizers import Adam 10 | from tensorflow.keras.utils import plot_model 11 | from tensorflow.keras.models import load_model 12 | from tensorflow.keras.layers import Input, Conv2D, Flatten, Dense, MaxPooling2D 13 | 14 | 15 | class TrainModel: 16 | def __init__(self, num_layers, width, batch_size, learning_rate, input_dim, output_dim): 17 | self._input_dim = input_dim 18 | self._output_dim = output_dim 19 | self._batch_size = batch_size 20 | self._learning_rate = learning_rate 21 | self._model = self._build_model(num_layers, width) 22 | self.js = 0 23 | 24 | 25 | def _build_model(self, num_layers, width): 26 | """ 27 | 全连通深度神经网络的建立与编译 28 | """ 29 | inputs_1 = keras.Input(shape=(16, 12, 1)) 30 | x1 = Conv2D(16, (4, 4), strides=(2, 2), activation='relu')(inputs_1) 31 | x1 = Conv2D(32, (2, 2), strides=(1, 1), activation='relu')(x1) 32 | x1 = Flatten()(x1) 33 | 34 | 35 | inputs_2 = keras.Input(shape=(16, 12, 1)) 36 | x2 = Conv2D(16, (4, 4), strides=(2, 2), activation='relu')(inputs_2) 37 | x2 = Conv2D(32, (2, 2), strides=(1, 1), activation='relu')(x2) 38 | x2 = Flatten()(x2) 39 | 40 | inputs_3 = keras.Input(shape=(16, 12, 1)) 41 | x3 = Conv2D(16, (4, 4), strides=(2, 2), activation='relu')(inputs_3) 42 | x3 = Conv2D(32, (2, 2), strides=(1, 1), activation='relu')(x3) 43 | x3 = Flatten()(x3) 44 | 45 | x = keras.layers.concatenate([x1, x2, x3]) 46 | x = layers.Dense(128, activation='relu')(x) 47 | x = layers.Dense(64, activation='relu')(x) 48 | x = layers.Dense(self._output_dim, activation='linear')(x) 49 | 50 | # outputs = layers.Dense(self._output_dim, activation='linear')(x) 51 | 52 | model = keras.Model(inputs=[inputs_1, inputs_2, inputs_3], outputs=[x], name='my_model') 53 | model.compile(loss=losses.mean_squared_error, optimizer=Adam(lr=self._learning_rate)) 54 | 55 | return model 56 | 57 | 58 | def predict_one(self, state): 59 | """ 60 | 从单个状态预测动作值 61 | """ 62 | return self._model.predict(state) 63 | 64 | 65 | def predict_batch(self, states): 66 | """ 67 | Predict the action values from a batch of states 68 | """ 69 | return self._model.predict(states) 70 | 71 | 72 | def train_batch(self, states, q_sa): 73 | """ 74 | Train the nn using the updated q-values 75 | """ 76 | 77 | self._model.fit(states, q_sa, epochs=1, verbose=0) 78 | 79 | 80 | def save_model(self, path): 81 | """ 82 | Save the current model in the folder as h5 file and a model architecture summary as png 83 | """ 84 | self._model.save(os.path.join(path, 'trained_model.h5')) 85 | plot_model(self._model, to_file=os.path.join(path, 'model_structure.png'), show_shapes=True, show_layer_names=True) 86 | 87 | 88 | @property 89 | def input_dim(self): 90 | return self._input_dim 91 | 92 | 93 | @property 94 | def output_dim(self): 95 | return self._output_dim 96 | 97 | 98 | @property 99 | def batch_size(self): 100 | return self._batch_size 101 | 102 | 103 | class TestModel: 104 | def __init__(self, input_dim, model_path): 105 | self._input_dim = input_dim 106 | self._model = self._load_my_model(model_path) 107 | 108 | 109 | def _load_my_model(self, model_folder_path): 110 | """ 111 | Load the model stored in the folder specified by the model number, if it exists 112 | """ 113 | model_file_path = os.path.join(model_folder_path, 'trained_model.h5') 114 | 115 | if os.path.isfile(model_file_path): 116 | loaded_model = load_model(model_file_path) 117 | return loaded_model 118 | else: 119 | sys.exit("Model number not found") 120 | 121 | 122 | def predict_one(self, state): 123 | """ 124 | Predict the action values from a single state 125 | """ 126 | state = np.reshape(state, [1, self._input_dim]) 127 | return self._model.predict(state) 128 | 129 | 130 | @property 131 | def input_dim(self): 132 | return self._input_dim -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import configparser 2 | from sumolib import checkBinary 3 | import os 4 | import sys 5 | 6 | 7 | def import_train_configuration(config_file): 8 | """ 9 | Read the config file regarding the training and import its content 10 | """ 11 | content = configparser.ConfigParser() 12 | content.read(config_file) 13 | config = {} 14 | config['gui'] = content['simulation'].getboolean('gui') 15 | config['total_episodes'] = content['simulation'].getint('total_episodes') 16 | config['max_steps'] = content['simulation'].getint('max_steps') 17 | config['n_cars_generated'] = content['simulation'].getint('n_cars_generated') 18 | config['green_duration'] = content['simulation'].getint('green_duration') 19 | config['yellow_duration'] = content['simulation'].getint('yellow_duration') 20 | config['num_layers'] = content['model'].getint('num_layers') 21 | config['width_layers'] = content['model'].getint('width_layers') 22 | config['batch_size'] = content['model'].getint('batch_size') 23 | config['learning_rate'] = content['model'].getfloat('learning_rate') 24 | config['training_epochs'] = content['model'].getint('training_epochs') 25 | config['memory_size_min'] = content['memory'].getint('memory_size_min') 26 | config['memory_size_max'] = content['memory'].getint('memory_size_max') 27 | config['num_states'] = content['agent'].getint('num_states') 28 | config['num_actions'] = content['agent'].getint('num_actions') 29 | config['gamma'] = content['agent'].getfloat('gamma') 30 | config['models_path_name'] = content['dir']['models_path_name'] 31 | config['sumocfg_file_name'] = content['dir']['sumocfg_file_name'] 32 | return config 33 | 34 | 35 | def import_test_configuration(config_file): 36 | """ 37 | Read the config file regarding the testing and import its content 38 | """ 39 | content = configparser.ConfigParser() 40 | content.read(config_file) 41 | config = {} 42 | config['gui'] = content['simulation'].getboolean('gui') 43 | config['max_steps'] = content['simulation'].getint('max_steps') 44 | config['n_cars_generated'] = content['simulation'].getint('n_cars_generated') 45 | config['episode_seed'] = content['simulation'].getint('episode_seed') 46 | config['green_duration'] = content['simulation'].getint('green_duration') 47 | config['yellow_duration'] = content['simulation'].getint('yellow_duration') 48 | config['num_states'] = content['agent'].getint('num_states') 49 | config['num_actions'] = content['agent'].getint('num_actions') 50 | config['sumocfg_file_name'] = content['dir']['sumocfg_file_name'] 51 | config['models_path_name'] = content['dir']['models_path_name'] 52 | config['model_to_test'] = content['dir'].getint('model_to_test') 53 | return config 54 | 55 | 56 | def set_sumo(gui, sumocfg_file_name, max_steps): 57 | """ 58 | Configure various parameters of SUMO 59 | """ 60 | # sumo things - we need to import python modules from the $SUMO_HOME/tools directory 61 | if 'SUMO_HOME' in os.environ: 62 | tools = os.path.join(os.environ['SUMO_HOME'], 'tools') 63 | sys.path.append(tools) 64 | else: 65 | sys.exit("please declare environment variable 'SUMO_HOME'") 66 | 67 | # setting the cmd mode or the visual mode 68 | if gui == False: 69 | sumoBinary = checkBinary('sumo') 70 | else: 71 | sumoBinary = checkBinary('sumo-gui') 72 | 73 | # setting the cmd command to run sumo at simulation time 74 | sumo_cmd = [sumoBinary, "-c", os.path.join('intersection', sumocfg_file_name), "--no-step-log", "true", "--waiting-time-memory", str(max_steps)] 75 | 76 | return sumo_cmd 77 | 78 | 79 | def set_train_path(models_path_name): 80 | """ 81 | Create a new model path with an incremental integer, also considering previously created model paths 82 | """ 83 | models_path = os.path.join(os.getcwd(), models_path_name, '') 84 | os.makedirs(os.path.dirname(models_path), exist_ok=True) 85 | 86 | dir_content = os.listdir(models_path) 87 | if dir_content: 88 | previous_versions = [int(name.split("_")[1]) for name in dir_content] 89 | new_version = str(max(previous_versions) + 1) 90 | else: 91 | new_version = '1' 92 | 93 | data_path = os.path.join(models_path, 'model_'+new_version, '') 94 | os.makedirs(os.path.dirname(data_path), exist_ok=True) 95 | return data_path 96 | 97 | 98 | def set_test_path(models_path_name, model_n): 99 | """ 100 | Returns a model path that identifies the model number provided as argument and a newly created 'test' path 101 | """ 102 | model_folder_path = os.path.join(os.getcwd(), models_path_name, 'model_'+str(model_n), '') 103 | 104 | if os.path.isdir(model_folder_path): 105 | plot_path = os.path.join(model_folder_path, 'test', '') 106 | os.makedirs(os.path.dirname(plot_path), exist_ok=True) 107 | return model_folder_path, plot_path 108 | else: 109 | sys.exit('The model number specified does not exist in the models folder') -------------------------------------------------------------------------------- /ddqn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import keras 3 | import random 4 | from keras.layers import Input, Dense, Activation, Dropout 5 | from keras.models import Sequential 6 | from tensorflow.keras import layers 7 | from tensorflow.keras import losses 8 | from keras.optimizers import Adam 9 | from tensorflow.keras.layers import Input, Conv2D, Flatten, Dense, MaxPooling2D 10 | 11 | 12 | class DoubleDQN(): 13 | def __init__(self): 14 | 15 | # hyperparameters 16 | # self.env = env 17 | self.gamma = 0.99 18 | self.nn_learning_rate = 0.0002 19 | self.nn_batch_size = None 20 | self.epochs = 1 21 | self.minibatch_sz = 64 22 | self.epsilon = 1. 23 | self.epsilon_decay = 0.992 24 | self.epsilon_floor = 0.05 25 | # self.n_s = env.observation_space.shape[0] 26 | self.n_a = 4 # (输出动作空间) 27 | self._output_dim = 4 28 | 29 | self.description = 'DQN Learner' 30 | self.update_frequency = 100 31 | self.verbose = False 32 | 33 | # memory 34 | self.memory_max = 50000 35 | self.reset() 36 | 37 | def reset(self): 38 | self.epsilon = 1. 39 | self.step = 0 40 | self.memory = [[], [], [], []] 41 | 42 | # create nn's 43 | self.model = self._make_model_() 44 | self.target_model = self._make_model_() 45 | 46 | def _make_model_(self): 47 | 48 | inputs_1 = keras.Input(shape=(16, 16, 1)) 49 | x1 = Conv2D(16, (7, 7), strides=2, activation='relu')(inputs_1) 50 | x1 = Conv2D(32, (2, 2), strides=1, activation='relu')(x1) 51 | x1 = Flatten()(x1) 52 | 53 | inputs_2 = keras.Input(shape=(16, 16, 1)) 54 | x2 = Conv2D(16, (7, 7), strides=2, activation='relu')(inputs_2) 55 | x2 = Conv2D(32, (2, 2), strides=1, activation='relu')(x2) 56 | x2 = Flatten()(x2) 57 | 58 | inputs_3 = keras.Input(shape=(16, 16, 1)) 59 | x3 = Conv2D(16, (7, 7), strides=2, activation='relu')(inputs_3) 60 | x3 = Conv2D(32, (2, 2), strides=1, activation='relu')(x3) 61 | x3 = Flatten()(x3) 62 | 63 | x = keras.layers.concatenate([x1, x2, x3]) 64 | x = layers.Dense(128, activation='relu')(x) 65 | x = layers.Dense(64, activation='relu')(x) 66 | x = layers.Dense(self._output_dim, activation='linear')(x) 67 | 68 | # outputs = layers.Dense(self._output_dim, activation='linear')(x) 69 | 70 | model = keras.Model(inputs=[inputs_1, inputs_2, inputs_3], outputs=[x], name='my_model') 71 | model.compile(loss=losses.mean_squared_error, optimizer=Adam(lr=0.001)) 72 | 73 | return model 74 | 75 | def _model_update_(self): 76 | self.target_model.set_weights(self.model.get_weights()) 77 | 78 | def pick_action(self, state): 79 | 80 | if np.random.random() < self.epsilon: 81 | return np.random.choice(self.n_a) 82 | else: 83 | tmp = self.model.predict(state) 84 | return np.argmax(tmp[0]) 85 | 86 | def update(self, old_state, old_action, reward, current_sate, done, epsilon): 87 | memory_len = len(self.memory[0]) + len(self.memory[0]) + len(self.memory[0]) + len(self.memory[0]) 88 | 89 | # if len(self.memory) > self.memory_max: self.memory.pop(0) 90 | if memory_len > self.memory_max: self.memory = [[], [], [], []] 91 | 92 | self.memory[old_action].append([old_state, old_action, reward, current_sate, done]) 93 | 94 | self._batch_train_() 95 | 96 | if self.step % self.update_frequency == 0: 97 | self._model_update_() 98 | 99 | if done and self.epsilon > self.epsilon_floor: 100 | # self.epsilon = self.epsilon * self.epsilon_decay 101 | self.epsilon = epsilon 102 | 103 | self.step += 1 104 | def _batch_train_(self): 105 | # memory_len = len(self.memory[0]) + len(self.memory[1]) + len(self.memory[2]) + len(self.memory[3]) 106 | m0 = len(self.memory[0]) 107 | m1 = len(self.memory[1]) 108 | m2 = len(self.memory[2]) 109 | m3 = len(self.memory[3]) 110 | # if memory_len > self.minibatch_sz: 111 | if m1 > 16 and m2 > 16 and m3 > 16 and m0 > 16: 112 | em1 = np.zeros((100, 16, 16, 1)) 113 | v1 = np.zeros((100, 16, 16, 1)) 114 | p1 = np.zeros((100, 16, 16, 1)) 115 | em2 = np.zeros((100, 16, 16, 1)) 116 | v2 = np.zeros((100, 16, 16, 1)) 117 | p2 = np.zeros((100, 16, 16, 1)) 118 | # create training batch 119 | # batch = random.sample(self.memory, self.minibatch_sz) 120 | # print("0", len(self.memory[0]), "1", len(self.memory[1]), "2", len(self.memory[2]), "3", len(self.memory[3])) 121 | b0 = random.sample(self.memory[0], 16) 122 | b1 = random.sample(self.memory[1], 16) 123 | b2 = random.sample(self.memory[2], 16) 124 | b3 = random.sample(self.memory[3], 16) 125 | batch = b0 + b1 + b2 + b3 126 | 127 | i = 0 128 | for val in batch: 129 | p1[i] = val[0][0] 130 | em1[i] = val[0][1] 131 | v1[i] = val[0][2] 132 | p2[i] = val[3][0] 133 | em2[i] = val[3][1] 134 | v2[i] = val[3][2] 135 | i += 1 136 | # 根据memory中的随机选择的结果重构丢入网络进行训练 137 | states = [p1, em1, v1] 138 | next_states = [p2, em2, v2] 139 | 140 | # use update rule from Minh 2013 141 | q_s_a = self.model.predict(states) # predict Q(state), for every sample 142 | q_s_a_d = self.model.predict(next_states) # predict Q(next_state), for every sample 143 | tm_q_s_a_d = self.target_model.predict(next_states) 144 | 145 | a_maxes = np.argmax(q_s_a_d, axis=1) # 返回q_s_a_d中最大值的索引 146 | 147 | y = np.zeros((100, 1, 4)) 148 | em = np.zeros((100, 16, 16, 1)) 149 | v = np.zeros((100, 16, 16, 1)) 150 | p = np.zeros((100, 16, 16, 1)) 151 | for i, b in enumerate(batch): 152 | state, action, reward, n_states, done = b[0], b[1], b[2], b[3], b[4] 153 | target = reward 154 | if not done: 155 | Q_target_max = tm_q_s_a_d[i][a_maxes[i]] 156 | target += self.gamma * Q_target_max 157 | current_q = q_s_a[i] 158 | current_q[action] = target 159 | p[i] = state[0] 160 | em[i] = state[1] 161 | v[i] = state[2] 162 | y[i] = current_q 163 | x = [p, em, v] 164 | self.model.fit(x, y, epochs=1, verbose=False) # train the NNt -------------------------------------------------------------------------------- /testing_simulation.py: -------------------------------------------------------------------------------- 1 | import traci 2 | import numpy as np 3 | import random 4 | import timeit 5 | import os 6 | 7 | # phase codes based on environment.net.xml 8 | PHASE_NS_GREEN = 0 # action 0 code 00 9 | PHASE_NS_YELLOW = 1 10 | PHASE_NSL_GREEN = 2 # action 1 code 01 11 | PHASE_NSL_YELLOW = 3 12 | PHASE_EW_GREEN = 4 # action 2 code 10 13 | PHASE_EW_YELLOW = 5 14 | PHASE_EWL_GREEN = 6 # action 3 code 11 15 | PHASE_EWL_YELLOW = 7 16 | 17 | 18 | class Simulation: 19 | def __init__(self, Model, TrafficGen, sumo_cmd, max_steps, green_duration, yellow_duration, num_states, num_actions): 20 | self._Model = Model 21 | self._TrafficGen = TrafficGen 22 | self._step = 0 23 | self._sumo_cmd = sumo_cmd 24 | self._max_steps = max_steps 25 | self._green_duration = green_duration 26 | self._yellow_duration = yellow_duration 27 | self._num_states = num_states 28 | self._num_actions = num_actions 29 | self._reward_episode = [] 30 | self._queue_length_episode = [] 31 | self._co_episode = [] 32 | 33 | 34 | def run(self, episode): 35 | """ 36 | Runs the testing simulation 37 | """ 38 | start_time = timeit.default_timer() 39 | 40 | # first, generate the route file for this simulation and set up sumo 41 | self._TrafficGen.generate_routefile(seed=episode) 42 | traci.start(self._sumo_cmd) 43 | print("Simulating...") 44 | 45 | # inits 46 | self._step = 0 47 | self._waiting_times = {} 48 | self._wait_car_number = 0 49 | self._co = {} 50 | self._hc = {} 51 | self._nox = {} 52 | self._co2 = {} 53 | self._sum_co = 0 54 | self._sum_waiting_time = 0 55 | old_total_wait = 0 56 | old_total_co = 0 57 | old_total_hc = 0 58 | old_total_nox = 0 59 | old_total_co2 = 0 60 | old_action = -1 # dummy init 61 | 62 | while self._step < self._max_steps: 63 | 64 | # get current state of the intersection 65 | current_state = self._get_state() 66 | car = np.sum(current_state) 67 | 68 | # calculate reward of previous action: (change in cumulative waiting time between actions) 69 | # waiting time = seconds waited by a car since the spawn in the environment, cumulated for every car in incoming lanes 70 | # current_total_wait = self._collect_waiting_times() 71 | # current_total_wait, current_total_co, current_hc, current_nox, current_co2 = self._collect_waiting_times() 72 | # reward_wait = old_total_wait - current_total_wait 73 | # a = old_total_co - current_total_co 74 | # b = old_total_hc - current_hc 75 | # c = old_total_nox - current_nox 76 | # d = old_total_co2 - current_co2 77 | # reward_co = round((a/10), 1) 78 | # reward_hc = round(b, 1) 79 | # reward_nox = round(c, 1) 80 | # reward_co2 = round((d/1000), 1) 81 | # reward_em = reward_nox + reward_hc + reward_co + reward_co2 82 | 83 | current_total_wait = self._collect_waiting_car() 84 | reward = old_total_wait - current_total_wait 85 | 86 | # choose the light phase to activate, based on the current state of the intersection 87 | action = self._choose_action(current_state) 88 | 89 | # if the chosen phase is different from the last phase, activate the yellow phase 90 | if self._step != 0 and old_action != action: 91 | self._set_yellow_phase(old_action) 92 | self._simulate(self._yellow_duration) 93 | 94 | # execute the phase selected before 95 | self._set_green_phase(action) 96 | self._simulate(self._green_duration) 97 | 98 | # saving variables for later & accumulate reward 99 | old_action = action 100 | old_total_wait = current_total_wait 101 | # old_total_co = current_total_co 102 | # old_total_nox = current_nox 103 | # old_total_hc = current_hc 104 | # old_total_co2 = current_co2 105 | 106 | self._reward_episode.append(reward) 107 | 108 | co = self._sum_co 109 | wait = self._sum_waiting_time 110 | 111 | #print("Total reward:", np.sum(self._reward_episode)) 112 | traci.close() 113 | simulation_time = round(timeit.default_timer() - start_time, 1) 114 | 115 | return simulation_time, wait, co 116 | 117 | 118 | def _simulate(self, steps_todo): 119 | """ 120 | Proceed with the simulation in sumo 121 | """ 122 | a = open("/3实验/6/Deep-QLearning-Agent-for-Traffic-Signal-Control-master/test-6e-co.txt", 'a') 123 | b = open("/3实验/6/Deep-QLearning-Agent-for-Traffic-Signal-Control-master/test-6e-wait.txt", 'a') 124 | 125 | 126 | if (self._step + steps_todo) >= self._max_steps: # do not do more steps than the maximum allowed number of steps 127 | steps_todo = self._max_steps - self._step 128 | 129 | while steps_todo > 0: 130 | traci.simulationStep() # simulate 1 step in sumo 131 | self._step += 1 # update the step counter 132 | steps_todo -= 1 133 | queue_length, co = self._get_queue_length() 134 | self._queue_length_episode.append(queue_length) 135 | self._sum_waiting_time += queue_length 136 | self._sum_co += co 137 | self._co_episode.append(co) 138 | 139 | a.write("%s\n" % co) 140 | b.write("%s\n" % queue_length) 141 | 142 | def _collect_waiting_car(self): 143 | """检索在各个车道上等待的车的数量""" 144 | emssion_class = ['Zero/default', "HBEFA3/LDV_G_EU6", 'HBEFA3/PC_G_EU4', 'HBEFA3/Bus', 'HBEFA3/HDV'] 145 | incoming_roads = ["E2TL", "N2TL", "W2TL", "S2TL"] 146 | w_car = 0 147 | c_number = 0 148 | car_list = traci.vehicle.getIDList() 149 | for car_id in car_list: 150 | road_id = traci.vehicle.getRoadID(car_id) 151 | if road_id in incoming_roads: 152 | ve = traci.vehicle.getSpeed(car_id) 153 | v_class = traci.vehicle.getEmissionClass(car_id) 154 | if ve <= 0.1: 155 | if v_class == emssion_class[0]: 156 | c_number = 1 157 | elif v_class == emssion_class[1]: 158 | c_number = 2 159 | elif v_class == emssion_class[2]: 160 | c_number = 3 161 | elif v_class == emssion_class[3]: 162 | c_number = 4 163 | elif v_class == emssion_class[4]: 164 | c_number = 5 165 | else: 166 | c_number = 0 167 | w_car += c_number 168 | self._wait_car_number = w_car 169 | 170 | return self._wait_car_number 171 | def _collect_waiting_times(self): 172 | """ 173 | Retrieve the waiting time of every car in the incoming roads 174 | """ 175 | incoming_roads = ["E2TL", "N2TL", "W2TL", "S2TL"] 176 | car_list = traci.vehicle.getIDList() 177 | for car_id in car_list: 178 | wait_time = traci.vehicle.getAccumulatedWaitingTime(car_id) 179 | co = traci.vehicle.getCOEmission(car_id) 180 | hc = traci.vehicle.getHCEmission(car_id) 181 | nox = traci.vehicle.getNOxEmission(car_id) 182 | co2 = traci.vehicle.getCO2Emission(car_id) 183 | road_id = traci.vehicle.getRoadID(car_id) # get the road id where the car is located 184 | if road_id in incoming_roads: # consider only the waiting times of cars in incoming roads 185 | self._waiting_times[car_id] = wait_time 186 | self._co[car_id] = co 187 | self._hc[car_id] = hc 188 | self._nox[car_id] = nox 189 | self._co2[car_id] = co2 190 | else: 191 | if car_id in self._waiting_times: # a car that was tracked has cleared the intersection 192 | del self._waiting_times[car_id] 193 | del self._co[car_id] 194 | del self._hc[car_id] 195 | del self._nox[car_id] 196 | del self._co2[car_id] 197 | total_waiting_time = sum(self._waiting_times.values()) 198 | total_co = round(sum(self._co.values()), 1) 199 | total_hc = round(sum(self._hc.values()), 1) 200 | total_nox = round(sum(self._nox.values()), 1) 201 | total_co2 = round(sum(self._co2.values()), 1) 202 | return total_waiting_time, total_co, total_hc, total_nox, total_co2 203 | 204 | 205 | def _choose_action(self, state): 206 | """ 207 | Pick the best action known based on the current state of the env 208 | """ 209 | return np.argmax(self._Model.predict_one(state)) 210 | 211 | 212 | def _set_yellow_phase(self, old_action): 213 | """ 214 | Activate the correct yellow light combination in sumo 215 | """ 216 | yellow_phase_code = old_action * 2 + 1 # obtain the yellow phase code, based on the old action (ref on environment.net.xml) 217 | traci.trafficlight.setPhase("TL", yellow_phase_code) 218 | 219 | 220 | def _set_green_phase(self, action_number): 221 | """ 222 | Activate the correct green light combination in sumo 223 | """ 224 | 225 | 226 | if action_number == 0: 227 | traci.trafficlight.setPhase("TL", PHASE_NS_GREEN) 228 | elif action_number == 1: 229 | traci.trafficlight.setPhase("TL", PHASE_NSL_GREEN) 230 | elif action_number == 2: 231 | traci.trafficlight.setPhase("TL", PHASE_EW_GREEN) 232 | elif action_number == 3: 233 | traci.trafficlight.setPhase("TL", PHASE_EWL_GREEN) 234 | 235 | 236 | def _get_queue_length(self): 237 | car = traci.edge.getCOEmission("N2TL") 238 | car = traci.edge.getCOEmission("S2TL") 239 | car = traci.edge.getCOEmission("E2TL") 240 | car = traci.edge.getCOEmission("W2TL") 241 | 242 | halt_N = traci.edge.getLastStepHaltingNumber("N2TL") 243 | halt_S = traci.edge.getLastStepHaltingNumber("S2TL") 244 | halt_E = traci.edge.getLastStepHaltingNumber("E2TL") 245 | halt_W = traci.edge.getLastStepHaltingNumber("W2TL") 246 | co_N = traci.edge.getCOEmission("N2TL") 247 | co_S = traci.edge.getCOEmission("S2TL") 248 | co_E = traci.edge.getCOEmission("E2TL") 249 | co_W = traci.edge.getCOEmission("W2TL") 250 | 251 | co2_N = traci.edge.getCO2Emission("N2TL") 252 | co2_S = traci.edge.getCO2Emission("S2TL") 253 | co2_E = traci.edge.getCO2Emission("E2TL") 254 | co2_W = traci.edge.getCO2Emission("W2TL") 255 | 256 | hc_N = traci.edge.getHCEmission("N2TL") 257 | hc_S = traci.edge.getHCEmission("S2TL") 258 | hc_E = traci.edge.getHCEmission("E2TL") 259 | hc_W = traci.edge.getHCEmission("W2TL") 260 | 261 | nox_N = traci.edge.getNOxEmission("N2TL") 262 | nox_S = traci.edge.getNOxEmission("S2TL") 263 | nox_E = traci.edge.getNOxEmission("E2TL") 264 | nox_W = traci.edge.getNOxEmission("W2TL") 265 | 266 | sum_co2 = co2_N + co2_S + co2_E + co2_W 267 | sum_co = co_S + co_N + co_W + co_E 268 | sum_hc = hc_E + hc_N + hc_S + hc_W 269 | sum_nox = nox_E + nox_N + nox_W +nox_S 270 | queue_length = halt_N + halt_S + halt_E + halt_W 271 | return queue_length, sum_co, sum_co2, sum_hc, sum_nox 272 | 273 | 274 | def _get_state(self): 275 | """ 276 | Retrieve the state of the intersection from sumo, in the form of cell occupancy 277 | """ 278 | state = np.zeros(self._num_states) 279 | car_list = traci.vehicle.getIDList() 280 | 281 | for car_id in car_list: 282 | lane_pos = traci.vehicle.getLanePosition(car_id) 283 | lane_id = traci.vehicle.getLaneID(car_id) 284 | lane_pos = 750 - lane_pos # inversion of lane pos, so if the car is close to the traffic light -> lane_pos = 0 --- 750 = max len of a road 285 | 286 | # distance in meters from the traffic light -> mapping into cells 287 | if lane_pos < 7: 288 | lane_cell = 0 289 | elif lane_pos < 14: 290 | lane_cell = 1 291 | elif lane_pos < 21: 292 | lane_cell = 2 293 | elif lane_pos < 28: 294 | lane_cell = 3 295 | elif lane_pos < 40: 296 | lane_cell = 4 297 | elif lane_pos < 60: 298 | lane_cell = 5 299 | elif lane_pos < 100: 300 | lane_cell = 6 301 | elif lane_pos < 160: 302 | lane_cell = 7 303 | elif lane_pos < 400: 304 | lane_cell = 8 305 | elif lane_pos <= 750: 306 | lane_cell = 9 307 | 308 | # finding the lane where the car is located 309 | # x2TL_3 are the "turn left only" lanes 310 | if lane_id == "W2TL_0" or lane_id == "W2TL_1" or lane_id == "W2TL_2": 311 | lane_group = 0 312 | elif lane_id == "W2TL_3": 313 | lane_group = 1 314 | elif lane_id == "N2TL_0" or lane_id == "N2TL_1" or lane_id == "N2TL_2": 315 | lane_group = 2 316 | elif lane_id == "N2TL_3": 317 | lane_group = 3 318 | elif lane_id == "E2TL_0" or lane_id == "E2TL_1" or lane_id == "E2TL_2": 319 | lane_group = 4 320 | elif lane_id == "E2TL_3": 321 | lane_group = 5 322 | elif lane_id == "S2TL_0" or lane_id == "S2TL_1" or lane_id == "S2TL_2": 323 | lane_group = 6 324 | elif lane_id == "S2TL_3": 325 | lane_group = 7 326 | else: 327 | lane_group = -1 328 | 329 | if lane_group >= 1 and lane_group <= 7: 330 | car_position = int(str(lane_group) + str(lane_cell)) # composition of the two postion ID to create a number in interval 0-79 331 | valid_car = True 332 | elif lane_group == 0: 333 | car_position = lane_cell 334 | valid_car = True 335 | else: 336 | valid_car = False # flag for not detecting cars crossing the intersection or driving away from it 337 | 338 | if valid_car: 339 | state[car_position] = 1 # write the position of the car car_id in the state array in the form of "cell occupied" 340 | 341 | return state 342 | 343 | 344 | @property 345 | def queue_length_episode(self): 346 | return self._queue_length_episode 347 | 348 | 349 | @property 350 | def reward_episode(self): 351 | return self._reward_episode 352 | 353 | 354 | 355 | -------------------------------------------------------------------------------- /intersection/environment.net.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | -------------------------------------------------------------------------------- /training_simulation.py: -------------------------------------------------------------------------------- 1 | # state为排放等级矩阵 2 | import traci 3 | import numpy as np 4 | import random 5 | import timeit 6 | import os 7 | 8 | PHASE_NS_GREEN = 0 # action 0 code 00，南北绿 9 | PHASE_NS_YELLOW = 1 10 | PHASE_NSL_GREEN = 2 # action 1 code 01，南北左转 11 | PHASE_NSL_YELLOW = 3 12 | PHASE_EW_GREEN = 4 # action 2 code 10，东西 13 | PHASE_EW_YELLOW = 5 14 | PHASE_EWL_GREEN = 6 # action 3 code 11，东西左转 15 | PHASE_EWL_YELLOW = 7 16 | 17 | 18 | class Simulation: 19 | def __init__(self, Model, Memory, TrafficGen, sumo_cmd, gamma, max_steps, green_duration, yellow_duration, num_states, num_actions, training_epochs): 20 | self._Model = Model 21 | self._Memory = Memory 22 | self._TrafficGen = TrafficGen 23 | self._gamma = gamma 24 | self._step = 0 25 | self._sumo_cmd = sumo_cmd 26 | self._max_steps = max_steps 27 | self._green_duration = green_duration 28 | self._yellow_duration = yellow_duration 29 | self._num_states = num_states 30 | self._num_actions = num_actions 31 | self._reward_store = [] 32 | self._cumulative_wait_store = [] 33 | self._avg_queue_length_store = [] 34 | self._total_co = [] 35 | self._training_epochs = training_epochs 36 | 37 | 38 | def run(self, episode, epsilon): 39 | start_time = timeit.default_timer() 40 | 41 | self._TrafficGen.generate_routefile(seed=episode) 42 | traci.start(self._sumo_cmd) 43 | print("Simulating...") 44 | 45 | self._step = 0 46 | self._wait_car_number = 0 47 | self._waiting_times = {} 48 | self._co = {} 49 | self._hc = {} 50 | self._nox = {} 51 | self._co2 = {} 52 | self._sum_neg_reward = 0 53 | self._sum_queue_length = 0 54 | self._sum_waiting_time = 0 55 | self._wait_time = 0 56 | self._sum_co = 0 57 | self._sum_co2 = 0 58 | self.done = False 59 | old_total_wait = 0 60 | old_total_emssion = 0 61 | # old_total_co = 0 62 | # old_total_hc = 0 63 | # old_total_nox = 0 64 | # old_total_co2 = 0 65 | old_state = -1 66 | old_action = -1 67 | 68 | while self._step < self._max_steps: 69 | 70 | # 获取当前路口的state 71 | current_state = self._get_state() 72 | 73 | # 获取当前状态下交叉口车辆的等待时间，排放数据 74 | # current_total_wait = self._collect_waiting_car() 75 | current_total_wait, current_co2, current_hc, curret_nox = self._collect_waiting_times() 76 | 77 | reward_wait = old_total_wait - current_total_wait 78 | current_total_emssion = (0.046 * current_co2) + (0.15 * current_hc) + (0.804 * curret_nox) 79 | reward_emssion = old_total_emssion - current_total_emssion 80 | # 设置奖励 81 | reward = (0.6 * reward_wait) + (0.4 * reward_emssion) 82 | # self._Memory.add_sample((old_state, old_action, reward, current_state)) 83 | 84 | # 根据交叉口的当前状态选择要激活的灯光相位 85 | action = self._Model.pick_action(current_state) 86 | 87 | # 如果选择的相位与上一个相位不一样则激活黄色 88 | if self._step != 0 and old_action != action: 89 | self._set_yellow_phase(old_action) 90 | self._simulate(self._yellow_duration) 91 | 92 | # 执行之前选择的阶段 93 | self._set_green_phase(action) 94 | self._simulate(self._green_duration) # green duration = 10, yellow = 4 95 | if old_state == -1: 96 | old_state = current_state 97 | 98 | # 将数据保存到memory并且更新网络 99 | if self._step != 0: 100 | self._Model.update(old_state, old_action, reward, current_state, self.done, epsilon) 101 | 102 | # 更新变量 103 | old_state = current_state 104 | old_action = action 105 | old_total_wait = current_total_wait 106 | old_total_emssion = current_total_emssion 107 | 108 | # 只保存有意义的奖励以便更好地查看代理的行为是否正确 109 | if reward < 0: 110 | self._sum_neg_reward += reward 111 | 112 | self._save_episode_stats() 113 | print("Total reward:", self._sum_neg_reward, "- Epsilon:", round(epsilon, 2)) 114 | print("wait time: ", self._sum_waiting_time) 115 | print("sum co2: ", self._sum_co) 116 | w = self._sum_waiting_time 117 | co = self._sum_co 118 | co2 = self._sum_co2 119 | reward = self._sum_neg_reward 120 | traci.close() 121 | simulation_time = round(timeit.default_timer() - start_time, 1) 122 | 123 | print("Training...") 124 | start_time = timeit.default_timer() 125 | training_time = round(timeit.default_timer() - start_time, 1) 126 | 127 | return simulation_time, training_time, w, co, co2, reward 128 | 129 | 130 | def _simulate(self, steps_todo): 131 | """ 132 | 在收集统计数据时执行sumo中的步骤 133 | """ 134 | if (self._step + steps_todo) >= self._max_steps: 135 | self.done = True 136 | steps_todo = self._max_steps - self._step 137 | 138 | while steps_todo > 0: 139 | traci.simulationStep() 140 | self._step += 1 # 更新步数 141 | steps_todo -= 1 142 | queue_length, sum_co, sum_co2 = self._get_queue_length() 143 | self._sum_queue_length += queue_length 144 | self._sum_waiting_time += queue_length # 对每一辆车来说排队等候的每一秒意味着每辆车等候一秒，因此排队长度=等待秒数 145 | self._sum_co += sum_co 146 | 147 | def _collect_waiting_car(self): 148 | """检索在各个车道上等待的车的数量""" 149 | emssion_class = ['Zero/default', "HBEFA3/LDV_G_EU6", 'HBEFA3/PC_G_EU4', 'HBEFA3/Bus', 'HBEFA3/HDV'] 150 | incoming_roads = ["E2TL", "N2TL", "W2TL", "S2TL"] 151 | w_car = 0 152 | c_number = 0 153 | car_list = traci.vehicle.getIDList() 154 | for car_id in car_list: 155 | road_id = traci.vehicle.getRoadID(car_id) 156 | if road_id in incoming_roads: 157 | ve = traci.vehicle.getSpeed(car_id) 158 | v_class = traci.vehicle.getEmissionClass(car_id) 159 | if ve <= 0.1: 160 | if v_class == emssion_class[0]: 161 | c_number = 1 162 | elif v_class == emssion_class[1]: 163 | c_number = 2 164 | elif v_class == emssion_class[2]: 165 | c_number = 3 166 | elif v_class == emssion_class[3]: 167 | c_number = 4 168 | elif v_class == emssion_class[4]: 169 | c_number = 5 170 | else: 171 | c_number = 0 172 | w_car += c_number 173 | self._wait_car_number = w_car 174 | 175 | return self._wait_car_number 176 | 177 | 178 | 179 | def _collect_waiting_times(self): 180 | """ 181 | 检索每辆车在进站道路上的等待时间 182 | """ 183 | incoming_roads = ["E2TL", "N2TL", "W2TL", "S2TL"] 184 | car_list = traci.vehicle.getIDList() 185 | car_numbers = 0 186 | for car_id in car_list: 187 | wait_time = traci.vehicle.getAccumulatedWaitingTime(car_id) 188 | co = traci.vehicle.getCOEmission(car_id) 189 | hc = traci.vehicle.getHCEmission(car_id) 190 | nox = traci.vehicle.getNOxEmission(car_id) 191 | co2 = traci.vehicle.getCO2Emission(car_id) 192 | road_id = traci.vehicle.getRoadID(car_id) # 获得车辆所在的道路id 193 | if road_id in incoming_roads: # consider only the waiting times of cars in incoming roads 194 | self._waiting_times[car_id] = wait_time 195 | self._co[car_id] = co 196 | self._hc[car_id] = hc 197 | self._nox[car_id] = nox 198 | self._co2[car_id] = co2 199 | car_numbers += 1 200 | else: 201 | if car_id in self._waiting_times: # a car that was tracked has cleared the intersection 202 | del self._waiting_times[car_id] 203 | del self._co[car_id] 204 | del self._hc[car_id] 205 | del self._nox[car_id] 206 | del self._co2[car_id] 207 | if car_numbers == 0: 208 | car_numbers = 1 209 | 210 | total_waiting_time = (sum(self._waiting_times.values())) / car_numbers 211 | co2 = sum(self._co2.values()) 212 | hc = sum(self._hc.values()) 213 | nox = sum(self._nox.values()) 214 | 215 | total_co2 = co2 / car_numbers 216 | total_hc = hc / car_numbers 217 | total_nox = nox / car_numbers 218 | 219 | return total_waiting_time, total_co2, total_hc, total_nox, 220 | # , total_co, total_hc, total_nox, total_co2 221 | 222 | 223 | def _choose_action(self, state, epsilon): 224 | """ 225 | 根据epsilon贪婪策略，决定是否进行探索性或贪婪策略行动 226 | """ 227 | if random.random() < epsilon: 228 | return random.randint(0, self._num_actions - 1) # 随机行动 229 | else: 230 | return np.argmax(self._Model.predict_one(state)) # 当前状态下的最佳行动 231 | 232 | 233 | def _set_yellow_phase(self, old_action): 234 | """ 235 | 激活正确的黄灯组合 in sumo 236 | """ 237 | yellow_phase_code = old_action * 2 + 1 # 根据旧动作获取黄色相位码 (ref on environment.net.xml) 238 | traci.trafficlight.setPhase("TL", yellow_phase_code) 239 | 240 | 241 | def _set_green_phase(self, action_number): 242 | """ 243 | Activate the correct green light combination in sumo 244 | """ 245 | if action_number == 0: 246 | traci.trafficlight.setPhase("TL", PHASE_NS_GREEN) # 南北绿灯 247 | elif action_number == 1: 248 | traci.trafficlight.setPhase("TL", PHASE_NSL_GREEN) # 南北左转绿灯 249 | elif action_number == 2: 250 | traci.trafficlight.setPhase("TL", PHASE_EW_GREEN) # 东西绿灯 251 | elif action_number == 3: 252 | traci.trafficlight.setPhase("TL", PHASE_EWL_GREEN) # 东西左转绿灯 253 | 254 | 255 | def _get_queue_length(self): 256 | """ 257 | 检索每个进入车道中速度为0的车辆数 258 | """ 259 | halt_N = traci.edge.getLastStepHaltingNumber("N2TL") # 返回给定边上最后一个时间步的停止车辆总数。低于0.1 m / s的速度被认为是停止。 260 | halt_S = traci.edge.getLastStepHaltingNumber("S2TL") 261 | halt_E = traci.edge.getLastStepHaltingNumber("E2TL") 262 | halt_W = traci.edge.getLastStepHaltingNumber("W2TL") 263 | co_N = traci.edge.getCOEmission("N2TL") 264 | co_S = traci.edge.getCOEmission("S2TL") 265 | co_E = traci.edge.getCOEmission("E2TL") 266 | co_W = traci.edge.getCOEmission("W2TL") 267 | co2_N = traci.edge.getCO2Emission("N2TL") 268 | co2_S = traci.edge.getCO2Emission("S2TL") 269 | co2_E = traci.edge.getCO2Emission("E2TL") 270 | co2_W = traci.edge.getCO2Emission("W2TL") 271 | queue_length = halt_N + halt_S + halt_E + halt_W 272 | sum_co = co_S + co_N + co_W + co_E 273 | sumo_co2 = co2_S + co2_E + co2_W + co2_N 274 | 275 | return queue_length, sum_co, sumo_co2 276 | 277 | def _get_state(self): 278 | positionMatrix = [] # 位置矩阵 279 | velocityMatrix = [] # 速度矩阵 280 | emssionMatrix = [] 281 | 282 | cellLength = 7 283 | offset = 11 284 | speedLimit = 14 285 | 286 | junctionPosition = traci.junction.getPosition('TL')[0] # 交叉口位置 287 | vehicles_road1 = traci.edge.getLastStepVehicleIDs('E2TL') # 返回上一个模拟步骤中指定边上的车辆ID列表 288 | vehicles_road2 = traci.edge.getLastStepVehicleIDs('W2TL') 289 | vehicles_road3 = traci.edge.getLastStepVehicleIDs('N2TL') 290 | vehicles_road4 = traci.edge.getLastStepVehicleIDs('S2TL') 291 | for i in range(16): 292 | positionMatrix.append([]) 293 | velocityMatrix.append([]) 294 | emssionMatrix.append([]) 295 | for j in range(16): 296 | positionMatrix[i].append(0) 297 | velocityMatrix[i].append(0) 298 | emssionMatrix[i].append(0) 299 | 300 | for v in vehicles_road1: 301 | # 计算车辆距离交叉路的距离 302 | ind = int(abs(750 - traci.vehicle.getLanePosition(v)) / cellLength) 303 | # print("E car position:", traci.vehicle.getLanePosition(v)) 304 | e = traci.vehicle.getCOEmission(v) + traci.vehicle.getCO2Emission(v) + traci.vehicle.getHCEmission( 305 | v) + traci.vehicle.getNOxEmission(v) 306 | if (ind < 16): 307 | positionMatrix[3 - traci.vehicle.getLaneIndex(v)][ind] = 1 308 | velocityMatrix[3 - traci.vehicle.getLaneIndex(v)][ind] = traci.vehicle.getSpeed(v) / speedLimit 309 | emssionMatrix[3 - traci.vehicle.getLaneIndex(v)][ind] = e 310 | 311 | for v in vehicles_road2: 312 | ind = int(abs(750 - traci.vehicle.getLanePosition(v)) / cellLength) 313 | e = traci.vehicle.getCOEmission(v) + traci.vehicle.getCO2Emission(v) + traci.vehicle.getHCEmission( 314 | v) + traci.vehicle.getNOxEmission(v) 315 | if (ind < 16): 316 | positionMatrix[4 + traci.vehicle.getLaneIndex(v)][ind] = 1 317 | velocityMatrix[4 + traci.vehicle.getLaneIndex(v)][ind] = traci.vehicle.getSpeed(v) / speedLimit 318 | emssionMatrix[4 + traci.vehicle.getLaneIndex(v)][ind] = e 319 | 320 | junctionPosition = traci.junction.getPosition('TL')[1] 321 | for v in vehicles_road3: 322 | ind = int(abs(750 - traci.vehicle.getLanePosition(v)) / cellLength) 323 | e = traci.vehicle.getCOEmission(v) + traci.vehicle.getCO2Emission(v) + traci.vehicle.getHCEmission( 324 | v) + traci.vehicle.getNOxEmission(v) 325 | if (ind < 16): 326 | positionMatrix[8 + traci.vehicle.getLaneIndex(v)][ind] = 1 327 | velocityMatrix[8 + traci.vehicle.getLaneIndex(v)][ind] = traci.vehicle.getSpeed(v) / speedLimit 328 | emssionMatrix[8 + traci.vehicle.getLaneIndex(v)][ind] = e 329 | 330 | for v in vehicles_road4: 331 | ind = int(abs(750 - traci.vehicle.getLanePosition(v)) / cellLength) 332 | e = traci.vehicle.getCOEmission(v) + traci.vehicle.getCO2Emission(v) + traci.vehicle.getHCEmission( 333 | v) + traci.vehicle.getNOxEmission(v) 334 | if (ind < 16): 335 | positionMatrix[12 + traci.vehicle.getLaneIndex(v)][ind] = 1 336 | velocityMatrix[12 + traci.vehicle.getLaneIndex(v)][ind] = traci.vehicle.getSpeed(v) / speedLimit 337 | emssionMatrix[12 + traci.vehicle.getLaneIndex(v)][ind] = e 338 | 339 | position = np.array(positionMatrix) 340 | position = position.reshape(1, 16, 16, 1) 341 | 342 | velocity = np.array(velocityMatrix) 343 | velocity = velocity.reshape(1, 16, 16, 1) 344 | 345 | emssion = np.array(emssionMatrix) 346 | emssion = emssion.reshape(1, 16, 16, 1) 347 | 348 | return [position, velocity, emssion] 349 | # def _get_state(self): 350 | # positionMatrix = [] # 位置矩阵 351 | # velocityMatrix = [] # 速度矩阵 352 | # emssionMatrix = [] 353 | # 354 | # cellLength = 7 355 | # offset = 11 356 | # speedLimit = 14 357 | # 358 | # junctionPosition = traci.junction.getPosition('TL')[0] # 交叉口位置 359 | # vehicles_road1 = traci.edge.getLastStepVehicleIDs('E2TL') # 返回上一个模拟步骤中指定边上的车辆ID列表 360 | # vehicles_road2 = traci.edge.getLastStepVehicleIDs('W2TL') 361 | # vehicles_road3 = traci.edge.getLastStepVehicleIDs('N2TL') 362 | # vehicles_road4 = traci.edge.getLastStepVehicleIDs('S2TL') 363 | # for i in range(16): 364 | # positionMatrix.append([]) 365 | # velocityMatrix.append([]) 366 | # emssionMatrix.append([]) 367 | # for j in range(12): 368 | # positionMatrix[i].append(0) 369 | # velocityMatrix[i].append(0) 370 | # emssionMatrix[i].append(0) 371 | # 372 | # for v in vehicles_road1: 373 | # ind = int(abs(750 - traci.vehicle.getLanePosition(v)) / cellLength) 374 | # # e = traci.vehicle.getEmissionClass(v) 375 | # e = traci.vehicle.getCOEmission(v) + traci.vehicle.getCO2Emission(v) + traci.vehicle.getHCEmission( 376 | # v) + traci.vehicle.getNOxEmission(v) 377 | # if (ind < 12): 378 | # positionMatrix[3 - traci.vehicle.getLaneIndex(v)][ind] = 1 379 | # velocityMatrix[3 - traci.vehicle.getLaneIndex(v)][ind] = traci.vehicle.getSpeed(v) / speedLimit 380 | # emssionMatrix[3 - traci.vehicle.getLaneIndex(v)][ind] = e 381 | # 382 | # for v in vehicles_road2: 383 | # 384 | # ind = int(abs(750 - traci.vehicle.getLanePosition(v)) / cellLength) 385 | # e = traci.vehicle.getCOEmission(v) + traci.vehicle.getCO2Emission(v) + traci.vehicle.getHCEmission( 386 | # v) + traci.vehicle.getNOxEmission(v) 387 | # if (ind < 12): 388 | # positionMatrix[4 + traci.vehicle.getLaneIndex(v)][ind] = 1 389 | # velocityMatrix[4 + traci.vehicle.getLaneIndex(v)][ind] = traci.vehicle.getSpeed(v) / speedLimit 390 | # emssionMatrix[4 + traci.vehicle.getLaneIndex(v)][ind] = e 391 | # 392 | # junctionPosition = traci.junction.getPosition('TL')[1] 393 | # for v in vehicles_road3: 394 | # ind = int(abs(750 - traci.vehicle.getLanePosition(v)) / cellLength) 395 | # e = traci.vehicle.getCOEmission(v) + traci.vehicle.getCO2Emission(v) + traci.vehicle.getHCEmission( 396 | # v) + traci.vehicle.getNOxEmission(v) 397 | # if (ind < 12): 398 | # positionMatrix[8 + traci.vehicle.getLaneIndex(v)][ind] = 1 399 | # velocityMatrix[8 + traci.vehicle.getLaneIndex(v)][ind] = traci.vehicle.getSpeed(v) / speedLimit 400 | # emssionMatrix[8 + traci.vehicle.getLaneIndex(v)][ind] = e 401 | # 402 | # for v in vehicles_road4: 403 | # ind = int(abs(750 - traci.vehicle.getLanePosition(v)) / cellLength) 404 | # e = traci.vehicle.getCOEmission(v) + traci.vehicle.getCO2Emission(v) + traci.vehicle.getHCEmission( 405 | # v) + traci.vehicle.getNOxEmission(v) 406 | # if (ind < 12): 407 | # positionMatrix[12 + traci.vehicle.getLaneIndex(v)][ind] = 1 408 | # velocityMatrix[12 + traci.vehicle.getLaneIndex(v)][ind] = traci.vehicle.getSpeed(v) / speedLimit 409 | # emssionMatrix[12 + traci.vehicle.getLaneIndex(v)][ind] = e 410 | # 411 | # position = np.array(positionMatrix) 412 | # position = position.reshape(1, 16, 12, 1) 413 | # 414 | # velocity = np.array(velocityMatrix) 415 | # velocity = velocity.reshape(1, 16, 12, 1) 416 | # 417 | # emssion = np.array(emssionMatrix) 418 | # emssion = emssion.reshape(1, 16, 12, 1) 419 | # 420 | # return [position, emssion, velocity] 421 | 422 | 423 | 424 | def _replay(self): 425 | batch = self._Memory.get_samples(self._Model.batch_size) # batch size = 100 426 | em1 = np.zeros((100, 16, 12, 1)) 427 | v1 = np.zeros((100, 16, 12, 1)) 428 | p1 = np.zeros((100, 16, 12, 1)) 429 | em2 = np.zeros((100, 16, 12, 1)) 430 | v2 = np.zeros((100, 16, 12, 1)) 431 | p2 = np.zeros((100, 16, 12, 1)) 432 | if len(batch) > 0: # if the memory is full enough 433 | 434 | # states = [val[0] for val in batch] 435 | # next_states = np.array([val[3] for val in batch]) # extract next states from the batch 436 | i = 0 437 | for val in batch: 438 | p1[i] = val[0][0] 439 | em1[i] = val[0][1] 440 | v1[i] = val[0][2] 441 | p2[i] = val[3][0] 442 | em2[i] = val[3][1] 443 | v2[i] = val[3][2] 444 | i += 1 445 | states = [p1, em1, v1] 446 | next_states = [p2, em2, v2] 447 | # prediction 448 | q_s_a = self._Model.predict_batch(states) # predict Q(state), for every sample 449 | q_s_a_d = self._Model.predict_batch(next_states) # predict Q(next_state), for every sample 450 | 451 | y = np.zeros((100, 1, 4)) 452 | em = np.zeros((100, 16, 12, 1)) 453 | v = np.zeros((100, 16, 12, 1)) 454 | p = np.zeros((100, 16, 12, 1)) 455 | for i, b in enumerate(batch): 456 | state, action, reward, _ = b[0], b[1], b[2], b[3] # extract data from one sample 457 | current_q = q_s_a[i] # get the Q(state) predicted before 458 | current_q[action] = reward + self._gamma * np.amax(q_s_a_d[i]) # update Q(state, action) 459 | # current_q = np.reshape(current_q, [1, 4]) 460 | p[i] = state[0] 461 | em[i] = state[1] 462 | v[i] = state[2] 463 | y[i] = current_q 464 | # x.append(np.array(state)) 465 | # y.append(np.array(current_q)) 466 | x = [p, em, v] 467 | self._Model.train_batch(x, y) # train the NN 468 | 469 | 470 | def _save_episode_stats(self): 471 | """ 472 | 保存事件的统计信息，以便在会话结束时绘制图表 473 | """ 474 | self._reward_store.append(self._sum_neg_reward) # how much negative reward in this episode 475 | self._cumulative_wait_store.append(self._sum_waiting_time) # total number of seconds waited by cars in this episode 476 | self._avg_queue_length_store.append(self._sum_queue_length / self._max_steps) # average number of queued cars per step, in this episode 477 | 478 | 479 | @property 480 | def reward_store(self): 481 | return self._reward_store 482 | 483 | 484 | @property 485 | def cumulative_wait_store(self): 486 | return self._cumulative_wait_store 487 | 488 | 489 | @property 490 | def avg_queue_length_store(self): 491 | return self._avg_queue_length_store 492 | 493 | -------------------------------------------------------------------------------- /generator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | 4 | class TrafficGenerator: 5 | def __init__(self, max_steps, n_cars_generated): 6 | self._n_cars_generated = n_cars_generated # how many cars per episode 7 | self._max_steps = max_steps 8 | 9 | def generate_routefile(self, seed): 10 | """ 11 | 生成每次仿真的车辆 12 | """ 13 | np.random.seed(seed) # 使得每次产生的车辆情况相同 14 | 15 | # 车辆生成服从weibull distribution 16 | timings = np.random.weibull(2, self._n_cars_generated) 17 | timings = np.sort(timings) # 给timings排序 18 | 19 | # 重新调整分布以适应间隔 0:max_steps 20 | car_gen_steps = [] 21 | min_old = math.floor(timings[1]) 22 | max_old = math.ceil(timings[-1]) 23 | min_new = 0 24 | max_new = self._max_steps 25 | for value in timings: 26 | car_gen_steps = np.append(car_gen_steps, ((max_new - min_new) / (max_old - min_old)) * (value - max_old) + max_new) 27 | 28 | car_gen_steps = np.rint(car_gen_steps) # round every value to int -> effective steps when a car will be generated 29 | 30 | # produce the file for cars generation, one car per line 31 | with open("intersection/episode_routes.rou.xml", "w") as routes: 32 | print(""" 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | """, file=routes) 52 | 53 | for car_counter, step in enumerate(car_gen_steps): 54 | straight_or_turn = np.random.uniform() 55 | car_class = np.random.uniform() 56 | if straight_or_turn < 0.75: # choose direction: straight or turn - 75% of times the car goes straight 57 | route_straight = np.random.randint(1, 5) # choose a random source & destination 58 | if route_straight == 1: 59 | if 0 < car_class < 0.3: 60 | print( 61 | ' ' % ( 62 | car_counter, step), file=routes) 63 | elif 0.3 < car_class < 0.6: 64 | print( 65 | ' ' % ( 66 | car_counter, step), file=routes) 67 | elif 0.6 < car_class < 0.75: 68 | print( 69 | ' ' % ( 70 | car_counter, step), file=routes) 71 | elif 0.75 < car_class < 0.9: 72 | print( 73 | ' ' % ( 74 | car_counter, step), file=routes) 75 | else: 76 | print( 77 | ' ' % ( 78 | car_counter, step), file=routes) 79 | elif route_straight == 2: 80 | if 0 < car_class < 0.3: 81 | print( 82 | ' ' % ( 83 | car_counter, step), file=routes) 84 | elif 0.3 < car_class < 0.6: 85 | print( 86 | ' ' % ( 87 | car_counter, step), file=routes) 88 | elif 0.6 < car_class < 0.75: 89 | print( 90 | ' ' % ( 91 | car_counter, step), file=routes) 92 | elif 0.75 < car_class < 0.9: 93 | print( 94 | ' ' % ( 95 | car_counter, step), file=routes) 96 | else: 97 | print( 98 | ' ' % ( 99 | car_counter, step), file=routes) 100 | 101 | elif route_straight == 3: 102 | if 0 < car_class < 0.3: 103 | print( 104 | ' ' % ( 105 | car_counter, step), file=routes) 106 | elif 0.3 < car_class < 0.6: 107 | print( 108 | ' ' % ( 109 | car_counter, step), file=routes) 110 | elif 0.6 < car_class < 0.75: 111 | print( 112 | ' ' % ( 113 | car_counter, step), file=routes) 114 | elif 0.75 < car_class < 0.9: 115 | print( 116 | ' ' % ( 117 | car_counter, step), file=routes) 118 | else: 119 | print( 120 | ' ' % ( 121 | car_counter, step), file=routes) 122 | 123 | else: 124 | if 0 < car_class < 0.3: 125 | print( 126 | ' ' % ( 127 | car_counter, step), file=routes) 128 | elif 0.3 < car_class < 0.6: 129 | 130 | print( 131 | ' ' % ( 132 | car_counter, step), file=routes) 133 | elif 0.6 < car_class < 0.75: 134 | print( 135 | ' ' % ( 136 | car_counter, step), file=routes) 137 | elif 0.75 < car_class < 0.9: 138 | print( 139 | ' ' % ( 140 | car_counter, step), file=routes) 141 | else: 142 | print( 143 | ' ' % ( 144 | car_counter, step), file=routes) 145 | 146 | else: # car that turn -25% of the time the car turns 147 | route_turn = np.random.randint(1, 9) # choose random source source & destination 148 | if route_turn == 1: 149 | if 0 < car_class < 0.3: 150 | print( 151 | ' ' % ( 152 | car_counter, step), file=routes) 153 | elif 0.3 < car_class < 0.6: 154 | print( 155 | ' ' % ( 156 | car_counter, step), file=routes) 157 | elif 0.6 < car_class < 0.75: 158 | print( 159 | ' ' % ( 160 | car_counter, step), file=routes) 161 | elif 0.75 < car_class < 0.9: 162 | print( 163 | ' ' % ( 164 | car_counter, step), file=routes) 165 | else: 166 | print( 167 | ' ' % ( 168 | car_counter, step), file=routes) 169 | 170 | elif route_turn == 2: 171 | if 0 < car_class < 0.3: 172 | print( 173 | ' ' % ( 174 | car_counter, step), file=routes) 175 | elif 0.3 < car_class < 0.6: 176 | print( 177 | ' ' % ( 178 | car_counter, step), file=routes) 179 | elif 0.6 < car_class < 0.75: 180 | print( 181 | ' ' % ( 182 | car_counter, step), file=routes) 183 | elif 0.75 < car_class < 0.9: 184 | print( 185 | ' ' % ( 186 | car_counter, step), file=routes) 187 | else: 188 | print( 189 | ' ' % ( 190 | car_counter, step), file=routes) 191 | 192 | elif route_turn == 3: 193 | if 0 < car_class < 0.3: 194 | print( 195 | ' ' % ( 196 | car_counter, step), file=routes) 197 | elif 0.3 < car_class < 0.6: 198 | print( 199 | ' ' % ( 200 | car_counter, step), file=routes) 201 | elif 0.6 < car_class < 0.75: 202 | print( 203 | ' ' % ( 204 | car_counter, step), file=routes) 205 | elif 0.75 < car_class < 0.9: 206 | print( 207 | ' ' % ( 208 | car_counter, step), file=routes) 209 | else: 210 | print( 211 | ' ' % ( 212 | car_counter, step), file=routes) 213 | 214 | elif route_turn == 4: 215 | if 0 < car_class < 0.3: 216 | print( 217 | ' ' % ( 218 | car_counter, step), file=routes) 219 | elif 0.3 < car_class < 0.6: 220 | print( 221 | ' ' % ( 222 | car_counter, step), file=routes) 223 | elif 0.6 < car_class < 0.75: 224 | print( 225 | ' ' % ( 226 | car_counter, step), file=routes) 227 | elif 0.75 < car_class < 0.9: 228 | print( 229 | ' ' % ( 230 | car_counter, step), file=routes) 231 | else: 232 | print( 233 | ' ' % ( 234 | car_counter, step), file=routes) 235 | 236 | elif route_turn == 5: 237 | if 0 < car_class < 0.3: 238 | print( 239 | ' ' % ( 240 | car_counter, step), file=routes) 241 | elif 0.3 < car_class < 0.6: 242 | print( 243 | ' ' % ( 244 | car_counter, step), file=routes) 245 | elif 0.6 < car_class < 0.75: 246 | print( 247 | ' ' % ( 248 | car_counter, step), file=routes) 249 | elif 0.75 < car_class < 0.9: 250 | print( 251 | ' ' % ( 252 | car_counter, step), file=routes) 253 | else: 254 | print( 255 | ' ' % ( 256 | car_counter, step), file=routes) 257 | 258 | elif route_turn == 6: 259 | if 0 < car_class < 0.3: 260 | print( 261 | ' ' % ( 262 | car_counter, step), file=routes) 263 | elif 0.3 < car_class < 0.6: 264 | print( 265 | ' ' % ( 266 | car_counter, step), file=routes) 267 | elif 0.6 < car_class < 0.75: 268 | print( 269 | ' ' % ( 270 | car_counter, step), file=routes) 271 | elif 0.75 < car_class < 0.9: 272 | print( 273 | ' ' % ( 274 | car_counter, step), file=routes) 275 | else: 276 | print( 277 | ' ' % ( 278 | car_counter, step), file=routes) 279 | 280 | elif route_turn == 7: 281 | if 0 < car_class < 0.3: 282 | print( 283 | ' ' % ( 284 | car_counter, step), file=routes) 285 | elif 0.3 < car_class < 0.6: 286 | print( 287 | ' ' % ( 288 | car_counter, step), file=routes) 289 | elif 0.6 < car_class < 0.75: 290 | print( 291 | ' ' % ( 292 | car_counter, step), file=routes) 293 | elif 0.75 < car_class < 0.9: 294 | print( 295 | ' ' % ( 296 | car_counter, step), file=routes) 297 | else: 298 | print( 299 | ' ' % ( 300 | car_counter, step), file=routes) 301 | 302 | elif route_turn == 8: 303 | if 0 < car_class < 0.3: 304 | print( 305 | ' ' % ( 306 | car_counter, step), file=routes) 307 | elif 0.3 < car_class < 0.6: 308 | print( 309 | ' ' % ( 310 | car_counter, step), file=routes) 311 | elif 0.6 < car_class < 0.75: 312 | print( 313 | ' ' % ( 314 | car_counter, step), file=routes) 315 | elif 0.75 < car_class < 0.9: 316 | print( 317 | ' ' % ( 318 | car_counter, step), file=routes) 319 | else: 320 | print( 321 | ' ' % ( 322 | car_counter, step), file=routes) 323 | 324 | print("", file=routes) -------------------------------------------------------------------------------- /intersection/episode_routes.rou.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 397 | 398 | 399 | 400 | 401 | 402 | 403 | 622 | --------------------------------------------------------------------------------