├── test.py
├── testing_settings.ini
├── myGitUpdate.sh
├── training_settings.ini
├── intersection
├── sumo_config.sumocfg
├── environment.net.xml
└── episode_routes.rou.xml
├── models
├── model_12
│ └── training_settings.ini
├── model_2
│ └── training_settings.ini
├── model_3
│ └── training_settings.ini
├── model_5
│ └── training_settings.ini
└── model_8
│ └── training_settings.ini
├── memory.py
├── visualization.py
├── testing_main.py
├── new_train_main.py
├── training_main.py
├── model.py
├── utils.py
├── ddqn.py
├── testing_simulation.py
├── training_simulation.py
└── generator.py
/test.py:
--------------------------------------------------------------------------------
1 | import random
2 |
3 | m = [[], []]
4 | batch = []
5 | m1 = [[1,2,5], [1,2,1]]
6 | m3 = [3]
7 | m2 = [[2,3,4,],[2,2,2]]
8 |
9 | batch.append(m1)
10 | batch.append(m2)
11 | print(batch)
12 | print(m1+m2)
--------------------------------------------------------------------------------
/testing_settings.ini:
--------------------------------------------------------------------------------
1 | [simulation]
2 | gui = True
3 | max_steps = 3100
4 | n_cars_generated = 4000
5 | episode_seed = 10000
6 | yellow_duration = 4
7 | green_duration = 10
8 |
9 | [agent]
10 | num_states = 80
11 | num_actions = 4
12 |
13 | [dir]
14 | models_path_name = models
15 | sumocfg_file_name = sumo_config.sumocfg
16 | model_to_test = 2
17 |
--------------------------------------------------------------------------------
/myGitUpdate.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # git init
4 | # git remote add pi git@10.2.125.15:/home/git/huaweiCodeCraft.git
5 |
6 | #echo "更新ing"
7 | # git pull
8 | #echo "更新end"
9 |
10 | echo "提交ing"
11 |
12 | git add .
13 | if [ -n "$1" ]
14 | then
15 | echo "commit: $1"
16 | git commit -am "$1"
17 | else
18 | echo "commit: updata"
19 | git commit -m "updata"
20 | fi
21 | # git push -f pi master
22 | git push
23 | echo "提交end"
24 |
--------------------------------------------------------------------------------
/training_settings.ini:
--------------------------------------------------------------------------------
1 | [simulation]
2 | gui = False
3 | total_episodes = 100
4 | max_steps = 300
5 | n_cars_generated = 600
6 | green_duration = 10
7 | yellow_duration = 4
8 |
9 | [model]
10 | num_layers = 4
11 | width_layers = 400
12 | batch_size = 100
13 | learning_rate = 0.001
14 | training_epochs = 800
15 |
16 | [memory]
17 | memory_size_min = 600
18 | memory_size_max = 50000
19 |
20 | [agent]
21 | num_states = 80
22 | num_actions = 4
23 | gamma = 0.75
24 |
25 | [dir]
26 | models_path_name = models
27 | sumocfg_file_name = sumo_config.sumocfg
--------------------------------------------------------------------------------
/intersection/sumo_config.sumocfg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/models/model_12/training_settings.ini:
--------------------------------------------------------------------------------
1 | [simulation]
2 | gui = False
3 | total_episodes = 100
4 | max_steps = 300
5 | n_cars_generated = 600
6 | green_duration = 10
7 | yellow_duration = 4
8 |
9 | [model]
10 | num_layers = 4
11 | width_layers = 400
12 | batch_size = 100
13 | learning_rate = 0.001
14 | training_epochs = 800
15 |
16 | [memory]
17 | memory_size_min = 600
18 | memory_size_max = 50000
19 |
20 | [agent]
21 | num_states = 80
22 | num_actions = 4
23 | gamma = 0.75
24 |
25 | [dir]
26 | models_path_name = models
27 | sumocfg_file_name = sumo_config.sumocfg
--------------------------------------------------------------------------------
/models/model_2/training_settings.ini:
--------------------------------------------------------------------------------
1 | [simulation]
2 | gui = False
3 | total_episodes = 100
4 | max_steps = 300
5 | n_cars_generated = 600
6 | green_duration = 10
7 | yellow_duration = 4
8 |
9 | [model]
10 | num_layers = 4
11 | width_layers = 400
12 | batch_size = 100
13 | learning_rate = 0.001
14 | training_epochs = 800
15 |
16 | [memory]
17 | memory_size_min = 600
18 | memory_size_max = 50000
19 |
20 | [agent]
21 | num_states = 80
22 | num_actions = 4
23 | gamma = 0.75
24 |
25 | [dir]
26 | models_path_name = models
27 | sumocfg_file_name = sumo_config.sumocfg
--------------------------------------------------------------------------------
/models/model_3/training_settings.ini:
--------------------------------------------------------------------------------
1 | [simulation]
2 | gui = False
3 | total_episodes = 100
4 | max_steps = 300
5 | n_cars_generated = 600
6 | green_duration = 10
7 | yellow_duration = 4
8 |
9 | [model]
10 | num_layers = 4
11 | width_layers = 400
12 | batch_size = 100
13 | learning_rate = 0.001
14 | training_epochs = 800
15 |
16 | [memory]
17 | memory_size_min = 600
18 | memory_size_max = 50000
19 |
20 | [agent]
21 | num_states = 80
22 | num_actions = 4
23 | gamma = 0.75
24 |
25 | [dir]
26 | models_path_name = models
27 | sumocfg_file_name = sumo_config.sumocfg
--------------------------------------------------------------------------------
/models/model_5/training_settings.ini:
--------------------------------------------------------------------------------
1 | [simulation]
2 | gui = False
3 | total_episodes = 100
4 | max_steps = 300
5 | n_cars_generated = 600
6 | green_duration = 10
7 | yellow_duration = 4
8 |
9 | [model]
10 | num_layers = 4
11 | width_layers = 400
12 | batch_size = 100
13 | learning_rate = 0.001
14 | training_epochs = 800
15 |
16 | [memory]
17 | memory_size_min = 600
18 | memory_size_max = 50000
19 |
20 | [agent]
21 | num_states = 80
22 | num_actions = 4
23 | gamma = 0.75
24 |
25 | [dir]
26 | models_path_name = models
27 | sumocfg_file_name = sumo_config.sumocfg
--------------------------------------------------------------------------------
/models/model_8/training_settings.ini:
--------------------------------------------------------------------------------
1 | [simulation]
2 | gui = False
3 | total_episodes = 100
4 | max_steps = 300
5 | n_cars_generated = 600
6 | green_duration = 10
7 | yellow_duration = 4
8 |
9 | [model]
10 | num_layers = 4
11 | width_layers = 400
12 | batch_size = 100
13 | learning_rate = 0.001
14 | training_epochs = 800
15 |
16 | [memory]
17 | memory_size_min = 600
18 | memory_size_max = 50000
19 |
20 | [agent]
21 | num_states = 80
22 | num_actions = 4
23 | gamma = 0.75
24 |
25 | [dir]
26 | models_path_name = models
27 | sumocfg_file_name = sumo_config.sumocfg
--------------------------------------------------------------------------------
/memory.py:
--------------------------------------------------------------------------------
1 | import random
2 |
3 | class Memory:
4 | def __init__(self, size_max, size_min):
5 | self._samples = []
6 | self._size_max = size_max
7 | self._size_min = size_min
8 |
9 |
10 | def add_sample(self, sample):
11 | """
12 | Add a sample into the memory
13 | """
14 | self._samples.append(sample)
15 | if self._size_now() > self._size_max:
16 | self._samples.pop(0) # if the length is greater than the size of memory, remove the oldest element
17 |
18 |
19 | def get_samples(self, n):
20 | """
21 | Get n samples randomly from the memory
22 | """
23 | if self._size_now() < self._size_min:
24 | return []
25 |
26 | if n > self._size_now():
27 | return random.sample(self._samples, self._size_now()) # get all the samples
28 | else:
29 | return random.sample(self._samples, n) # get "batch size" number of samples
30 |
31 |
32 | def _size_now(self):
33 | """
34 | Check how full the memory is
35 | """
36 | return len(self._samples)
--------------------------------------------------------------------------------
/visualization.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import os
3 |
4 | class Visualization:
5 | def __init__(self, path, dpi):
6 | self._path = path
7 | self._dpi = dpi
8 |
9 |
10 | def save_data_and_plot(self, data, filename, xlabel, ylabel):
11 | """
12 | Produce a plot of performance of the agent over the session and save the relative data to txt
13 | """
14 | min_val = min(data)
15 | max_val = max(data)
16 |
17 | plt.rcParams.update({'font.size': 24}) # set bigger font size
18 |
19 | plt.plot(data)
20 | plt.ylabel(ylabel)
21 | plt.xlabel(xlabel)
22 | plt.margins(0)
23 | plt.ylim(min_val - 0.05 * abs(min_val), max_val + 0.05 * abs(max_val))
24 | fig = plt.gcf()
25 | fig.set_size_inches(20, 11.25)
26 | fig.savefig(os.path.join(self._path, 'plot_'+filename+'.png'), dpi=self._dpi)
27 | plt.close("all")
28 |
29 | with open(os.path.join(self._path, 'plot_'+filename + '_data.txt'), "w") as file:
30 | for value in data:
31 | file.write("%s\n" % value)
32 |
--------------------------------------------------------------------------------
/testing_main.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import print_function
3 |
4 | import os
5 | from shutil import copyfile
6 |
7 | from testing_simulation import Simulation
8 | from generator import TrafficGenerator
9 | from model import TestModel
10 | from visualization import Visualization
11 | from utils import import_test_configuration, set_sumo, set_test_path
12 |
13 |
14 | if __name__ == "__main__":
15 |
16 |
17 | config = import_test_configuration(config_file='testing_settings.ini')
18 | sumo_cmd = set_sumo(config['gui'], config['sumocfg_file_name'], config['max_steps'])
19 | model_path, plot_path = set_test_path(config['models_path_name'], config['model_to_test'])
20 |
21 | Model = TestModel(
22 | input_dim=config['num_states'],
23 | model_path=model_path
24 | )
25 |
26 | TrafficGen = TrafficGenerator(
27 | config['max_steps'],
28 | config['n_cars_generated']
29 | )
30 |
31 | Visualization = Visualization(
32 | plot_path,
33 | dpi=96
34 | )
35 |
36 | Simulation = Simulation(
37 | Model,
38 | TrafficGen,
39 | sumo_cmd,
40 | config['max_steps'],
41 | config['green_duration'],
42 | config['yellow_duration'],
43 | config['num_states'],
44 | config['num_actions']
45 | )
46 | print("car: ", config['n_cars_generated'])
47 |
48 | print('\n----- Test episode')
49 | simulation_time, w, co = Simulation.run(config['episode_seed']) # run the simulation
50 | print('Simulation time:', simulation_time, 's')
51 | print("sumo co: ", co, 'mg')
52 | print("sumo wait: ", w, 's')
53 |
54 | print("----- Testing info saved at:", plot_path)
55 |
56 | copyfile(src='testing_settings.ini', dst=os.path.join(plot_path, 'testing_settings.ini'))
57 |
58 | Visualization.save_data_and_plot(data=Simulation.reward_episode, filename='reward', xlabel='Action step', ylabel='Reward')
59 | Visualization.save_data_and_plot(data=Simulation.queue_length_episode, filename='queue', xlabel='Step', ylabel='Queue lenght (vehicles)')
60 |
--------------------------------------------------------------------------------
/new_train_main.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import print_function
3 | import os
4 | import datetime
5 | from shutil import copyfile
6 | from training_simulation import Simulation
7 | from generator import TrafficGenerator
8 | from ddqn import DoubleDQN
9 | from memory import Memory
10 | from utils import import_train_configuration, set_sumo, set_train_path
11 | from visualization import Visualization
12 | import matplotlib.pyplot as plt
13 |
14 | if __name__ == '__main__':
15 | X = []
16 | Y = []
17 | Z = []
18 | R = []
19 |
20 | config = import_train_configuration(config_file='training_settings.ini')
21 | sumo_cmd = set_sumo(config['gui'], config['sumocfg_file_name'], config['max_steps']) # gui=False,max_steos=5400
22 | path = set_train_path(config['models_path_name']) # models_path_name = models
23 |
24 | Model = DoubleDQN()
25 |
26 | Visualization = Visualization(
27 | path,
28 | dpi=96
29 | )
30 |
31 | TrafficGen = TrafficGenerator(
32 | config['max_steps'], # max_steps = 5400
33 | config['n_cars_generated'] # n_cars_generated = 1000
34 | )
35 | Memory = Memory(
36 | config['memory_size_max'], # memory_size_max = 50000
37 | config['memory_size_min'] # memory_size_min = 600
38 | )
39 |
40 | Simulation = Simulation(
41 | Model,
42 | Memory,
43 | TrafficGen,
44 | sumo_cmd,
45 | config['gamma'], # 0.75
46 | config['max_steps'], # 5400
47 | config['green_duration'], # 10
48 | config['yellow_duration'], # 4
49 | config['num_states'], # 80
50 | config['num_actions'], # 4
51 | config['training_epochs'] # 800
52 | )
53 | episode = 0
54 | timestamp_start = datetime.datetime.now()
55 | Model.reset()
56 |
57 | while episode < config['total_episodes']:
58 | print('\n----- Episode', str(episode+1), 'of', str(config['total_episodes']))
59 | epsilon = 1.0 - (episode / config['total_episodes'])
60 | simulation_time, training_time, w, co, co2, reward = Simulation.run(episode, epsilon)
61 | print('Simulation time:', simulation_time, 's - Training time:', training_time, 's - Total:', round(simulation_time+training_time, 1), 's')
62 | X.append(w) # wait time
63 | Y.append(co)
64 | Z.append(co2)
65 | R.append(reward)
66 | episode += 1
67 |
68 | print("\n----- Start time:", timestamp_start)
69 | print("----- End time:", datetime.datetime.now())
70 | print("----- Session info saved at:", path)
71 |
72 | plt.title("reward is car number")
73 | plt.subplot(3, 1, 1)
74 | plt.plot(X, label='waiting time')
75 | plt.legend()
76 | plt.subplot(3, 1, 2)
77 | plt.plot(Y, label='sum co')
78 | plt.legend()
79 | plt.subplot(3, 1, 3)
80 | plt.plot(Z, label='sum co2')
81 | plt.show()
82 |
83 | plt.title("Reward")
84 | plt.plot(R, label="Reward")
85 | plt.legend()
86 | plt.show()
87 |
88 |
89 | copyfile(src='training_settings.ini', dst=os.path.join(path, 'training_settings.ini'))
--------------------------------------------------------------------------------
/training_main.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import print_function
3 | import os
4 | import datetime
5 | from shutil import copyfile
6 | from training_simulation import Simulation
7 | from generator import TrafficGenerator
8 | from memory import Memory
9 | from model import TrainModel
10 | from visualization import Visualization
11 | from utils import import_train_configuration, set_sumo, set_train_path
12 | import matplotlib.pyplot as plt
13 |
14 |
15 |
16 | if __name__ == "__main__":
17 | X = []
18 | Y = []
19 | Z = []
20 | R = []
21 |
22 | config = import_train_configuration(config_file='training_settings.ini')
23 | sumo_cmd = set_sumo(config['gui'], config['sumocfg_file_name'], config['max_steps']) # gui=False,max_steos=5400
24 | path = set_train_path(config['models_path_name']) # models_path_name = models
25 |
26 | Model = TrainModel(
27 | config['num_layers'], # num_layers = 4
28 | config['width_layers'], # width_layers = 400
29 | config['batch_size'], # batch_size = 100
30 | config['learning_rate'], # learning_rate = 0.001
31 | input_dim=config['num_states'], # num_states = 80
32 | output_dim=config['num_actions'] # num_actions = 4
33 | )
34 |
35 | Memory = Memory(
36 | config['memory_size_max'], # memory_size_max = 50000
37 | config['memory_size_min'] # memory_size_min = 600
38 | )
39 |
40 | TrafficGen = TrafficGenerator(
41 | config['max_steps'], # max_steps = 5400
42 | config['n_cars_generated'] # n_cars_generated = 1000
43 | )
44 |
45 | Visualization = Visualization(
46 | path,
47 | dpi=96
48 | )
49 |
50 | Simulation = Simulation(
51 | Model,
52 | Memory,
53 | TrafficGen,
54 | sumo_cmd,
55 | config['gamma'], # 0.75
56 | config['max_steps'], # 5400
57 | config['green_duration'], # 10
58 | config['yellow_duration'], # 4
59 | config['num_states'], # 80
60 | config['num_actions'], # 4
61 | config['training_epochs'] # 800
62 | )
63 |
64 | episode = 0
65 | timestamp_start = datetime.datetime.now()
66 | f = open("/3实验/6/Deep-QLearning-Agent-for-Traffic-Signal-Control-master/wait.txt", 'w')
67 | g = open("/3实验/6/Deep-QLearning-Agent-for-Traffic-Signal-Control-master/co.txt", 'w')
68 |
69 | f.truncate()
70 | g.truncate()
71 |
72 | while episode < config['total_episodes']:
73 | print('\n----- Episode', str(episode+1), 'of', str(config['total_episodes']))
74 | epsilon = 1.0 - (episode / config['total_episodes']) # set the epsilon for this episode according to epsilon-greedy policy
75 | simulation_time, training_time, w, co, co2, reward = Simulation.run(episode, epsilon) # run the simulation
76 | print('Simulation time:', simulation_time, 's - Training time:', training_time, 's - Total:', round(simulation_time+training_time, 1), 's')
77 | X.append(w) # wait time
78 | Y.append(co)
79 | Z.append(co2)
80 | R.append(reward)
81 | f.write("%s\n" % w)
82 | g.write("%s\n" % co)
83 | episode += 1
84 |
85 | print("\n----- Start time:", timestamp_start)
86 | print("----- End time:", datetime.datetime.now())
87 | print("----- Session info saved at:", path)
88 |
89 | plt.title("reward is car number")
90 | plt.subplot(3, 1, 1)
91 | plt.plot(X, label='waiting time')
92 | plt.legend()
93 | plt.subplot(3, 1, 2)
94 | plt.plot(Y, label='sum co')
95 | plt.legend()
96 | plt.subplot(3, 1, 3)
97 | plt.plot(Z, label='sum co2')
98 | plt.show()
99 |
100 | plt.title("Reward")
101 | plt.plot(R, label="Reward")
102 | plt.legend()
103 | plt.show()
104 |
105 | Model.save_model(path)
106 |
107 | copyfile(src='training_settings.ini', dst=os.path.join(path, 'training_settings.ini'))
108 |
109 | Visualization.save_data_and_plot(data=Simulation.reward_store, filename='reward', xlabel='Episode', ylabel='Cumulative negative reward')
110 | Visualization.save_data_and_plot(data=Simulation.cumulative_wait_store, filename='delay', xlabel='Episode', ylabel='Cumulative delay (s)')
111 | Visualization.save_data_and_plot(data=Simulation.avg_queue_length_store, filename='queue', xlabel='Episode', ylabel='Average queue length (vehicles)')
--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
1 | import os
2 | import tensorflow as tf
3 | import numpy as np
4 | import sys
5 |
6 | from tensorflow import keras
7 | from tensorflow.keras import layers
8 | from tensorflow.keras import losses
9 | from tensorflow.keras.optimizers import Adam
10 | from tensorflow.keras.utils import plot_model
11 | from tensorflow.keras.models import load_model
12 | from tensorflow.keras.layers import Input, Conv2D, Flatten, Dense, MaxPooling2D
13 |
14 |
15 | class TrainModel:
16 | def __init__(self, num_layers, width, batch_size, learning_rate, input_dim, output_dim):
17 | self._input_dim = input_dim
18 | self._output_dim = output_dim
19 | self._batch_size = batch_size
20 | self._learning_rate = learning_rate
21 | self._model = self._build_model(num_layers, width)
22 | self.js = 0
23 |
24 |
25 | def _build_model(self, num_layers, width):
26 | """
27 | 全连通深度神经网络的建立与编译
28 | """
29 | inputs_1 = keras.Input(shape=(16, 12, 1))
30 | x1 = Conv2D(16, (4, 4), strides=(2, 2), activation='relu')(inputs_1)
31 | x1 = Conv2D(32, (2, 2), strides=(1, 1), activation='relu')(x1)
32 | x1 = Flatten()(x1)
33 |
34 |
35 | inputs_2 = keras.Input(shape=(16, 12, 1))
36 | x2 = Conv2D(16, (4, 4), strides=(2, 2), activation='relu')(inputs_2)
37 | x2 = Conv2D(32, (2, 2), strides=(1, 1), activation='relu')(x2)
38 | x2 = Flatten()(x2)
39 |
40 | inputs_3 = keras.Input(shape=(16, 12, 1))
41 | x3 = Conv2D(16, (4, 4), strides=(2, 2), activation='relu')(inputs_3)
42 | x3 = Conv2D(32, (2, 2), strides=(1, 1), activation='relu')(x3)
43 | x3 = Flatten()(x3)
44 |
45 | x = keras.layers.concatenate([x1, x2, x3])
46 | x = layers.Dense(128, activation='relu')(x)
47 | x = layers.Dense(64, activation='relu')(x)
48 | x = layers.Dense(self._output_dim, activation='linear')(x)
49 |
50 | # outputs = layers.Dense(self._output_dim, activation='linear')(x)
51 |
52 | model = keras.Model(inputs=[inputs_1, inputs_2, inputs_3], outputs=[x], name='my_model')
53 | model.compile(loss=losses.mean_squared_error, optimizer=Adam(lr=self._learning_rate))
54 |
55 | return model
56 |
57 |
58 | def predict_one(self, state):
59 | """
60 | 从单个状态预测动作值
61 | """
62 | return self._model.predict(state)
63 |
64 |
65 | def predict_batch(self, states):
66 | """
67 | Predict the action values from a batch of states
68 | """
69 | return self._model.predict(states)
70 |
71 |
72 | def train_batch(self, states, q_sa):
73 | """
74 | Train the nn using the updated q-values
75 | """
76 |
77 | self._model.fit(states, q_sa, epochs=1, verbose=0)
78 |
79 |
80 | def save_model(self, path):
81 | """
82 | Save the current model in the folder as h5 file and a model architecture summary as png
83 | """
84 | self._model.save(os.path.join(path, 'trained_model.h5'))
85 | plot_model(self._model, to_file=os.path.join(path, 'model_structure.png'), show_shapes=True, show_layer_names=True)
86 |
87 |
88 | @property
89 | def input_dim(self):
90 | return self._input_dim
91 |
92 |
93 | @property
94 | def output_dim(self):
95 | return self._output_dim
96 |
97 |
98 | @property
99 | def batch_size(self):
100 | return self._batch_size
101 |
102 |
103 | class TestModel:
104 | def __init__(self, input_dim, model_path):
105 | self._input_dim = input_dim
106 | self._model = self._load_my_model(model_path)
107 |
108 |
109 | def _load_my_model(self, model_folder_path):
110 | """
111 | Load the model stored in the folder specified by the model number, if it exists
112 | """
113 | model_file_path = os.path.join(model_folder_path, 'trained_model.h5')
114 |
115 | if os.path.isfile(model_file_path):
116 | loaded_model = load_model(model_file_path)
117 | return loaded_model
118 | else:
119 | sys.exit("Model number not found")
120 |
121 |
122 | def predict_one(self, state):
123 | """
124 | Predict the action values from a single state
125 | """
126 | state = np.reshape(state, [1, self._input_dim])
127 | return self._model.predict(state)
128 |
129 |
130 | @property
131 | def input_dim(self):
132 | return self._input_dim
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | import configparser
2 | from sumolib import checkBinary
3 | import os
4 | import sys
5 |
6 |
7 | def import_train_configuration(config_file):
8 | """
9 | Read the config file regarding the training and import its content
10 | """
11 | content = configparser.ConfigParser()
12 | content.read(config_file)
13 | config = {}
14 | config['gui'] = content['simulation'].getboolean('gui')
15 | config['total_episodes'] = content['simulation'].getint('total_episodes')
16 | config['max_steps'] = content['simulation'].getint('max_steps')
17 | config['n_cars_generated'] = content['simulation'].getint('n_cars_generated')
18 | config['green_duration'] = content['simulation'].getint('green_duration')
19 | config['yellow_duration'] = content['simulation'].getint('yellow_duration')
20 | config['num_layers'] = content['model'].getint('num_layers')
21 | config['width_layers'] = content['model'].getint('width_layers')
22 | config['batch_size'] = content['model'].getint('batch_size')
23 | config['learning_rate'] = content['model'].getfloat('learning_rate')
24 | config['training_epochs'] = content['model'].getint('training_epochs')
25 | config['memory_size_min'] = content['memory'].getint('memory_size_min')
26 | config['memory_size_max'] = content['memory'].getint('memory_size_max')
27 | config['num_states'] = content['agent'].getint('num_states')
28 | config['num_actions'] = content['agent'].getint('num_actions')
29 | config['gamma'] = content['agent'].getfloat('gamma')
30 | config['models_path_name'] = content['dir']['models_path_name']
31 | config['sumocfg_file_name'] = content['dir']['sumocfg_file_name']
32 | return config
33 |
34 |
35 | def import_test_configuration(config_file):
36 | """
37 | Read the config file regarding the testing and import its content
38 | """
39 | content = configparser.ConfigParser()
40 | content.read(config_file)
41 | config = {}
42 | config['gui'] = content['simulation'].getboolean('gui')
43 | config['max_steps'] = content['simulation'].getint('max_steps')
44 | config['n_cars_generated'] = content['simulation'].getint('n_cars_generated')
45 | config['episode_seed'] = content['simulation'].getint('episode_seed')
46 | config['green_duration'] = content['simulation'].getint('green_duration')
47 | config['yellow_duration'] = content['simulation'].getint('yellow_duration')
48 | config['num_states'] = content['agent'].getint('num_states')
49 | config['num_actions'] = content['agent'].getint('num_actions')
50 | config['sumocfg_file_name'] = content['dir']['sumocfg_file_name']
51 | config['models_path_name'] = content['dir']['models_path_name']
52 | config['model_to_test'] = content['dir'].getint('model_to_test')
53 | return config
54 |
55 |
56 | def set_sumo(gui, sumocfg_file_name, max_steps):
57 | """
58 | Configure various parameters of SUMO
59 | """
60 | # sumo things - we need to import python modules from the $SUMO_HOME/tools directory
61 | if 'SUMO_HOME' in os.environ:
62 | tools = os.path.join(os.environ['SUMO_HOME'], 'tools')
63 | sys.path.append(tools)
64 | else:
65 | sys.exit("please declare environment variable 'SUMO_HOME'")
66 |
67 | # setting the cmd mode or the visual mode
68 | if gui == False:
69 | sumoBinary = checkBinary('sumo')
70 | else:
71 | sumoBinary = checkBinary('sumo-gui')
72 |
73 | # setting the cmd command to run sumo at simulation time
74 | sumo_cmd = [sumoBinary, "-c", os.path.join('intersection', sumocfg_file_name), "--no-step-log", "true", "--waiting-time-memory", str(max_steps)]
75 |
76 | return sumo_cmd
77 |
78 |
79 | def set_train_path(models_path_name):
80 | """
81 | Create a new model path with an incremental integer, also considering previously created model paths
82 | """
83 | models_path = os.path.join(os.getcwd(), models_path_name, '')
84 | os.makedirs(os.path.dirname(models_path), exist_ok=True)
85 |
86 | dir_content = os.listdir(models_path)
87 | if dir_content:
88 | previous_versions = [int(name.split("_")[1]) for name in dir_content]
89 | new_version = str(max(previous_versions) + 1)
90 | else:
91 | new_version = '1'
92 |
93 | data_path = os.path.join(models_path, 'model_'+new_version, '')
94 | os.makedirs(os.path.dirname(data_path), exist_ok=True)
95 | return data_path
96 |
97 |
98 | def set_test_path(models_path_name, model_n):
99 | """
100 | Returns a model path that identifies the model number provided as argument and a newly created 'test' path
101 | """
102 | model_folder_path = os.path.join(os.getcwd(), models_path_name, 'model_'+str(model_n), '')
103 |
104 | if os.path.isdir(model_folder_path):
105 | plot_path = os.path.join(model_folder_path, 'test', '')
106 | os.makedirs(os.path.dirname(plot_path), exist_ok=True)
107 | return model_folder_path, plot_path
108 | else:
109 | sys.exit('The model number specified does not exist in the models folder')
--------------------------------------------------------------------------------
/ddqn.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import keras
3 | import random
4 | from keras.layers import Input, Dense, Activation, Dropout
5 | from keras.models import Sequential
6 | from tensorflow.keras import layers
7 | from tensorflow.keras import losses
8 | from keras.optimizers import Adam
9 | from tensorflow.keras.layers import Input, Conv2D, Flatten, Dense, MaxPooling2D
10 |
11 |
12 | class DoubleDQN():
13 | def __init__(self):
14 |
15 | # hyperparameters
16 | # self.env = env
17 | self.gamma = 0.99
18 | self.nn_learning_rate = 0.0002
19 | self.nn_batch_size = None
20 | self.epochs = 1
21 | self.minibatch_sz = 64
22 | self.epsilon = 1.
23 | self.epsilon_decay = 0.992
24 | self.epsilon_floor = 0.05
25 | # self.n_s = env.observation_space.shape[0]
26 | self.n_a = 4 # (输出动作空间)
27 | self._output_dim = 4
28 |
29 | self.description = 'DQN Learner'
30 | self.update_frequency = 100
31 | self.verbose = False
32 |
33 | # memory
34 | self.memory_max = 50000
35 | self.reset()
36 |
37 | def reset(self):
38 | self.epsilon = 1.
39 | self.step = 0
40 | self.memory = [[], [], [], []]
41 |
42 | # create nn's
43 | self.model = self._make_model_()
44 | self.target_model = self._make_model_()
45 |
46 | def _make_model_(self):
47 |
48 | inputs_1 = keras.Input(shape=(16, 16, 1))
49 | x1 = Conv2D(16, (7, 7), strides=2, activation='relu')(inputs_1)
50 | x1 = Conv2D(32, (2, 2), strides=1, activation='relu')(x1)
51 | x1 = Flatten()(x1)
52 |
53 | inputs_2 = keras.Input(shape=(16, 16, 1))
54 | x2 = Conv2D(16, (7, 7), strides=2, activation='relu')(inputs_2)
55 | x2 = Conv2D(32, (2, 2), strides=1, activation='relu')(x2)
56 | x2 = Flatten()(x2)
57 |
58 | inputs_3 = keras.Input(shape=(16, 16, 1))
59 | x3 = Conv2D(16, (7, 7), strides=2, activation='relu')(inputs_3)
60 | x3 = Conv2D(32, (2, 2), strides=1, activation='relu')(x3)
61 | x3 = Flatten()(x3)
62 |
63 | x = keras.layers.concatenate([x1, x2, x3])
64 | x = layers.Dense(128, activation='relu')(x)
65 | x = layers.Dense(64, activation='relu')(x)
66 | x = layers.Dense(self._output_dim, activation='linear')(x)
67 |
68 | # outputs = layers.Dense(self._output_dim, activation='linear')(x)
69 |
70 | model = keras.Model(inputs=[inputs_1, inputs_2, inputs_3], outputs=[x], name='my_model')
71 | model.compile(loss=losses.mean_squared_error, optimizer=Adam(lr=0.001))
72 |
73 | return model
74 |
75 | def _model_update_(self):
76 | self.target_model.set_weights(self.model.get_weights())
77 |
78 | def pick_action(self, state):
79 |
80 | if np.random.random() < self.epsilon:
81 | return np.random.choice(self.n_a)
82 | else:
83 | tmp = self.model.predict(state)
84 | return np.argmax(tmp[0])
85 |
86 | def update(self, old_state, old_action, reward, current_sate, done, epsilon):
87 | memory_len = len(self.memory[0]) + len(self.memory[0]) + len(self.memory[0]) + len(self.memory[0])
88 |
89 | # if len(self.memory) > self.memory_max: self.memory.pop(0)
90 | if memory_len > self.memory_max: self.memory = [[], [], [], []]
91 |
92 | self.memory[old_action].append([old_state, old_action, reward, current_sate, done])
93 |
94 | self._batch_train_()
95 |
96 | if self.step % self.update_frequency == 0:
97 | self._model_update_()
98 |
99 | if done and self.epsilon > self.epsilon_floor:
100 | # self.epsilon = self.epsilon * self.epsilon_decay
101 | self.epsilon = epsilon
102 |
103 | self.step += 1
104 | def _batch_train_(self):
105 | # memory_len = len(self.memory[0]) + len(self.memory[1]) + len(self.memory[2]) + len(self.memory[3])
106 | m0 = len(self.memory[0])
107 | m1 = len(self.memory[1])
108 | m2 = len(self.memory[2])
109 | m3 = len(self.memory[3])
110 | # if memory_len > self.minibatch_sz:
111 | if m1 > 16 and m2 > 16 and m3 > 16 and m0 > 16:
112 | em1 = np.zeros((100, 16, 16, 1))
113 | v1 = np.zeros((100, 16, 16, 1))
114 | p1 = np.zeros((100, 16, 16, 1))
115 | em2 = np.zeros((100, 16, 16, 1))
116 | v2 = np.zeros((100, 16, 16, 1))
117 | p2 = np.zeros((100, 16, 16, 1))
118 | # create training batch
119 | # batch = random.sample(self.memory, self.minibatch_sz)
120 | # print("0", len(self.memory[0]), "1", len(self.memory[1]), "2", len(self.memory[2]), "3", len(self.memory[3]))
121 | b0 = random.sample(self.memory[0], 16)
122 | b1 = random.sample(self.memory[1], 16)
123 | b2 = random.sample(self.memory[2], 16)
124 | b3 = random.sample(self.memory[3], 16)
125 | batch = b0 + b1 + b2 + b3
126 |
127 | i = 0
128 | for val in batch:
129 | p1[i] = val[0][0]
130 | em1[i] = val[0][1]
131 | v1[i] = val[0][2]
132 | p2[i] = val[3][0]
133 | em2[i] = val[3][1]
134 | v2[i] = val[3][2]
135 | i += 1
136 | # 根据memory中的随机选择的结果重构丢入网络进行训练
137 | states = [p1, em1, v1]
138 | next_states = [p2, em2, v2]
139 |
140 | # use update rule from Minh 2013
141 | q_s_a = self.model.predict(states) # predict Q(state), for every sample
142 | q_s_a_d = self.model.predict(next_states) # predict Q(next_state), for every sample
143 | tm_q_s_a_d = self.target_model.predict(next_states)
144 |
145 | a_maxes = np.argmax(q_s_a_d, axis=1) # 返回q_s_a_d中最大值的索引
146 |
147 | y = np.zeros((100, 1, 4))
148 | em = np.zeros((100, 16, 16, 1))
149 | v = np.zeros((100, 16, 16, 1))
150 | p = np.zeros((100, 16, 16, 1))
151 | for i, b in enumerate(batch):
152 | state, action, reward, n_states, done = b[0], b[1], b[2], b[3], b[4]
153 | target = reward
154 | if not done:
155 | Q_target_max = tm_q_s_a_d[i][a_maxes[i]]
156 | target += self.gamma * Q_target_max
157 | current_q = q_s_a[i]
158 | current_q[action] = target
159 | p[i] = state[0]
160 | em[i] = state[1]
161 | v[i] = state[2]
162 | y[i] = current_q
163 | x = [p, em, v]
164 | self.model.fit(x, y, epochs=1, verbose=False) # train the NNt
--------------------------------------------------------------------------------
/testing_simulation.py:
--------------------------------------------------------------------------------
1 | import traci
2 | import numpy as np
3 | import random
4 | import timeit
5 | import os
6 |
7 | # phase codes based on environment.net.xml
8 | PHASE_NS_GREEN = 0 # action 0 code 00
9 | PHASE_NS_YELLOW = 1
10 | PHASE_NSL_GREEN = 2 # action 1 code 01
11 | PHASE_NSL_YELLOW = 3
12 | PHASE_EW_GREEN = 4 # action 2 code 10
13 | PHASE_EW_YELLOW = 5
14 | PHASE_EWL_GREEN = 6 # action 3 code 11
15 | PHASE_EWL_YELLOW = 7
16 |
17 |
18 | class Simulation:
19 | def __init__(self, Model, TrafficGen, sumo_cmd, max_steps, green_duration, yellow_duration, num_states, num_actions):
20 | self._Model = Model
21 | self._TrafficGen = TrafficGen
22 | self._step = 0
23 | self._sumo_cmd = sumo_cmd
24 | self._max_steps = max_steps
25 | self._green_duration = green_duration
26 | self._yellow_duration = yellow_duration
27 | self._num_states = num_states
28 | self._num_actions = num_actions
29 | self._reward_episode = []
30 | self._queue_length_episode = []
31 | self._co_episode = []
32 |
33 |
34 | def run(self, episode):
35 | """
36 | Runs the testing simulation
37 | """
38 | start_time = timeit.default_timer()
39 |
40 | # first, generate the route file for this simulation and set up sumo
41 | self._TrafficGen.generate_routefile(seed=episode)
42 | traci.start(self._sumo_cmd)
43 | print("Simulating...")
44 |
45 | # inits
46 | self._step = 0
47 | self._waiting_times = {}
48 | self._wait_car_number = 0
49 | self._co = {}
50 | self._hc = {}
51 | self._nox = {}
52 | self._co2 = {}
53 | self._sum_co = 0
54 | self._sum_waiting_time = 0
55 | old_total_wait = 0
56 | old_total_co = 0
57 | old_total_hc = 0
58 | old_total_nox = 0
59 | old_total_co2 = 0
60 | old_action = -1 # dummy init
61 |
62 | while self._step < self._max_steps:
63 |
64 | # get current state of the intersection
65 | current_state = self._get_state()
66 | car = np.sum(current_state)
67 |
68 | # calculate reward of previous action: (change in cumulative waiting time between actions)
69 | # waiting time = seconds waited by a car since the spawn in the environment, cumulated for every car in incoming lanes
70 | # current_total_wait = self._collect_waiting_times()
71 | # current_total_wait, current_total_co, current_hc, current_nox, current_co2 = self._collect_waiting_times()
72 | # reward_wait = old_total_wait - current_total_wait
73 | # a = old_total_co - current_total_co
74 | # b = old_total_hc - current_hc
75 | # c = old_total_nox - current_nox
76 | # d = old_total_co2 - current_co2
77 | # reward_co = round((a/10), 1)
78 | # reward_hc = round(b, 1)
79 | # reward_nox = round(c, 1)
80 | # reward_co2 = round((d/1000), 1)
81 | # reward_em = reward_nox + reward_hc + reward_co + reward_co2
82 |
83 | current_total_wait = self._collect_waiting_car()
84 | reward = old_total_wait - current_total_wait
85 |
86 | # choose the light phase to activate, based on the current state of the intersection
87 | action = self._choose_action(current_state)
88 |
89 | # if the chosen phase is different from the last phase, activate the yellow phase
90 | if self._step != 0 and old_action != action:
91 | self._set_yellow_phase(old_action)
92 | self._simulate(self._yellow_duration)
93 |
94 | # execute the phase selected before
95 | self._set_green_phase(action)
96 | self._simulate(self._green_duration)
97 |
98 | # saving variables for later & accumulate reward
99 | old_action = action
100 | old_total_wait = current_total_wait
101 | # old_total_co = current_total_co
102 | # old_total_nox = current_nox
103 | # old_total_hc = current_hc
104 | # old_total_co2 = current_co2
105 |
106 | self._reward_episode.append(reward)
107 |
108 | co = self._sum_co
109 | wait = self._sum_waiting_time
110 |
111 | #print("Total reward:", np.sum(self._reward_episode))
112 | traci.close()
113 | simulation_time = round(timeit.default_timer() - start_time, 1)
114 |
115 | return simulation_time, wait, co
116 |
117 |
118 | def _simulate(self, steps_todo):
119 | """
120 | Proceed with the simulation in sumo
121 | """
122 | a = open("/3实验/6/Deep-QLearning-Agent-for-Traffic-Signal-Control-master/test-6e-co.txt", 'a')
123 | b = open("/3实验/6/Deep-QLearning-Agent-for-Traffic-Signal-Control-master/test-6e-wait.txt", 'a')
124 |
125 |
126 | if (self._step + steps_todo) >= self._max_steps: # do not do more steps than the maximum allowed number of steps
127 | steps_todo = self._max_steps - self._step
128 |
129 | while steps_todo > 0:
130 | traci.simulationStep() # simulate 1 step in sumo
131 | self._step += 1 # update the step counter
132 | steps_todo -= 1
133 | queue_length, co = self._get_queue_length()
134 | self._queue_length_episode.append(queue_length)
135 | self._sum_waiting_time += queue_length
136 | self._sum_co += co
137 | self._co_episode.append(co)
138 |
139 | a.write("%s\n" % co)
140 | b.write("%s\n" % queue_length)
141 |
142 | def _collect_waiting_car(self):
143 | """检索在各个车道上等待的车的数量"""
144 | emssion_class = ['Zero/default', "HBEFA3/LDV_G_EU6", 'HBEFA3/PC_G_EU4', 'HBEFA3/Bus', 'HBEFA3/HDV']
145 | incoming_roads = ["E2TL", "N2TL", "W2TL", "S2TL"]
146 | w_car = 0
147 | c_number = 0
148 | car_list = traci.vehicle.getIDList()
149 | for car_id in car_list:
150 | road_id = traci.vehicle.getRoadID(car_id)
151 | if road_id in incoming_roads:
152 | ve = traci.vehicle.getSpeed(car_id)
153 | v_class = traci.vehicle.getEmissionClass(car_id)
154 | if ve <= 0.1:
155 | if v_class == emssion_class[0]:
156 | c_number = 1
157 | elif v_class == emssion_class[1]:
158 | c_number = 2
159 | elif v_class == emssion_class[2]:
160 | c_number = 3
161 | elif v_class == emssion_class[3]:
162 | c_number = 4
163 | elif v_class == emssion_class[4]:
164 | c_number = 5
165 | else:
166 | c_number = 0
167 | w_car += c_number
168 | self._wait_car_number = w_car
169 |
170 | return self._wait_car_number
171 | def _collect_waiting_times(self):
172 | """
173 | Retrieve the waiting time of every car in the incoming roads
174 | """
175 | incoming_roads = ["E2TL", "N2TL", "W2TL", "S2TL"]
176 | car_list = traci.vehicle.getIDList()
177 | for car_id in car_list:
178 | wait_time = traci.vehicle.getAccumulatedWaitingTime(car_id)
179 | co = traci.vehicle.getCOEmission(car_id)
180 | hc = traci.vehicle.getHCEmission(car_id)
181 | nox = traci.vehicle.getNOxEmission(car_id)
182 | co2 = traci.vehicle.getCO2Emission(car_id)
183 | road_id = traci.vehicle.getRoadID(car_id) # get the road id where the car is located
184 | if road_id in incoming_roads: # consider only the waiting times of cars in incoming roads
185 | self._waiting_times[car_id] = wait_time
186 | self._co[car_id] = co
187 | self._hc[car_id] = hc
188 | self._nox[car_id] = nox
189 | self._co2[car_id] = co2
190 | else:
191 | if car_id in self._waiting_times: # a car that was tracked has cleared the intersection
192 | del self._waiting_times[car_id]
193 | del self._co[car_id]
194 | del self._hc[car_id]
195 | del self._nox[car_id]
196 | del self._co2[car_id]
197 | total_waiting_time = sum(self._waiting_times.values())
198 | total_co = round(sum(self._co.values()), 1)
199 | total_hc = round(sum(self._hc.values()), 1)
200 | total_nox = round(sum(self._nox.values()), 1)
201 | total_co2 = round(sum(self._co2.values()), 1)
202 | return total_waiting_time, total_co, total_hc, total_nox, total_co2
203 |
204 |
205 | def _choose_action(self, state):
206 | """
207 | Pick the best action known based on the current state of the env
208 | """
209 | return np.argmax(self._Model.predict_one(state))
210 |
211 |
212 | def _set_yellow_phase(self, old_action):
213 | """
214 | Activate the correct yellow light combination in sumo
215 | """
216 | yellow_phase_code = old_action * 2 + 1 # obtain the yellow phase code, based on the old action (ref on environment.net.xml)
217 | traci.trafficlight.setPhase("TL", yellow_phase_code)
218 |
219 |
220 | def _set_green_phase(self, action_number):
221 | """
222 | Activate the correct green light combination in sumo
223 | """
224 |
225 |
226 | if action_number == 0:
227 | traci.trafficlight.setPhase("TL", PHASE_NS_GREEN)
228 | elif action_number == 1:
229 | traci.trafficlight.setPhase("TL", PHASE_NSL_GREEN)
230 | elif action_number == 2:
231 | traci.trafficlight.setPhase("TL", PHASE_EW_GREEN)
232 | elif action_number == 3:
233 | traci.trafficlight.setPhase("TL", PHASE_EWL_GREEN)
234 |
235 |
236 | def _get_queue_length(self):
237 | car = traci.edge.getCOEmission("N2TL")
238 | car = traci.edge.getCOEmission("S2TL")
239 | car = traci.edge.getCOEmission("E2TL")
240 | car = traci.edge.getCOEmission("W2TL")
241 |
242 | halt_N = traci.edge.getLastStepHaltingNumber("N2TL")
243 | halt_S = traci.edge.getLastStepHaltingNumber("S2TL")
244 | halt_E = traci.edge.getLastStepHaltingNumber("E2TL")
245 | halt_W = traci.edge.getLastStepHaltingNumber("W2TL")
246 | co_N = traci.edge.getCOEmission("N2TL")
247 | co_S = traci.edge.getCOEmission("S2TL")
248 | co_E = traci.edge.getCOEmission("E2TL")
249 | co_W = traci.edge.getCOEmission("W2TL")
250 |
251 | co2_N = traci.edge.getCO2Emission("N2TL")
252 | co2_S = traci.edge.getCO2Emission("S2TL")
253 | co2_E = traci.edge.getCO2Emission("E2TL")
254 | co2_W = traci.edge.getCO2Emission("W2TL")
255 |
256 | hc_N = traci.edge.getHCEmission("N2TL")
257 | hc_S = traci.edge.getHCEmission("S2TL")
258 | hc_E = traci.edge.getHCEmission("E2TL")
259 | hc_W = traci.edge.getHCEmission("W2TL")
260 |
261 | nox_N = traci.edge.getNOxEmission("N2TL")
262 | nox_S = traci.edge.getNOxEmission("S2TL")
263 | nox_E = traci.edge.getNOxEmission("E2TL")
264 | nox_W = traci.edge.getNOxEmission("W2TL")
265 |
266 | sum_co2 = co2_N + co2_S + co2_E + co2_W
267 | sum_co = co_S + co_N + co_W + co_E
268 | sum_hc = hc_E + hc_N + hc_S + hc_W
269 | sum_nox = nox_E + nox_N + nox_W +nox_S
270 | queue_length = halt_N + halt_S + halt_E + halt_W
271 | return queue_length, sum_co, sum_co2, sum_hc, sum_nox
272 |
273 |
274 | def _get_state(self):
275 | """
276 | Retrieve the state of the intersection from sumo, in the form of cell occupancy
277 | """
278 | state = np.zeros(self._num_states)
279 | car_list = traci.vehicle.getIDList()
280 |
281 | for car_id in car_list:
282 | lane_pos = traci.vehicle.getLanePosition(car_id)
283 | lane_id = traci.vehicle.getLaneID(car_id)
284 | lane_pos = 750 - lane_pos # inversion of lane pos, so if the car is close to the traffic light -> lane_pos = 0 --- 750 = max len of a road
285 |
286 | # distance in meters from the traffic light -> mapping into cells
287 | if lane_pos < 7:
288 | lane_cell = 0
289 | elif lane_pos < 14:
290 | lane_cell = 1
291 | elif lane_pos < 21:
292 | lane_cell = 2
293 | elif lane_pos < 28:
294 | lane_cell = 3
295 | elif lane_pos < 40:
296 | lane_cell = 4
297 | elif lane_pos < 60:
298 | lane_cell = 5
299 | elif lane_pos < 100:
300 | lane_cell = 6
301 | elif lane_pos < 160:
302 | lane_cell = 7
303 | elif lane_pos < 400:
304 | lane_cell = 8
305 | elif lane_pos <= 750:
306 | lane_cell = 9
307 |
308 | # finding the lane where the car is located
309 | # x2TL_3 are the "turn left only" lanes
310 | if lane_id == "W2TL_0" or lane_id == "W2TL_1" or lane_id == "W2TL_2":
311 | lane_group = 0
312 | elif lane_id == "W2TL_3":
313 | lane_group = 1
314 | elif lane_id == "N2TL_0" or lane_id == "N2TL_1" or lane_id == "N2TL_2":
315 | lane_group = 2
316 | elif lane_id == "N2TL_3":
317 | lane_group = 3
318 | elif lane_id == "E2TL_0" or lane_id == "E2TL_1" or lane_id == "E2TL_2":
319 | lane_group = 4
320 | elif lane_id == "E2TL_3":
321 | lane_group = 5
322 | elif lane_id == "S2TL_0" or lane_id == "S2TL_1" or lane_id == "S2TL_2":
323 | lane_group = 6
324 | elif lane_id == "S2TL_3":
325 | lane_group = 7
326 | else:
327 | lane_group = -1
328 |
329 | if lane_group >= 1 and lane_group <= 7:
330 | car_position = int(str(lane_group) + str(lane_cell)) # composition of the two postion ID to create a number in interval 0-79
331 | valid_car = True
332 | elif lane_group == 0:
333 | car_position = lane_cell
334 | valid_car = True
335 | else:
336 | valid_car = False # flag for not detecting cars crossing the intersection or driving away from it
337 |
338 | if valid_car:
339 | state[car_position] = 1 # write the position of the car car_id in the state array in the form of "cell occupied"
340 |
341 | return state
342 |
343 |
344 | @property
345 | def queue_length_episode(self):
346 | return self._queue_length_episode
347 |
348 |
349 | @property
350 | def reward_episode(self):
351 | return self._reward_episode
352 |
353 |
354 |
355 |
--------------------------------------------------------------------------------
/intersection/environment.net.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
--------------------------------------------------------------------------------
/training_simulation.py:
--------------------------------------------------------------------------------
1 | # state为排放等级矩阵
2 | import traci
3 | import numpy as np
4 | import random
5 | import timeit
6 | import os
7 |
8 | PHASE_NS_GREEN = 0 # action 0 code 00,南北绿
9 | PHASE_NS_YELLOW = 1
10 | PHASE_NSL_GREEN = 2 # action 1 code 01,南北左转
11 | PHASE_NSL_YELLOW = 3
12 | PHASE_EW_GREEN = 4 # action 2 code 10,东西
13 | PHASE_EW_YELLOW = 5
14 | PHASE_EWL_GREEN = 6 # action 3 code 11,东西左转
15 | PHASE_EWL_YELLOW = 7
16 |
17 |
18 | class Simulation:
19 | def __init__(self, Model, Memory, TrafficGen, sumo_cmd, gamma, max_steps, green_duration, yellow_duration, num_states, num_actions, training_epochs):
20 | self._Model = Model
21 | self._Memory = Memory
22 | self._TrafficGen = TrafficGen
23 | self._gamma = gamma
24 | self._step = 0
25 | self._sumo_cmd = sumo_cmd
26 | self._max_steps = max_steps
27 | self._green_duration = green_duration
28 | self._yellow_duration = yellow_duration
29 | self._num_states = num_states
30 | self._num_actions = num_actions
31 | self._reward_store = []
32 | self._cumulative_wait_store = []
33 | self._avg_queue_length_store = []
34 | self._total_co = []
35 | self._training_epochs = training_epochs
36 |
37 |
38 | def run(self, episode, epsilon):
39 | start_time = timeit.default_timer()
40 |
41 | self._TrafficGen.generate_routefile(seed=episode)
42 | traci.start(self._sumo_cmd)
43 | print("Simulating...")
44 |
45 | self._step = 0
46 | self._wait_car_number = 0
47 | self._waiting_times = {}
48 | self._co = {}
49 | self._hc = {}
50 | self._nox = {}
51 | self._co2 = {}
52 | self._sum_neg_reward = 0
53 | self._sum_queue_length = 0
54 | self._sum_waiting_time = 0
55 | self._wait_time = 0
56 | self._sum_co = 0
57 | self._sum_co2 = 0
58 | self.done = False
59 | old_total_wait = 0
60 | old_total_emssion = 0
61 | # old_total_co = 0
62 | # old_total_hc = 0
63 | # old_total_nox = 0
64 | # old_total_co2 = 0
65 | old_state = -1
66 | old_action = -1
67 |
68 | while self._step < self._max_steps:
69 |
70 | # 获取当前路口的state
71 | current_state = self._get_state()
72 |
73 | # 获取当前状态下交叉口车辆的等待时间,排放数据
74 | # current_total_wait = self._collect_waiting_car()
75 | current_total_wait, current_co2, current_hc, curret_nox = self._collect_waiting_times()
76 |
77 | reward_wait = old_total_wait - current_total_wait
78 | current_total_emssion = (0.046 * current_co2) + (0.15 * current_hc) + (0.804 * curret_nox)
79 | reward_emssion = old_total_emssion - current_total_emssion
80 | # 设置奖励
81 | reward = (0.6 * reward_wait) + (0.4 * reward_emssion)
82 | # self._Memory.add_sample((old_state, old_action, reward, current_state))
83 |
84 | # 根据交叉口的当前状态选择要激活的灯光相位
85 | action = self._Model.pick_action(current_state)
86 |
87 | # 如果选择的相位与上一个相位不一样则激活黄色
88 | if self._step != 0 and old_action != action:
89 | self._set_yellow_phase(old_action)
90 | self._simulate(self._yellow_duration)
91 |
92 | # 执行之前选择的阶段
93 | self._set_green_phase(action)
94 | self._simulate(self._green_duration) # green duration = 10, yellow = 4
95 | if old_state == -1:
96 | old_state = current_state
97 |
98 | # 将数据保存到memory并且更新网络
99 | if self._step != 0:
100 | self._Model.update(old_state, old_action, reward, current_state, self.done, epsilon)
101 |
102 | # 更新变量
103 | old_state = current_state
104 | old_action = action
105 | old_total_wait = current_total_wait
106 | old_total_emssion = current_total_emssion
107 |
108 | # 只保存有意义的奖励以便更好地查看代理的行为是否正确
109 | if reward < 0:
110 | self._sum_neg_reward += reward
111 |
112 | self._save_episode_stats()
113 | print("Total reward:", self._sum_neg_reward, "- Epsilon:", round(epsilon, 2))
114 | print("wait time: ", self._sum_waiting_time)
115 | print("sum co2: ", self._sum_co)
116 | w = self._sum_waiting_time
117 | co = self._sum_co
118 | co2 = self._sum_co2
119 | reward = self._sum_neg_reward
120 | traci.close()
121 | simulation_time = round(timeit.default_timer() - start_time, 1)
122 |
123 | print("Training...")
124 | start_time = timeit.default_timer()
125 | training_time = round(timeit.default_timer() - start_time, 1)
126 |
127 | return simulation_time, training_time, w, co, co2, reward
128 |
129 |
130 | def _simulate(self, steps_todo):
131 | """
132 | 在收集统计数据时执行sumo中的步骤
133 | """
134 | if (self._step + steps_todo) >= self._max_steps:
135 | self.done = True
136 | steps_todo = self._max_steps - self._step
137 |
138 | while steps_todo > 0:
139 | traci.simulationStep()
140 | self._step += 1 # 更新步数
141 | steps_todo -= 1
142 | queue_length, sum_co, sum_co2 = self._get_queue_length()
143 | self._sum_queue_length += queue_length
144 | self._sum_waiting_time += queue_length # 对每一辆车来说排队等候的每一秒意味着每辆车等候一秒,因此排队长度=等待秒数
145 | self._sum_co += sum_co
146 |
147 | def _collect_waiting_car(self):
148 | """检索在各个车道上等待的车的数量"""
149 | emssion_class = ['Zero/default', "HBEFA3/LDV_G_EU6", 'HBEFA3/PC_G_EU4', 'HBEFA3/Bus', 'HBEFA3/HDV']
150 | incoming_roads = ["E2TL", "N2TL", "W2TL", "S2TL"]
151 | w_car = 0
152 | c_number = 0
153 | car_list = traci.vehicle.getIDList()
154 | for car_id in car_list:
155 | road_id = traci.vehicle.getRoadID(car_id)
156 | if road_id in incoming_roads:
157 | ve = traci.vehicle.getSpeed(car_id)
158 | v_class = traci.vehicle.getEmissionClass(car_id)
159 | if ve <= 0.1:
160 | if v_class == emssion_class[0]:
161 | c_number = 1
162 | elif v_class == emssion_class[1]:
163 | c_number = 2
164 | elif v_class == emssion_class[2]:
165 | c_number = 3
166 | elif v_class == emssion_class[3]:
167 | c_number = 4
168 | elif v_class == emssion_class[4]:
169 | c_number = 5
170 | else:
171 | c_number = 0
172 | w_car += c_number
173 | self._wait_car_number = w_car
174 |
175 | return self._wait_car_number
176 |
177 |
178 |
179 | def _collect_waiting_times(self):
180 | """
181 | 检索每辆车在进站道路上的等待时间
182 | """
183 | incoming_roads = ["E2TL", "N2TL", "W2TL", "S2TL"]
184 | car_list = traci.vehicle.getIDList()
185 | car_numbers = 0
186 | for car_id in car_list:
187 | wait_time = traci.vehicle.getAccumulatedWaitingTime(car_id)
188 | co = traci.vehicle.getCOEmission(car_id)
189 | hc = traci.vehicle.getHCEmission(car_id)
190 | nox = traci.vehicle.getNOxEmission(car_id)
191 | co2 = traci.vehicle.getCO2Emission(car_id)
192 | road_id = traci.vehicle.getRoadID(car_id) # 获得车辆所在的道路id
193 | if road_id in incoming_roads: # consider only the waiting times of cars in incoming roads
194 | self._waiting_times[car_id] = wait_time
195 | self._co[car_id] = co
196 | self._hc[car_id] = hc
197 | self._nox[car_id] = nox
198 | self._co2[car_id] = co2
199 | car_numbers += 1
200 | else:
201 | if car_id in self._waiting_times: # a car that was tracked has cleared the intersection
202 | del self._waiting_times[car_id]
203 | del self._co[car_id]
204 | del self._hc[car_id]
205 | del self._nox[car_id]
206 | del self._co2[car_id]
207 | if car_numbers == 0:
208 | car_numbers = 1
209 |
210 | total_waiting_time = (sum(self._waiting_times.values())) / car_numbers
211 | co2 = sum(self._co2.values())
212 | hc = sum(self._hc.values())
213 | nox = sum(self._nox.values())
214 |
215 | total_co2 = co2 / car_numbers
216 | total_hc = hc / car_numbers
217 | total_nox = nox / car_numbers
218 |
219 | return total_waiting_time, total_co2, total_hc, total_nox,
220 | # , total_co, total_hc, total_nox, total_co2
221 |
222 |
223 | def _choose_action(self, state, epsilon):
224 | """
225 | 根据epsilon贪婪策略,决定是否进行探索性或贪婪策略行动
226 | """
227 | if random.random() < epsilon:
228 | return random.randint(0, self._num_actions - 1) # 随机行动
229 | else:
230 | return np.argmax(self._Model.predict_one(state)) # 当前状态下的最佳行动
231 |
232 |
233 | def _set_yellow_phase(self, old_action):
234 | """
235 | 激活正确的黄灯组合 in sumo
236 | """
237 | yellow_phase_code = old_action * 2 + 1 # 根据旧动作获取黄色相位码 (ref on environment.net.xml)
238 | traci.trafficlight.setPhase("TL", yellow_phase_code)
239 |
240 |
241 | def _set_green_phase(self, action_number):
242 | """
243 | Activate the correct green light combination in sumo
244 | """
245 | if action_number == 0:
246 | traci.trafficlight.setPhase("TL", PHASE_NS_GREEN) # 南北绿灯
247 | elif action_number == 1:
248 | traci.trafficlight.setPhase("TL", PHASE_NSL_GREEN) # 南北左转绿灯
249 | elif action_number == 2:
250 | traci.trafficlight.setPhase("TL", PHASE_EW_GREEN) # 东西绿灯
251 | elif action_number == 3:
252 | traci.trafficlight.setPhase("TL", PHASE_EWL_GREEN) # 东西左转绿灯
253 |
254 |
255 | def _get_queue_length(self):
256 | """
257 | 检索每个进入车道中速度为0的车辆数
258 | """
259 | halt_N = traci.edge.getLastStepHaltingNumber("N2TL") # 返回给定边上最后一个时间步的停止车辆总数。低于0.1 m / s的速度被认为是停止。
260 | halt_S = traci.edge.getLastStepHaltingNumber("S2TL")
261 | halt_E = traci.edge.getLastStepHaltingNumber("E2TL")
262 | halt_W = traci.edge.getLastStepHaltingNumber("W2TL")
263 | co_N = traci.edge.getCOEmission("N2TL")
264 | co_S = traci.edge.getCOEmission("S2TL")
265 | co_E = traci.edge.getCOEmission("E2TL")
266 | co_W = traci.edge.getCOEmission("W2TL")
267 | co2_N = traci.edge.getCO2Emission("N2TL")
268 | co2_S = traci.edge.getCO2Emission("S2TL")
269 | co2_E = traci.edge.getCO2Emission("E2TL")
270 | co2_W = traci.edge.getCO2Emission("W2TL")
271 | queue_length = halt_N + halt_S + halt_E + halt_W
272 | sum_co = co_S + co_N + co_W + co_E
273 | sumo_co2 = co2_S + co2_E + co2_W + co2_N
274 |
275 | return queue_length, sum_co, sumo_co2
276 |
277 | def _get_state(self):
278 | positionMatrix = [] # 位置矩阵
279 | velocityMatrix = [] # 速度矩阵
280 | emssionMatrix = []
281 |
282 | cellLength = 7
283 | offset = 11
284 | speedLimit = 14
285 |
286 | junctionPosition = traci.junction.getPosition('TL')[0] # 交叉口位置
287 | vehicles_road1 = traci.edge.getLastStepVehicleIDs('E2TL') # 返回上一个模拟步骤中指定边上的车辆ID列表
288 | vehicles_road2 = traci.edge.getLastStepVehicleIDs('W2TL')
289 | vehicles_road3 = traci.edge.getLastStepVehicleIDs('N2TL')
290 | vehicles_road4 = traci.edge.getLastStepVehicleIDs('S2TL')
291 | for i in range(16):
292 | positionMatrix.append([])
293 | velocityMatrix.append([])
294 | emssionMatrix.append([])
295 | for j in range(16):
296 | positionMatrix[i].append(0)
297 | velocityMatrix[i].append(0)
298 | emssionMatrix[i].append(0)
299 |
300 | for v in vehicles_road1:
301 | # 计算车辆距离交叉路的距离
302 | ind = int(abs(750 - traci.vehicle.getLanePosition(v)) / cellLength)
303 | # print("E car position:", traci.vehicle.getLanePosition(v))
304 | e = traci.vehicle.getCOEmission(v) + traci.vehicle.getCO2Emission(v) + traci.vehicle.getHCEmission(
305 | v) + traci.vehicle.getNOxEmission(v)
306 | if (ind < 16):
307 | positionMatrix[3 - traci.vehicle.getLaneIndex(v)][ind] = 1
308 | velocityMatrix[3 - traci.vehicle.getLaneIndex(v)][ind] = traci.vehicle.getSpeed(v) / speedLimit
309 | emssionMatrix[3 - traci.vehicle.getLaneIndex(v)][ind] = e
310 |
311 | for v in vehicles_road2:
312 | ind = int(abs(750 - traci.vehicle.getLanePosition(v)) / cellLength)
313 | e = traci.vehicle.getCOEmission(v) + traci.vehicle.getCO2Emission(v) + traci.vehicle.getHCEmission(
314 | v) + traci.vehicle.getNOxEmission(v)
315 | if (ind < 16):
316 | positionMatrix[4 + traci.vehicle.getLaneIndex(v)][ind] = 1
317 | velocityMatrix[4 + traci.vehicle.getLaneIndex(v)][ind] = traci.vehicle.getSpeed(v) / speedLimit
318 | emssionMatrix[4 + traci.vehicle.getLaneIndex(v)][ind] = e
319 |
320 | junctionPosition = traci.junction.getPosition('TL')[1]
321 | for v in vehicles_road3:
322 | ind = int(abs(750 - traci.vehicle.getLanePosition(v)) / cellLength)
323 | e = traci.vehicle.getCOEmission(v) + traci.vehicle.getCO2Emission(v) + traci.vehicle.getHCEmission(
324 | v) + traci.vehicle.getNOxEmission(v)
325 | if (ind < 16):
326 | positionMatrix[8 + traci.vehicle.getLaneIndex(v)][ind] = 1
327 | velocityMatrix[8 + traci.vehicle.getLaneIndex(v)][ind] = traci.vehicle.getSpeed(v) / speedLimit
328 | emssionMatrix[8 + traci.vehicle.getLaneIndex(v)][ind] = e
329 |
330 | for v in vehicles_road4:
331 | ind = int(abs(750 - traci.vehicle.getLanePosition(v)) / cellLength)
332 | e = traci.vehicle.getCOEmission(v) + traci.vehicle.getCO2Emission(v) + traci.vehicle.getHCEmission(
333 | v) + traci.vehicle.getNOxEmission(v)
334 | if (ind < 16):
335 | positionMatrix[12 + traci.vehicle.getLaneIndex(v)][ind] = 1
336 | velocityMatrix[12 + traci.vehicle.getLaneIndex(v)][ind] = traci.vehicle.getSpeed(v) / speedLimit
337 | emssionMatrix[12 + traci.vehicle.getLaneIndex(v)][ind] = e
338 |
339 | position = np.array(positionMatrix)
340 | position = position.reshape(1, 16, 16, 1)
341 |
342 | velocity = np.array(velocityMatrix)
343 | velocity = velocity.reshape(1, 16, 16, 1)
344 |
345 | emssion = np.array(emssionMatrix)
346 | emssion = emssion.reshape(1, 16, 16, 1)
347 |
348 | return [position, velocity, emssion]
349 | # def _get_state(self):
350 | # positionMatrix = [] # 位置矩阵
351 | # velocityMatrix = [] # 速度矩阵
352 | # emssionMatrix = []
353 | #
354 | # cellLength = 7
355 | # offset = 11
356 | # speedLimit = 14
357 | #
358 | # junctionPosition = traci.junction.getPosition('TL')[0] # 交叉口位置
359 | # vehicles_road1 = traci.edge.getLastStepVehicleIDs('E2TL') # 返回上一个模拟步骤中指定边上的车辆ID列表
360 | # vehicles_road2 = traci.edge.getLastStepVehicleIDs('W2TL')
361 | # vehicles_road3 = traci.edge.getLastStepVehicleIDs('N2TL')
362 | # vehicles_road4 = traci.edge.getLastStepVehicleIDs('S2TL')
363 | # for i in range(16):
364 | # positionMatrix.append([])
365 | # velocityMatrix.append([])
366 | # emssionMatrix.append([])
367 | # for j in range(12):
368 | # positionMatrix[i].append(0)
369 | # velocityMatrix[i].append(0)
370 | # emssionMatrix[i].append(0)
371 | #
372 | # for v in vehicles_road1:
373 | # ind = int(abs(750 - traci.vehicle.getLanePosition(v)) / cellLength)
374 | # # e = traci.vehicle.getEmissionClass(v)
375 | # e = traci.vehicle.getCOEmission(v) + traci.vehicle.getCO2Emission(v) + traci.vehicle.getHCEmission(
376 | # v) + traci.vehicle.getNOxEmission(v)
377 | # if (ind < 12):
378 | # positionMatrix[3 - traci.vehicle.getLaneIndex(v)][ind] = 1
379 | # velocityMatrix[3 - traci.vehicle.getLaneIndex(v)][ind] = traci.vehicle.getSpeed(v) / speedLimit
380 | # emssionMatrix[3 - traci.vehicle.getLaneIndex(v)][ind] = e
381 | #
382 | # for v in vehicles_road2:
383 | #
384 | # ind = int(abs(750 - traci.vehicle.getLanePosition(v)) / cellLength)
385 | # e = traci.vehicle.getCOEmission(v) + traci.vehicle.getCO2Emission(v) + traci.vehicle.getHCEmission(
386 | # v) + traci.vehicle.getNOxEmission(v)
387 | # if (ind < 12):
388 | # positionMatrix[4 + traci.vehicle.getLaneIndex(v)][ind] = 1
389 | # velocityMatrix[4 + traci.vehicle.getLaneIndex(v)][ind] = traci.vehicle.getSpeed(v) / speedLimit
390 | # emssionMatrix[4 + traci.vehicle.getLaneIndex(v)][ind] = e
391 | #
392 | # junctionPosition = traci.junction.getPosition('TL')[1]
393 | # for v in vehicles_road3:
394 | # ind = int(abs(750 - traci.vehicle.getLanePosition(v)) / cellLength)
395 | # e = traci.vehicle.getCOEmission(v) + traci.vehicle.getCO2Emission(v) + traci.vehicle.getHCEmission(
396 | # v) + traci.vehicle.getNOxEmission(v)
397 | # if (ind < 12):
398 | # positionMatrix[8 + traci.vehicle.getLaneIndex(v)][ind] = 1
399 | # velocityMatrix[8 + traci.vehicle.getLaneIndex(v)][ind] = traci.vehicle.getSpeed(v) / speedLimit
400 | # emssionMatrix[8 + traci.vehicle.getLaneIndex(v)][ind] = e
401 | #
402 | # for v in vehicles_road4:
403 | # ind = int(abs(750 - traci.vehicle.getLanePosition(v)) / cellLength)
404 | # e = traci.vehicle.getCOEmission(v) + traci.vehicle.getCO2Emission(v) + traci.vehicle.getHCEmission(
405 | # v) + traci.vehicle.getNOxEmission(v)
406 | # if (ind < 12):
407 | # positionMatrix[12 + traci.vehicle.getLaneIndex(v)][ind] = 1
408 | # velocityMatrix[12 + traci.vehicle.getLaneIndex(v)][ind] = traci.vehicle.getSpeed(v) / speedLimit
409 | # emssionMatrix[12 + traci.vehicle.getLaneIndex(v)][ind] = e
410 | #
411 | # position = np.array(positionMatrix)
412 | # position = position.reshape(1, 16, 12, 1)
413 | #
414 | # velocity = np.array(velocityMatrix)
415 | # velocity = velocity.reshape(1, 16, 12, 1)
416 | #
417 | # emssion = np.array(emssionMatrix)
418 | # emssion = emssion.reshape(1, 16, 12, 1)
419 | #
420 | # return [position, emssion, velocity]
421 |
422 |
423 |
424 | def _replay(self):
425 | batch = self._Memory.get_samples(self._Model.batch_size) # batch size = 100
426 | em1 = np.zeros((100, 16, 12, 1))
427 | v1 = np.zeros((100, 16, 12, 1))
428 | p1 = np.zeros((100, 16, 12, 1))
429 | em2 = np.zeros((100, 16, 12, 1))
430 | v2 = np.zeros((100, 16, 12, 1))
431 | p2 = np.zeros((100, 16, 12, 1))
432 | if len(batch) > 0: # if the memory is full enough
433 |
434 | # states = [val[0] for val in batch]
435 | # next_states = np.array([val[3] for val in batch]) # extract next states from the batch
436 | i = 0
437 | for val in batch:
438 | p1[i] = val[0][0]
439 | em1[i] = val[0][1]
440 | v1[i] = val[0][2]
441 | p2[i] = val[3][0]
442 | em2[i] = val[3][1]
443 | v2[i] = val[3][2]
444 | i += 1
445 | states = [p1, em1, v1]
446 | next_states = [p2, em2, v2]
447 | # prediction
448 | q_s_a = self._Model.predict_batch(states) # predict Q(state), for every sample
449 | q_s_a_d = self._Model.predict_batch(next_states) # predict Q(next_state), for every sample
450 |
451 | y = np.zeros((100, 1, 4))
452 | em = np.zeros((100, 16, 12, 1))
453 | v = np.zeros((100, 16, 12, 1))
454 | p = np.zeros((100, 16, 12, 1))
455 | for i, b in enumerate(batch):
456 | state, action, reward, _ = b[0], b[1], b[2], b[3] # extract data from one sample
457 | current_q = q_s_a[i] # get the Q(state) predicted before
458 | current_q[action] = reward + self._gamma * np.amax(q_s_a_d[i]) # update Q(state, action)
459 | # current_q = np.reshape(current_q, [1, 4])
460 | p[i] = state[0]
461 | em[i] = state[1]
462 | v[i] = state[2]
463 | y[i] = current_q
464 | # x.append(np.array(state))
465 | # y.append(np.array(current_q))
466 | x = [p, em, v]
467 | self._Model.train_batch(x, y) # train the NN
468 |
469 |
470 | def _save_episode_stats(self):
471 | """
472 | 保存事件的统计信息,以便在会话结束时绘制图表
473 | """
474 | self._reward_store.append(self._sum_neg_reward) # how much negative reward in this episode
475 | self._cumulative_wait_store.append(self._sum_waiting_time) # total number of seconds waited by cars in this episode
476 | self._avg_queue_length_store.append(self._sum_queue_length / self._max_steps) # average number of queued cars per step, in this episode
477 |
478 |
479 | @property
480 | def reward_store(self):
481 | return self._reward_store
482 |
483 |
484 | @property
485 | def cumulative_wait_store(self):
486 | return self._cumulative_wait_store
487 |
488 |
489 | @property
490 | def avg_queue_length_store(self):
491 | return self._avg_queue_length_store
492 |
493 |
--------------------------------------------------------------------------------
/generator.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import math
3 |
4 | class TrafficGenerator:
5 | def __init__(self, max_steps, n_cars_generated):
6 | self._n_cars_generated = n_cars_generated # how many cars per episode
7 | self._max_steps = max_steps
8 |
9 | def generate_routefile(self, seed):
10 | """
11 | 生成每次仿真的车辆
12 | """
13 | np.random.seed(seed) # 使得每次产生的车辆情况相同
14 |
15 | # 车辆生成服从weibull distribution
16 | timings = np.random.weibull(2, self._n_cars_generated)
17 | timings = np.sort(timings) # 给timings排序
18 |
19 | # 重新调整分布以适应间隔 0:max_steps
20 | car_gen_steps = []
21 | min_old = math.floor(timings[1])
22 | max_old = math.ceil(timings[-1])
23 | min_new = 0
24 | max_new = self._max_steps
25 | for value in timings:
26 | car_gen_steps = np.append(car_gen_steps, ((max_new - min_new) / (max_old - min_old)) * (value - max_old) + max_new)
27 |
28 | car_gen_steps = np.rint(car_gen_steps) # round every value to int -> effective steps when a car will be generated
29 |
30 | # produce the file for cars generation, one car per line
31 | with open("intersection/episode_routes.rou.xml", "w") as routes:
32 | print("""
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 | """, file=routes)
52 |
53 | for car_counter, step in enumerate(car_gen_steps):
54 | straight_or_turn = np.random.uniform()
55 | car_class = np.random.uniform()
56 | if straight_or_turn < 0.75: # choose direction: straight or turn - 75% of times the car goes straight
57 | route_straight = np.random.randint(1, 5) # choose a random source & destination
58 | if route_straight == 1:
59 | if 0 < car_class < 0.3:
60 | print(
61 | ' ' % (
62 | car_counter, step), file=routes)
63 | elif 0.3 < car_class < 0.6:
64 | print(
65 | ' ' % (
66 | car_counter, step), file=routes)
67 | elif 0.6 < car_class < 0.75:
68 | print(
69 | ' ' % (
70 | car_counter, step), file=routes)
71 | elif 0.75 < car_class < 0.9:
72 | print(
73 | ' ' % (
74 | car_counter, step), file=routes)
75 | else:
76 | print(
77 | ' ' % (
78 | car_counter, step), file=routes)
79 | elif route_straight == 2:
80 | if 0 < car_class < 0.3:
81 | print(
82 | ' ' % (
83 | car_counter, step), file=routes)
84 | elif 0.3 < car_class < 0.6:
85 | print(
86 | ' ' % (
87 | car_counter, step), file=routes)
88 | elif 0.6 < car_class < 0.75:
89 | print(
90 | ' ' % (
91 | car_counter, step), file=routes)
92 | elif 0.75 < car_class < 0.9:
93 | print(
94 | ' ' % (
95 | car_counter, step), file=routes)
96 | else:
97 | print(
98 | ' ' % (
99 | car_counter, step), file=routes)
100 |
101 | elif route_straight == 3:
102 | if 0 < car_class < 0.3:
103 | print(
104 | ' ' % (
105 | car_counter, step), file=routes)
106 | elif 0.3 < car_class < 0.6:
107 | print(
108 | ' ' % (
109 | car_counter, step), file=routes)
110 | elif 0.6 < car_class < 0.75:
111 | print(
112 | ' ' % (
113 | car_counter, step), file=routes)
114 | elif 0.75 < car_class < 0.9:
115 | print(
116 | ' ' % (
117 | car_counter, step), file=routes)
118 | else:
119 | print(
120 | ' ' % (
121 | car_counter, step), file=routes)
122 |
123 | else:
124 | if 0 < car_class < 0.3:
125 | print(
126 | ' ' % (
127 | car_counter, step), file=routes)
128 | elif 0.3 < car_class < 0.6:
129 |
130 | print(
131 | ' ' % (
132 | car_counter, step), file=routes)
133 | elif 0.6 < car_class < 0.75:
134 | print(
135 | ' ' % (
136 | car_counter, step), file=routes)
137 | elif 0.75 < car_class < 0.9:
138 | print(
139 | ' ' % (
140 | car_counter, step), file=routes)
141 | else:
142 | print(
143 | ' ' % (
144 | car_counter, step), file=routes)
145 |
146 | else: # car that turn -25% of the time the car turns
147 | route_turn = np.random.randint(1, 9) # choose random source source & destination
148 | if route_turn == 1:
149 | if 0 < car_class < 0.3:
150 | print(
151 | ' ' % (
152 | car_counter, step), file=routes)
153 | elif 0.3 < car_class < 0.6:
154 | print(
155 | ' ' % (
156 | car_counter, step), file=routes)
157 | elif 0.6 < car_class < 0.75:
158 | print(
159 | ' ' % (
160 | car_counter, step), file=routes)
161 | elif 0.75 < car_class < 0.9:
162 | print(
163 | ' ' % (
164 | car_counter, step), file=routes)
165 | else:
166 | print(
167 | ' ' % (
168 | car_counter, step), file=routes)
169 |
170 | elif route_turn == 2:
171 | if 0 < car_class < 0.3:
172 | print(
173 | ' ' % (
174 | car_counter, step), file=routes)
175 | elif 0.3 < car_class < 0.6:
176 | print(
177 | ' ' % (
178 | car_counter, step), file=routes)
179 | elif 0.6 < car_class < 0.75:
180 | print(
181 | ' ' % (
182 | car_counter, step), file=routes)
183 | elif 0.75 < car_class < 0.9:
184 | print(
185 | ' ' % (
186 | car_counter, step), file=routes)
187 | else:
188 | print(
189 | ' ' % (
190 | car_counter, step), file=routes)
191 |
192 | elif route_turn == 3:
193 | if 0 < car_class < 0.3:
194 | print(
195 | ' ' % (
196 | car_counter, step), file=routes)
197 | elif 0.3 < car_class < 0.6:
198 | print(
199 | ' ' % (
200 | car_counter, step), file=routes)
201 | elif 0.6 < car_class < 0.75:
202 | print(
203 | ' ' % (
204 | car_counter, step), file=routes)
205 | elif 0.75 < car_class < 0.9:
206 | print(
207 | ' ' % (
208 | car_counter, step), file=routes)
209 | else:
210 | print(
211 | ' ' % (
212 | car_counter, step), file=routes)
213 |
214 | elif route_turn == 4:
215 | if 0 < car_class < 0.3:
216 | print(
217 | ' ' % (
218 | car_counter, step), file=routes)
219 | elif 0.3 < car_class < 0.6:
220 | print(
221 | ' ' % (
222 | car_counter, step), file=routes)
223 | elif 0.6 < car_class < 0.75:
224 | print(
225 | ' ' % (
226 | car_counter, step), file=routes)
227 | elif 0.75 < car_class < 0.9:
228 | print(
229 | ' ' % (
230 | car_counter, step), file=routes)
231 | else:
232 | print(
233 | ' ' % (
234 | car_counter, step), file=routes)
235 |
236 | elif route_turn == 5:
237 | if 0 < car_class < 0.3:
238 | print(
239 | ' ' % (
240 | car_counter, step), file=routes)
241 | elif 0.3 < car_class < 0.6:
242 | print(
243 | ' ' % (
244 | car_counter, step), file=routes)
245 | elif 0.6 < car_class < 0.75:
246 | print(
247 | ' ' % (
248 | car_counter, step), file=routes)
249 | elif 0.75 < car_class < 0.9:
250 | print(
251 | ' ' % (
252 | car_counter, step), file=routes)
253 | else:
254 | print(
255 | ' ' % (
256 | car_counter, step), file=routes)
257 |
258 | elif route_turn == 6:
259 | if 0 < car_class < 0.3:
260 | print(
261 | ' ' % (
262 | car_counter, step), file=routes)
263 | elif 0.3 < car_class < 0.6:
264 | print(
265 | ' ' % (
266 | car_counter, step), file=routes)
267 | elif 0.6 < car_class < 0.75:
268 | print(
269 | ' ' % (
270 | car_counter, step), file=routes)
271 | elif 0.75 < car_class < 0.9:
272 | print(
273 | ' ' % (
274 | car_counter, step), file=routes)
275 | else:
276 | print(
277 | ' ' % (
278 | car_counter, step), file=routes)
279 |
280 | elif route_turn == 7:
281 | if 0 < car_class < 0.3:
282 | print(
283 | ' ' % (
284 | car_counter, step), file=routes)
285 | elif 0.3 < car_class < 0.6:
286 | print(
287 | ' ' % (
288 | car_counter, step), file=routes)
289 | elif 0.6 < car_class < 0.75:
290 | print(
291 | ' ' % (
292 | car_counter, step), file=routes)
293 | elif 0.75 < car_class < 0.9:
294 | print(
295 | ' ' % (
296 | car_counter, step), file=routes)
297 | else:
298 | print(
299 | ' ' % (
300 | car_counter, step), file=routes)
301 |
302 | elif route_turn == 8:
303 | if 0 < car_class < 0.3:
304 | print(
305 | ' ' % (
306 | car_counter, step), file=routes)
307 | elif 0.3 < car_class < 0.6:
308 | print(
309 | ' ' % (
310 | car_counter, step), file=routes)
311 | elif 0.6 < car_class < 0.75:
312 | print(
313 | ' ' % (
314 | car_counter, step), file=routes)
315 | elif 0.75 < car_class < 0.9:
316 | print(
317 | ' ' % (
318 | car_counter, step), file=routes)
319 | else:
320 | print(
321 | ' ' % (
322 | car_counter, step), file=routes)
323 |
324 | print("", file=routes)
--------------------------------------------------------------------------------
/intersection/episode_routes.rou.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 |
248 |
249 |
250 |
251 |
252 |
253 |
254 |
255 |
256 |
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
297 |
298 |
299 |
300 |
301 |
302 |
303 |
304 |
305 |
306 |
307 |
308 |
309 |
310 |
311 |
312 |
313 |
314 |
315 |
316 |
317 |
318 |
319 |
320 |
321 |
322 |
323 |
324 |
325 |
326 |
327 |
328 |
329 |
330 |
331 |
332 |
333 |
334 |
335 |
336 |
337 |
338 |
339 |
340 |
341 |
342 |
343 |
344 |
345 |
346 |
347 |
348 |
349 |
350 |
351 |
352 |
353 |
354 |
355 |
356 |
357 |
358 |
359 |
360 |
361 |
362 |
363 |
364 |
365 |
366 |
367 |
368 |
369 |
370 |
371 |
372 |
373 |
374 |
375 |
376 |
377 |
378 |
379 |
380 |
381 |
382 |
383 |
384 |
385 |
386 |
387 |
388 |
389 |
390 |
391 |
392 |
393 |
394 |
395 |
396 |
397 |
398 |
399 |
400 |
401 |
402 |
403 |
404 |
405 |
406 |
407 |
408 |
409 |
410 |
411 |
412 |
413 |
414 |
415 |
416 |
417 |
418 |
419 |
420 |
421 |
422 |
423 |
424 |
425 |
426 |
427 |
428 |
429 |
430 |
431 |
432 |
433 |
434 |
435 |
436 |
437 |
438 |
439 |
440 |
441 |
442 |
443 |
444 |
445 |
446 |
447 |
448 |
449 |
450 |
451 |
452 |
453 |
454 |
455 |
456 |
457 |
458 |
459 |
460 |
461 |
462 |
463 |
464 |
465 |
466 |
467 |
468 |
469 |
470 |
471 |
472 |
473 |
474 |
475 |
476 |
477 |
478 |
479 |
480 |
481 |
482 |
483 |
484 |
485 |
486 |
487 |
488 |
489 |
490 |
491 |
492 |
493 |
494 |
495 |
496 |
497 |
498 |
499 |
500 |
501 |
502 |
503 |
504 |
505 |
506 |
507 |
508 |
509 |
510 |
511 |
512 |
513 |
514 |
515 |
516 |
517 |
518 |
519 |
520 |
521 |
522 |
523 |
524 |
525 |
526 |
527 |
528 |
529 |
530 |
531 |
532 |
533 |
534 |
535 |
536 |
537 |
538 |
539 |
540 |
541 |
542 |
543 |
544 |
545 |
546 |
547 |
548 |
549 |
550 |
551 |
552 |
553 |
554 |
555 |
556 |
557 |
558 |
559 |
560 |
561 |
562 |
563 |
564 |
565 |
566 |
567 |
568 |
569 |
570 |
571 |
572 |
573 |
574 |
575 |
576 |
577 |
578 |
579 |
580 |
581 |
582 |
583 |
584 |
585 |
586 |
587 |
588 |
589 |
590 |
591 |
592 |
593 |
594 |
595 |
596 |
597 |
598 |
599 |
600 |
601 |
602 |
603 |
604 |
605 |
606 |
607 |
608 |
609 |
610 |
611 |
612 |
613 |
614 |
615 |
616 |
617 |
618 |
619 |
620 |
621 |
622 |
--------------------------------------------------------------------------------