├── Code
    ├── Model
    │   └── FinalModel.hdf5
    ├── SensorDataCollection.py
    ├── main_script1.py
    └── test_script.py
├── Final Presentaion.pptx
├── README.md
├── Research Papers
    ├── An Empirical Evaluation of Deep Learning on Highway Driving.pdf
    ├── CARLA (2).pdf
    ├── CARMA- A Deep Reinforcement Learning Approach to Autonomous Driving.pdf
    ├── CONTINUOUS CONTROL WITH DEEP REINFORCEMENT LEARNING.pdf
    ├── Deep Reinforcement Learning for Simulated Autonomous Vehicle Control.pdf
    ├── Deep Reinforcement Learning framework for Autonomous Driving.pdf
    ├── DeepDriving Learning Affordance for Direct Perception in Autonomous Driving.pdf
    ├── Driverless Car_ Autonomous Driving Using Deep Reinforcement Learning In Urban Environment.pdf
    ├── Evolving Large-Scale Neural Networks for Vision-Based Reinforcement Learning.pdf
    ├── High Speed Obstacle Avoidance using Monocular Vision and Reinforcement Learning.pdf
    ├── Improving Sample Efficiency in Model Free Reinforcement Learning from Images.pdf
    ├── Learning Driving Styles for Autonomous Vehicles from Demonstration (1).pdf
    ├── Learning to Drive using Inverse Reinforcement Learning ANd DQN.pdf
    ├── Navigating Occluded Intersections with Autonomous Vehicles using Deep Reinforcement Learning.pdf
    ├── Our Research Paper
    │   └── Teach An Agent To Drive A Car In A Virtual Environment.docx
    ├── Planning for Autonomous Cars that Leverage Effects on Human Actions.pdf
    ├── Safe, Multi-Agent, Reinforcement Learning for.pdf
    ├── VIRTUAL TO REAL REINFORCEMENT LEARNING.pdf
    └── World Models.pdf
└── Results Media
    ├── Accuracy12000.jpg
    ├── Agent_preview_1.mp4
    ├── Agent_preview_2.mp4
    ├── Epsilon12000.jpg
    ├── Loss12000.jpg
    ├── RewardAvg12000.jpg
    ├── RewardMax12000.JPG
    ├── RewardMin12000.jpg
    ├── Training_Video_1.mp4
    └── Training_video_2.mp4


/Code/Model/FinalModel.hdf5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Code/Model/FinalModel.hdf5


--------------------------------------------------------------------------------
/Code/SensorDataCollection.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Teach An Agent To Drive A Car In A Virtual Environment
  3 | 
  4 | @author: Dewang Shah, Anant Vignesh Mahadhevan and Rakesh Ramesh
  5 | '''
  6 | 
  7 | #Importing necessary packages
  8 | import glob
  9 | import os
 10 | import sys
 11 | import random
 12 | import time
 13 | import numpy as np
 14 | import cv2
 15 | import math
 16 | from collections import deque
 17 | from keras.applications.xception import Xception
 18 | from keras.layers import Dense, GlobalAveragePooling2D
 19 | from keras.optimizers import Adam
 20 | from keras.models import Model
 21 | 
 22 | #Importing CARLA environment
 23 | try:
 24 |     sys.path.append(glob.glob('../carla/dist/carla-*%d.%d-%s.egg' % (
 25 |         sys.version_info.major,
 26 |         sys.version_info.minor,
 27 |         'win-amd64' if os.name == 'nt' else 'linux-x86_64'))[0])
 28 | except IndexError:
 29 |     pass
 30 | import carla
 31 | 
 32 | #Setting CARLA environmental parameters 
 33 | showPreview = False
 34 | imageWidth = 640
 35 | imageHeight = 480
 36 | secondsPerEpisode = 10
 37 | replayMemorySize = 5_000
 38 | minReplayMemorySize = 1_000
 39 | modelName = "Xception"
 40 | 
 41 | #Class to create CARLA environment
 42 | class CarlaEnvironment:
 43 |     showCam = showPreview
 44 |     steerAmt = 1.0
 45 |     imageWidth = imageWidth
 46 |     imageHeight = imageHeight
 47 |     front_camera = None
 48 | 
 49 |     def __init__(self):
 50 |         self.client = carla.Client("localhost", 2000) #Run CARLA as the server in port 2000
 51 |         self.client.set_timeout(10.0) #Set server environment timeout as 10 seconds
 52 |         self.world = self.client.get_world() #Initialize CARLA world
 53 |         self.blueprint_library = self.world.get_blueprint_library() #Initialize CARLA world blueprint
 54 |         self.model_3 = self.blueprint_library.filter("model3")[0] #Initialize Tesla Model 3 car blueprint
 55 | 
 56 |     def reset(self):
 57 |         self.collision_hist = []
 58 |         self.actor_list = []
 59 | 
 60 |         #Get a random spawn point from the map and spawn the vehicle in the spawn point
 61 |         self.transform = random.choice(self.world.get_map().get_spawn_points())
 62 |         self.vehicle = self.world.spawn_actor(self.model_3, self.transform)
 63 |         #Add the vehicle to the list of actors in the environment
 64 |         self.actor_list.append(self.vehicle)
 65 | 
 66 |         #Set the RGB Camera sensor attributes
 67 |         self.rgb_cam = self.blueprint_library.find('sensor.camera.rgb')
 68 |         self.rgb_cam.set_attribute("image_size_x", f"{self.imageWidth}")
 69 |         self.rgb_cam.set_attribute("image_size_y", f"{self.imageHeight}")
 70 |         self.rgb_cam.set_attribute("fov", f"110")
 71 | 
 72 |         #Add the RGB Camera Sensor to the car
 73 |         transform = carla.Transform(carla.Location(x=2.5, z=0.7))
 74 |         self.sensor = self.world.spawn_actor(self.rgb_cam, transform, attach_to=self.vehicle)
 75 |         #Add the camera sensor to the list of actors in the environment
 76 |         self.actor_list.append(self.sensor)
 77 |         self.sensor.listen(lambda data: self.processImage(data))
 78 | 
 79 |         self.vehicle.apply_control(carla.VehicleControl(throttle=0.0, brake=0.0))
 80 |         time.sleep(4)
 81 | 
 82 |         #Add the Collision Sensor to the car
 83 |         colsensor = self.blueprint_library.find("sensor.other.collision")
 84 |         self.colsensor = self.world.spawn_actor(colsensor, transform, attach_to=self.vehicle)
 85 |         #Add the collision sensor to the list of actors in the environment
 86 |         self.actor_list.append(self.colsensor)
 87 |         self.colsensor.listen(lambda event: self.collisionData(event))
 88 | 
 89 |         while self.front_camera is None:
 90 |             time.sleep(0.01)
 91 | 
 92 |         self.episode_start = time.time()
 93 |         self.vehicle.apply_control(carla.VehicleControl(throttle=0.0, brake=0.0))
 94 | 
 95 |         return self.front_camera
 96 | 
 97 |     def processImage(self, image):
 98 |         i = np.array(image.raw_data)
 99 |         i2 = i.reshape((self.imageHeight, self.imageWidth, 4))
100 |         i3 = i2[:, :, :3]
101 |         if self.showCam:
102 |             cv2.imshow("", i3)
103 |             cv2.waitKey(1)
104 |         self.front_camera = i3
105 | 
106 |     def collisionData(self, event):
107 |         self.collision_hist.append(event)
108 | 
109 |     def step(self, action):
110 |         if action == 0:
111 |             self.vehicle.apply_control(carla.VehicleControl(throttle=1.0, steer=-1*self.steerAmt))
112 |         elif action == 1:
113 |             self.vehicle.apply_control(carla.VehicleControl(throttle=1.0, steer= 0))
114 |         elif action == 2:
115 |             self.vehicle.apply_control(carla.VehicleControl(throttle=1.0, steer=1*self.steerAmt))
116 | 
117 |         v = self.vehicle.get_velocity()
118 |         kmh = int(3.6 * math.sqrt(v.x**2 + v.y**2 + v.z**2))
119 | 
120 |         if len(self.collision_hist) != 0:
121 |             done = True
122 |             reward = -200
123 |         elif kmh < 50:
124 |             done = False
125 |             reward = -1
126 |         else:
127 |             done = False
128 |             reward = 1
129 | 
130 |         if self.episode_start + secondsPerEpisode < time.time():
131 |             done = True
132 | 
133 |         return self.front_camera, reward, done, None
134 | 
135 | 
136 | class DQNAgent:
137 |     def __init__(self):
138 |         self.model = self.create_model()
139 |         self.target_model = self.create_model()
140 |         self.target_model.set_weights(self.model.get_weights())
141 | 
142 |         self.replay_memory = deque(maxlen=replayMemorySize)
143 | 
144 |         self.tensorboard = ModifiedTensorBoard(log_dir=f"logs/{modelName}-{int(time.time())}")
145 |         self.target_update_counter = 0
146 |         self.graph = tf.get_default_graph()
147 | 
148 |         self.terminate = False
149 |         self.last_logged_episode = 0
150 |         self.training_initialized = False
151 | 
152 |     def create_model(self):
153 |         base_model = Xception(weights=None, include_top=False, input_shape=(imageHeight, imageWidth,3))
154 | 
155 |         x = base_model.output
156 |         x = GlobalAveragePooling2D()(x)
157 | 
158 |         predictions = Dense(3, activation="linear")(x)
159 |         model = Model(inputs=base_model.input, outputs=predictions)
160 |         model.compile(loss="mse", optimizer=Adam(lr=0.001), metrics=["accuracy"])
161 |         return model


--------------------------------------------------------------------------------
/Code/main_script1.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Teach An Agent To Drive A Car In A Virtual Environment
  3 | 
  4 | @author: Dewang Shah, Anant Vignesh Mahadhevan and Rakesh Ramesh
  5 | '''
  6 | import glob
  7 | import os
  8 | import sys
  9 | import random
 10 | import time
 11 | import numpy as np
 12 | import cv2
 13 | import math
 14 | from collections import deque
 15 | from keras.applications.xception import Xception
 16 | from keras.layers import Dense, AveragePooling2D, Flatten
 17 | from keras.optimizers import Adam
 18 | from keras.models import Model
 19 | from keras.callbacks import TensorBoard
 20 | from keras.callbacks import ModelCheckpoint
 21 | 
 22 | import tensorflow as tf
 23 | from keras.layers import Activation, Dense
 24 | import keras.backend.tensorflow_backend as backend
 25 | from keras import Sequential
 26 | from keras.layers.convolutional import Conv2D
 27 | from threading import Thread
 28 | 
 29 | from tqdm import tqdm
 30 | 
 31 | try:
 32 |     sys.path.append(glob.glob('../carla/dist/carla-*%d.%d-%s.egg' % (
 33 |         sys.version_info.major,
 34 |         sys.version_info.minor,
 35 |         'win-amd64' if os.name == 'nt' else 'linux-x86_64'))[0])
 36 | except IndexError:
 37 |     pass
 38 | import carla
 39 | 
 40 | 
 41 | SHOW_PREVIEW = False
 42 | IM_WIDTH = 640
 43 | IM_HEIGHT = 480
 44 | SECONDS_PER_EPISODE = 10
 45 | REPLAY_MEMORY_SIZE = 5_000
 46 | MIN_REPLAY_MEMORY_SIZE = 1_000
 47 | MINIBATCH_SIZE = 16
 48 | PREDICTION_BATCH_SIZE = 1
 49 | TRAINING_BATCH_SIZE = MINIBATCH_SIZE // 4
 50 | UPDATE_TARGET_EVERY = 5
 51 | MODEL_NAME = "Xception"
 52 | 
 53 | MEMORY_FRACTION = 0.4
 54 | MIN_REWARD = -200
 55 | 
 56 | EPISODES = 10000
 57 | 
 58 | DISCOUNT = 0.99
 59 | epsilon = 1
 60 | EPSILON_DECAY = 0.95 ## 0.9975 99975
 61 | MIN_EPSILON = 0.001
 62 | 
 63 | AGGREGATE_STATS_EVERY = 10
 64 | 
 65 | 
 66 | # Own Tensorboard class
 67 | class ModifiedTensorBoard(TensorBoard):
 68 | 
 69 |     # Overriding init to set initial step and writer (we want one log file for all .fit() calls)
 70 |     def __init__(self, **kwargs):
 71 |         super().__init__(**kwargs)
 72 |         self.step = 1
 73 |         self.writer = tf.summary.FileWriter(self.log_dir)
 74 | 
 75 |     # Overriding this method to stop creating default log writer
 76 |     def set_model(self, model):
 77 |         pass
 78 | 
 79 |     # Overrided, saves logs with our step number
 80 |     # (otherwise every .fit() will start writing from 0th step)
 81 |     def on_epoch_end(self, epoch, logs=None):
 82 |         self.update_stats(**logs)
 83 | 
 84 |     # Overrided
 85 |     # We train for one batch only, no need to save anything at epoch end
 86 |     def on_batch_end(self, batch, logs=None):
 87 |         pass
 88 | 
 89 |     # Overrided, so won't close writer
 90 |     def on_train_end(self, _):
 91 |         pass
 92 | 
 93 |     # Custom method for saving own metrics
 94 |     # Creates writer, writes custom metrics and closes writer
 95 |     def update_stats(self, **stats):
 96 |         self._write_logs(stats, self.step)
 97 | 
 98 | 
 99 | class CarEnv:
100 |     SHOW_CAM = SHOW_PREVIEW
101 |     STEER_AMT = 1.0
102 |     im_width = IM_WIDTH
103 |     im_height = IM_HEIGHT
104 |     front_camera = None
105 | 
106 |     def __init__(self):
107 |         self.client = carla.Client("localhost", 2000)
108 |         self.client.set_timeout(10.0)
109 |         self.world = self.client.get_world()
110 |         self.blueprint_library = self.world.get_blueprint_library()
111 |         self.model_3 = self.blueprint_library.filter("model3")[0]
112 | 
113 |     def reset(self):
114 |         self.collision_hist = []
115 |         self.actor_list = []
116 | 
117 |         self.transform = random.choice(self.world.get_map().get_spawn_points())
118 |         self.vehicle = self.world.spawn_actor(self.model_3, self.transform)
119 |         self.actor_list.append(self.vehicle)
120 | 
121 |         self.rgb_cam = self.blueprint_library.find('sensor.camera.rgb')
122 |         self.rgb_cam.set_attribute("image_size_x", f"{self.im_width}")
123 |         self.rgb_cam.set_attribute("image_size_y", f"{self.im_height}")
124 |         self.rgb_cam.set_attribute("fov", f"110")
125 | 
126 |         transform = carla.Transform(carla.Location(x=2.5, z=0.7))
127 |         self.sensor = self.world.spawn_actor(self.rgb_cam, transform, attach_to=self.vehicle)
128 |         self.actor_list.append(self.sensor)
129 |         self.sensor.listen(lambda data: self.process_img(data))
130 | 
131 |         self.vehicle.apply_control(carla.VehicleControl(throttle=0.0, brake=0.0))
132 |         time.sleep(4)
133 | 
134 |         colsensor = self.blueprint_library.find("sensor.other.collision")
135 |         self.colsensor = self.world.spawn_actor(colsensor, transform, attach_to=self.vehicle)
136 |         self.actor_list.append(self.colsensor)
137 |         self.colsensor.listen(lambda event: self.collision_data(event))
138 | 
139 |         while self.front_camera is None:
140 |             time.sleep(0.01)
141 | 
142 |         self.episode_start = time.time()
143 |         self.vehicle.apply_control(carla.VehicleControl(throttle=0.0, brake=0.0))
144 | 
145 |         return self.front_camera
146 | 
147 |     def collision_data(self, event):
148 |         self.collision_hist.append(event)
149 | 
150 |     def process_img(self, image):
151 |         i = np.array(image.raw_data)
152 |         #print(i.shape)
153 |         i2 = i.reshape((self.im_height, self.im_width, 4))
154 |         i3 = i2[:, :, :3]
155 |         if self.SHOW_CAM:
156 |             cv2.imshow("", i3)
157 |             cv2.waitKey(1)
158 |         self.front_camera = i3
159 | 
160 |     def step(self, action):
161 |         if action == 0:
162 |             self.vehicle.apply_control(carla.VehicleControl(throttle=1.0, steer=-1*self.STEER_AMT))
163 |         elif action == 1:
164 |             self.vehicle.apply_control(carla.VehicleControl(throttle=1.0, steer= 0))
165 |         elif action == 2:
166 |             self.vehicle.apply_control(carla.VehicleControl(throttle=1.0, steer=1*self.STEER_AMT))
167 | 
168 |         v = self.vehicle.get_velocity()
169 |         kmh = int(3.6 * math.sqrt(v.x**2 + v.y**2 + v.z**2))
170 | 
171 |         if len(self.collision_hist) != 0:
172 |             done = True
173 |             reward = -200
174 |         elif kmh < 50:
175 |             done = False
176 |             reward = -1
177 |         else:
178 |             done = False
179 |             reward = 1
180 | 
181 |         if self.episode_start + SECONDS_PER_EPISODE < time.time():
182 |             done = True
183 | 
184 |         return self.front_camera, reward, done, None
185 | 
186 | 
187 | class DQNAgent:
188 |     
189 |     def __init__(self):
190 |         self.model = self.create_model()
191 |         self.target_model = self.create_model()
192 |         self.target_model.set_weights(self.model.get_weights())
193 | 
194 |         self.replay_memory = deque(maxlen=REPLAY_MEMORY_SIZE)
195 | 
196 |         self.tensorboard = ModifiedTensorBoard(log_dir=f"logs/{MODEL_NAME}-{int(time.time())}")
197 |         self.target_update_counter = 0
198 |         self.graph = tf.get_default_graph()
199 | 
200 |         self.terminate = False
201 |         self.last_logged_episode = 0
202 |         self.training_initialized = False
203 |         
204 |     def create_model(self):
205 |         model = Sequential()
206 | 
207 |         model.add(Conv2D(64, (3, 3), input_shape=(IM_HEIGHT, IM_WIDTH,3), padding='same'))
208 |         model.add(Activation('relu'))
209 |         model.add(AveragePooling2D(pool_size=(5, 5), strides=(3, 3), padding='same'))
210 | 
211 |         model.add(Conv2D(64, (3, 3), padding='same'))
212 |         model.add(Activation('relu'))
213 |         model.add(AveragePooling2D(pool_size=(5, 5), strides=(3, 3), padding='same'))
214 | 
215 |         model.add(Conv2D(64, (3, 3), padding='same'))
216 |         model.add(Activation('relu'))
217 |         model.add(AveragePooling2D(pool_size=(5, 5), strides=(3, 3), padding='same'))
218 | 
219 |         model.add(Flatten())
220 | 
221 |         model.add(Dense(3, activation="linear"))
222 |         model = Model(inputs=model.input, outputs=model.output)
223 |         model.compile(loss="mse", optimizer=Adam(lr=0.001), metrics=["accuracy"])
224 |         
225 |         #CHECKPOINT
226 |         #filepath="weights-improvement-{epoch:02d}-{val_accuracy:.2f}.hdf5"
227 |         #checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=False, mode='max')
228 |         #callbacks_list = [self.tensorboard,checkpoint]
229 |         return model
230 | 
231 |     def update_replay_memory(self, transition):
232 |         # transition = (current_state, action, reward, new_state, done)
233 |         self.replay_memory.append(transition)
234 | 
235 |     def train(self):
236 |         if len(self.replay_memory) < MIN_REPLAY_MEMORY_SIZE:
237 |             return
238 | 
239 |         minibatch = random.sample(self.replay_memory, MINIBATCH_SIZE)
240 | 
241 |         current_states = np.array([transition[0] for transition in minibatch])/255
242 |         with self.graph.as_default():
243 |             current_qs_list = self.model.predict(current_states, PREDICTION_BATCH_SIZE)
244 | 
245 |         new_current_states = np.array([transition[3] for transition in minibatch])/255
246 |         with self.graph.as_default():
247 |             future_qs_list = self.target_model.predict(new_current_states, PREDICTION_BATCH_SIZE)
248 | 
249 |         X = []
250 |         y = []
251 | 
252 |         for index, (current_state, action, reward, new_state, done) in enumerate(minibatch):
253 |             if not done:
254 |                 max_future_q = np.max(future_qs_list[index])
255 |                 new_q = reward + DISCOUNT * max_future_q
256 |             else:
257 |                 new_q = reward
258 | 
259 |             current_qs = current_qs_list[index]
260 |             current_qs[action] = new_q
261 | 
262 |             X.append(current_state)
263 |             y.append(current_qs)
264 | 
265 |         log_this_step = False
266 |         if self.tensorboard.step > self.last_logged_episode:
267 |             log_this_step = True
268 |             self.last_log_episode = self.tensorboard.step
269 | 
270 |         with self.graph.as_default():
271 |             self.model.fit(np.array(X)/255, np.array(y), batch_size=TRAINING_BATCH_SIZE, verbose=0, shuffle=False, callbacks=[self.tensorboard] if log_this_step else None)
272 | 
273 | 
274 |         if log_this_step:
275 |             self.target_update_counter += 1
276 | 
277 |         if self.target_update_counter > UPDATE_TARGET_EVERY:
278 |             self.target_model.set_weights(self.model.get_weights())
279 |             self.target_update_counter = 0
280 | 
281 |     def get_qs(self, state):
282 |         return self.model.predict(np.array(state).reshape(-1, *state.shape)/255)[0]
283 | 
284 |     def train_in_loop(self):
285 |         #filepath="weights-improvement-{epoch:02d}-{val_accuracy:.2f}.hdf5"
286 |         #checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True)
287 |         
288 |         X = np.random.uniform(size=(1, IM_HEIGHT, IM_WIDTH, 3)).astype(np.float32)
289 |         y = np.random.uniform(size=(1, 3)).astype(np.float32)
290 |         
291 |         #log_this_step = False
292 |         #if self.tensorboard.step > self.last_logged_episode:
293 |         #    log_this_step = True
294 |         #    self.last_log_episode = self.tensorboard.step
295 |             
296 |         with self.graph.as_default():
297 |             #self.model.fit(X,y, verbose=False, batch_size=1, callbacks=[self.tensorboard,checkpoint])
298 |             self.model.fit(X,y, verbose=False, batch_size=1, callbacks=[self.tensorboard])
299 |         
300 |         #if log_this_step:
301 |         #    self.target_update_counter += 1
302 |             
303 |         #if self.target_update_counter > UPDATE_TARGET_EVERY:
304 |         #    self.target_model.set_weights(self.model.get_weights())
305 |         #    self.target_update_counter = 0
306 |             
307 |         self.training_initialized = True
308 | 
309 |         while True:
310 |             if self.terminate:
311 |                 return
312 |             self.train()
313 |             time.sleep(0.01)
314 | 
315 | 
316 | 
317 | if __name__ == '__main__':
318 |     FPS = 60
319 |     # For stats
320 |     ep_rewards = [-200]
321 | 
322 |     # For more repetitive results
323 |     random.seed(1)
324 |     np.random.seed(1)
325 |     tf.set_random_seed(1)
326 | 
327 |     # Memory fraction, used mostly when trai8ning multiple agents
328 |     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=MEMORY_FRACTION)
329 |     backend.set_session(tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)))
330 | 
331 |     # Create models folder
332 |     if not os.path.isdir('C:\\Users\\Dewang\\Desktop\\RL Project\\models'):
333 |         os.makedirs('C:\\Users\\Dewang\\Desktop\\RL Project\\models')
334 |         
335 |     #Create checkpoint folder
336 |     if not os.path.isdir('C:\\Users\\Dewang\\Desktop\\RL Project\\models\\Checkpoints'):
337 |         os.makedirs('C:\\Users\\Dewang\\Desktop\\RL Project\\models\\Checkpoints')
338 | 
339 |     # Create agent and environment
340 |     agent = DQNAgent()
341 |     env = CarEnv()
342 | 
343 | 
344 |     # Start training thread and wait for training to be initialized
345 |     trainer_thread = Thread(target=agent.train_in_loop, daemon=True)
346 |     trainer_thread.start()
347 |     while not agent.training_initialized:
348 |         time.sleep(0.01)
349 | 
350 |     # Initialize predictions - forst prediction takes longer as of initialization that has to be done
351 |     # It's better to do a first prediction then before we start iterating over episode steps
352 |     agent.get_qs(np.ones((env.im_height, env.im_width, 3)))
353 | 
354 |     # Iterate over episodes
355 |     for episode in tqdm(range(1, EPISODES + 1), ascii=True, unit='episodes'):
356 |         #try:
357 | 
358 |             env.collision_hist = []
359 | 
360 |             # Update tensorboard step every episode
361 |             agent.tensorboard.step = episode
362 | 
363 |             # Restarting episode - reset episode reward and step number
364 |             episode_reward = 0
365 |             step = 1
366 | 
367 |             # Reset environment and get initial state
368 |             current_state = env.reset()
369 | 
370 |             # Reset flag and start iterating until episode ends
371 |             done = False
372 |             episode_start = time.time()
373 | 
374 |             # Play for given number of seconds only
375 |             while True:
376 | 
377 |                 # This part stays mostly the same, the change is to query a model for Q values
378 |                 if np.random.random() > epsilon:
379 |                     # Get action from Q table
380 |                     action = np.argmax(agent.get_qs(current_state))
381 |                 else:
382 |                     # Get random action
383 |                     action = np.random.randint(0, 3)
384 |                     # This takes no time, so we add a delay matching 60 FPS (prediction above takes longer)
385 |                     time.sleep(1/FPS)
386 | 
387 |                 new_state, reward, done, _ = env.step(action)
388 | 
389 |                 # Transform new continous state to new discrete state and count reward
390 |                 episode_reward += reward
391 | 
392 |                 # Every step we update replay memory
393 |                 agent.update_replay_memory((current_state, action, reward, new_state, done))
394 | 
395 |                 current_state = new_state
396 |                 step += 1
397 | 
398 |                 if done:
399 |                     break
400 | 
401 |             # End of episode - destroy agents
402 |             for actor in env.actor_list:
403 |                 actor.destroy()
404 | 
405 |             # Append episode reward to a list and log stats (every given number of episodes)
406 |             ep_rewards.append(episode_reward)
407 |             if not episode % AGGREGATE_STATS_EVERY or episode == 1:
408 |                 average_reward = sum(ep_rewards[-AGGREGATE_STATS_EVERY:])/len(ep_rewards[-AGGREGATE_STATS_EVERY:])
409 |                 min_reward = min(ep_rewards[-AGGREGATE_STATS_EVERY:])
410 |                 max_reward = max(ep_rewards[-AGGREGATE_STATS_EVERY:])
411 |                 agent.tensorboard.update_stats(reward_avg=average_reward, reward_min=min_reward, reward_max=max_reward, epsilon=epsilon)
412 | 
413 |                 # Save model, but only when min reward is greater or equal a set value
414 |                 if min_reward >= MIN_REWARD:
415 |                     agent.model.save(f'C:/Users/Dewang/Desktop/RLProject/models/{MODEL_NAME}__{max_reward:_>7.2f}max_{average_reward:_>7.2f}avg_{min_reward:_>7.2f}min__{int(time.time())}.hdf5')
416 | 
417 |             # Decay epsilon
418 |             if epsilon > MIN_EPSILON:
419 |                 epsilon *= EPSILON_DECAY
420 |                 epsilon = max(MIN_EPSILON, epsilon)
421 |             
422 |             #Manual Checkpoint
423 |             if episode > 0:
424 |                 agent.model.save(f'C:/Users/Dewang/Desktop/RLProject/models/Checkpoints/{MODEL_NAME}__{episode}__{max_reward:_>7.2f}max_{average_reward:_>7.2f}avg_{min_reward:_>7.2f}min__{int(time.time())}.hdf5')
425 |                 
426 |     # Set termination flag for training thread and wait for it to finish
427 |     agent.terminate = True
428 |     trainer_thread.join()
429 |     agent.model.save(f'C:/Users/Dewang/Desktop/RLProject/models/{MODEL_NAME}__{max_reward:_>7.2f}max_{average_reward:_>7.2f}avg_{min_reward:_>7.2f}min__{int(time.time())}.hdf5')
430 | 


--------------------------------------------------------------------------------
/Code/test_script.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Teach An Agent To Drive A Car In A Virtual Environment
 3 | 
 4 | @author: Dewang Shah, Anant Vignesh Mahadhevan and Rakesh Ramesh
 5 | '''
 6 | import random
 7 | from collections import deque
 8 | import numpy as np
 9 | import cv2
10 | import time
11 | import tensorflow as tf
12 | import keras.backend.tensorflow_backend as backend
13 | from keras.models import load_model
14 | from main_script import CarEnv, MEMORY_FRACTION
15 | 
16 | 
17 | #MODEL_PATH = "C:\\Users\\yashp\\Downloads\\RL Project\\CARLA_0.9.5\\PythonAPI\\examples\\models\\Xception____-4.00max_-103.00avg_-202.00min__1571284603.model"
18 | MODEL_PATH = "C:\\Users\\Dewang\\Desktop\\Xception__3101____38.00max_-110.30avg_-272.00min__1574048139_AVM.hdf5"
19 | if __name__ == '__main__':
20 | 
21 |     # Memory fraction
22 |     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=MEMORY_FRACTION)
23 |     backend.set_session(tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)))
24 | 
25 |     # Load the model
26 |     model = load_model(MODEL_PATH)
27 | 
28 |     # Create environment
29 |     env = CarEnv()
30 | 
31 |     # For agent speed measurements - keeps last 60 frametimes
32 |     fps_counter = deque(maxlen=60)
33 | 
34 |     # Initialize predictions - first prediction takes longer as of initialization that has to be done
35 |     # It's better to do a first prediction then before we start iterating over episode steps
36 |     model.predict(np.ones((1, env.im_height, env.im_width, 3)))
37 | 
38 |     # Loop over episodes
39 |     while True:
40 | 
41 |         print('Restarting episode')
42 | 
43 |         # Reset environment and get initial state
44 |         current_state = env.reset()
45 |         env.collision_hist = []
46 | 
47 |         done = False
48 | 
49 |         # Loop over steps
50 |         while True:
51 | 
52 |             # For FPS counter
53 |             step_start = time.time()
54 | 
55 |             # Show current frame
56 |             cv2.imshow(f'Agent - preview', current_state)
57 |             cv2.waitKey(1)
58 | 
59 |             # Predict an action based on current observation space
60 |             qs = model.predict(np.array(current_state).reshape(-1, *current_state.shape)/255)[0]
61 |             action = np.argmax(qs)
62 | 
63 |             # Step environment (additional flag informs environment to not break an episode by time limit)
64 |             new_state, reward, done, _ = env.step(action)
65 | 
66 |             # Set current step for next loop iteration
67 |             current_state = new_state
68 | 
69 |             # If done - agent crashed, break an episode
70 |             if done:
71 |                 break
72 | 
73 |             # Measure step time, append to a deque, then print mean FPS for last 60 frames, q values and taken action
74 |             frame_time = time.time() - step_start
75 |             fps_counter.append(frame_time)
76 |             print(f'Agent: {len(fps_counter)/sum(fps_counter):>4.1f} FPS | Action: [{qs[0]:>5.2f}, {qs[1]:>5.2f}, {qs[2]:>5.2f}] {action}')
77 | 
78 |         # Destroy an actor at end of episode
79 |         for actor in env.actor_list:
80 |             actor.destroy()
81 | 


--------------------------------------------------------------------------------
/Final Presentaion.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Final Presentaion.pptx


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Training-Self-Driving-Car-Using-Reinforcement-Learning
2 | It is highly evident that autonomous vehicles will be the future and it will be a prominent vehicle category in the next decade. For this to be a success, the vehicle should be safe, reliable and provide a comfortable user experience. Autonomous driving must have sophisticated negotiating skills while taking right, left turns and while pushing ahead in urban areas. Reinforcement learning is considered as the main domain for learning driving policy. We propose a reinforcement learning approach using deep Q-learning approach which will extract the maximum reward from a large state space. We use CARLA, an open-source simulator for autonomous driving research. The outcome of this experiment is to resemble a real-life environment where the agent tries to overcome the obstacles using the data from the virtual sensors attached to the agent.
3 | 


--------------------------------------------------------------------------------
/Research Papers/An Empirical Evaluation of Deep Learning on Highway Driving.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/An Empirical Evaluation of Deep Learning on Highway Driving.pdf


--------------------------------------------------------------------------------
/Research Papers/CARLA (2).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/CARLA (2).pdf


--------------------------------------------------------------------------------
/Research Papers/CARMA- A Deep Reinforcement Learning Approach to Autonomous Driving.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/CARMA- A Deep Reinforcement Learning Approach to Autonomous Driving.pdf


--------------------------------------------------------------------------------
/Research Papers/CONTINUOUS CONTROL WITH DEEP REINFORCEMENT LEARNING.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/CONTINUOUS CONTROL WITH DEEP REINFORCEMENT LEARNING.pdf


--------------------------------------------------------------------------------
/Research Papers/Deep Reinforcement Learning for Simulated Autonomous Vehicle Control.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/Deep Reinforcement Learning for Simulated Autonomous Vehicle Control.pdf


--------------------------------------------------------------------------------
/Research Papers/Deep Reinforcement Learning framework for Autonomous Driving.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/Deep Reinforcement Learning framework for Autonomous Driving.pdf


--------------------------------------------------------------------------------
/Research Papers/DeepDriving Learning Affordance for Direct Perception in Autonomous Driving.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/DeepDriving Learning Affordance for Direct Perception in Autonomous Driving.pdf


--------------------------------------------------------------------------------
/Research Papers/Driverless Car_ Autonomous Driving Using Deep Reinforcement Learning In Urban Environment.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/Driverless Car_ Autonomous Driving Using Deep Reinforcement Learning In Urban Environment.pdf


--------------------------------------------------------------------------------
/Research Papers/Evolving Large-Scale Neural Networks for Vision-Based Reinforcement Learning.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/Evolving Large-Scale Neural Networks for Vision-Based Reinforcement Learning.pdf


--------------------------------------------------------------------------------
/Research Papers/High Speed Obstacle Avoidance using Monocular Vision and Reinforcement Learning.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/High Speed Obstacle Avoidance using Monocular Vision and Reinforcement Learning.pdf


--------------------------------------------------------------------------------
/Research Papers/Improving Sample Efficiency in Model Free Reinforcement Learning from Images.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/Improving Sample Efficiency in Model Free Reinforcement Learning from Images.pdf


--------------------------------------------------------------------------------
/Research Papers/Learning Driving Styles for Autonomous Vehicles from Demonstration (1).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/Learning Driving Styles for Autonomous Vehicles from Demonstration (1).pdf


--------------------------------------------------------------------------------
/Research Papers/Learning to Drive using Inverse Reinforcement Learning ANd DQN.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/Learning to Drive using Inverse Reinforcement Learning ANd DQN.pdf


--------------------------------------------------------------------------------
/Research Papers/Navigating Occluded Intersections with Autonomous Vehicles using Deep Reinforcement Learning.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/Navigating Occluded Intersections with Autonomous Vehicles using Deep Reinforcement Learning.pdf


--------------------------------------------------------------------------------
/Research Papers/Our Research Paper/Teach An Agent To Drive A Car In A Virtual Environment.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/Our Research Paper/Teach An Agent To Drive A Car In A Virtual Environment.docx


--------------------------------------------------------------------------------
/Research Papers/Planning for Autonomous Cars that Leverage Effects on Human Actions.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/Planning for Autonomous Cars that Leverage Effects on Human Actions.pdf


--------------------------------------------------------------------------------
/Research Papers/Safe, Multi-Agent, Reinforcement Learning for.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/Safe, Multi-Agent, Reinforcement Learning for.pdf


--------------------------------------------------------------------------------
/Research Papers/VIRTUAL TO REAL REINFORCEMENT LEARNING.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/VIRTUAL TO REAL REINFORCEMENT LEARNING.pdf


--------------------------------------------------------------------------------
/Research Papers/World Models.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/World Models.pdf


--------------------------------------------------------------------------------
/Results Media/Accuracy12000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Results Media/Accuracy12000.jpg


--------------------------------------------------------------------------------
/Results Media/Agent_preview_1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Results Media/Agent_preview_1.mp4


--------------------------------------------------------------------------------
/Results Media/Agent_preview_2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Results Media/Agent_preview_2.mp4


--------------------------------------------------------------------------------
/Results Media/Epsilon12000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Results Media/Epsilon12000.jpg


--------------------------------------------------------------------------------
/Results Media/Loss12000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Results Media/Loss12000.jpg


--------------------------------------------------------------------------------
/Results Media/RewardAvg12000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Results Media/RewardAvg12000.jpg


--------------------------------------------------------------------------------
/Results Media/RewardMax12000.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Results Media/RewardMax12000.JPG


--------------------------------------------------------------------------------
/Results Media/RewardMin12000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Results Media/RewardMin12000.jpg


--------------------------------------------------------------------------------
/Results Media/Training_Video_1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Results Media/Training_Video_1.mp4


--------------------------------------------------------------------------------
/Results Media/Training_video_2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Results Media/Training_video_2.mp4


--------------------------------------------------------------------------------