├── Code ├── Model │ └── FinalModel.hdf5 ├── SensorDataCollection.py ├── main_script1.py └── test_script.py ├── Final Presentaion.pptx ├── README.md ├── Research Papers ├── An Empirical Evaluation of Deep Learning on Highway Driving.pdf ├── CARLA (2).pdf ├── CARMA- A Deep Reinforcement Learning Approach to Autonomous Driving.pdf ├── CONTINUOUS CONTROL WITH DEEP REINFORCEMENT LEARNING.pdf ├── Deep Reinforcement Learning for Simulated Autonomous Vehicle Control.pdf ├── Deep Reinforcement Learning framework for Autonomous Driving.pdf ├── DeepDriving Learning Affordance for Direct Perception in Autonomous Driving.pdf ├── Driverless Car_ Autonomous Driving Using Deep Reinforcement Learning In Urban Environment.pdf ├── Evolving Large-Scale Neural Networks for Vision-Based Reinforcement Learning.pdf ├── High Speed Obstacle Avoidance using Monocular Vision and Reinforcement Learning.pdf ├── Improving Sample Efficiency in Model Free Reinforcement Learning from Images.pdf ├── Learning Driving Styles for Autonomous Vehicles from Demonstration (1).pdf ├── Learning to Drive using Inverse Reinforcement Learning ANd DQN.pdf ├── Navigating Occluded Intersections with Autonomous Vehicles using Deep Reinforcement Learning.pdf ├── Our Research Paper │ └── Teach An Agent To Drive A Car In A Virtual Environment.docx ├── Planning for Autonomous Cars that Leverage Effects on Human Actions.pdf ├── Safe, Multi-Agent, Reinforcement Learning for.pdf ├── VIRTUAL TO REAL REINFORCEMENT LEARNING.pdf └── World Models.pdf └── Results Media ├── Accuracy12000.jpg ├── Agent_preview_1.mp4 ├── Agent_preview_2.mp4 ├── Epsilon12000.jpg ├── Loss12000.jpg ├── RewardAvg12000.jpg ├── RewardMax12000.JPG ├── RewardMin12000.jpg ├── Training_Video_1.mp4 └── Training_video_2.mp4 /Code/Model/FinalModel.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Code/Model/FinalModel.hdf5 -------------------------------------------------------------------------------- /Code/SensorDataCollection.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Teach An Agent To Drive A Car In A Virtual Environment 3 | 4 | @author: Dewang Shah, Anant Vignesh Mahadhevan and Rakesh Ramesh 5 | ''' 6 | 7 | #Importing necessary packages 8 | import glob 9 | import os 10 | import sys 11 | import random 12 | import time 13 | import numpy as np 14 | import cv2 15 | import math 16 | from collections import deque 17 | from keras.applications.xception import Xception 18 | from keras.layers import Dense, GlobalAveragePooling2D 19 | from keras.optimizers import Adam 20 | from keras.models import Model 21 | 22 | #Importing CARLA environment 23 | try: 24 | sys.path.append(glob.glob('../carla/dist/carla-*%d.%d-%s.egg' % ( 25 | sys.version_info.major, 26 | sys.version_info.minor, 27 | 'win-amd64' if os.name == 'nt' else 'linux-x86_64'))[0]) 28 | except IndexError: 29 | pass 30 | import carla 31 | 32 | #Setting CARLA environmental parameters 33 | showPreview = False 34 | imageWidth = 640 35 | imageHeight = 480 36 | secondsPerEpisode = 10 37 | replayMemorySize = 5_000 38 | minReplayMemorySize = 1_000 39 | modelName = "Xception" 40 | 41 | #Class to create CARLA environment 42 | class CarlaEnvironment: 43 | showCam = showPreview 44 | steerAmt = 1.0 45 | imageWidth = imageWidth 46 | imageHeight = imageHeight 47 | front_camera = None 48 | 49 | def __init__(self): 50 | self.client = carla.Client("localhost", 2000) #Run CARLA as the server in port 2000 51 | self.client.set_timeout(10.0) #Set server environment timeout as 10 seconds 52 | self.world = self.client.get_world() #Initialize CARLA world 53 | self.blueprint_library = self.world.get_blueprint_library() #Initialize CARLA world blueprint 54 | self.model_3 = self.blueprint_library.filter("model3")[0] #Initialize Tesla Model 3 car blueprint 55 | 56 | def reset(self): 57 | self.collision_hist = [] 58 | self.actor_list = [] 59 | 60 | #Get a random spawn point from the map and spawn the vehicle in the spawn point 61 | self.transform = random.choice(self.world.get_map().get_spawn_points()) 62 | self.vehicle = self.world.spawn_actor(self.model_3, self.transform) 63 | #Add the vehicle to the list of actors in the environment 64 | self.actor_list.append(self.vehicle) 65 | 66 | #Set the RGB Camera sensor attributes 67 | self.rgb_cam = self.blueprint_library.find('sensor.camera.rgb') 68 | self.rgb_cam.set_attribute("image_size_x", f"{self.imageWidth}") 69 | self.rgb_cam.set_attribute("image_size_y", f"{self.imageHeight}") 70 | self.rgb_cam.set_attribute("fov", f"110") 71 | 72 | #Add the RGB Camera Sensor to the car 73 | transform = carla.Transform(carla.Location(x=2.5, z=0.7)) 74 | self.sensor = self.world.spawn_actor(self.rgb_cam, transform, attach_to=self.vehicle) 75 | #Add the camera sensor to the list of actors in the environment 76 | self.actor_list.append(self.sensor) 77 | self.sensor.listen(lambda data: self.processImage(data)) 78 | 79 | self.vehicle.apply_control(carla.VehicleControl(throttle=0.0, brake=0.0)) 80 | time.sleep(4) 81 | 82 | #Add the Collision Sensor to the car 83 | colsensor = self.blueprint_library.find("sensor.other.collision") 84 | self.colsensor = self.world.spawn_actor(colsensor, transform, attach_to=self.vehicle) 85 | #Add the collision sensor to the list of actors in the environment 86 | self.actor_list.append(self.colsensor) 87 | self.colsensor.listen(lambda event: self.collisionData(event)) 88 | 89 | while self.front_camera is None: 90 | time.sleep(0.01) 91 | 92 | self.episode_start = time.time() 93 | self.vehicle.apply_control(carla.VehicleControl(throttle=0.0, brake=0.0)) 94 | 95 | return self.front_camera 96 | 97 | def processImage(self, image): 98 | i = np.array(image.raw_data) 99 | i2 = i.reshape((self.imageHeight, self.imageWidth, 4)) 100 | i3 = i2[:, :, :3] 101 | if self.showCam: 102 | cv2.imshow("", i3) 103 | cv2.waitKey(1) 104 | self.front_camera = i3 105 | 106 | def collisionData(self, event): 107 | self.collision_hist.append(event) 108 | 109 | def step(self, action): 110 | if action == 0: 111 | self.vehicle.apply_control(carla.VehicleControl(throttle=1.0, steer=-1*self.steerAmt)) 112 | elif action == 1: 113 | self.vehicle.apply_control(carla.VehicleControl(throttle=1.0, steer= 0)) 114 | elif action == 2: 115 | self.vehicle.apply_control(carla.VehicleControl(throttle=1.0, steer=1*self.steerAmt)) 116 | 117 | v = self.vehicle.get_velocity() 118 | kmh = int(3.6 * math.sqrt(v.x**2 + v.y**2 + v.z**2)) 119 | 120 | if len(self.collision_hist) != 0: 121 | done = True 122 | reward = -200 123 | elif kmh < 50: 124 | done = False 125 | reward = -1 126 | else: 127 | done = False 128 | reward = 1 129 | 130 | if self.episode_start + secondsPerEpisode < time.time(): 131 | done = True 132 | 133 | return self.front_camera, reward, done, None 134 | 135 | 136 | class DQNAgent: 137 | def __init__(self): 138 | self.model = self.create_model() 139 | self.target_model = self.create_model() 140 | self.target_model.set_weights(self.model.get_weights()) 141 | 142 | self.replay_memory = deque(maxlen=replayMemorySize) 143 | 144 | self.tensorboard = ModifiedTensorBoard(log_dir=f"logs/{modelName}-{int(time.time())}") 145 | self.target_update_counter = 0 146 | self.graph = tf.get_default_graph() 147 | 148 | self.terminate = False 149 | self.last_logged_episode = 0 150 | self.training_initialized = False 151 | 152 | def create_model(self): 153 | base_model = Xception(weights=None, include_top=False, input_shape=(imageHeight, imageWidth,3)) 154 | 155 | x = base_model.output 156 | x = GlobalAveragePooling2D()(x) 157 | 158 | predictions = Dense(3, activation="linear")(x) 159 | model = Model(inputs=base_model.input, outputs=predictions) 160 | model.compile(loss="mse", optimizer=Adam(lr=0.001), metrics=["accuracy"]) 161 | return model -------------------------------------------------------------------------------- /Code/main_script1.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Teach An Agent To Drive A Car In A Virtual Environment 3 | 4 | @author: Dewang Shah, Anant Vignesh Mahadhevan and Rakesh Ramesh 5 | ''' 6 | import glob 7 | import os 8 | import sys 9 | import random 10 | import time 11 | import numpy as np 12 | import cv2 13 | import math 14 | from collections import deque 15 | from keras.applications.xception import Xception 16 | from keras.layers import Dense, AveragePooling2D, Flatten 17 | from keras.optimizers import Adam 18 | from keras.models import Model 19 | from keras.callbacks import TensorBoard 20 | from keras.callbacks import ModelCheckpoint 21 | 22 | import tensorflow as tf 23 | from keras.layers import Activation, Dense 24 | import keras.backend.tensorflow_backend as backend 25 | from keras import Sequential 26 | from keras.layers.convolutional import Conv2D 27 | from threading import Thread 28 | 29 | from tqdm import tqdm 30 | 31 | try: 32 | sys.path.append(glob.glob('../carla/dist/carla-*%d.%d-%s.egg' % ( 33 | sys.version_info.major, 34 | sys.version_info.minor, 35 | 'win-amd64' if os.name == 'nt' else 'linux-x86_64'))[0]) 36 | except IndexError: 37 | pass 38 | import carla 39 | 40 | 41 | SHOW_PREVIEW = False 42 | IM_WIDTH = 640 43 | IM_HEIGHT = 480 44 | SECONDS_PER_EPISODE = 10 45 | REPLAY_MEMORY_SIZE = 5_000 46 | MIN_REPLAY_MEMORY_SIZE = 1_000 47 | MINIBATCH_SIZE = 16 48 | PREDICTION_BATCH_SIZE = 1 49 | TRAINING_BATCH_SIZE = MINIBATCH_SIZE // 4 50 | UPDATE_TARGET_EVERY = 5 51 | MODEL_NAME = "Xception" 52 | 53 | MEMORY_FRACTION = 0.4 54 | MIN_REWARD = -200 55 | 56 | EPISODES = 10000 57 | 58 | DISCOUNT = 0.99 59 | epsilon = 1 60 | EPSILON_DECAY = 0.95 ## 0.9975 99975 61 | MIN_EPSILON = 0.001 62 | 63 | AGGREGATE_STATS_EVERY = 10 64 | 65 | 66 | # Own Tensorboard class 67 | class ModifiedTensorBoard(TensorBoard): 68 | 69 | # Overriding init to set initial step and writer (we want one log file for all .fit() calls) 70 | def __init__(self, **kwargs): 71 | super().__init__(**kwargs) 72 | self.step = 1 73 | self.writer = tf.summary.FileWriter(self.log_dir) 74 | 75 | # Overriding this method to stop creating default log writer 76 | def set_model(self, model): 77 | pass 78 | 79 | # Overrided, saves logs with our step number 80 | # (otherwise every .fit() will start writing from 0th step) 81 | def on_epoch_end(self, epoch, logs=None): 82 | self.update_stats(**logs) 83 | 84 | # Overrided 85 | # We train for one batch only, no need to save anything at epoch end 86 | def on_batch_end(self, batch, logs=None): 87 | pass 88 | 89 | # Overrided, so won't close writer 90 | def on_train_end(self, _): 91 | pass 92 | 93 | # Custom method for saving own metrics 94 | # Creates writer, writes custom metrics and closes writer 95 | def update_stats(self, **stats): 96 | self._write_logs(stats, self.step) 97 | 98 | 99 | class CarEnv: 100 | SHOW_CAM = SHOW_PREVIEW 101 | STEER_AMT = 1.0 102 | im_width = IM_WIDTH 103 | im_height = IM_HEIGHT 104 | front_camera = None 105 | 106 | def __init__(self): 107 | self.client = carla.Client("localhost", 2000) 108 | self.client.set_timeout(10.0) 109 | self.world = self.client.get_world() 110 | self.blueprint_library = self.world.get_blueprint_library() 111 | self.model_3 = self.blueprint_library.filter("model3")[0] 112 | 113 | def reset(self): 114 | self.collision_hist = [] 115 | self.actor_list = [] 116 | 117 | self.transform = random.choice(self.world.get_map().get_spawn_points()) 118 | self.vehicle = self.world.spawn_actor(self.model_3, self.transform) 119 | self.actor_list.append(self.vehicle) 120 | 121 | self.rgb_cam = self.blueprint_library.find('sensor.camera.rgb') 122 | self.rgb_cam.set_attribute("image_size_x", f"{self.im_width}") 123 | self.rgb_cam.set_attribute("image_size_y", f"{self.im_height}") 124 | self.rgb_cam.set_attribute("fov", f"110") 125 | 126 | transform = carla.Transform(carla.Location(x=2.5, z=0.7)) 127 | self.sensor = self.world.spawn_actor(self.rgb_cam, transform, attach_to=self.vehicle) 128 | self.actor_list.append(self.sensor) 129 | self.sensor.listen(lambda data: self.process_img(data)) 130 | 131 | self.vehicle.apply_control(carla.VehicleControl(throttle=0.0, brake=0.0)) 132 | time.sleep(4) 133 | 134 | colsensor = self.blueprint_library.find("sensor.other.collision") 135 | self.colsensor = self.world.spawn_actor(colsensor, transform, attach_to=self.vehicle) 136 | self.actor_list.append(self.colsensor) 137 | self.colsensor.listen(lambda event: self.collision_data(event)) 138 | 139 | while self.front_camera is None: 140 | time.sleep(0.01) 141 | 142 | self.episode_start = time.time() 143 | self.vehicle.apply_control(carla.VehicleControl(throttle=0.0, brake=0.0)) 144 | 145 | return self.front_camera 146 | 147 | def collision_data(self, event): 148 | self.collision_hist.append(event) 149 | 150 | def process_img(self, image): 151 | i = np.array(image.raw_data) 152 | #print(i.shape) 153 | i2 = i.reshape((self.im_height, self.im_width, 4)) 154 | i3 = i2[:, :, :3] 155 | if self.SHOW_CAM: 156 | cv2.imshow("", i3) 157 | cv2.waitKey(1) 158 | self.front_camera = i3 159 | 160 | def step(self, action): 161 | if action == 0: 162 | self.vehicle.apply_control(carla.VehicleControl(throttle=1.0, steer=-1*self.STEER_AMT)) 163 | elif action == 1: 164 | self.vehicle.apply_control(carla.VehicleControl(throttle=1.0, steer= 0)) 165 | elif action == 2: 166 | self.vehicle.apply_control(carla.VehicleControl(throttle=1.0, steer=1*self.STEER_AMT)) 167 | 168 | v = self.vehicle.get_velocity() 169 | kmh = int(3.6 * math.sqrt(v.x**2 + v.y**2 + v.z**2)) 170 | 171 | if len(self.collision_hist) != 0: 172 | done = True 173 | reward = -200 174 | elif kmh < 50: 175 | done = False 176 | reward = -1 177 | else: 178 | done = False 179 | reward = 1 180 | 181 | if self.episode_start + SECONDS_PER_EPISODE < time.time(): 182 | done = True 183 | 184 | return self.front_camera, reward, done, None 185 | 186 | 187 | class DQNAgent: 188 | 189 | def __init__(self): 190 | self.model = self.create_model() 191 | self.target_model = self.create_model() 192 | self.target_model.set_weights(self.model.get_weights()) 193 | 194 | self.replay_memory = deque(maxlen=REPLAY_MEMORY_SIZE) 195 | 196 | self.tensorboard = ModifiedTensorBoard(log_dir=f"logs/{MODEL_NAME}-{int(time.time())}") 197 | self.target_update_counter = 0 198 | self.graph = tf.get_default_graph() 199 | 200 | self.terminate = False 201 | self.last_logged_episode = 0 202 | self.training_initialized = False 203 | 204 | def create_model(self): 205 | model = Sequential() 206 | 207 | model.add(Conv2D(64, (3, 3), input_shape=(IM_HEIGHT, IM_WIDTH,3), padding='same')) 208 | model.add(Activation('relu')) 209 | model.add(AveragePooling2D(pool_size=(5, 5), strides=(3, 3), padding='same')) 210 | 211 | model.add(Conv2D(64, (3, 3), padding='same')) 212 | model.add(Activation('relu')) 213 | model.add(AveragePooling2D(pool_size=(5, 5), strides=(3, 3), padding='same')) 214 | 215 | model.add(Conv2D(64, (3, 3), padding='same')) 216 | model.add(Activation('relu')) 217 | model.add(AveragePooling2D(pool_size=(5, 5), strides=(3, 3), padding='same')) 218 | 219 | model.add(Flatten()) 220 | 221 | model.add(Dense(3, activation="linear")) 222 | model = Model(inputs=model.input, outputs=model.output) 223 | model.compile(loss="mse", optimizer=Adam(lr=0.001), metrics=["accuracy"]) 224 | 225 | #CHECKPOINT 226 | #filepath="weights-improvement-{epoch:02d}-{val_accuracy:.2f}.hdf5" 227 | #checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=False, mode='max') 228 | #callbacks_list = [self.tensorboard,checkpoint] 229 | return model 230 | 231 | def update_replay_memory(self, transition): 232 | # transition = (current_state, action, reward, new_state, done) 233 | self.replay_memory.append(transition) 234 | 235 | def train(self): 236 | if len(self.replay_memory) < MIN_REPLAY_MEMORY_SIZE: 237 | return 238 | 239 | minibatch = random.sample(self.replay_memory, MINIBATCH_SIZE) 240 | 241 | current_states = np.array([transition[0] for transition in minibatch])/255 242 | with self.graph.as_default(): 243 | current_qs_list = self.model.predict(current_states, PREDICTION_BATCH_SIZE) 244 | 245 | new_current_states = np.array([transition[3] for transition in minibatch])/255 246 | with self.graph.as_default(): 247 | future_qs_list = self.target_model.predict(new_current_states, PREDICTION_BATCH_SIZE) 248 | 249 | X = [] 250 | y = [] 251 | 252 | for index, (current_state, action, reward, new_state, done) in enumerate(minibatch): 253 | if not done: 254 | max_future_q = np.max(future_qs_list[index]) 255 | new_q = reward + DISCOUNT * max_future_q 256 | else: 257 | new_q = reward 258 | 259 | current_qs = current_qs_list[index] 260 | current_qs[action] = new_q 261 | 262 | X.append(current_state) 263 | y.append(current_qs) 264 | 265 | log_this_step = False 266 | if self.tensorboard.step > self.last_logged_episode: 267 | log_this_step = True 268 | self.last_log_episode = self.tensorboard.step 269 | 270 | with self.graph.as_default(): 271 | self.model.fit(np.array(X)/255, np.array(y), batch_size=TRAINING_BATCH_SIZE, verbose=0, shuffle=False, callbacks=[self.tensorboard] if log_this_step else None) 272 | 273 | 274 | if log_this_step: 275 | self.target_update_counter += 1 276 | 277 | if self.target_update_counter > UPDATE_TARGET_EVERY: 278 | self.target_model.set_weights(self.model.get_weights()) 279 | self.target_update_counter = 0 280 | 281 | def get_qs(self, state): 282 | return self.model.predict(np.array(state).reshape(-1, *state.shape)/255)[0] 283 | 284 | def train_in_loop(self): 285 | #filepath="weights-improvement-{epoch:02d}-{val_accuracy:.2f}.hdf5" 286 | #checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True) 287 | 288 | X = np.random.uniform(size=(1, IM_HEIGHT, IM_WIDTH, 3)).astype(np.float32) 289 | y = np.random.uniform(size=(1, 3)).astype(np.float32) 290 | 291 | #log_this_step = False 292 | #if self.tensorboard.step > self.last_logged_episode: 293 | # log_this_step = True 294 | # self.last_log_episode = self.tensorboard.step 295 | 296 | with self.graph.as_default(): 297 | #self.model.fit(X,y, verbose=False, batch_size=1, callbacks=[self.tensorboard,checkpoint]) 298 | self.model.fit(X,y, verbose=False, batch_size=1, callbacks=[self.tensorboard]) 299 | 300 | #if log_this_step: 301 | # self.target_update_counter += 1 302 | 303 | #if self.target_update_counter > UPDATE_TARGET_EVERY: 304 | # self.target_model.set_weights(self.model.get_weights()) 305 | # self.target_update_counter = 0 306 | 307 | self.training_initialized = True 308 | 309 | while True: 310 | if self.terminate: 311 | return 312 | self.train() 313 | time.sleep(0.01) 314 | 315 | 316 | 317 | if __name__ == '__main__': 318 | FPS = 60 319 | # For stats 320 | ep_rewards = [-200] 321 | 322 | # For more repetitive results 323 | random.seed(1) 324 | np.random.seed(1) 325 | tf.set_random_seed(1) 326 | 327 | # Memory fraction, used mostly when trai8ning multiple agents 328 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=MEMORY_FRACTION) 329 | backend.set_session(tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))) 330 | 331 | # Create models folder 332 | if not os.path.isdir('C:\\Users\\Dewang\\Desktop\\RL Project\\models'): 333 | os.makedirs('C:\\Users\\Dewang\\Desktop\\RL Project\\models') 334 | 335 | #Create checkpoint folder 336 | if not os.path.isdir('C:\\Users\\Dewang\\Desktop\\RL Project\\models\\Checkpoints'): 337 | os.makedirs('C:\\Users\\Dewang\\Desktop\\RL Project\\models\\Checkpoints') 338 | 339 | # Create agent and environment 340 | agent = DQNAgent() 341 | env = CarEnv() 342 | 343 | 344 | # Start training thread and wait for training to be initialized 345 | trainer_thread = Thread(target=agent.train_in_loop, daemon=True) 346 | trainer_thread.start() 347 | while not agent.training_initialized: 348 | time.sleep(0.01) 349 | 350 | # Initialize predictions - forst prediction takes longer as of initialization that has to be done 351 | # It's better to do a first prediction then before we start iterating over episode steps 352 | agent.get_qs(np.ones((env.im_height, env.im_width, 3))) 353 | 354 | # Iterate over episodes 355 | for episode in tqdm(range(1, EPISODES + 1), ascii=True, unit='episodes'): 356 | #try: 357 | 358 | env.collision_hist = [] 359 | 360 | # Update tensorboard step every episode 361 | agent.tensorboard.step = episode 362 | 363 | # Restarting episode - reset episode reward and step number 364 | episode_reward = 0 365 | step = 1 366 | 367 | # Reset environment and get initial state 368 | current_state = env.reset() 369 | 370 | # Reset flag and start iterating until episode ends 371 | done = False 372 | episode_start = time.time() 373 | 374 | # Play for given number of seconds only 375 | while True: 376 | 377 | # This part stays mostly the same, the change is to query a model for Q values 378 | if np.random.random() > epsilon: 379 | # Get action from Q table 380 | action = np.argmax(agent.get_qs(current_state)) 381 | else: 382 | # Get random action 383 | action = np.random.randint(0, 3) 384 | # This takes no time, so we add a delay matching 60 FPS (prediction above takes longer) 385 | time.sleep(1/FPS) 386 | 387 | new_state, reward, done, _ = env.step(action) 388 | 389 | # Transform new continous state to new discrete state and count reward 390 | episode_reward += reward 391 | 392 | # Every step we update replay memory 393 | agent.update_replay_memory((current_state, action, reward, new_state, done)) 394 | 395 | current_state = new_state 396 | step += 1 397 | 398 | if done: 399 | break 400 | 401 | # End of episode - destroy agents 402 | for actor in env.actor_list: 403 | actor.destroy() 404 | 405 | # Append episode reward to a list and log stats (every given number of episodes) 406 | ep_rewards.append(episode_reward) 407 | if not episode % AGGREGATE_STATS_EVERY or episode == 1: 408 | average_reward = sum(ep_rewards[-AGGREGATE_STATS_EVERY:])/len(ep_rewards[-AGGREGATE_STATS_EVERY:]) 409 | min_reward = min(ep_rewards[-AGGREGATE_STATS_EVERY:]) 410 | max_reward = max(ep_rewards[-AGGREGATE_STATS_EVERY:]) 411 | agent.tensorboard.update_stats(reward_avg=average_reward, reward_min=min_reward, reward_max=max_reward, epsilon=epsilon) 412 | 413 | # Save model, but only when min reward is greater or equal a set value 414 | if min_reward >= MIN_REWARD: 415 | agent.model.save(f'C:/Users/Dewang/Desktop/RLProject/models/{MODEL_NAME}__{max_reward:_>7.2f}max_{average_reward:_>7.2f}avg_{min_reward:_>7.2f}min__{int(time.time())}.hdf5') 416 | 417 | # Decay epsilon 418 | if epsilon > MIN_EPSILON: 419 | epsilon *= EPSILON_DECAY 420 | epsilon = max(MIN_EPSILON, epsilon) 421 | 422 | #Manual Checkpoint 423 | if episode > 0: 424 | agent.model.save(f'C:/Users/Dewang/Desktop/RLProject/models/Checkpoints/{MODEL_NAME}__{episode}__{max_reward:_>7.2f}max_{average_reward:_>7.2f}avg_{min_reward:_>7.2f}min__{int(time.time())}.hdf5') 425 | 426 | # Set termination flag for training thread and wait for it to finish 427 | agent.terminate = True 428 | trainer_thread.join() 429 | agent.model.save(f'C:/Users/Dewang/Desktop/RLProject/models/{MODEL_NAME}__{max_reward:_>7.2f}max_{average_reward:_>7.2f}avg_{min_reward:_>7.2f}min__{int(time.time())}.hdf5') 430 | -------------------------------------------------------------------------------- /Code/test_script.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Teach An Agent To Drive A Car In A Virtual Environment 3 | 4 | @author: Dewang Shah, Anant Vignesh Mahadhevan and Rakesh Ramesh 5 | ''' 6 | import random 7 | from collections import deque 8 | import numpy as np 9 | import cv2 10 | import time 11 | import tensorflow as tf 12 | import keras.backend.tensorflow_backend as backend 13 | from keras.models import load_model 14 | from main_script import CarEnv, MEMORY_FRACTION 15 | 16 | 17 | #MODEL_PATH = "C:\\Users\\yashp\\Downloads\\RL Project\\CARLA_0.9.5\\PythonAPI\\examples\\models\\Xception____-4.00max_-103.00avg_-202.00min__1571284603.model" 18 | MODEL_PATH = "C:\\Users\\Dewang\\Desktop\\Xception__3101____38.00max_-110.30avg_-272.00min__1574048139_AVM.hdf5" 19 | if __name__ == '__main__': 20 | 21 | # Memory fraction 22 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=MEMORY_FRACTION) 23 | backend.set_session(tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))) 24 | 25 | # Load the model 26 | model = load_model(MODEL_PATH) 27 | 28 | # Create environment 29 | env = CarEnv() 30 | 31 | # For agent speed measurements - keeps last 60 frametimes 32 | fps_counter = deque(maxlen=60) 33 | 34 | # Initialize predictions - first prediction takes longer as of initialization that has to be done 35 | # It's better to do a first prediction then before we start iterating over episode steps 36 | model.predict(np.ones((1, env.im_height, env.im_width, 3))) 37 | 38 | # Loop over episodes 39 | while True: 40 | 41 | print('Restarting episode') 42 | 43 | # Reset environment and get initial state 44 | current_state = env.reset() 45 | env.collision_hist = [] 46 | 47 | done = False 48 | 49 | # Loop over steps 50 | while True: 51 | 52 | # For FPS counter 53 | step_start = time.time() 54 | 55 | # Show current frame 56 | cv2.imshow(f'Agent - preview', current_state) 57 | cv2.waitKey(1) 58 | 59 | # Predict an action based on current observation space 60 | qs = model.predict(np.array(current_state).reshape(-1, *current_state.shape)/255)[0] 61 | action = np.argmax(qs) 62 | 63 | # Step environment (additional flag informs environment to not break an episode by time limit) 64 | new_state, reward, done, _ = env.step(action) 65 | 66 | # Set current step for next loop iteration 67 | current_state = new_state 68 | 69 | # If done - agent crashed, break an episode 70 | if done: 71 | break 72 | 73 | # Measure step time, append to a deque, then print mean FPS for last 60 frames, q values and taken action 74 | frame_time = time.time() - step_start 75 | fps_counter.append(frame_time) 76 | print(f'Agent: {len(fps_counter)/sum(fps_counter):>4.1f} FPS | Action: [{qs[0]:>5.2f}, {qs[1]:>5.2f}, {qs[2]:>5.2f}] {action}') 77 | 78 | # Destroy an actor at end of episode 79 | for actor in env.actor_list: 80 | actor.destroy() 81 | -------------------------------------------------------------------------------- /Final Presentaion.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Final Presentaion.pptx -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Training-Self-Driving-Car-Using-Reinforcement-Learning 2 | It is highly evident that autonomous vehicles will be the future and it will be a prominent vehicle category in the next decade. For this to be a success, the vehicle should be safe, reliable and provide a comfortable user experience. Autonomous driving must have sophisticated negotiating skills while taking right, left turns and while pushing ahead in urban areas. Reinforcement learning is considered as the main domain for learning driving policy. We propose a reinforcement learning approach using deep Q-learning approach which will extract the maximum reward from a large state space. We use CARLA, an open-source simulator for autonomous driving research. The outcome of this experiment is to resemble a real-life environment where the agent tries to overcome the obstacles using the data from the virtual sensors attached to the agent. 3 | -------------------------------------------------------------------------------- /Research Papers/An Empirical Evaluation of Deep Learning on Highway Driving.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/An Empirical Evaluation of Deep Learning on Highway Driving.pdf -------------------------------------------------------------------------------- /Research Papers/CARLA (2).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/CARLA (2).pdf -------------------------------------------------------------------------------- /Research Papers/CARMA- A Deep Reinforcement Learning Approach to Autonomous Driving.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/CARMA- A Deep Reinforcement Learning Approach to Autonomous Driving.pdf -------------------------------------------------------------------------------- /Research Papers/CONTINUOUS CONTROL WITH DEEP REINFORCEMENT LEARNING.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/CONTINUOUS CONTROL WITH DEEP REINFORCEMENT LEARNING.pdf -------------------------------------------------------------------------------- /Research Papers/Deep Reinforcement Learning for Simulated Autonomous Vehicle Control.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/Deep Reinforcement Learning for Simulated Autonomous Vehicle Control.pdf -------------------------------------------------------------------------------- /Research Papers/Deep Reinforcement Learning framework for Autonomous Driving.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/Deep Reinforcement Learning framework for Autonomous Driving.pdf -------------------------------------------------------------------------------- /Research Papers/DeepDriving Learning Affordance for Direct Perception in Autonomous Driving.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/DeepDriving Learning Affordance for Direct Perception in Autonomous Driving.pdf -------------------------------------------------------------------------------- /Research Papers/Driverless Car_ Autonomous Driving Using Deep Reinforcement Learning In Urban Environment.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/Driverless Car_ Autonomous Driving Using Deep Reinforcement Learning In Urban Environment.pdf -------------------------------------------------------------------------------- /Research Papers/Evolving Large-Scale Neural Networks for Vision-Based Reinforcement Learning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/Evolving Large-Scale Neural Networks for Vision-Based Reinforcement Learning.pdf -------------------------------------------------------------------------------- /Research Papers/High Speed Obstacle Avoidance using Monocular Vision and Reinforcement Learning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/High Speed Obstacle Avoidance using Monocular Vision and Reinforcement Learning.pdf -------------------------------------------------------------------------------- /Research Papers/Improving Sample Efficiency in Model Free Reinforcement Learning from Images.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/Improving Sample Efficiency in Model Free Reinforcement Learning from Images.pdf -------------------------------------------------------------------------------- /Research Papers/Learning Driving Styles for Autonomous Vehicles from Demonstration (1).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/Learning Driving Styles for Autonomous Vehicles from Demonstration (1).pdf -------------------------------------------------------------------------------- /Research Papers/Learning to Drive using Inverse Reinforcement Learning ANd DQN.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/Learning to Drive using Inverse Reinforcement Learning ANd DQN.pdf -------------------------------------------------------------------------------- /Research Papers/Navigating Occluded Intersections with Autonomous Vehicles using Deep Reinforcement Learning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/Navigating Occluded Intersections with Autonomous Vehicles using Deep Reinforcement Learning.pdf -------------------------------------------------------------------------------- /Research Papers/Our Research Paper/Teach An Agent To Drive A Car In A Virtual Environment.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/Our Research Paper/Teach An Agent To Drive A Car In A Virtual Environment.docx -------------------------------------------------------------------------------- /Research Papers/Planning for Autonomous Cars that Leverage Effects on Human Actions.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/Planning for Autonomous Cars that Leverage Effects on Human Actions.pdf -------------------------------------------------------------------------------- /Research Papers/Safe, Multi-Agent, Reinforcement Learning for.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/Safe, Multi-Agent, Reinforcement Learning for.pdf -------------------------------------------------------------------------------- /Research Papers/VIRTUAL TO REAL REINFORCEMENT LEARNING.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/VIRTUAL TO REAL REINFORCEMENT LEARNING.pdf -------------------------------------------------------------------------------- /Research Papers/World Models.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Research Papers/World Models.pdf -------------------------------------------------------------------------------- /Results Media/Accuracy12000.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Results Media/Accuracy12000.jpg -------------------------------------------------------------------------------- /Results Media/Agent_preview_1.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Results Media/Agent_preview_1.mp4 -------------------------------------------------------------------------------- /Results Media/Agent_preview_2.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Results Media/Agent_preview_2.mp4 -------------------------------------------------------------------------------- /Results Media/Epsilon12000.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Results Media/Epsilon12000.jpg -------------------------------------------------------------------------------- /Results Media/Loss12000.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Results Media/Loss12000.jpg -------------------------------------------------------------------------------- /Results Media/RewardAvg12000.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Results Media/RewardAvg12000.jpg -------------------------------------------------------------------------------- /Results Media/RewardMax12000.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Results Media/RewardMax12000.JPG -------------------------------------------------------------------------------- /Results Media/RewardMin12000.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Results Media/RewardMin12000.jpg -------------------------------------------------------------------------------- /Results Media/Training_Video_1.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Results Media/Training_Video_1.mp4 -------------------------------------------------------------------------------- /Results Media/Training_video_2.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anantvignesh/Training-Self-Driving-Car-Using-Reinforcement-Learning/fd4343b5440fe8aa6fe11637ba82fd7806a95a60/Results Media/Training_video_2.mp4 --------------------------------------------------------------------------------