├── .gitignore ├── 1.png ├── 2.GIF ├── 2.png ├── DRLCar.py ├── README.md └── environment.yml /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | *.local 3 | -------------------------------------------------------------------------------- /1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fdevmsy/Reinforcement-Learning-Based-Self-Driving-Car/e362ea7b3b84f7e8eb3040c000bd74b4234fb5be/1.png -------------------------------------------------------------------------------- /2.GIF: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fdevmsy/Reinforcement-Learning-Based-Self-Driving-Car/e362ea7b3b84f7e8eb3040c000bd74b4234fb5be/2.GIF -------------------------------------------------------------------------------- /2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fdevmsy/Reinforcement-Learning-Based-Self-Driving-Car/e362ea7b3b84f7e8eb3040c000bd74b4234fb5be/2.png -------------------------------------------------------------------------------- /DRLCar.py: -------------------------------------------------------------------------------- 1 | 2 | import argparse 3 | import base64 4 | import json 5 | 6 | import socketio 7 | import eventlet 8 | import eventlet.wsgi 9 | import time 10 | from PIL import Image 11 | from PIL import ImageOps 12 | from flask import Flask, render_template 13 | from io import BytesIO 14 | 15 | # Modules for DQN 16 | import tensorflow as tf 17 | import math 18 | import cv2 19 | import random 20 | import numpy as np 21 | import copy 22 | import matplotlib.pyplot as plt 23 | import datetime 24 | import os 25 | 26 | # Unity connection 27 | sio = socketio.Server() 28 | app = Flask(__name__) 29 | 30 | # DQN Parameters 31 | algorithm = 'DQN' 32 | 33 | Num_action = 5 34 | Gamma = 0.99 35 | Learning_rate = 0.00025 36 | 37 | First_epsilon = 1.0 38 | Final_epsilon = 0.01 39 | Epsilon = First_epsilon 40 | 41 | Num_replay_memory = 50000 42 | Num_start_training = 25000 43 | Num_training = 500000 44 | Num_update = 5000 45 | Num_batch = 32 46 | Num_skipFrame = 4 47 | Num_stackFrame = 4 48 | Num_colorChannel = 1 49 | Num_MapChannel = 1 50 | 51 | img_size = 80 52 | map_size = 81 53 | 54 | Num_step_save = 50000 55 | Num_step_plot = 100 56 | 57 | # Parameters for Network 58 | first_conv_img = [8,8, Num_colorChannel * Num_stackFrame * 2,32] 59 | first_conv_map = [8, 8, Num_stackFrame, 32] 60 | second_conv = [4,4,32,64] 61 | third_conv = [3,3,64,64] 62 | first_dense_img = [10*10*64, 1024] 63 | first_dense_map = [11*11*64, 1024] 64 | # first_dense = [10*10*64 + 11*11*64, 512] 65 | first_dense = [10*10*64, 512] 66 | second_dense = [512, 256] 67 | third_dense = [256, Num_action] 68 | 69 | # Initialize weights and bias 70 | def weight_variable(shape): 71 | return tf.Variable(xavier_initializer(shape)) 72 | 73 | def bias_variable(shape): 74 | return tf.Variable(xavier_initializer(shape)) 75 | 76 | # Xavier Weights initializer 77 | def xavier_initializer(shape): 78 | dim_sum = np.sum(shape) 79 | if len(shape) == 1: 80 | dim_sum += 1 81 | bound = np.sqrt(2.0 / dim_sum) 82 | return tf.random_uniform(shape, minval=-bound, maxval=bound) 83 | 84 | # Convolution and pooling 85 | def conv2d(x,w, stride): 86 | return tf.nn.conv2d(x,w,strides=[1, stride, stride, 1], padding='SAME') 87 | 88 | def max_pool_2x2(x): 89 | return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME') 90 | 91 | # Assign network variables to target networks 92 | def assign_network_to_target(): 93 | # Get trainable variables 94 | trainable_variables = tf.trainable_variables() 95 | # network lstm variables 96 | trainable_variables_network = [var for var in trainable_variables if var.name.startswith('network')] 97 | 98 | # target lstm variables 99 | trainable_variables_target = [var for var in trainable_variables if var.name.startswith('target')] 100 | 101 | for i in range(len(trainable_variables_network)): 102 | sess.run(tf.assign(trainable_variables_target[i], trainable_variables_network[i])) 103 | 104 | # Code for tensorboard 105 | def setup_summary(): 106 | episode_score = tf.Variable(0.) 107 | 108 | tf.summary.scalar('Total Reward/' + str(Num_step_plot) + ' steps', episode_score) 109 | 110 | summary_vars = [episode_score] 111 | summary_placeholders = [tf.placeholder(tf.float32) for _ in range(len(summary_vars))] 112 | update_ops = [summary_vars[i].assign(summary_placeholders[i]) for i in range(len(summary_vars))] 113 | summary_op = tf.summary.merge_all() 114 | return summary_placeholders, update_ops, summary_op 115 | 116 | # Input 117 | x_img = tf.placeholder(tf.float32, shape = [None, img_size, img_size, 2 * Num_colorChannel * Num_stackFrame]) 118 | # x_map = tf.placeholder(tf.float32, shape = [None, map_size, map_size, Num_stackFrame]) 119 | 120 | # Normalize input 121 | x_img = (x_img - (255.0/2)) / (255.0/2) 122 | ###################################### Image Network ###################################### 123 | with tf.variable_scope('network'): 124 | # Convolution variables 125 | w_conv1_img = weight_variable(first_conv_img) 126 | b_conv1_img = bias_variable([first_conv_img[3]]) 127 | 128 | w_conv2_img = weight_variable(second_conv) 129 | b_conv2_img = bias_variable([second_conv[3]]) 130 | 131 | w_conv3_img = weight_variable(third_conv) 132 | b_conv3_img = bias_variable([third_conv[3]]) 133 | 134 | w_fc1 = weight_variable(first_dense) 135 | b_fc1 = bias_variable([first_dense[1]]) 136 | 137 | w_fc2 = weight_variable(second_dense) 138 | b_fc2 = bias_variable([second_dense[1]]) 139 | 140 | w_fc3 = weight_variable(third_dense) 141 | b_fc3 = bias_variable([third_dense[1]]) 142 | 143 | ###################################### Image Network ###################################### 144 | h_conv1_img = tf.nn.relu(conv2d(x_img, w_conv1_img, 4) + b_conv1_img) 145 | h_conv2_img = tf.nn.relu(conv2d(h_conv1_img, w_conv2_img, 2) + b_conv2_img) 146 | h_conv3_img = tf.nn.relu(conv2d(h_conv2_img, w_conv3_img, 1) + b_conv3_img) 147 | 148 | h_pool3_flat_img = tf.reshape(h_conv3_img, [-1, first_dense_img[0]]) 149 | 150 | h_flat = h_pool3_flat_img 151 | h_fc1 = tf.nn.relu(tf.matmul(h_flat, w_fc1)+b_fc1) 152 | h_fc2 = tf.nn.relu(tf.matmul(h_fc1, w_fc2)+b_fc2) 153 | 154 | output = tf.matmul(h_fc2, w_fc3) + b_fc3 155 | 156 | ###################################### Image Target Network ###################################### 157 | with tf.variable_scope('target'): 158 | # Convolution variables target 159 | w_conv1_target_img = weight_variable(first_conv_img) 160 | b_conv1_target_img = bias_variable([first_conv_img[3]]) 161 | 162 | w_conv2_target_img = weight_variable(second_conv) 163 | b_conv2_target_img = bias_variable([second_conv[3]]) 164 | 165 | w_conv3_target_img = weight_variable(third_conv) 166 | b_conv3_target_img = bias_variable([third_conv[3]]) 167 | 168 | # Densely connect layer variables target 169 | w_fc1_target = weight_variable(first_dense) 170 | b_fc1_target = bias_variable([first_dense[1]]) 171 | 172 | w_fc2_target = weight_variable(second_dense) 173 | b_fc2_target = bias_variable([second_dense[1]]) 174 | 175 | w_fc3_target = weight_variable(third_dense) 176 | b_fc3_target = bias_variable([third_dense[1]]) 177 | 178 | # img Target Network 179 | h_conv1_target_img = tf.nn.relu(conv2d(x_img, w_conv1_target_img, 4) + b_conv1_target_img) 180 | h_conv2_target_img = tf.nn.relu(conv2d(h_conv1_target_img, w_conv2_target_img, 2) + b_conv2_target_img) 181 | h_conv3_target_img = tf.nn.relu(conv2d(h_conv2_target_img, w_conv3_target_img, 1) + b_conv3_target_img) 182 | 183 | h_pool3_flat_target_img = tf.reshape(h_conv3_target_img, [-1, first_dense_img[0]]) 184 | 185 | h_flat_target = h_pool3_flat_img 186 | h_fc1_target = tf.nn.relu(tf.matmul(h_flat_target, w_fc1_target)+b_fc1_target) 187 | h_fc2_target = tf.nn.relu(tf.matmul(h_fc1_target, w_fc2_target)+b_fc2_target) 188 | 189 | output_target = tf.matmul(h_fc2_target, w_fc3_target) + b_fc3_target 190 | 191 | ###################################### Calculate Loss & Train ###################################### 192 | # Loss function and Train 193 | action_target = tf.placeholder(tf.float32, shape = [None, Num_action]) 194 | y_prediction = tf.placeholder(tf.float32, shape = [None]) 195 | 196 | y_target = tf.reduce_sum(tf.multiply(output, action_target), reduction_indices = 1) 197 | Loss = tf.reduce_mean(tf.square(y_prediction - y_target)) 198 | train_step = tf.train.AdamOptimizer(learning_rate = Learning_rate, epsilon = 1e-02).minimize(Loss) 199 | 200 | # Initialize variables 201 | config = tf.ConfigProto() 202 | config.gpu_options.per_process_gpu_memory_fraction = 0.4 203 | 204 | sess = tf.InteractiveSession(config=config) 205 | 206 | # date - hour - minute of training time 207 | date_time = str(datetime.date.today()) + '_' + str(datetime.datetime.now().hour) + '_' + str(datetime.datetime.now().minute) 208 | 209 | # Make folder for save data 210 | os.makedirs('saved_networks/' + date_time) 211 | 212 | # Summary for tensorboard 213 | summary_placeholders, update_ops, summary_op = setup_summary() 214 | summary_writer = tf.summary.FileWriter('saved_networks/' + date_time, sess.graph) 215 | 216 | init = tf.global_variables_initializer() 217 | sess.run(init) 218 | 219 | # Load the file if the saved file exists 220 | saver = tf.train.Saver() 221 | # check_save = 1 222 | check_save = input('Is there any saved data?(1=y/2=n): ') 223 | 224 | if check_save == 1: 225 | checkpoint = tf.train.get_checkpoint_state('saved_networks/' + date_time) 226 | if checkpoint and checkpoint.model_checkpoint_path: 227 | saver.restore(sess, checkpoint.model_checkpoint_path) 228 | print("Successfully loaded:", checkpoint.model_checkpoint_path) 229 | else: 230 | print("Could not find old network weights") 231 | 232 | # Initial parameters 233 | Replay_memory = [] 234 | step = 1 235 | Init = 0 236 | state = 'Observing' 237 | episode = 0 238 | score = 0 239 | 240 | observation_in_img = 0 241 | observation_in_map = 0 242 | img_front_old = 0 243 | 244 | Is_connect = False 245 | terminal_connect = 0 246 | 247 | reward_x = [] 248 | reward_y = [] 249 | 250 | observation_set_img = [] 251 | observation_set_map = [] 252 | 253 | action_old = np.array([1, 0, 0, 0, 0]) 254 | speed_old = 20 255 | Was_left_changing = False 256 | Was_right_changing = False 257 | 258 | Vehicle_z_old = 0 259 | # Communication with Unity 260 | @sio.on('telemetry') 261 | def telemetry(sid, data): 262 | if data: 263 | # print("received!\n") 264 | global step, Replay_memory, observation_in_img, observation_in_map, Epsilon, terminal_connect, img_front_old, reward_x, reward_y, \ 265 | observation_set_img, observation_set_map, TD_list, action_old, speed_old, Init, Was_left_changing, Was_right_changing, Vehicle_z_old,\ 266 | episode, score 267 | # print(data) 268 | current_time = time.time() 269 | 270 | Is_right_lane_changing = float(data["user/angle"])>0 271 | Is_left_lane_changing = float(data["user/angle"])<0 272 | 273 | Is_lane_changing = False 274 | 275 | if Is_right_lane_changing == 1 or Is_left_lane_changing == 1: 276 | Is_lane_changing = True 277 | else: 278 | Is_lane_changing = False 279 | 280 | Vehicle_z = float(data["collide"]) 281 | 282 | # The current image from the camera of the car (front) 283 | imgString_front = data["camforward/image_array"] 284 | image_front = Image.open(BytesIO(base64.b64decode(imgString_front))) 285 | image_array_front = np.asarray(image_front) 286 | # ---------------------- Image transformation ---------------------- 287 | #image_array_front = image_array_front[55:130, 60:260,:] 288 | image_trans_front = cv2.resize(image_array_front, (img_size, img_size)) 289 | 290 | if Num_colorChannel == 1: 291 | image_trans_front = cv2.cvtColor(image_trans_front, cv2.COLOR_RGB2GRAY) 292 | image_trans_front = np.reshape(image_trans_front, (img_size, img_size, 1)) 293 | 294 | #image_trans_front = (image_trans_front - (255./2.)) / (255./2.) 295 | 296 | # ------------------------------------------------------------------ 297 | # The current image from the camera of the car (rear) 298 | imgString_rear = data["camback/image_array"] 299 | image_rear = Image.open(BytesIO(base64.b64decode(imgString_rear))) 300 | image_array_rear = np.asarray(image_rear) 301 | # ---------------------- Image transformation ---------------------- 302 | # image_array_rear = image_array_rear[55:130, 60:260,:] 303 | 304 | image_trans_rear = cv2.resize(image_array_rear, (img_size, img_size)) 305 | 306 | if Num_colorChannel == 1: 307 | image_trans_rear = cv2.cvtColor(image_trans_rear, cv2.COLOR_RGB2GRAY) 308 | image_trans_rear = np.reshape(image_trans_rear, (img_size, img_size, 1)) 309 | 310 | # image_trans_rear = (image_trans_rear - (255./2.)) / (255./2.) 311 | # ------------------------------------------------------------------ 312 | 313 | # Initialization 314 | if Init == 0: 315 | observation_next_img = np.zeros([img_size, img_size, 2]) 316 | 317 | observation_in_img = np.zeros([img_size, img_size, 1]) 318 | 319 | for i in range(Num_stackFrame): 320 | observation_in_img = np.insert(observation_in_img, [1], image_trans_front, axis = 2) 321 | observation_in_img = np.insert(observation_in_img, [1], image_trans_rear , axis = 2) 322 | 323 | observation_in_img = np.delete(observation_in_img, [0], axis = 2) 324 | 325 | # Making observation set for img 326 | for i in range(Num_skipFrame * Num_stackFrame): 327 | observation_set_img.insert(0, observation_in_img[:,:,:2]) 328 | 329 | Vehicle_z_old = Vehicle_z 330 | 331 | 332 | Init = 1 333 | print('Initialization is Finished!') 334 | 335 | # Processing input data 336 | observation_next_img = np.zeros([img_size, img_size, 1]) 337 | observation_next_img = np.insert(observation_next_img, [1], image_trans_front, axis = 2) 338 | observation_next_img = np.insert(observation_next_img, [1], image_trans_rear , axis = 2) 339 | observation_next_img = np.delete(observation_next_img, [0], axis = 2) 340 | # print("mark1") 341 | 342 | del observation_set_img[0] 343 | observation_set_img.append(observation_next_img) 344 | observation_next_in_img = np.zeros([img_size, img_size, 1]) 345 | 346 | for stack_frame in range(Num_stackFrame): 347 | observation_next_in_img = np.insert(observation_next_in_img, [1], observation_set_img[-1 - (Num_skipFrame * stack_frame)], axis = 2) 348 | 349 | observation_next_in_img = np.delete(observation_next_in_img, [0], axis = 2) 350 | 351 | # Get data from Unity 352 | # reward = float(data["reward"]) 353 | ###### Q MARK ######### 354 | # print("mark2") 355 | throttle = float(data['user/throttle']) 356 | angle = float(data['user/angle']) 357 | dist_path = float(data['distance/path']) 358 | # dist_path = float(data['distance/path']) 359 | print("distance: ", dist_path) 360 | 361 | action_vehicle = 0 362 | 363 | if throttle > 0: 364 | antion_vehicle = 1.00 365 | if throttle < 0: 366 | antion_vehicle = 2.00 367 | if angle > 0: 368 | action_vehicle = 4.00 369 | if angle < 0: 370 | action_vehicle = 3.00 371 | 372 | # action_vehicle = float(data["Action_vehicle"]) 373 | speed_vehicle = float(data["speed"]) 374 | 375 | # According to the last action, get reward. 376 | action_old_index = np.argmax(action_old) 377 | 378 | reward = speed_vehicle / 10 379 | reward_bad = -500 380 | reward -= abs(dist_path) * 10 381 | ###### Q Mark ########## 382 | if action_old_index == 1: 383 | reward += 1 384 | elif action_old_index == 2: 385 | reward -= 5 386 | elif action_old_index == 3: 387 | reward -= 1 388 | elif action_old_index == 4: 389 | reward -= 1 390 | 391 | # Get action with string 392 | action_str = '' 393 | 394 | if action_old_index == 0: 395 | action_str = 'Nothing' 396 | elif action_old_index == 1: 397 | action_str = 'Acc' 398 | elif action_old_index == 2: 399 | action_str = 'Dec' 400 | elif action_old_index == 3: 401 | action_str = 'Left' 402 | elif action_old_index == 4: 403 | action_str = 'Right' 404 | 405 | # If terminal is 1 ( = Collision), then reward is -100 406 | # terminal = terminal_connect 407 | terminal = 0 408 | # print("mark3") 409 | # print(data["collide"]) 410 | ####### terminal ############## 411 | if data["collide"] == "1.0000": 412 | # if abs(Vehicle_z - Vehicle_z_old) > 1 and Vehicle_z_old < 21: 413 | print('Terminal!!') 414 | terminal = 1 415 | 416 | # send_control(2) 417 | # print("Going Back") 418 | # send_control(2) 419 | 420 | if terminal == 1 and step != 1: 421 | reward = reward_bad 422 | 423 | if len(Replay_memory) > 15: 424 | # Replay_memory[-1][3] = reward_bad 425 | 426 | RM_index = list(range(-15, 0)) 427 | RM_index.reverse() 428 | RM_index_crash = -1 429 | 430 | right_action = np.zeros([5]) 431 | right_action[4] = 1 432 | 433 | left_action = np.zeros([5]) 434 | left_action[3] = 1 435 | 436 | if Was_right_changing == 1: 437 | for i_RM in RM_index: 438 | if np.argmax(Replay_memory[i_RM][2]) == 4: 439 | RM_index_crash = i_RM 440 | break 441 | 442 | Replay_memory[RM_index_crash][3] = reward_bad 443 | 444 | if Was_left_changing == 1: 445 | for i_RM in RM_index: 446 | if np.argmax(Replay_memory[i_RM][2]) == 4: 447 | RM_index_crash = i_RM 448 | break 449 | 450 | Replay_memory[RM_index_crash][3] = reward_bad 451 | 452 | # It shows action which is decided by random or Q network while training 453 | Action_from = '' 454 | 455 | # If step is less than Num_start_training, store replay memory 456 | if step <= Num_start_training: 457 | state = 'Observing' 458 | print("observing") 459 | 460 | action = np.zeros([Num_action]) 461 | action[random.randint(0, Num_action - 1)] = 1.0 462 | 463 | elif step <= Num_start_training + Num_training: 464 | state = 'Training' 465 | print("training") 466 | 467 | # Get action 468 | if random.random() < Epsilon: 469 | # print("using random") 470 | action = np.zeros([Num_action]) 471 | action[random.randint(0, Num_action - 1)] = 1.0 472 | Action_from = 'Random' 473 | else: 474 | Q_value = output.eval(feed_dict={x_img: [observation_in_img]}) 475 | # print("using prediction") 476 | action = np.zeros([Num_action]) 477 | action[np.argmax(Q_value)] = 1 478 | Action_from = 'Q_network' 479 | 480 | # Select minibatch 481 | minibatch = random.sample(Replay_memory, Num_batch) 482 | 483 | # Save the each batch data 484 | observation_batch_img = [batch[0] for batch in minibatch] 485 | action_batch = [batch[1] for batch in minibatch] 486 | reward_batch = [batch[2] for batch in minibatch] 487 | observation_next_batch_img = [batch[3] for batch in minibatch] 488 | terminal_batch = [batch[4] for batch in minibatch] 489 | 490 | # Update target network according to the Num_update value 491 | if step % Num_update == 0: 492 | assign_network_to_target() 493 | 494 | # Get Target value 495 | y_batch = [] 496 | # print(len(observation_next_batch_img)) 497 | # print('end') 498 | # Q_batch = output_target.eval(feed_dict = {x_img: observation_next_batch_img, x_map: observation_next_batch_map}) 499 | try: 500 | Q_batch = output_target.eval(feed_dict = {x_img: observation_next_batch_img}) 501 | # print("got q batch") 502 | 503 | 504 | for i in range(len(minibatch)): 505 | if terminal_batch[i] == True: 506 | y_batch.append(reward_batch[i]) 507 | else: 508 | y_batch.append(reward_batch[i] + Gamma * np.max(Q_batch[i])) 509 | 510 | train_step.run(feed_dict = {action_target: action_batch, y_prediction: y_batch, x_img: observation_batch_img}) 511 | 512 | # save progress every certain steps 513 | if step % Num_step_save == 0: 514 | saver.save(sess, 'saved_networks/' + date_time + '/' + algorithm) 515 | print('Model is saved!!!') 516 | except: 517 | # print("error") 518 | pass 519 | else: 520 | print("testing") 521 | # Testing code 522 | state = 'Testing' 523 | Q_value = output.eval(feed_dict={x_img: [observation_in_img]}) 524 | print(Q_value) 525 | action = np.zeros([Num_action]) 526 | action[np.argmax(Q_value)] = 1 527 | 528 | Epsilon = 0 529 | # print("mark4") 530 | ## Saving the camera image 531 | # i_front = Image.fromarray(image_array_front, mode='RGB') 532 | # i_front.save("./Image_front/" + str(step) + '.jpg') 533 | 534 | # i_rear = Image.fromarray(image_array_rear, mode='RGB') 535 | # i_rear.save("./Image_rear/" + str(step) + '.jpg') 536 | 537 | # If replay memory is more than Num_replay_memory than erase one 538 | if state != 'Testing': 539 | if len(Replay_memory) > Num_replay_memory: 540 | del Replay_memory[0] 541 | 542 | observation_in_img = np.uint8(observation_in_img) 543 | # observation_in_map = np.int8(observation_in_map) 544 | observation_next_in_img = np.uint8(observation_next_in_img) 545 | # observation_next_in_map = np.int8(observation_next_in_map) 546 | 547 | # Save experience to the Replay memory and TD_list 548 | Replay_memory.append([observation_in_img, action_old, reward, \ 549 | observation_next_in_img, terminal]) 550 | # Send action to Unity 551 | # print("action: ", action) 552 | action_in = np.argmax(action) 553 | # print("action_in: ", action_in) 554 | send_control(action_in) 555 | # print("mark5") 556 | if state != 'Observing': 557 | score += reward 558 | 559 | if step % Num_step_plot == 0 and step != Num_start_training: 560 | tensorboard_info = [score / Num_step_plot] 561 | for i in range(len(tensorboard_info)): 562 | sess.run(update_ops[i], feed_dict = {summary_placeholders[i]: float(tensorboard_info[i])}) 563 | summary_str = sess.run(summary_op) 564 | summary_writer.add_summary(summary_str, step) 565 | score = 0 566 | 567 | # Print information mark 568 | # print('Step: ' + str(step) + ' / ' + 'Episode: ' + str(episode) + ' / ' + 'State: ' + state + ' / ' + 'Action: ' + action_str + ' / ' + 569 | # 'Reward: ' + str(reward) + ' / ' + 'Epsilon: ' + str(Epsilon) + ' / ' + 'Action from: ' + Action_from + '\n') 570 | 571 | if terminal == 1: 572 | if state != 'Observing': 573 | episode += 1 574 | 575 | # Get current variables to old vatiables 576 | observation_in_img = observation_next_in_img 577 | # observation_in_map = observation_next_in_map 578 | # print('mark5.1') 579 | action_old = action 580 | speed_old = speed_vehicle 581 | img_front_old = image_array_front 582 | Was_left_changing = Is_left_lane_changing 583 | Was_right_changing = Is_right_lane_changing 584 | 585 | Vehicle_z_old = Vehicle_z 586 | # Update step number and decrease epsilon 587 | step += 1 588 | if Epsilon > Final_epsilon and state == 'Training': 589 | Epsilon -= First_epsilon / Num_training 590 | # print('mark5.2') 591 | else: 592 | # NOTE: DON'T EDIT THIS. 593 | # self.sio.emit('manual', data={}, skip_sid=True) 594 | print("nothing received!") 595 | # print("mark6") 596 | # Connection with Unity 597 | @sio.on('connect') 598 | def connect(sid, environ): 599 | print("connect ", sid) 600 | send_control(-1) 601 | 602 | # # Disconnect with Unity 603 | # @sio.on('disconnect') 604 | # def disconnect(sid): 605 | # print('Client disconnected') 606 | 607 | # Send control to Unity 608 | num_connection = 0 609 | def send_control(action): 610 | global num_connection 611 | data = {} 612 | if action == -1 or action == 0: 613 | steering_angle_a = 0 614 | throttle_a = 0 615 | data = {'steering_angle':steering_angle_a.__str__(), 'throttle': throttle_a.__str__()} 616 | 617 | # if action == 1: 618 | # data = {'user/throttle':'0.8'} 619 | # if action == 2: 620 | # data = {'user/throttle':'-0.8'} 621 | # if action == 3: 622 | # data = {'user/angle':'-8'} 623 | # if action == 4: 624 | # data = {'user/angle':'8'} 625 | elif action == 1: 626 | data = {'throttle':'0.8', 'steering_angle':'0'} 627 | elif action == 2: 628 | data = {'throttle':'-0.8', 'steering_angle':'0'} 629 | elif action == 3: 630 | data = {'steering_angle':'-15', 'throttle':'0.8'} 631 | elif action == 4: 632 | data = {'steering_angle':'15', 'throttle':'0.8'} 633 | 634 | 635 | if num_connection > 500: 636 | num_connection = 0 637 | 638 | sio.emit("steer", data, skip_sid=True) 639 | 640 | # sio.emit("onsteer", data={ 641 | # 'action': action.__str__() 642 | # 'num_connection': num_connection.__str__() 643 | 644 | 645 | 646 | if __name__ == '__main__': 647 | # wrap Flask application with engineio's middleware 648 | app = socketio.Middleware(sio, app) 649 | 650 | # deploy as an eventlet WSGI server 651 | eventlet.wsgi.server(eventlet.listen(('', 9090)), app) 652 | 653 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Reinforcement Learning Self-Driving Car 2 | 3 | ## Introduction 4 | 5 | This project implemented a self-driving car in our simulator using reinforcement learning. The car is able to drive freely and stably in different scenes with or without random barriers. We didn't collect any dataset and train the model ourselves, instead we defined actions and corresponding rewards for the car and let it learn itself from exploring. 6 | 7 | ![Demo](https://github.com/Fdevmsy/Reinforcement-Learning-Based-Self-Driving-Car/blob/master/2.GIF) 8 | 9 | The simulator is upgrading everyday, more scenes and functions are being added. Locating and navigation will be added soon. 10 | 11 | ![](1.png) 12 | 13 | ## Installation 14 | 15 | ### Homebrew 16 | ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" 17 | ### OpenAI Gym dependencies 18 | `brew install cmake boost boost-python sdl2 swig wget` 19 | ### noti 20 | `(curl -L https://github.com/variadico/noti/releases/download/v2.5.0/noti2.5.0.darwin-amd64.tar.gz | tar -xz); sudo mv noti /usr/local/bin/` 21 | ### Node >= v7.0 22 | `brew install node` 23 | 24 | ### Project Dependencies 25 | `./bin/copy-config 26 | npm install; sudo npm I -g grunt-cli` 27 | 28 | `conda env create -f environment.yml 29 | source activate DRL` 30 | 31 | 32 | ## Background 33 | 34 | Reinforcement learning develops control patterns by providing feedback on a model’s selected actions, which encourages the model to select better actions in the future. At each time step, given some state s, the model will select an action a, and then observe the new state s' and a reward r based on some optimality criterion. 35 | 36 | We specifically used a method known as Q learning, which approximates the maximum expected return for performing an action at a given state using an action-value (Q) function. Specifically, return gives the sum of the rewards until the game terminates, where the reward is discounted by a factor of γ at each time step. We formally define this as: 37 | 38 | ![alt-text](http://imgur.com/h7MJxSJ.png "(1)") 39 | 40 | We then define the action-value function: 41 | 42 | ![alt-text](http://imgur.com/05MxGxk.png "(2)") 43 | 44 | Note that if the optimal Q function is known for state s', we can write the optimal Q function at preceding state s as the maximum expected value of ![alt-text](http://imgur.com/1RSOCHo.png "Sorry, no alt-text for this one"). This identity is known as the Bellman equation: 45 | 46 | ![alt-text](http://imgur.com/BERyjr2.png "(3)") 47 | 48 | The intuition behind reinforcement learning is to continually update the action-value function based on observations using the Bellman equation. It has been shown by Sutton et al 1998 [2] that such update algorithms will converge on the optimal action-value function as time approaches infinity. Based on this, we can define Q as the output of a neural network, which has weights θ, and train this network by minimizing the following loss function at each iteration i: 49 | 50 | ![alt-text](http://imgur.com/3gFka35.png "(4)") 51 | 52 | Where y_i represents the target function we want to approach during each iteration. It is defined as: 53 | 54 | ![alt-text](http://imgur.com/gKcXJfi.png "(5)") 55 | 56 | Note that when i is equal to the final iteration of an episode (colloquially the end of a game), the Q function should be 0 since it is impossible to attain additional reward after the game has ended. Therefore, when i equals the terminal frame of an episode, we can simply write: 57 | 58 | ![alt-text](http://imgur.com/nU8qRJM.png "(6)") 59 | 60 | ## Definition 61 | 62 | Actions: 63 | 64 | - 0: do nothing 65 | - 1: Accelerate 66 | - 2: Decelerate 67 | - 3: Turn Left 68 | - 4: Turn Right 69 | 70 | We train the network with these actions. In order to communicate with simulator, we covert them to car's throttle and steering wheel. 71 | 72 | ~~~python 73 | if action == -1 or action == 0: 74 | steering_angle_a = 0 75 | throttle_a = 0 76 | data = {'steering_angle':steering_angle_a.__str__(), 'throttle': throttle_a.__str__()} 77 | 78 | elif action == 1: 79 | data = {'throttle':'0.8', 'steering_angle':'0'} 80 | elif action == 2: 81 | data = {'throttle':'-0.8', 'steering_angle':'0'} 82 | elif action == 3: 83 | data = {'steering_angle':'-15', 'throttle':'0.8'} 84 | elif action == 4: 85 | data = {'steering_angle':'15', 'throttle':'0.8'} 86 | 87 | Reward: 88 | 89 | reward = speed_vehicle / 10 90 | reward_bad = -500000 91 | reward -= abs(dist_path) * 10 92 | 93 | ###### Q Mark ########## 94 | if action_old_index == 1: 95 | reward += 1 96 | elif action_old_index == 2: 97 | reward -= 5 98 | elif action_old_index == 3: 99 | reward -= 1 100 | elif action_old_index == 4: 101 | reward -= 1 102 | 103 | 104 | If terminated: 105 | reward -= 500 106 | ~~~ 107 | 108 | Our goal is to make a self-driving car. But what we want is not only driving without hitting the wall, stable and comfortable are also preferred. So every action like turning and decelerating would get a small minus reward. Accelerate is preferred so we give it +1. Hitting the wall is the top 1 thing to be avoid. So we give it -10. 109 | 110 | State: 111 | The state is the current image from front camera and the speed. 112 | 113 | ## Usage 114 | 115 | 1. Set up simulator 116 | 2. `python python DRLCar.py` 117 | 118 | ## Reference 119 | 120 | This project is greatly inspired by MLJejuCamp2017's project: 121 | [https://github.com/MLJejuCamp2017/DRL_based_SelfDrivingCarControl](https://github.com/MLJejuCamp2017/DRL_based_SelfDrivingCarControl) 122 | 123 | 124 | ## Contact 125 | 126 | MakerColider 127 | 128 | Shiyu Mou 129 | shiyumou@usc.edu 130 | 131 | 132 | 133 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: DRL 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python>=3.5 6 | - anaconda 7 | - six 8 | - h5py 9 | - matplotlib==1.4.3 10 | - seaborn>=0.7.1 11 | - Pillow>=3.3.1 12 | - PyOpenGL>=3.1.0 13 | - glances>=2.6.2 14 | - pytest-cov>=2.3.1 15 | - pytest-xdist>=1.15.0 16 | - pip: 17 | - codacy-coverage>=1.3.3 18 | - mem_top==0.1.5 19 | - atari_py>=0.0.18 20 | - cmake==0.6.0 21 | - tensorflow>=1.0.0 22 | - Keras>=1.2.2,<2.0.0 23 | - "--editable=git+https://github.com/openai/gym.git#egg=gym[all]" 24 | --------------------------------------------------------------------------------