├── README.md ├── LICENSE ├── datagen.py ├── train.py ├── model.py └── augmentation.py /README.md: -------------------------------------------------------------------------------- 1 | # SDC-Udacity-Challenge-2 ([Simulation video](https://www.youtube.com/watch?v=YVmlioP3qqY)) 2 | This repo consists of my work on udacity's challenge which uses end-to-end learning to predict steering angles from front image as input for self driving cars. 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Gautam Sharma 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /datagen.py: -------------------------------------------------------------------------------- 1 | from __future__ import with_statement 2 | import augmentation as aug 3 | import cv2 4 | import matplotlib.image as mpimg 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | import pandas as pd 8 | import random 9 | import scipy.misc 10 | 11 | 12 | # Points to the end of the last batch 13 | train_batch_pointer = 0 14 | val_batch_pointer = 0 15 | 16 | # Get data 17 | FDIR = '/Volumes/Untitled/Udacity datasets/challenge 2 training/Ch2_002/images & interpolated data/' 18 | df = pd.read_csv('/Volumes/Untitled/Udacity datasets/challenge 2 training/Ch2_002/images & interpolated data/interpolated.csv') 19 | df = df[df.frame_id == 'center_camera'] 20 | xs = [FDIR + s for s in df.filename] 21 | ys = df.angle 22 | 23 | # Get number of images 24 | num_images = len(xs) 25 | 26 | # Shuffle list of images 27 | c = list(zip(xs, ys)) 28 | random.shuffle(c) 29 | xs, ys = zip(*c) 30 | 31 | # Split into train-dev set 32 | train_xs = xs[:int(len(xs) * 0.8)] 33 | train_ys = ys[:int(len(xs) * 0.8)] 34 | 35 | val_xs = xs[-int(len(xs) * 0.2):] 36 | val_ys = ys[-int(len(xs) * 0.2):] 37 | 38 | num_train_images = len(train_xs) 39 | num_val_images = len(val_xs) 40 | 41 | 42 | def LoadTrainBatch(batch_size): 43 | global train_batch_pointer 44 | x_out = [] 45 | y_out = [] 46 | for i in range(0, batch_size): 47 | img = cv2.imread(train_xs[(train_batch_pointer + i) % num_train_images]) 48 | # Random shifts and rotations 49 | distorted, _, _ = aug.random_distortion(img) 50 | # Resize, normalize & append 51 | x_out.append(cv2.resize(distorted, (200, 66)) / 255.0) 52 | y_out.append([train_ys[(train_batch_pointer + i) % num_train_images]]) 53 | train_batch_pointer += batch_size 54 | return x_out, y_out 55 | 56 | def LoadValBatch(batch_size): 57 | global val_batch_pointer 58 | x_out = [] 59 | y_out = [] 60 | for i in range(0, batch_size): 61 | x_out.append(cv2.resize(cv2.imread(val_xs[(val_batch_pointer + i) % num_val_images]), (200, 66)) / 255.0) 62 | y_out.append([val_ys[(val_batch_pointer + i) % num_val_images]]) 63 | val_batch_pointer += batch_size 64 | return x_out, y_out 65 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | ''' 2 | To view tensorboard train and validation loss 3 | In the 1st terminal, run the command line: 4 | --> tensorboard --logdir=./train_logs 5 | Then open http://0.0.0.0:6006/ into your web browser 6 | In the 2nd terminal, run the command line: 7 | --> tensorboard --logdir=./val_logs --port=8008" 8 | Then open http://0.0.0.0:8008/ into your web browser 9 | ''' 10 | import os 11 | import os.path 12 | import tensorflow as tf 13 | import datagen 14 | import model 15 | import numpy as np 16 | 17 | LOGDIR = './save' 18 | CKPT_FILE = './save/model.ckpt' 19 | TRAIN_TENSORBOARD_LOG = './train_logs' 20 | VAL_TENSORBOARD_LOG = './val_logs' 21 | 22 | sess = tf.InteractiveSession() 23 | 24 | loss = tf.reduce_mean(tf.square(tf.sub(model.y_, model.y))) 25 | train_step = tf.train.AdamOptimizer(1e-4).minimize(loss) 26 | 27 | saver = tf.train.Saver() 28 | 29 | train_summary = tf.scalar_summary("train_loss", loss) 30 | val_summary = tf.scalar_summary("val_loss", loss) 31 | 32 | if not os.path.exists(LOGDIR): 33 | os.makedirs(LOGDIR) 34 | 35 | if os.path.isfile(CKPT_FILE): 36 | saver.restore(sess, CKPT_FILE) 37 | else: 38 | sess.run(tf.initialize_all_variables()) 39 | 40 | if not os.path.exists(TRAIN_TENSORBOARD_LOG): 41 | os.makedirs(TRAIN_TENSORBOARD_LOG) 42 | if not os.path.exists(VAL_TENSORBOARD_LOG): 43 | os.makedirs(VAL_TENSORBOARD_LOG) 44 | 45 | train_summary_writer = tf.train.SummaryWriter(TRAIN_TENSORBOARD_LOG, graph=tf.get_default_graph()) 46 | val_summary_writer = tf.train.SummaryWriter(VAL_TENSORBOARD_LOG, graph=tf.get_default_graph()) 47 | 48 | batch_size = 100 49 | 50 | for i in range(int(datagen.num_images * 3)): 51 | xs_train, ys_train = datagen.LoadTrainBatch(batch_size) 52 | train_step.run(feed_dict={model.x:xs_train, model.y_:ys_train, model.keep_prob:0.5}) 53 | 54 | if i % 10 == 0: 55 | xs_val, ys_val = datagen.LoadValBatch(batch_size) 56 | # Write logs at every iteration 57 | train_loss = train_summary.eval(feed_dict={model.x:xs_train, model.y_:ys_train, model.keep_prob:1.0}) 58 | val_loss = val_summary.eval(feed_dict={model.x:xs_val, model.y_:ys_val, model.keep_prob:1.0}) 59 | train_summary_writer.add_summary(train_loss, i) 60 | val_summary_writer.add_summary(val_loss, i) 61 | train_loss = loss.eval(feed_dict={model.x:xs_train, model.y_:ys_train, model.keep_prob:1.0}) 62 | val_loss = loss.eval(feed_dict={model.x:xs_val, model.y_:ys_val, model.keep_prob:1.0}) 63 | print("step: %d, loss: %g, val loss: %g" % (i, train_loss, val_loss)) 64 | 65 | if i % 100 == 0: 66 | checkpoint_path = os.path.join(LOGDIR, "model.ckpt") 67 | filename = saver.save(sess, checkpoint_path) 68 | print("Model saved in file: %s" % filename) 69 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | 5 | def flatten(x_tensor): 6 | flat_dim = np.prod(x_tensor.get_shape().as_list()[1:]) 7 | return tf.reshape(x_tensor, shape=(-1,flat_dim)) 8 | 9 | def fully_conn(x_tensor, num_outputs): 10 | weights = tf.Variable(tf.truncated_normal( 11 | [x_tensor.get_shape().as_list()[1],num_outputs], 12 | stddev=0.1)) 13 | biases = tf.Variable(tf.constant(0.0, shape=[num_outputs])) 14 | return tf.nn.relu(tf.matmul(x_tensor,weights) + biases) 15 | 16 | def output(x_tensor, num_outputs): 17 | weights = tf.Variable(tf.truncated_normal( 18 | [x_tensor.get_shape().as_list()[1],num_outputs], 19 | stddev=0.1)) 20 | biases = tf.Variable(tf.constant(0.0, shape=[num_outputs])) 21 | return tf.matmul(x_tensor,weights) + biases 22 | 23 | def conv2d(x_tensor, conv_num_outputs, conv_ksize, conv_strides): 24 | weights = tf.Variable(tf.truncated_normal( 25 | [conv_ksize[0],conv_ksize[1],x_tensor.get_shape().as_list()[3],conv_num_outputs], 26 | stddev=0.1)) 27 | biases = tf.Variable(tf.constant(0.0, shape=[conv_num_outputs])) 28 | x = tf.nn.conv2d(x_tensor, weights, [1,conv_strides[0],conv_strides[1],1], padding='SAME') 29 | return tf.nn.relu(x+biases) 30 | 31 | def conv2d_maxpool(x_tensor, conv_num_outputs, conv_ksize, conv_strides, pool_ksize, pool_strides): 32 | weights = tf.Variable(tf.truncated_normal( 33 | [conv_ksize[0],conv_ksize[1],x_tensor.get_shape().as_list()[3],conv_num_outputs], 34 | stddev=0.1)) 35 | biases = tf.Variable(tf.constant(0.0, shape=[conv_num_outputs])) 36 | x = tf.nn.conv2d(x_tensor, weights, [1,conv_strides[0],conv_strides[1],1], padding='SAME') 37 | x = tf.nn.relu(x+biases) 38 | 39 | x = tf.nn.max_pool(x, [1, pool_ksize[0], pool_ksize[1], 1], [1, pool_strides[0], pool_strides[1], 1], padding='SAME') 40 | return x 41 | 42 | # Placeholders for model inputs, outputs and dropout rates 43 | x = tf.placeholder(tf.float32, shape=[None,66,200,3]) 44 | y_ = tf.placeholder(tf.float32, shape=[None,1]) 45 | 46 | keep_prob = tf.placeholder(tf.float32) 47 | 48 | # Model architecture 49 | conv = conv2d(x, 3, (1,1), (1,1)) 50 | 51 | conv = conv2d(conv, 32, (3,3), (1,1)) 52 | conv = conv2d_maxpool(conv, 32, (3,3), (1,1), (2,2), (2,2)) 53 | 54 | conv = conv2d(conv, 64, (3,3), (1,1)) 55 | conv = conv2d_maxpool(conv, 64, (3,3), (1,1), (2,2), (2,2)) 56 | 57 | conv = conv2d(conv, 128, (3, 3), (1,1)) 58 | conv = conv2d_maxpool(conv, 128, (3,3), (1,1), (2,2), (2,2)) 59 | conv = tf.nn.dropout(conv, keep_prob) 60 | 61 | flat = flatten(conv) 62 | 63 | dense = fully_conn(flat, 512) 64 | dense = tf.nn.dropout(dense, keep_prob) 65 | dense = fully_conn(dense, 64) 66 | 67 | y = tf.nn.tanh(output(dense, 10)) 68 | -------------------------------------------------------------------------------- /augmentation.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import math 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | 6 | 7 | def get_horizon_y(img, draw=False, min_y=200, max_y=300, hough_threshold=150): 8 | ''' Estimate horizon y coordinate using Canny edge detector and Hough transform. ''' 9 | 10 | gray = cv2.cvtColor(img,cv2.COLOR_RGB2GRAY) 11 | 12 | if draw: 13 | fig = plt.figure() 14 | plt.imshow(gray, cmap='gray') 15 | 16 | edges = cv2.Canny(gray,20,150,apertureSize = 3) 17 | 18 | if draw: 19 | fig = plt.figure() 20 | plt.imshow(edges, cmap='gray') 21 | 22 | lines = None 23 | horizon = None 24 | horizon_y = 1000 25 | 26 | while lines is None or horizon is None: 27 | 28 | lines = cv2.HoughLines(edges,1,np.pi/180, hough_threshold) 29 | 30 | if lines is None: 31 | hough_threshold = hough_threshold - 10 32 | continue 33 | 34 | horizontal_lines = [] 35 | 36 | for i, line in enumerate(lines): 37 | for rho,theta in line: 38 | 39 | # just the horizontal lines 40 | if np.sin(theta) > 0.9999: 41 | 42 | if rho < horizon_y and rho >= min_y and rho <= max_y: 43 | horizon_y = rho 44 | horizon = line 45 | 46 | if horizon is None: 47 | hough_threshold = hough_threshold - 10 48 | 49 | if draw and not horizon is None: 50 | 51 | for rho,theta in horizon: 52 | a = np.cos(theta) 53 | b = np.sin(theta) 54 | 55 | x0 = a*rho 56 | y0 = b*rho 57 | x1 = int(x0 + 1000*(-b)) 58 | y1 = int(y0 + 1000*(a)) 59 | x2 = int(x0 - 1000*(-b)) 60 | y2 = int(y0 - 1000*(a)) 61 | 62 | cv2.line(gray,(x1,y1),(x2,y2),(255,255,255),2) 63 | 64 | fig = plt.figure() 65 | plt.imshow(gray, cmap='gray') 66 | 67 | if horizon is None: 68 | print('Horizon not found. Return default estimate.') 69 | return min_y 70 | 71 | return int(horizon_y) 72 | 73 | 74 | def eulerToRotation(theta): 75 | ''' Calculates Rotation Matrix given euler angles. ''' 76 | 77 | R_x = np.array([ 78 | [1, 0, 0 ], 79 | [0, math.cos(theta[0]), -math.sin(theta[0]) ], 80 | [0, math.sin(theta[0]), math.cos(theta[0]) ] 81 | ]) 82 | 83 | R_y = np.array([[math.cos(theta[1]), 0, math.sin(theta[1]) ], 84 | [0, 1, 0 ], 85 | [-math.sin(theta[1]), 0, math.cos(theta[1]) ] 86 | ]) 87 | 88 | R_z = np.array([[math.cos(theta[2]), -math.sin(theta[2]), 0], 89 | [math.sin(theta[2]), math.cos(theta[2]), 0], 90 | [0, 0, 1] 91 | ]) 92 | 93 | 94 | R = np.dot(R_z, np.dot( R_y, R_x )) 95 | 96 | return R 97 | 98 | 99 | def translation(t): 100 | ''' Returns a 2-dimension translation matrix ''' 101 | 102 | T = np.array([[1, 0, t[0]], 103 | [0, 1, t[1]], 104 | [0, 0, 1]]) 105 | return T 106 | 107 | 108 | def apply_distortion(img, rotation, shift, pixel_per_meter=160, 109 | crop_x=10, crop_y=240, draw=False): 110 | 111 | ''' 112 | Applies shift and rotation distortion to image, assuming all points below the 113 | horizon are on flat ground and all points above the horizon are infinitely far away. 114 | The distorted image is also cropped to match the proportions used in "End to End Learning for Self-Driving Cars". 115 | Parameters: 116 | img - source image 117 | rotation - 'yaw' rotation angle in radians. 118 | shift - shift in meters. 119 | rotation_mean - rotation distribution mean 120 | rotation_std - rotation distribution standard deviation 121 | shift_mean - shift distribution mean 122 | shift_std - shift distribution standard deviation 123 | crop_x - number of pixels to be cropped from each side of the distorted image. 124 | crop_y - number of pixels to be cropped from the upper portion of the distorted image. 125 | crop - enables/disables cropping 126 | draw - enables/disables drawing using matplotlib (useful for debugging) 127 | ''' 128 | 129 | # convert to pixels 130 | shift = shift * pixel_per_meter 131 | 132 | #copy = img.copy() 133 | 134 | if draw: 135 | fig = plt.figure(figsize=(12, 12)) 136 | fig.add_subplot(3, 2, 1, title="Original") 137 | plt.imshow(img) 138 | 139 | #horizon_y = get_horizon_y(img) 140 | horizon_y = crop_y 141 | below_horizon = img[crop_y:,:] 142 | 143 | pts = np.array([[0, 0], [below_horizon.shape[1]-1, 0], [below_horizon.shape[1]-1, below_horizon.shape[0]-1], 144 | [0, below_horizon.shape[0]-1]], dtype=np.float32) 145 | 146 | birds_eye_magic_number = 20 147 | 148 | dst = np.array([ 149 | [0, 0], 150 | [below_horizon.shape[1] - 1, 0], 151 | [below_horizon.shape[1] - 1, (below_horizon.shape[0] * birds_eye_magic_number) - 1], 152 | [0, (below_horizon.shape[0] * birds_eye_magic_number) - 1]], dtype=np.float32) 153 | 154 | # compute the perspective transform matrix and then apply it 155 | M = cv2.getPerspectiveTransform(pts, dst) 156 | '''below_horizon = cv2.warpPerspective(img, M, (img.shape[1], img.shape[0] * birds_eye_magic_number)) 157 | if draw: 158 | fig.add_subplot(3, 2, 2, title="Bird's eye view") 159 | plt.imshow(below_horizon)''' 160 | 161 | T1 = translation([-(below_horizon.shape[1]/2 + shift), -(below_horizon.shape[0] * birds_eye_magic_number)]) 162 | T2 = translation([below_horizon.shape[1]/2, below_horizon.shape[0] * birds_eye_magic_number]) 163 | T = np.dot(T2, np.dot(eulerToRotation([0., 0., rotation]), T1)) 164 | '''warped = cv2.warpPerspective(below_horizon, T, (below_horizon.shape[1], below_horizon.shape[0])) 165 | if draw: 166 | fig.add_subplot(3, 2, 3, title="rotation: {:.2f}; shift: {:.2f}".format(rotation, shift)) 167 | plt.imshow(warped)''' 168 | T = np.dot(T, M) 169 | T = np.dot(np.linalg.inv(M), T) 170 | 171 | warped = cv2.warpPerspective(below_horizon, T, (below_horizon.shape[1], below_horizon.shape[0])) 172 | 173 | '''if draw: 174 | fig.add_subplot(3, 2, 4, title="Inverse warp transform") 175 | plt.imshow(warped)''' 176 | 177 | #copy[horizon_y:] = warped[horizon_y:] * (warped[horizon_y:] > 0) + copy[horizon_y:] * (1 - (warped[horizon_y:] > 0)) 178 | #copy[horizon_y:] = warped[horizon_y:] 179 | #copy[horizon_y - 3: horizon_y + 3] = cv2.blur(copy[horizon_y - 3: horizon_y + 3], (1,5)) 180 | 181 | #if crop: 182 | img = warped[:, crop_x:img.shape[1]-crop_x] 183 | 184 | if draw: 185 | fig.add_subplot(3, 2, 2, title="Final result after cropping") 186 | plt.imshow(img) 187 | 188 | return img 189 | 190 | 191 | def random_distortion(image, rotation=None, shift=None, rotation_mean=0, rotation_std=0.002, 192 | shift_mean=0, shift_std=0.1): 193 | ''' 194 | Applies random shift and rotation distortion to image. 195 | Parameters: 196 | img - source image 197 | rotation - 'yaw' rotation angle in radians. If None, value is sampled from normal distribution. 198 | shift - shift in meters. If None, value is sampled from normal distribution. 199 | rotation_mean - rotation distribution mean 200 | rotation_std - rotation distribution standard deviation 201 | shift_mean - shift distribution mean 202 | shift_std - shift distribution standard deviation 203 | ''' 204 | 205 | if rotation is None: 206 | rotation = np.random.normal(rotation_mean, rotation_std) 207 | 208 | if shift is None: 209 | shift = np.random.normal(shift_mean, shift_std) 210 | 211 | return apply_distortion(image, rotation, shift), rotation, shift 212 | 213 | 214 | def get_steer_back_angle(steering_wheel_angle, speed, rotation, shift, steer_back_time = 2., fps = 20, 215 | wheel_base = 2.84988, steering_ratio = 14.8): 216 | 217 | dt = (1./fps) 218 | #shift0 = shift 219 | #rotation0 = rotation 220 | theta = math.pi/2. + rotation 221 | # true vehicle velocity 222 | v = speed 223 | vx = math.cos(theta) * v 224 | 225 | # assume constant acceleration 226 | ax = (-shift - vx * steer_back_time) * 2. / (steer_back_time * steer_back_time) 227 | 228 | # calculate velocity x and shift after dt 229 | vx = vx + ax * dt 230 | shift = shift + vx * dt + ax * dt * dt / 2. 231 | 232 | # steer back angular velocity 233 | vtheta = (math.acos(vx / v) - theta) / dt 234 | 235 | # calculate theta after dt 236 | #theta = theta + vtheta * dt 237 | theta = math.acos(vx / v) 238 | 239 | # true angular velocity 240 | vtheta_truth = math.tan(steering_wheel_angle / steering_ratio) * v / wheel_base 241 | 242 | #print(vtheta, vtheta_truth, left_steering.iloc[i].steering_wheel_angle) 243 | 244 | # we have two add "steer back" and true angular velocities to calculate final steering angle 245 | vtheta = vtheta + vtheta_truth 246 | 247 | wheel_angle = math.atan(vtheta * wheel_base / v) 248 | steering_wheel_angle = wheel_angle * steering_ratio 249 | 250 | rotation = -(math.pi/2. - theta) 251 | return rotation, shift, steering_wheel_angle 252 | 253 | 254 | 255 | def steer_back_distortion(image, steering_wheel_angle, speed, rotation=None, shift=None, 256 | initial_rotation=0, initial_shift=0): 257 | ''' Utility function to easily generate new labeled images with random rotation and shift. ''' 258 | 259 | distorted, rotation, shift = random_distortion(image, rotation=rotation, shift=shift) 260 | rotation, shift, steering_wheel_angle = get_steer_back_angle(steering_wheel_angle, speed, rotation + initial_rotation, shift + initial_shift) 261 | 262 | return distorted, steering_wheel_angle, rotation, shift 263 | --------------------------------------------------------------------------------