├── README.md
├── LICENSE
├── datagen.py
├── train.py
├── model.py
└── augmentation.py


/README.md:
--------------------------------------------------------------------------------
1 | # SDC-Udacity-Challenge-2 ([Simulation video](https://www.youtube.com/watch?v=YVmlioP3qqY))
2 | This repo consists of my work on udacity's challenge which uses end-to-end learning to predict steering angles from front image as input for self driving cars.
3 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016 Gautam Sharma
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/datagen.py:
--------------------------------------------------------------------------------
 1 | from __future__ import with_statement
 2 | import augmentation as aug
 3 | import cv2
 4 | import matplotlib.image as mpimg
 5 | import matplotlib.pyplot as plt
 6 | import numpy as np
 7 | import pandas as pd
 8 | import random
 9 | import scipy.misc
10 | 
11 | 
12 | # Points to the end of the last batch
13 | train_batch_pointer = 0
14 | val_batch_pointer = 0
15 | 
16 | # Get data
17 | FDIR = '/Volumes/Untitled/Udacity datasets/challenge 2 training/Ch2_002/images & interpolated data/'
18 | df = pd.read_csv('/Volumes/Untitled/Udacity datasets/challenge 2 training/Ch2_002/images & interpolated data/interpolated.csv')
19 | df = df[df.frame_id == 'center_camera']
20 | xs = [FDIR + s for s in df.filename]
21 | ys = df.angle
22 | 
23 | # Get number of images
24 | num_images = len(xs)
25 | 
26 | # Shuffle list of images
27 | c = list(zip(xs, ys))
28 | random.shuffle(c)
29 | xs, ys = zip(*c)
30 | 
31 | # Split into train-dev set
32 | train_xs = xs[:int(len(xs) * 0.8)]
33 | train_ys = ys[:int(len(xs) * 0.8)]
34 | 
35 | val_xs = xs[-int(len(xs) * 0.2):]
36 | val_ys = ys[-int(len(xs) * 0.2):]
37 | 
38 | num_train_images = len(train_xs)
39 | num_val_images = len(val_xs)
40 | 
41 | 
42 | def LoadTrainBatch(batch_size):
43 |   global train_batch_pointer
44 |   x_out = []
45 |   y_out = []
46 |   for i in range(0, batch_size):
47 |     img = cv2.imread(train_xs[(train_batch_pointer + i) % num_train_images])
48 |     # Random shifts and rotations
49 |     distorted, _, _ = aug.random_distortion(img)
50 |     # Resize, normalize & append
51 |     x_out.append(cv2.resize(distorted, (200, 66))  / 255.0)
52 |     y_out.append([train_ys[(train_batch_pointer + i) % num_train_images]])
53 |   train_batch_pointer += batch_size
54 |   return x_out, y_out
55 | 
56 | def LoadValBatch(batch_size):
57 |   global val_batch_pointer
58 |   x_out = []
59 |   y_out = []
60 |   for i in range(0, batch_size):
61 |     x_out.append(cv2.resize(cv2.imread(val_xs[(val_batch_pointer + i) % num_val_images]), (200, 66)) / 255.0)
62 |     y_out.append([val_ys[(val_batch_pointer + i) % num_val_images]])
63 |   val_batch_pointer += batch_size
64 |   return x_out, y_out
65 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | To view tensorboard train and validation loss
 3 | In the 1st terminal, run the command line:
 4 |   --> tensorboard --logdir=./train_logs
 5 |   Then open http://0.0.0.0:6006/ into your web browser
 6 | In the 2nd terminal, run the command line:
 7 |   --> tensorboard --logdir=./val_logs --port=8008"
 8 |   Then open http://0.0.0.0:8008/ into your web browser
 9 | '''
10 | import os
11 | import os.path
12 | import tensorflow as tf
13 | import datagen
14 | import model
15 | import numpy as np
16 | 
17 | LOGDIR = './save'
18 | CKPT_FILE = './save/model.ckpt'
19 | TRAIN_TENSORBOARD_LOG = './train_logs'
20 | VAL_TENSORBOARD_LOG = './val_logs'
21 | 
22 | sess = tf.InteractiveSession()
23 | 
24 | loss = tf.reduce_mean(tf.square(tf.sub(model.y_, model.y)))
25 | train_step = tf.train.AdamOptimizer(1e-4).minimize(loss)
26 | 
27 | saver = tf.train.Saver()
28 | 
29 | train_summary = tf.scalar_summary("train_loss", loss)
30 | val_summary = tf.scalar_summary("val_loss", loss)
31 | 
32 | if not os.path.exists(LOGDIR):
33 |   os.makedirs(LOGDIR)
34 | 
35 | if os.path.isfile(CKPT_FILE):
36 |   saver.restore(sess, CKPT_FILE)
37 | else:
38 |   sess.run(tf.initialize_all_variables())
39 | 
40 | if not os.path.exists(TRAIN_TENSORBOARD_LOG):
41 |   os.makedirs(TRAIN_TENSORBOARD_LOG)
42 | if not os.path.exists(VAL_TENSORBOARD_LOG):
43 |   os.makedirs(VAL_TENSORBOARD_LOG)
44 | 
45 | train_summary_writer = tf.train.SummaryWriter(TRAIN_TENSORBOARD_LOG, graph=tf.get_default_graph())
46 | val_summary_writer = tf.train.SummaryWriter(VAL_TENSORBOARD_LOG, graph=tf.get_default_graph())
47 | 
48 | batch_size = 100
49 | 
50 | for i in range(int(datagen.num_images * 3)):
51 |   xs_train, ys_train = datagen.LoadTrainBatch(batch_size)
52 |   train_step.run(feed_dict={model.x:xs_train, model.y_:ys_train, model.keep_prob:0.5})
53 | 
54 |   if i % 10 == 0:
55 |     xs_val, ys_val = datagen.LoadValBatch(batch_size)
56 |     # Write logs at every iteration
57 |     train_loss = train_summary.eval(feed_dict={model.x:xs_train, model.y_:ys_train, model.keep_prob:1.0})
58 |     val_loss = val_summary.eval(feed_dict={model.x:xs_val, model.y_:ys_val, model.keep_prob:1.0})
59 |     train_summary_writer.add_summary(train_loss, i)
60 |     val_summary_writer.add_summary(val_loss, i)
61 |     train_loss = loss.eval(feed_dict={model.x:xs_train, model.y_:ys_train, model.keep_prob:1.0})
62 |     val_loss = loss.eval(feed_dict={model.x:xs_val, model.y_:ys_val, model.keep_prob:1.0})
63 |     print("step: %d, loss: %g, val loss: %g" % (i, train_loss, val_loss))
64 | 
65 |   if i % 100 == 0:
66 |     checkpoint_path = os.path.join(LOGDIR, "model.ckpt")
67 |     filename = saver.save(sess, checkpoint_path)
68 | print("Model saved in file: %s" % filename)
69 | 


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | 
 4 | 
 5 | def flatten(x_tensor):
 6 |     flat_dim = np.prod(x_tensor.get_shape().as_list()[1:])
 7 |     return tf.reshape(x_tensor, shape=(-1,flat_dim))
 8 | 
 9 | def fully_conn(x_tensor, num_outputs):
10 |     weights = tf.Variable(tf.truncated_normal(
11 |                     [x_tensor.get_shape().as_list()[1],num_outputs],
12 |                     stddev=0.1))
13 |     biases = tf.Variable(tf.constant(0.0, shape=[num_outputs]))
14 |     return tf.nn.relu(tf.matmul(x_tensor,weights) + biases)
15 | 
16 | def output(x_tensor, num_outputs):
17 |     weights = tf.Variable(tf.truncated_normal(
18 |                     [x_tensor.get_shape().as_list()[1],num_outputs],
19 |                     stddev=0.1))
20 |     biases = tf.Variable(tf.constant(0.0, shape=[num_outputs]))
21 |     return tf.matmul(x_tensor,weights) + biases
22 | 
23 | def conv2d(x_tensor, conv_num_outputs, conv_ksize, conv_strides):
24 |     weights = tf.Variable(tf.truncated_normal(
25 |                     [conv_ksize[0],conv_ksize[1],x_tensor.get_shape().as_list()[3],conv_num_outputs],
26 |                     stddev=0.1))
27 |     biases = tf.Variable(tf.constant(0.0, shape=[conv_num_outputs]))
28 |     x = tf.nn.conv2d(x_tensor, weights, [1,conv_strides[0],conv_strides[1],1], padding='SAME')
29 |     return tf.nn.relu(x+biases)
30 | 
31 | def conv2d_maxpool(x_tensor, conv_num_outputs, conv_ksize, conv_strides, pool_ksize, pool_strides):
32 |     weights = tf.Variable(tf.truncated_normal(
33 |                     [conv_ksize[0],conv_ksize[1],x_tensor.get_shape().as_list()[3],conv_num_outputs],
34 |                     stddev=0.1))
35 |     biases = tf.Variable(tf.constant(0.0, shape=[conv_num_outputs]))
36 |     x = tf.nn.conv2d(x_tensor, weights, [1,conv_strides[0],conv_strides[1],1], padding='SAME')
37 |     x = tf.nn.relu(x+biases)
38 | 
39 |     x = tf.nn.max_pool(x, [1, pool_ksize[0], pool_ksize[1], 1], [1, pool_strides[0], pool_strides[1], 1], padding='SAME')
40 |     return x
41 | 
42 | # Placeholders for model inputs, outputs and dropout rates
43 | x = tf.placeholder(tf.float32, shape=[None,66,200,3])
44 | y_ = tf.placeholder(tf.float32, shape=[None,1])
45 | 
46 | keep_prob = tf.placeholder(tf.float32)
47 | 
48 | # Model architecture
49 | conv = conv2d(x, 3, (1,1), (1,1))
50 | 
51 | conv = conv2d(conv, 32, (3,3), (1,1))
52 | conv = conv2d_maxpool(conv, 32, (3,3), (1,1), (2,2), (2,2))
53 | 
54 | conv = conv2d(conv, 64, (3,3), (1,1))
55 | conv = conv2d_maxpool(conv, 64, (3,3), (1,1), (2,2), (2,2))
56 | 
57 | conv = conv2d(conv, 128, (3, 3), (1,1))
58 | conv = conv2d_maxpool(conv, 128, (3,3), (1,1), (2,2), (2,2))
59 | conv = tf.nn.dropout(conv, keep_prob)
60 | 
61 | flat = flatten(conv)
62 | 
63 | dense = fully_conn(flat, 512)
64 | dense = tf.nn.dropout(dense, keep_prob)
65 | dense = fully_conn(dense, 64)
66 | 
67 | y = tf.nn.tanh(output(dense, 10))
68 | 


--------------------------------------------------------------------------------
/augmentation.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import math
  3 | import matplotlib.pyplot as plt
  4 | import numpy as np
  5 | 
  6 | 
  7 | def get_horizon_y(img, draw=False, min_y=200, max_y=300, hough_threshold=150):
  8 |   ''' Estimate horizon y coordinate using Canny edge detector and Hough transform. '''
  9 | 
 10 |   gray = cv2.cvtColor(img,cv2.COLOR_RGB2GRAY)
 11 | 
 12 |   if draw:
 13 |     fig = plt.figure()
 14 |     plt.imshow(gray, cmap='gray')
 15 | 
 16 |   edges = cv2.Canny(gray,20,150,apertureSize = 3)
 17 | 
 18 |   if draw:
 19 |     fig = plt.figure()
 20 |     plt.imshow(edges, cmap='gray')
 21 | 
 22 |   lines = None
 23 |   horizon = None
 24 |   horizon_y = 1000
 25 | 
 26 |   while lines is None or horizon is None:
 27 | 
 28 |     lines = cv2.HoughLines(edges,1,np.pi/180, hough_threshold)
 29 | 
 30 |     if lines is None:
 31 |       hough_threshold = hough_threshold - 10
 32 |       continue
 33 | 
 34 |     horizontal_lines = []
 35 | 
 36 |     for i, line in enumerate(lines):
 37 |       for rho,theta in line:
 38 | 
 39 |         # just the horizontal lines
 40 |         if np.sin(theta) > 0.9999:
 41 | 
 42 |           if rho < horizon_y and rho >= min_y and rho <= max_y:
 43 |             horizon_y = rho
 44 |             horizon = line
 45 | 
 46 |     if horizon is None:
 47 |       hough_threshold = hough_threshold - 10
 48 | 
 49 |   if draw and not horizon is None:
 50 | 
 51 |     for rho,theta in horizon:
 52 |       a = np.cos(theta)
 53 |       b = np.sin(theta)
 54 | 
 55 |       x0 = a*rho
 56 |       y0 = b*rho
 57 |       x1 = int(x0 + 1000*(-b))
 58 |       y1 = int(y0 + 1000*(a))
 59 |       x2 = int(x0 - 1000*(-b))
 60 |       y2 = int(y0 - 1000*(a))
 61 | 
 62 |       cv2.line(gray,(x1,y1),(x2,y2),(255,255,255),2)
 63 | 
 64 |     fig = plt.figure()
 65 |     plt.imshow(gray, cmap='gray')
 66 | 
 67 |   if horizon is None:
 68 |     print('Horizon not found. Return default estimate.')
 69 |     return min_y
 70 | 
 71 |   return int(horizon_y)
 72 | 
 73 | 
 74 | def eulerToRotation(theta):
 75 |   ''' Calculates Rotation Matrix given euler angles. '''
 76 | 
 77 |   R_x = np.array([
 78 |           [1,         0,                  0                   ],
 79 |           [0,         math.cos(theta[0]), -math.sin(theta[0]) ],
 80 |           [0,         math.sin(theta[0]), math.cos(theta[0])  ]
 81 |           ])
 82 | 
 83 |   R_y = np.array([[math.cos(theta[1]),    0,      math.sin(theta[1])  ],
 84 |           [0,                     1,      0                   ],
 85 |           [-math.sin(theta[1]),   0,      math.cos(theta[1])  ]
 86 |           ])
 87 | 
 88 |   R_z = np.array([[math.cos(theta[2]),    -math.sin(theta[2]),    0],
 89 |           [math.sin(theta[2]),    math.cos(theta[2]),     0],
 90 |           [0,                     0,                      1]
 91 |           ])
 92 | 
 93 | 
 94 |   R = np.dot(R_z, np.dot( R_y, R_x ))
 95 | 
 96 |   return R
 97 | 
 98 | 
 99 | def translation(t):
100 |   ''' Returns a 2-dimension translation matrix '''
101 | 
102 |   T = np.array([[1, 0, t[0]],
103 |           [0, 1, t[1]],
104 |           [0, 0, 1]])
105 |   return T
106 | 
107 | 
108 | def apply_distortion(img, rotation, shift, pixel_per_meter=160,
109 |            crop_x=10, crop_y=240, draw=False):
110 | 
111 |   '''
112 |   Applies shift and rotation distortion to image, assuming all points below the
113 |   horizon are on flat ground and all points above the horizon are infinitely far away.
114 |   The distorted image is also cropped to match the proportions used in "End to End Learning for Self-Driving Cars".
115 |   Parameters:
116 |   img - source image
117 |   rotation - 'yaw' rotation angle in radians.
118 |   shift - shift in meters.
119 |   rotation_mean - rotation distribution mean
120 |   rotation_std - rotation distribution standard deviation
121 |   shift_mean - shift distribution mean
122 |   shift_std - shift distribution standard deviation
123 |   crop_x - number of pixels to be cropped from each side of the distorted image.
124 |   crop_y - number of pixels to be cropped from the upper portion of the distorted image.
125 |   crop - enables/disables cropping
126 |   draw - enables/disables drawing using matplotlib (useful for debugging)
127 |   '''
128 | 
129 |   # convert to pixels
130 |   shift = shift * pixel_per_meter
131 | 
132 |   #copy = img.copy()
133 | 
134 |   if draw:
135 |     fig = plt.figure(figsize=(12, 12))
136 |     fig.add_subplot(3, 2, 1, title="Original")
137 |     plt.imshow(img)
138 | 
139 |   #horizon_y = get_horizon_y(img)
140 |   horizon_y = crop_y
141 |   below_horizon = img[crop_y:,:]
142 | 
143 |   pts = np.array([[0, 0], [below_horizon.shape[1]-1, 0], [below_horizon.shape[1]-1, below_horizon.shape[0]-1],
144 |           [0, below_horizon.shape[0]-1]], dtype=np.float32)
145 | 
146 |   birds_eye_magic_number = 20
147 | 
148 |   dst = np.array([
149 |       [0, 0],
150 |       [below_horizon.shape[1] - 1, 0],
151 |       [below_horizon.shape[1] - 1, (below_horizon.shape[0] * birds_eye_magic_number) - 1],
152 |       [0, (below_horizon.shape[0] * birds_eye_magic_number) - 1]], dtype=np.float32)
153 | 
154 |   # compute the perspective transform matrix and then apply it
155 |   M = cv2.getPerspectiveTransform(pts, dst)
156 |   '''below_horizon = cv2.warpPerspective(img, M, (img.shape[1], img.shape[0] * birds_eye_magic_number))
157 |   if draw:
158 |     fig.add_subplot(3, 2, 2, title="Bird's eye view")
159 |     plt.imshow(below_horizon)'''
160 | 
161 |   T1 = translation([-(below_horizon.shape[1]/2 + shift), -(below_horizon.shape[0] * birds_eye_magic_number)])
162 |   T2 = translation([below_horizon.shape[1]/2, below_horizon.shape[0] * birds_eye_magic_number])
163 |   T = np.dot(T2, np.dot(eulerToRotation([0., 0., rotation]), T1))
164 |   '''warped = cv2.warpPerspective(below_horizon, T, (below_horizon.shape[1], below_horizon.shape[0]))
165 |   if draw:
166 |     fig.add_subplot(3, 2, 3, title="rotation: {:.2f}; shift: {:.2f}".format(rotation, shift))
167 |     plt.imshow(warped)'''
168 |   T = np.dot(T, M)
169 |   T = np.dot(np.linalg.inv(M), T)
170 | 
171 |   warped = cv2.warpPerspective(below_horizon, T, (below_horizon.shape[1], below_horizon.shape[0]))
172 | 
173 |   '''if draw:
174 |     fig.add_subplot(3, 2, 4, title="Inverse warp transform")
175 |     plt.imshow(warped)'''
176 | 
177 |   #copy[horizon_y:] = warped[horizon_y:] * (warped[horizon_y:] > 0) + copy[horizon_y:] * (1 - (warped[horizon_y:] > 0))
178 |   #copy[horizon_y:] = warped[horizon_y:]
179 |   #copy[horizon_y - 3: horizon_y + 3] = cv2.blur(copy[horizon_y - 3: horizon_y + 3], (1,5))
180 | 
181 |   #if crop:
182 |   img = warped[:, crop_x:img.shape[1]-crop_x]
183 | 
184 |   if draw:
185 |     fig.add_subplot(3, 2, 2, title="Final result after cropping")
186 |     plt.imshow(img)
187 | 
188 |   return img
189 | 
190 | 
191 | def random_distortion(image, rotation=None, shift=None, rotation_mean=0, rotation_std=0.002,
192 |             shift_mean=0, shift_std=0.1):
193 |   '''
194 |   Applies random shift and rotation distortion to image.
195 |   Parameters:
196 |   img - source image
197 |   rotation - 'yaw' rotation angle in radians. If None, value is sampled from normal distribution.
198 |   shift - shift in meters. If None, value is sampled from normal distribution.
199 |   rotation_mean - rotation distribution mean
200 |   rotation_std - rotation distribution standard deviation
201 |   shift_mean - shift distribution mean
202 |   shift_std - shift distribution standard deviation
203 |   '''
204 | 
205 |   if rotation is None:
206 |     rotation = np.random.normal(rotation_mean, rotation_std)
207 | 
208 |   if shift is None:
209 |     shift = np.random.normal(shift_mean, shift_std)
210 | 
211 |   return apply_distortion(image, rotation, shift), rotation, shift
212 | 
213 | 
214 | def get_steer_back_angle(steering_wheel_angle, speed, rotation, shift, steer_back_time = 2., fps = 20,
215 |            wheel_base = 2.84988, steering_ratio = 14.8):
216 | 
217 |   dt = (1./fps)
218 |   #shift0 = shift
219 |   #rotation0 = rotation
220 |   theta = math.pi/2. + rotation
221 |   # true vehicle velocity
222 |   v = speed
223 |   vx = math.cos(theta) * v
224 | 
225 |   # assume constant acceleration
226 |   ax = (-shift - vx * steer_back_time) * 2. / (steer_back_time * steer_back_time)
227 | 
228 |   # calculate velocity x and shift after dt
229 |   vx = vx + ax * dt
230 |   shift = shift + vx * dt + ax * dt * dt / 2.
231 | 
232 |   # steer back angular velocity
233 |   vtheta = (math.acos(vx / v) - theta) / dt
234 | 
235 |   # calculate theta after dt
236 |   #theta = theta + vtheta * dt
237 |   theta = math.acos(vx / v)
238 | 
239 |   # true angular velocity
240 |   vtheta_truth = math.tan(steering_wheel_angle / steering_ratio) * v / wheel_base
241 | 
242 |   #print(vtheta, vtheta_truth, left_steering.iloc[i].steering_wheel_angle)
243 | 
244 |   # we have two add "steer back" and true angular velocities to calculate final steering angle
245 |   vtheta = vtheta + vtheta_truth
246 | 
247 |   wheel_angle = math.atan(vtheta * wheel_base / v)
248 |   steering_wheel_angle = wheel_angle * steering_ratio
249 | 
250 |   rotation = -(math.pi/2. - theta)
251 |   return rotation, shift, steering_wheel_angle
252 | 
253 | 
254 | 
255 | def steer_back_distortion(image, steering_wheel_angle, speed, rotation=None, shift=None,
256 |               initial_rotation=0, initial_shift=0):
257 |   ''' Utility function to easily generate new labeled images with random rotation and shift. '''
258 | 
259 |   distorted, rotation, shift = random_distortion(image, rotation=rotation, shift=shift)
260 |   rotation, shift, steering_wheel_angle = get_steer_back_angle(steering_wheel_angle, speed, rotation + initial_rotation, shift + initial_shift)
261 | 
262 |   return distorted, steering_wheel_angle, rotation, shift
263 | 


--------------------------------------------------------------------------------