├── main.py └── readme.md /main.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import glob 4 | import os 5 | import pickle 6 | import matplotlib.pyplot as plt 7 | import pylab 8 | import time 9 | import imageio 10 | from scipy.misc import imread, imresize, imsave 11 | 12 | out_examples = 0 13 | MOV_AVG_LENGTH = 5 14 | 15 | 16 | def main(): 17 | 18 | # ------------------------ Camera Calibration ------------------------ 19 | # As calibration may take some time, save calibration data into pickle file to speed up testing 20 | if not os.path.exists('calibration.p'): 21 | # Read all jpg files from calibration image folder 22 | images = glob.glob('camera_cal/*.jpg') 23 | 24 | with open('calibration.p', mode='wb') as f: 25 | ret, mtx, dist, rvecs, tvecs = calibrate_camera(images, nx=9, ny=6) 26 | pickle.dump([ret, mtx, dist, rvecs, tvecs], f) 27 | f.close() 28 | else: 29 | with open('calibration.p', mode='rb') as f: 30 | ret, mtx, dist, rvecs, tvecs = pickle.load(f) 31 | f.close() 32 | 33 | if out_examples: 34 | # output undistorted image to output_image 35 | to_calibrate = imread('camera_cal/calibration3.jpg') 36 | imsave('output_images/calibration3_calibrated.jpg', cv2.undistort(to_calibrate, mtx, dist, None, mtx)) 37 | 38 | vid = imageio.get_reader('project_video.mp4', 'ffmpeg') 39 | 40 | for i, img in enumerate(vid): 41 | 42 | t_dist0 = time.time() 43 | t_fps0 = t_dist0 44 | img = cv2.undistort(cv2.cvtColor(img, cv2.COLOR_RGB2BGR), mtx, dist, None, mtx) 45 | t_dist = time.time() - t_dist0 46 | 47 | 48 | # --------------------------- Binary Thresholding ---------------------------- 49 | # 50 | # if out_examples: 51 | # test_images = glob.glob('test_images/*.jpg') 52 | # plt.figure(figsize=(14, 10)) 53 | # for i, img in enumerate(test_images): 54 | # img_b = image_binary(cv2.undistort(cv2.imread(img), mtx, dist, None, mtx)) 55 | # plt.subplot(3, 3, i + 1) 56 | # plt.axis('off') 57 | # plt.title('%s' % str(img)) 58 | # plt.imshow(img_b, cmap='gray') 59 | # plt.show() 60 | 61 | t_bin0 = time.time() 62 | img_b = image_binary(img) 63 | t_bin = time.time() - t_bin0 64 | 65 | # ---------------------------- Perspective Transform -------------------------- 66 | 67 | t_warp0 = time.time() 68 | #src = [585, 457], [700, 457], [1110, img_b.shape[0]], [220, img_b.shape[0]] 69 | 70 | line_dst_offset = 200 71 | src = [595, 452], \ 72 | [685, 452], \ 73 | [1110, img_b.shape[0]], \ 74 | [220, img_b.shape[0]] 75 | 76 | dst = [src[3][0] + line_dst_offset, 0], \ 77 | [src[2][0] - line_dst_offset, 0], \ 78 | [src[2][0] - line_dst_offset, src[2][1]], \ 79 | [src[3][0] + line_dst_offset, src[3][1]] 80 | 81 | img_w = warp(img_b, src, dst) 82 | t_warp = time.time() - t_warp0 83 | 84 | if out_examples: 85 | # Count from mid frame beyond 86 | histogram = np.sum(img_w[int(img_w.shape[0] / 2):, :], axis=0) 87 | plt.plot(histogram) 88 | plt.savefig('histogram.jpg') 89 | plt.close() 90 | 91 | plt.figure(figsize=(21, 15)) 92 | for i, img in enumerate([img, img_b, img_w, imread('histogram.jpg')]): 93 | plt.subplot(2, 2, i + 1) 94 | plt.imshow(img, cmap='gray') 95 | if i == 3: 96 | plt.axis('off') 97 | plt.show() 98 | 99 | 100 | t_fit0 = time.time() 101 | try: 102 | left_fit, right_fit = fit_from_lines(left_fit, right_fit, img_w) 103 | 104 | mov_avg_left = np.append(mov_avg_left,np.array([left_fit]), axis=0) 105 | mov_avg_right = np.append(mov_avg_right,np.array([right_fit]), axis=0) 106 | 107 | except: 108 | left_fit, right_fit = sliding_windown(img_w) 109 | 110 | mov_avg_left = np.array([left_fit]) 111 | mov_avg_right = np.array([right_fit]) 112 | 113 | left_fit = np.array([np.mean(mov_avg_left[::-1][:,0][0:MOV_AVG_LENGTH]), 114 | np.mean(mov_avg_left[::-1][:,1][0:MOV_AVG_LENGTH]), 115 | np.mean(mov_avg_left[::-1][:,2][0:MOV_AVG_LENGTH])]) 116 | right_fit = np.array([np.mean(mov_avg_right[::-1][:,0][0:MOV_AVG_LENGTH]), 117 | np.mean(mov_avg_right[::-1][:,1][0:MOV_AVG_LENGTH]), 118 | np.mean(mov_avg_right[::-1][:,2][0:MOV_AVG_LENGTH])]) 119 | 120 | if mov_avg_left.shape[0] > 1000: 121 | mov_avg_left = mov_avg_left[0:MOV_AVG_LENGTH] 122 | if mov_avg_right.shape[0] > 1000: 123 | mov_avg_right = mov_avg_right[0:MOV_AVG_LENGTH] 124 | 125 | 126 | t_fit = time.time() - t_fit0 127 | 128 | t_draw0 = time.time() 129 | final = draw_lines(img, img_w, left_fit, right_fit, perspective=[src,dst]) 130 | t_draw = time.time() - t_draw0 131 | 132 | # print('fps: %d' % int((1./(t1-t0)))) 133 | print('undist: %f [ms] | bin: %f [ms]| warp: %f [ms]| fit: %f [ms]| draw: %f [ms] | fps %f' 134 | % (t_dist * 1000, t_bin * 1000, t_warp * 1000, t_fit * 1000, t_draw * 1000, 1./(time.time() - t_fps0))) 135 | cv2.imshow('final', final) 136 | 137 | if cv2.waitKey(1) & 0xFF == ord('q'): 138 | break 139 | 140 | 141 | def calibrate_camera(image_files, nx, ny): 142 | objpoints = [] 143 | imgpoints = [] 144 | 145 | objp = np.zeros(shape=(nx * ny, 3), dtype=np.float32) 146 | objp[:, :2] = np.mgrid[0:nx, 0:ny].T.reshape(-1, 2) 147 | 148 | for i in image_files: 149 | img = cv2.imread(i) 150 | if img.shape[0] != 720: 151 | img = cv2.resize(img,(1280, 720)) 152 | cv2.imshow('image',img) 153 | 154 | gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 155 | ret, corners = cv2.findChessboardCorners(gray, (nx, ny)) 156 | 157 | if ret: 158 | print("Calibrated!") 159 | imgpoints.append(corners) 160 | objpoints.append(objp) 161 | 162 | return cv2.calibrateCamera(objpoints, imgpoints, gray.shape[::-1], None, None) 163 | 164 | 165 | def image_binary(img, sobel_kernel=7, mag_thresh=(3, 255), s_thresh=(170, 255)): 166 | # --------------------------- Binary Thresholding ---------------------------- 167 | # Binary Thresholding is an intermediate step to improve lane line perception 168 | # it includes image transformation to gray scale to apply sobel transform and 169 | # binary slicing to output 0,1 type images according to pre-defined threshold. 170 | # 171 | # Also it's performed RGB to HSV transformation to get S information which in- 172 | # tensifies lane line detection. 173 | # 174 | # The output is a binary image combined with best of both S transform and mag- 175 | # nitude thresholding. 176 | 177 | hls = cv2.cvtColor(img, cv2.COLOR_RGB2HLS) 178 | gray = hls[:, :, 1] 179 | s_channel = hls[:, :, 2] 180 | 181 | 182 | # Binary matrixes creation 183 | sobel_binary = np.zeros(shape=gray.shape, dtype=bool) 184 | s_binary = sobel_binary 185 | combined_binary = s_binary.astype(np.float32) 186 | 187 | # Sobel Transform 188 | sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=sobel_kernel) 189 | sobely = 0 #cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=sobel_kernel) 190 | 191 | sobel_abs = np.abs(sobelx**2 + sobely**2) 192 | sobel_abs = np.uint8(255 * sobel_abs / np.max(sobel_abs)) 193 | 194 | sobel_binary[(sobel_abs > mag_thresh[0]) & (sobel_abs <= mag_thresh[1])] = 1 195 | 196 | # Threshold color channel 197 | s_binary[(s_channel >= s_thresh[0]) & (s_channel <= s_thresh[1])] = 1 198 | 199 | # Combine the two binary thresholds 200 | 201 | combined_binary[(s_binary == 1) | (sobel_binary == 1)] = 1 202 | combined_binary = np.uint8(255 * combined_binary / np.max(combined_binary)) 203 | 204 | #plt.imshow(combined_binary, cmap='gray') 205 | #plt.show() 206 | 207 | # ---------------- MASKED IMAGE -------------------- 208 | offset = 100 209 | mask_polyg = np.array([[(0 + offset, img.shape[0]), 210 | (img.shape[1] / 2.5, img.shape[0] / 1.65), 211 | (img.shape[1] / 1.8, img.shape[0] / 1.65), 212 | (img.shape[1], img.shape[0])]], 213 | dtype=np.int) 214 | 215 | # mask_polyg = np.concatenate((mask_polyg, mask_polyg, mask_polyg)) 216 | 217 | # Next we'll create a masked edges image using cv2.fillPoly() 218 | mask_img = np.zeros_like(combined_binary) 219 | ignore_mask_color = 255 220 | 221 | # This time we are defining a four sided polygon to mask 222 | # Applying polygon 223 | cv2.fillPoly(mask_img, mask_polyg, ignore_mask_color) 224 | masked_edges = cv2.bitwise_and(combined_binary, mask_img) 225 | 226 | return masked_edges 227 | 228 | 229 | def warp(img, src, dst): 230 | 231 | src = np.float32([src]) 232 | dst = np.float32([dst]) 233 | 234 | return cv2.warpPerspective(img, cv2.getPerspectiveTransform(src, dst), 235 | dsize=img.shape[0:2][::-1], flags=cv2.INTER_LINEAR) 236 | 237 | 238 | def sliding_windown(img_w): 239 | 240 | histogram = np.sum(img_w[int(img_w.shape[0] / 2):, :], axis=0) 241 | # Create an output image to draw on and visualize the result 242 | out_img = np.dstack((img_w, img_w, img_w)) * 255 243 | # Find the peak of the left and right halves of the histogram 244 | # These will be the starting point for the left and right lines 245 | midpoint = np.int(histogram.shape[0] / 2) 246 | leftx_base = np.argmax(histogram[:midpoint]) 247 | rightx_base = np.argmax(histogram[midpoint:]) + midpoint 248 | 249 | # Choose the number of sliding windows 250 | nwindows = 9 251 | # Set height of windows 252 | window_height = np.int(img_w.shape[0] / nwindows) 253 | # Identify the x and y positions of all nonzero pixels in the image 254 | nonzero = img_w.nonzero() 255 | nonzeroy = np.array(nonzero[0]) 256 | nonzerox = np.array(nonzero[1]) 257 | # Current positions to be updated for each window 258 | leftx_current = leftx_base 259 | rightx_current = rightx_base 260 | # Set the width of the windows +/- margin 261 | margin = 100 262 | # Set minimum number of pixels found to recenter window 263 | minpix = 50 264 | # Create empty lists to receive left and right lane pixel indices 265 | left_lane_inds = [] 266 | right_lane_inds = [] 267 | 268 | # Step through the windows one by one 269 | for window in range(nwindows): 270 | # Identify window boundaries in x and y (and right and left) 271 | win_y_low = img_w.shape[0] - (window + 1) * window_height 272 | win_y_high = img_w.shape[0] - window * window_height 273 | win_xleft_low = leftx_current - margin 274 | win_xleft_high = leftx_current + margin 275 | win_xright_low = rightx_current - margin 276 | win_xright_high = rightx_current + margin 277 | # Draw the windows on the visualization image 278 | cv2.rectangle(out_img, (win_xleft_low, win_y_low), (win_xleft_high, win_y_high), (0, 255, 0), 2) 279 | cv2.rectangle(out_img, (win_xright_low, win_y_low), (win_xright_high, win_y_high), (0, 255, 0), 2) 280 | # Identify the nonzero pixels in x and y within the window 281 | good_left_inds = ((nonzeroy >= win_y_low) & (nonzeroy < win_y_high) & (nonzerox >= win_xleft_low) & ( 282 | nonzerox < win_xleft_high)).nonzero()[0] 283 | good_right_inds = ((nonzeroy >= win_y_low) & (nonzeroy < win_y_high) & (nonzerox >= win_xright_low) & ( 284 | nonzerox < win_xright_high)).nonzero()[0] 285 | # Append these indices to the lists 286 | left_lane_inds.append(good_left_inds) 287 | right_lane_inds.append(good_right_inds) 288 | # If you found > minpix pixels, recenter next window on their mean position 289 | if len(good_left_inds) > minpix: 290 | leftx_current = np.int(np.mean(nonzerox[good_left_inds])) 291 | if len(good_right_inds) > minpix: 292 | rightx_current = np.int(np.mean(nonzerox[good_right_inds])) 293 | 294 | # Concatenate the arrays of indices 295 | left_lane_inds = np.concatenate(left_lane_inds) 296 | right_lane_inds = np.concatenate(right_lane_inds) 297 | 298 | # Extract left and right line pixel positions 299 | leftx = nonzerox[left_lane_inds] 300 | lefty = nonzeroy[left_lane_inds] 301 | rightx = nonzerox[right_lane_inds] 302 | righty = nonzeroy[right_lane_inds] 303 | 304 | # Fit a second order polynomial to each 305 | left_fit = np.polyfit(lefty, leftx, 2) 306 | right_fit = np.polyfit(righty, rightx, 2) 307 | 308 | # Generate x and y values for plotting 309 | # ploty = np.linspace(0, img_w.shape[0] - 1, img_w.shape[0]) 310 | # left_fitx = left_fit[0] * ploty ** 2 + left_fit[1] * ploty + left_fit[2] 311 | # right_fitx = right_fit[0] * ploty ** 2 + right_fit[1] * ploty + right_fit[2] 312 | # 313 | # out_img[nonzeroy[left_lane_inds], nonzerox[left_lane_inds]] = [255, 0, 0] 314 | # out_img[nonzeroy[right_lane_inds], nonzerox[right_lane_inds]] = [0, 0, 255] 315 | # plt.imshow(out_img) 316 | # plt.plot(left_fitx, ploty, color='yellow') 317 | # plt.plot(right_fitx, ploty, color='yellow') 318 | # plt.xlim(0, 1280) 319 | # plt.ylim(720, 0) 320 | 321 | return left_fit, right_fit 322 | 323 | 324 | def fit_from_lines(left_fit, right_fit, img_w): 325 | # Assume you now have a new warped binary image 326 | # from the next frame of video (also called "binary_warped") 327 | # It's now much easier to find line pixels! 328 | nonzero = img_w.nonzero() 329 | nonzeroy = np.array(nonzero[0]) 330 | nonzerox = np.array(nonzero[1]) 331 | margin = 100 332 | left_lane_inds = ((nonzerox > (left_fit[0] * (nonzeroy ** 2) + left_fit[1] * nonzeroy + left_fit[2] - margin)) & ( 333 | nonzerox < (left_fit[0] * (nonzeroy ** 2) + left_fit[1] * nonzeroy + left_fit[2] + margin))) 334 | right_lane_inds = ( 335 | (nonzerox > (right_fit[0] * (nonzeroy ** 2) + right_fit[1] * nonzeroy + right_fit[2] - margin)) & ( 336 | nonzerox < (right_fit[0] * (nonzeroy ** 2) + right_fit[1] * nonzeroy + right_fit[2] + margin))) 337 | 338 | # Again, extract left and right line pixel positions 339 | leftx = nonzerox[left_lane_inds] 340 | lefty = nonzeroy[left_lane_inds] 341 | rightx = nonzerox[right_lane_inds] 342 | righty = nonzeroy[right_lane_inds] 343 | # Fit a second order polynomial to each 344 | left_fit = np.polyfit(lefty, leftx, 2) 345 | right_fit = np.polyfit(righty, rightx, 2) 346 | 347 | return left_fit, right_fit 348 | 349 | 350 | def draw_lines(img, img_w, left_fit, right_fit, perspective): 351 | # Create an image to draw the lines on 352 | warp_zero = np.zeros_like(img_w).astype(np.uint8) 353 | color_warp = np.dstack((warp_zero, warp_zero, warp_zero)) 354 | #color_warp_center = np.dstack((warp_zero, warp_zero, warp_zero)) 355 | 356 | ploty = np.linspace(0, img.shape[0] - 1, img.shape[0]) 357 | 358 | left_fitx = left_fit[0] * ploty ** 2 + left_fit[1] * ploty + left_fit[2] 359 | right_fitx = right_fit[0] * ploty ** 2 + right_fit[1] * ploty + right_fit[2] 360 | 361 | # Recast the x and y points into usable format for cv2.fillPoly() 362 | pts_left = np.array([np.transpose(np.vstack([left_fitx, ploty]))]) 363 | pts_right = np.array([np.flipud(np.transpose(np.vstack([right_fitx, ploty])))]) 364 | pts = np.hstack((pts_left, pts_right)) 365 | 366 | # Draw the lane onto the warped blank image 367 | #cv2.fillPoly(color_warp_center, np.int_([pts]), (0, 255, 0)) 368 | cv2.fillPoly(color_warp, np.int_([pts]), (0, 255, 0)) 369 | 370 | # Warp the blank back to original image space using inverse perspective matrix (Minv) 371 | newwarp = warp(color_warp, perspective[1], perspective[0]) 372 | # Combine the result with the original image 373 | result = cv2.addWeighted(img, 1, newwarp, 0.2, 0) 374 | 375 | color_warp_lines = np.dstack((warp_zero, warp_zero, warp_zero)) 376 | cv2.polylines(color_warp_lines, np.int_([pts_right]), isClosed=False, color=(255, 255, 0), thickness=25) 377 | cv2.polylines(color_warp_lines, np.int_([pts_left]), isClosed=False, color=(0, 0, 255), thickness=25) 378 | newwarp_lines = warp(color_warp_lines, perspective[1], perspective[0]) 379 | 380 | result = cv2.addWeighted(result, 1, newwarp_lines, 1, 0) 381 | 382 | # ----- Radius Calculation ------ # 383 | 384 | img_height = img.shape[0] 385 | y_eval = img_height 386 | 387 | ym_per_pix = 30 / 720. # meters per pixel in y dimension 388 | xm_per_pix = 3.7 / 700 # meters per pixel in x dimension 389 | 390 | ploty = np.linspace(0, img_height - 1, img_height) 391 | # Fit new polynomials to x,y in world space 392 | left_fit_cr = np.polyfit(ploty * ym_per_pix, left_fitx * xm_per_pix, 2) 393 | right_fit_cr = np.polyfit(ploty * ym_per_pix, right_fitx * xm_per_pix, 2) 394 | 395 | # Calculate the new radii of curvature 396 | left_curverad = ((1 + (2 * left_fit_cr[0] * y_eval * ym_per_pix + left_fit_cr[1]) ** 2) ** 1.5) / np.absolute( 397 | 2 * left_fit_cr[0]) 398 | 399 | right_curverad = ( 400 | (1 + (2 * right_fit_cr[0] * y_eval * ym_per_pix + right_fit_cr[1]) ** 2) ** 1.5) / np.absolute( 401 | 2 * right_fit_cr[0]) 402 | 403 | radius = round((float(left_curverad) + float(right_curverad))/2.,2) 404 | 405 | # ----- Off Center Calculation ------ # 406 | 407 | lane_width = (right_fit[2] - left_fit[2]) * xm_per_pix 408 | center = (right_fit[2] - left_fit[2]) / 2 409 | off_left = (center - left_fit[2]) * xm_per_pix 410 | off_right = -(right_fit[2] - center) * xm_per_pix 411 | off_center = round((center - img.shape[0] / 2.) * xm_per_pix,2) 412 | 413 | # --- Print text on screen ------ # 414 | #if radius < 5000.0: 415 | text = "radius = %s [m]\noffcenter = %s [m]" % (str(radius), str(off_center)) 416 | #text = "radius = -- [m]\noffcenter = %s [m]" % (str(off_center)) 417 | 418 | for i, line in enumerate(text.split('\n')): 419 | i = 50 + 20 * i 420 | cv2.putText(result, line, (0,i), cv2.FONT_HERSHEY_DUPLEX, 0.5,(255,255,255),1,cv2.LINE_AA) 421 | return result 422 | 423 | 424 | 425 | if __name__ == '__main__': 426 | main() 427 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Advanced Lane Finding 2 | 3 | ## Goals 4 | 5 | * Compute the camera calibration matrix and distortion coefficients given a set of chessboard images. 6 | * Apply a distortion correction to raw images. 7 | * Use color transforms, gradients, etc., to create a thresholded binary image. 8 | * Apply a perspective transform to rectify binary image ("birds-eye view"). 9 | * Detect lane pixels and fit to find the lane boundary. 10 | * Determine the curvature of the lane and vehicle position with respect to center. 11 | * Warp the detected lane boundaries back onto the original image. 12 | * Output visual display of the lane boundaries and numerical estimation 13 | of lane curvature and vehicle position. 14 | 15 | | Raw Input | Final Output | 16 | | ------------- | ------------- | 17 | | ![Raw](http://i.imgur.com/bc60myS.png) | ![Undistorted](http://i.imgur.com/W6sxa33.png) | 18 | 19 | 20 | ### Camera Calibration 21 | 22 | This is a fundamental step of the project, since without calibration, the image analisys may fall into uncorrect results. 23 | 24 | Camera calibration is performed by opencv in two steps: 25 | 26 | 1. Find Chess Board Corners 27 | * This function retutns radial and tangential transform parameters 28 | * [OpenCV Chess Board Corners](http://docs.opencv.org/2.4/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html#drawchessboardcorners) 29 | 2. Camera Calibration 30 | * Calibrate Camera function takes as arguments the output of Chess Board Corners Function, plus object points values to return the Calibration Function. 31 | * [OpenCV Calibrate Camera](http://docs.opencv.org/2.4/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html#calibratecamera) 32 | 33 | **Undistorted Image Example:** 34 | 35 | | Before | After | 36 | | ------------- | ------------- | 37 | | ![Distorted](http://i.imgur.com/HAcV7QF.jpg) | ![Undistorted](http://i.imgur.com/HBHPVhC.jpg) | 38 | 39 | ***Find in the code: main.py:44*** 40 | 41 |
42 | 43 | ### Thresholded Binary Image 44 | 45 | Thresholding is applied after image ungoes to [Sobel](http://docs.opencv.org/3.1.0/d5/d0f/tutorial_py_gradients.html) gradient transform on x axis, which means we're interested mainly on vertical lines. Although before applying sobel transform, image it converted to gray scale and to HSV color space. The reason for such transformation is that, changing color spaces, makes easier to detect lines of different color, which causes a better result for Sobel transform. 46 | 47 | hls = cv2.cvtColor(img, cv2.COLOR_RGB2HLS) 48 | gray = hls[:, :, 1] 49 | s_channel = hls[:, :, 2] 50 | 51 | 52 | # Binary matrixes creation 53 | sobel_binary = np.zeros(shape=gray.shape, dtype=bool) 54 | s_binary = sobel_binary 55 | combined_binary = s_binary.astype(np.float32) 56 | 57 | # Sobel Transform 58 | sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=sobel_kernel) 59 | sobely = 0 #cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=sobel_kernel) 60 | 61 | sobel_abs = np.abs(sobelx**2 + sobely**2) 62 | sobel_abs = np.uint8(255 * sobel_abs / np.max(sobel_abs)) 63 | 64 | sobel_binary[(sobel_abs > mag_thresh[0]) & (sobel_abs <= mag_thresh[1])] = 1 65 | 66 | # Threshold color channel 67 | s_binary[(s_channel >= s_thresh[0]) & (s_channel <= s_thresh[1])] = 1 68 | 69 | # Combine the two binary thresholds 70 | 71 | combined_binary[(s_binary == 1) | (sobel_binary == 1)] = 1 72 | combined_binary = np.uint8(255 * combined_binary / np.max(combined_binary)) 73 | 74 | **Binary Output** 75 | ![](http://i.imgur.com/NZuNecy.jpg) 76 | 77 | #### Masking 78 | To reduce the amount of undesired points during image processing, after image thresholding the image is masked to output only the region of interest. This technique improves success of detection. 79 | 80 | ***Find in the code:*** 81 | 1. Binary Thresholding: main.py:62 - image_binary():177-202 82 | 2. Image masking: main.py: 62 - image_binary():208:224 83 | 84 | 85 | ### Perspective Transform 86 | Perspective transform helps to adjust the difference between real and perceived distance from objects far from camera. Without perspective transform would not be possible to precisely draw the lane line correctly. 87 | 88 | 89 | **Source and Destination Points** 90 | To achieve a precise transformation, we need to specify source and destination points, through a clock-wise sequence starting from first quadrant. 91 | ![](https://fivedots.coe.psu.ac.th/~ad/jg/ch139/persp2.jpg) 92 | 93 | **Offset** 94 | The offset indicated below is a trick to bring left and right lines closer to each other in order to not loose line curvature information. 95 | 96 | This trick increases the number of points which later will be calculated to fit to second order polynomial function, to represent the line. 97 | 98 | |Point | Source Points (x,y) | Destination Points (x,y) | 99 | |------| ------------- | ------------- | 100 | |A| (595, 452) | ((Source Xd + offset), 0) | 101 | |B| (685, 452) | ((Source Xc - offset), 0) | 102 | |C| (1110, y_size) | ((Source Xc - offset), y_size)| 103 | |D| (220, y_size) | ((Source Xd + offset), y_size) | 104 | 105 | line_dst_offset = 200 106 | src = [595, 452], \ 107 | [685, 452], \ 108 | [1110, img_b.shape[0]], \ 109 | [220, img_b.shape[0]] 110 | 111 | dst = [src[3][0] + line_dst_offset, 0], \ 112 | [src[2][0] - line_dst_offset, 0], \ 113 | [src[2][0] - line_dst_offset, src[2][1]], \ 114 | [src[3][0] + line_dst_offset, src[3][1]] 115 | 116 | 117 | ***Find in the code:*** 118 | 1. Perspective transform: main.py:65:81 119 | 120 | ### Detecting Lane Pixels 121 | 122 | The binary image obtained allied to perspective transformed image allow us to determine where lane lines are placed. 123 | 124 | To start the search for lanes position, an histogram from the binary image is generated. The histogram quantified the number of pixels along 'x' axis, as a consequence peaks found along the histrogram points to lane lines. See below the whole process from obtaining the undistorded image to its histogram. 125 | 126 | ![](http://i.imgur.com/I8f3dfs.jpg) 127 | 128 | 129 | * Upper left:Undistorted Image 130 | * Upper right: Binary masked image 131 | * Lower left: Perspective transformed from binary masked image 132 | * Lower right: Histogram from binary transformed image 133 | 134 | #### Fitting lines to a polynomial function 135 | As it's possible now to detect the major concentrarion of lines, a technique called sliding window is used to map the group of pixels that forms the lane line and then fit a second order polynomial. 136 | ![](https://d17h27t6h515a5.cloudfront.net/topher/2017/January/588cf5e0_screen-shot-2017-01-28-at-11.49.20-am/screen-shot-2017-01-28-at-11.49.20-am.png) 137 | 138 | # Choose the number of sliding windows 139 | nwindows = 9 140 | # Set height of windows 141 | window_height = np.int(img_w.shape[0] / nwindows) 142 | # Identify the x and y positions of all nonzero pixels in the image 143 | nonzero = img_w.nonzero() 144 | nonzeroy = np.array(nonzero[0]) 145 | nonzerox = np.array(nonzero[1]) 146 | # Current positions to be updated for each window 147 | leftx_current = leftx_base 148 | rightx_current = rightx_base 149 | # Set the width of the windows +/- margin 150 | margin = 100 151 | # Set minimum number of pixels found to recenter window 152 | minpix = 50 153 | # Create empty lists to receive left and right lane pixel indices 154 | left_lane_inds = [] 155 | right_lane_inds = [] 156 | 157 | # Step through the windows one by one 158 | for window in range(nwindows): 159 | # Identify window boundaries in x and y (and right and left) 160 | win_y_low = img_w.shape[0] - (window + 1) * window_height 161 | win_y_high = img_w.shape[0] - window * window_height 162 | win_xleft_low = leftx_current - margin 163 | win_xleft_high = leftx_current + margin 164 | win_xright_low = rightx_current - margin 165 | win_xright_high = rightx_current + margin 166 | # Draw the windows on the visualization image 167 | cv2.rectangle(out_img, (win_xleft_low, win_y_low), (win_xleft_high, win_y_high), (0, 255, 0), 2) 168 | cv2.rectangle(out_img, (win_xright_low, win_y_low), (win_xright_high, win_y_high), (0, 255, 0), 2) 169 | # Identify the nonzero pixels in x and y within the window 170 | good_left_inds = ((nonzeroy >= win_y_low) & (nonzeroy < win_y_high) & (nonzerox >= win_xleft_low) & ( 171 | nonzerox < win_xleft_high)).nonzero()[0] 172 | good_right_inds = ((nonzeroy >= win_y_low) & (nonzeroy < win_y_high) & (nonzerox >= win_xright_low) & ( 173 | nonzerox < win_xright_high)).nonzero()[0] 174 | # Append these indices to the lists 175 | left_lane_inds.append(good_left_inds) 176 | right_lane_inds.append(good_right_inds) 177 | # If you found > minpix pixels, recenter next window on their mean position 178 | if len(good_left_inds) > minpix: 179 | leftx_current = np.int(np.mean(nonzerox[good_left_inds])) 180 | if len(good_right_inds) > minpix: 181 | rightx_current = np.int(np.mean(nonzerox[good_right_inds])) 182 | 183 | # Concatenate the arrays of indices 184 | left_lane_inds = np.concatenate(left_lane_inds) 185 | right_lane_inds = np.concatenate(right_lane_inds) 186 | 187 | # Extract left and right line pixel positions 188 | leftx = nonzerox[left_lane_inds] 189 | lefty = nonzeroy[left_lane_inds] 190 | rightx = nonzerox[right_lane_inds] 191 | righty = nonzeroy[right_lane_inds] 192 | 193 | # Fit a second order polynomial to each 194 | left_fit = np.polyfit(lefty, leftx, 2) 195 | right_fit = np.polyfit(righty, rightx, 2) 196 | 197 | 198 | ***Find in the code:*** 199 | 1. Fit lines - fit from already found fit - main.py:102 200 | 2. Fit lines - fit from unknown (sliding_windown) - main.py:108 201 | 3. Averaging fit - main.py:110:118 202 | 203 | 204 | ### Lane Curvature and Off Center distance 205 | 206 | 207 | Lane curvature is calculated throught the following relation: 208 | 209 | $ Radius = \frac{[1+(\frac{dx}{dy})^2]^{3/2}}{|\frac{d^2x|}{dy^2}} $ 210 | 211 | Another important information for the calculation is the representation of each pixel in meters on the image: 212 | 213 | > Pixel x axis = 3.7 / 700 m 214 | > Pixel y axis = 30 / 720 m 215 | 216 | 217 | ploty = np.linspace(0, img_height - 1, img_height) 218 | # Fit new polynomials to x,y in world space 219 | left_fit_cr = np.polyfit(ploty * ym_per_pix, left_fitx * xm_per_pix, 2) 220 | right_fit_cr = np.polyfit(ploty * ym_per_pix, right_fitx * xm_per_pix, 2) 221 | 222 | # Calculate the new radii of curvature 223 | left_curverad = ((1 + (2 * left_fit_cr[0] * y_eval * ym_per_pix + left_fit_cr[1]) ** 2) ** 1.5) / np.absolute( 224 | 2 * left_fit_cr[0]) 225 | 226 | right_curverad = ( 227 | (1 + (2 * right_fit_cr[0] * y_eval * ym_per_pix + right_fit_cr[1]) ** 2) ** 1.5) / np.absolute( 228 | 2 * right_fit_cr[0]) 229 | 230 | radius = round((float(left_curverad) + float(right_curverad))/2.,2) 231 | 232 | 233 | #### Off Center distance 234 | 235 | Since we know the x axis pixel/distance relation and we also know the distance in pixels beween left and right lane, we can calculate the distance the car is from center to the left or to the right, considering that our cam is positioned right in the middle of image. 236 | 237 | **Center between lane lines** 238 | 239 | $ line_{center} = (right_{fit}[2] - left_{fit}[2]) / 2 $, where: 240 | 241 | * $right_{fit}[2]$ and $left_{fit}[2]$, are the linear coeficients from fit functions. They represent where the polynomia function crosses x axis where y = 0; 242 | 243 | ** Distance in pixels between car center and lanes center** 244 | 245 | $offcenter_{pixels} = (line_{center} - car_{center})$ 246 | 247 | In meters: 248 | 249 | $offcenter_{meters} = offcenter_{pixels} * (3.7 / 700)$ 250 | 251 | 252 | ***Find in the code:*** 253 | 1. main.py:129 254 | 2. draw_lines():382:421 255 | 256 | ### Inverse Perspective Tranformation 257 | 258 | A important step even before lane radius calculation is permforming inverse perspective to transform to plot the image on its real shape. 259 | 260 | The function is the same from the first perspective transform, but now source and destionation points are inverted on its arguments call. 261 | 262 | 263 | ***Find in the code:*** 264 | 1. main.py:129 265 | 2. draw_lines():371, 378 266 | 267 | ### Video Demostrantion 268 | 269 | [![ ](http://img.youtube.com/vi/iFhYH4QPJ9A/0.jpg)](http://www.youtube.com/watch?v=iFhYH4QPJ9A "Advanced Lane Detection") 270 | 271 | ### Discussion 272 | 273 | After completing the basic requirement, two main subjects stand out. 274 | 275 | 1. Performance: 276 | The final performance is not as fast as I wanted to be, the fps of roughly 12 fps is not enough for real time scenarios. During processing, two important steps takes almost 50 ms to complete, which gives the processing an starting point of 20 fps, which is already low fps. New improvements then must be made to improve unditortion function and binary transform. An interesting aproach would be porting this delopment from C++ to Python which suggests to speed up the undistortion 5x times: 277 | 278 | [How to speed up image undistortion for a video/image sequence?](https://shiyuzhao1.wordpress.com/2013/11/21/how-to-speed-up-image-undistortion-for-a-videoimage-sequence/) 279 | 280 | 2. Robustness 281 | There's a lot to be done to make this framework robust for different envorinments. New videos are available to help this development and from this point beyond it should take much lower effort than bringing the development from zero to this point. 282 | 283 | This project was very interesting which gave important skills on computer vision techniques. 284 | 285 | 286 | --------------------------------------------------------------------------------