├── README.md ├── Vehicle_Detection.ipynb ├── examples ├── mode_yolo_plot.jpg └── net_output.png ├── output_images ├── .DS_Store ├── output1.png ├── output2.jpg ├── output3.jpg ├── resized.png └── save_output_here.txt ├── project_video.mp4 ├── project_video_output.mp4 ├── test_images ├── .DS_Store ├── test1.jpg ├── test2.jpg ├── test3.jpg ├── test4.jpg ├── test5.jpg └── test6.jpg ├── test_video.mp4 └── utils ├── __init__.py └── utils.py /README.md: -------------------------------------------------------------------------------- 1 | # Vehicle Detection 2 | The goal of the project is to detect and draw squares around cars in dashcam footage. I used a YOLO image detection network to predict the boxes and labels that I put on the image. I choose to use a one-shot approach to this instead of the Udacity recommended approach for a few reasons: 3 | 4 | 1. I love deep learning and I’ll have much more fun playing with Neural networks than I will with more traditional approaches 5 | 2. I’ve heard models like this are more commonly used in the industry than the HOG/SVM style approach Udacity teaches 6 | 7 | I also choose to implement it in Keras because it's accessable and makes the nice and small. On that note, I've tried to include a more detailed description of YOLO's output than you can find anywhere else by googling. I spent a long time figuring out what each number meant and hopefully you won't have to do so. I've also put some effort into commenting profusely throughout the [`utils.py`](utils/utils.py) file to make it easy to understand. Feel free to use any parts of this code you want. 8 | 9 | ## What is YOLO? 10 | "YOLO" or "You Only Look Once" (hah ಠ_ಠ), is a massive Convolutional Nueral network for object detection and classification. As a quote from its [website](https://pjreddie.com/darknet/yolo/) explains “This network divides the image into regions and predicts bounding boxes and probabilities for each region. These bounding boxes are weighted by the predicted probabilities.” 11 | 12 | ![](https://pjreddie.com/media/image/model2.png) 13 | 14 | YOLO is a great network and on the cutting edge of object detection, but… it’s really big and it’s really slow. So since I’m not looking to put this network in a car, I decided to use the smaller [Tiny-YOLO](https://pjreddie.com/darknet/tiny-darknet/) instead. 15 | 16 | ## tiny-YOLO architecture 17 | 18 | 19 | ![model](./examples/mode_yolo_plot.jpg) 20 | 21 | 22 | 23 | 24 | ____________________________________________________________________________________________________ 25 | Layer (type) Output Shape Param # Connected to 26 | ==================================================================================================== 27 | convolution2d_1 (Convolution2D) (None, 16, 448, 448) 448 convolution2d_input_1[0][0] 28 | ____________________________________________________________________________________________________ 29 | leakyrelu_1 (LeakyReLU) (None, 16, 448, 448) 0 convolution2d_1[0][0] 30 | ____________________________________________________________________________________________________ 31 | maxpooling2d_1 (MaxPooling2D) (None, 16, 224, 224) 0 leakyrelu_1[0][0] 32 | ____________________________________________________________________________________________________ 33 | convolution2d_2 (Convolution2D) (None, 32, 224, 224) 4640 maxpooling2d_1[0][0] 34 | ____________________________________________________________________________________________________ 35 | leakyrelu_2 (LeakyReLU) (None, 32, 224, 224) 0 convolution2d_2[0][0] 36 | ____________________________________________________________________________________________________ 37 | maxpooling2d_2 (MaxPooling2D) (None, 32, 112, 112) 0 leakyrelu_2[0][0] 38 | ____________________________________________________________________________________________________ 39 | convolution2d_3 (Convolution2D) (None, 64, 112, 112) 18496 maxpooling2d_2[0][0] 40 | ____________________________________________________________________________________________________ 41 | leakyrelu_3 (LeakyReLU) (None, 64, 112, 112) 0 convolution2d_3[0][0] 42 | ____________________________________________________________________________________________________ 43 | maxpooling2d_3 (MaxPooling2D) (None, 64, 56, 56) 0 leakyrelu_3[0][0] 44 | ____________________________________________________________________________________________________ 45 | convolution2d_4 (Convolution2D) (None, 128, 56, 56) 73856 maxpooling2d_3[0][0] 46 | ____________________________________________________________________________________________________ 47 | leakyrelu_4 (LeakyReLU) (None, 128, 56, 56) 0 convolution2d_4[0][0] 48 | ____________________________________________________________________________________________________ 49 | maxpooling2d_4 (MaxPooling2D) (None, 128, 28, 28) 0 leakyrelu_4[0][0] 50 | ____________________________________________________________________________________________________ 51 | convolution2d_5 (Convolution2D) (None, 256, 28, 28) 295168 maxpooling2d_4[0][0] 52 | ____________________________________________________________________________________________________ 53 | leakyrelu_5 (LeakyReLU) (None, 256, 28, 28) 0 convolution2d_5[0][0] 54 | ____________________________________________________________________________________________________ 55 | maxpooling2d_5 (MaxPooling2D) (None, 256, 14, 14) 0 leakyrelu_5[0][0] 56 | ____________________________________________________________________________________________________ 57 | convolution2d_6 (Convolution2D) (None, 512, 14, 14) 1180160 maxpooling2d_5[0][0] 58 | ____________________________________________________________________________________________________ 59 | leakyrelu_6 (LeakyReLU) (None, 512, 14, 14) 0 convolution2d_6[0][0] 60 | ____________________________________________________________________________________________________ 61 | maxpooling2d_6 (MaxPooling2D) (None, 512, 7, 7) 0 leakyrelu_6[0][0] 62 | ____________________________________________________________________________________________________ 63 | convolution2d_7 (Convolution2D) (None, 1024, 7, 7) 4719616 maxpooling2d_6[0][0] 64 | ____________________________________________________________________________________________________ 65 | leakyrelu_7 (LeakyReLU) (None, 1024, 7, 7) 0 convolution2d_7[0][0] 66 | ____________________________________________________________________________________________________ 67 | convolution2d_8 (Convolution2D) (None, 1024, 7, 7) 9438208 leakyrelu_7[0][0] 68 | ____________________________________________________________________________________________________ 69 | leakyrelu_8 (LeakyReLU) (None, 1024, 7, 7) 0 convolution2d_8[0][0] 70 | ____________________________________________________________________________________________________ 71 | convolution2d_9 (Convolution2D) (None, 1024, 7, 7) 9438208 leakyrelu_8[0][0] 72 | ____________________________________________________________________________________________________ 73 | leakyrelu_9 (LeakyReLU) (None, 1024, 7, 7) 0 convolution2d_9[0][0] 74 | ____________________________________________________________________________________________________ 75 | flatten_1 (Flatten) (None, 50176) 0 leakyrelu_9[0][0] 76 | ____________________________________________________________________________________________________ 77 | dense_1 (Dense) (None, 256) 12845312 flatten_1[0][0] 78 | ____________________________________________________________________________________________________ 79 | dense_2 (Dense) (None, 4096) 1052672 dense_1[0][0] 80 | ____________________________________________________________________________________________________ 81 | leakyrelu_10 (LeakyReLU) (None, 4096) 0 dense_2[0][0] 82 | ____________________________________________________________________________________________________ 83 | dense_3 (Dense) (None, 1470) 6022590 leakyrelu_10[0][0] 84 | ==================================================================================================== 85 | Total params: 45,089,374 86 | Trainable params: 45,089,374 87 | Non-trainable params: 0 88 | ____________________________________________________________________________________________________ 89 | 90 | 91 | As you can see, tiny-YOLO has 9 convolutional layers and is only tiny when compared to the normal YOLO which has 24. 92 | 93 | ## YOLO Output: Overview 94 | 95 | 96 | These next four sections get into the nitty gritty of what the network produces. I’d recommend skipping them if you don’t want to implement this yourself. These outputs are the same for YOLO and Tiny-YOLO. 97 | 98 | This network outputs a vector of 1470 numbers that are divided into three sections. 99 | 100 | YOLO's sorts it's output by the 49 grid cells (7x7) that it divides the image into. The cells are represented in this array left to right, then top to bottom. The data is organized into three parts which I will describe below. 101 | 102 | ### YOLO Output: Probability 103 | This is the simplest one, there are 20 classes that the network can predict. 20 classes * 49 cells = the 980 numbers that are in this part of the vector. So 0-19 of the vector is the relative probabilities that the top left cell is categorized as each class. For this project, I only care about identifying cars, so I only use index 6. This corresponds to the car class in the dataset that tiny yolo was originally trained on. 104 | 105 | ### YOLO Output: Confidence 106 | Each cell in the grid is responsible for predicting two bounding boxes. Thus there is one number in this section of 98 of each of those bounding boxes in each cell (49*2). Each "confidence score" is the probability that there is an object within that bounding box. This doesn't tell you anything about the shape of the box, or what's in it, just whether there is something there. 107 | 108 | ### YOLO Output: Box Coordinates 109 | This is where the boxes are predicted. Each box is represented with four numbers (x,y,width,height). These numbers are relative to the cell they're in and have to be scaled to be represented on the entire image. Once again, each cell predicts two bounding boxes so 2 boxes * 4 number per box * 49 cells = 392 numbers. 110 | 111 | ## Weights/Training 112 | I don't have a big deep learning computer so even with the tiny-YOLO I need to either use a cpu so it gets stored in RAM or use pre trained weights. I choose the latter. The weights I used are from the darknet site and are from training the model on the VOC2012 Dataset. This is why there are 20 classes to choose from even though we'd be happy with just a car class. 113 | 114 | I didn't include the weights here because github doesn't like big files, but you can download them yourself [here](https://pjreddie.com/media/files/yolo.weights) 115 | 116 | ## Making up for size difference 117 | The model I'm using is trained to recognize objects that are generally much closer (and thus bigger) than the cars I'm trying to detect, so I had to work around it. Along with running YOLO on the whole image, I also ran it on subsections. The fixed subsection was zoomed in on the highway ahead to try to get farther cars. But I also kept track of where detected cars were and tried to find them in a zoomed in image of that position again in the next few frames. This also made up for some of the performance loss I got from using tiny-yolo vs the full model. 118 | 119 | Here's an example of the standard cropping i do to zoom the image in on the highway ahead. As you can see, padding has been automatically added on the top and bottom to make the image the proper 448x448. 120 | 121 |
cropped
122 | 123 | ## Heatmapping 124 | I used a heatmap of the past 8 frames to eliminate most false detections. False detections are generally flukes and happen for only a frame or two. The heatmap kept track of the total number of detections in any area of the image for the past 8 frames and if there were less than 3, I discarded that detection. 125 | 126 | ## Results 127 | [Result video](https://youtu.be/2EtTBv0DjKc) 128 | 129 | And here are a few example images of the output: 130 | 131 | 132 | 133 | 134 | 135 | ## Reflections 136 | Tiny-YOLO works pretty well but I'd love to see how it could perform with the full YOLO or even SDD. In the future I'd love to have the time and GPU's to train my own YOLO or SDD on open source dashcam datasets. 137 | 138 | Additionally, it would be nice to implement a more robust tracking function that could keep track of the velocity of objects. This would make it easy to estimate their position in upcoming frames. 139 | 140 | My shortcut of running YOLO multiple times per frame is just that, a shortcut. It would be much better to simply train the full YOLO on a highway dataset so that it would know to look for smaller cars. This would also mean i didn't have to discard 19 out of 20 classes the network produced and would make it more accurate as a result. 141 | 142 | I'd also like to keep track of which car is which and use it's mean box for the last few frames to eliminate jitter. 143 | 144 | 145 | 146 | 147 | -------------------------------------------------------------------------------- /examples/mode_yolo_plot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ablacklama/Vehicle-Detection/c907fd593c03661ca3638a89edab67aad3967d98/examples/mode_yolo_plot.jpg -------------------------------------------------------------------------------- /examples/net_output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ablacklama/Vehicle-Detection/c907fd593c03661ca3638a89edab67aad3967d98/examples/net_output.png -------------------------------------------------------------------------------- /output_images/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ablacklama/Vehicle-Detection/c907fd593c03661ca3638a89edab67aad3967d98/output_images/.DS_Store -------------------------------------------------------------------------------- /output_images/output1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ablacklama/Vehicle-Detection/c907fd593c03661ca3638a89edab67aad3967d98/output_images/output1.png -------------------------------------------------------------------------------- /output_images/output2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ablacklama/Vehicle-Detection/c907fd593c03661ca3638a89edab67aad3967d98/output_images/output2.jpg -------------------------------------------------------------------------------- /output_images/output3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ablacklama/Vehicle-Detection/c907fd593c03661ca3638a89edab67aad3967d98/output_images/output3.jpg -------------------------------------------------------------------------------- /output_images/resized.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ablacklama/Vehicle-Detection/c907fd593c03661ca3638a89edab67aad3967d98/output_images/resized.png -------------------------------------------------------------------------------- /output_images/save_output_here.txt: -------------------------------------------------------------------------------- 1 | Please save your output images to this folder and include a description in your README of what each image shows. -------------------------------------------------------------------------------- /project_video.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ablacklama/Vehicle-Detection/c907fd593c03661ca3638a89edab67aad3967d98/project_video.mp4 -------------------------------------------------------------------------------- /project_video_output.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ablacklama/Vehicle-Detection/c907fd593c03661ca3638a89edab67aad3967d98/project_video_output.mp4 -------------------------------------------------------------------------------- /test_images/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ablacklama/Vehicle-Detection/c907fd593c03661ca3638a89edab67aad3967d98/test_images/.DS_Store -------------------------------------------------------------------------------- /test_images/test1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ablacklama/Vehicle-Detection/c907fd593c03661ca3638a89edab67aad3967d98/test_images/test1.jpg -------------------------------------------------------------------------------- /test_images/test2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ablacklama/Vehicle-Detection/c907fd593c03661ca3638a89edab67aad3967d98/test_images/test2.jpg -------------------------------------------------------------------------------- /test_images/test3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ablacklama/Vehicle-Detection/c907fd593c03661ca3638a89edab67aad3967d98/test_images/test3.jpg -------------------------------------------------------------------------------- /test_images/test4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ablacklama/Vehicle-Detection/c907fd593c03661ca3638a89edab67aad3967d98/test_images/test4.jpg -------------------------------------------------------------------------------- /test_images/test5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ablacklama/Vehicle-Detection/c907fd593c03661ca3638a89edab67aad3967d98/test_images/test5.jpg -------------------------------------------------------------------------------- /test_images/test6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ablacklama/Vehicle-Detection/c907fd593c03661ca3638a89edab67aad3967d98/test_images/test6.jpg -------------------------------------------------------------------------------- /test_video.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ablacklama/Vehicle-Detection/c907fd593c03661ca3638a89edab67aad3967d98/test_video.mp4 -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import * -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | class bb: 5 | def __init__(self): 6 | self.x, self.y, self.h, self.w, self.c, self.prob, self.x1, self.y1, self.x2, self.y2\ 7 | = None, None, None, None, None, None, None, None, None, None 8 | 9 | #change data into propper size for Yolonet. Also can crop image with shavedim=(ymin,ymax,xmin,xmax) 10 | def preprocess(img, shave=False, shavedim=(350,500, 500,1000)): 11 | #if the image is to be cropped make sure the values are sane first then reduce the image down to the new dimentions 12 | if shave: 13 | if(shavedim[0] < 0): 14 | shavedim[0] = 0 15 | if (shavedim[1] > img.shape[0]): 16 | shavedim[1] = img.shape[0] 17 | if (shavedim[2] < 0): 18 | shavedim[2] = 0 19 | if (shavedim[3] > img.shape[1]): 20 | shavedim[3] = img.shape[1] 21 | img = img[shavedim[0]:shavedim[1],shavedim[2]:shavedim[3]] 22 | sizexy = [img.shape[1], img.shape[0]] 23 | 24 | #get the appropriate padding on the image to make it square 25 | padhw = [0,0] 26 | if(sizexy[0] > sizexy[1]): 27 | dif = sizexy[0] - sizexy[1] 28 | border = cv2.copyMakeBorder(img, int(dif/2), int(dif/2), 0, 0, cv2.BORDER_CONSTANT, value=[200, 200, 200]) 29 | padhw[0] = int(((dif/2)/border.shape[0]) * 448) 30 | 31 | elif (sizexy[1] > sizexy[0]): 32 | dif = sizexy[1] - sizexy[0] 33 | border = cv2.copyMakeBorder(img, 0, 0, int(dif / 2), int(dif / 2), cv2.BORDER_CONSTANT, value=[200, 200, 200]) 34 | padhw[1] = int(((dif / 2) / border.shape[1]) * 448) 35 | else: 36 | border = img 37 | 38 | #resize the image to fit the 448,448 input that yolo requires 39 | resized = cv2.resize(border, (448, 448)) 40 | 41 | #yolo requires the image to be fed in by (channel, y,x). Transpose to match that. 42 | transposed = np.transpose(resized, [2, 0, 1]) 43 | return transposed, padhw, shavedim, resized 44 | 45 | #read weights from file and load them into the model 46 | def load_weights(model,yolo_weight_file): 47 | weights = np.fromfile(yolo_weight_file,np.float32) 48 | weights = weights[4:] 49 | 50 | index = 0 51 | for layer in model.layers: 52 | shape = [w.shape for w in layer.get_weights()] 53 | if shape != []: 54 | shape_kernal,shape_bias = shape 55 | bias = weights[index:index+np.prod(shape_bias)].reshape(shape_bias) 56 | index += np.prod(shape_bias) 57 | kernal = weights[index:index+np.prod(shape_kernal)].reshape(shape_kernal) 58 | index += np.prod(shape_kernal) 59 | layer.set_weights([kernal,bias]) 60 | 61 | 62 | #change yolo output into box values that corrospond to the original image. 63 | def process_output(yolo_output, threshold=0.2, padhw=(98,0), shaved=False, shavedim=(350,500, 500,1000)): 64 | # Class label for car in the dataset 65 | car_class = 6 66 | boxes = [] 67 | S = 7 68 | B = 2 69 | C = 20 70 | SS = S * S # num yolo grid cells 71 | prob_size = SS * C # num class probabilities 72 | conf_size = SS * B # num confidences, 2 per grid cell 73 | 74 | probs = yolo_output[0:prob_size] # seperate probability array 75 | confidences = yolo_output[prob_size:prob_size + conf_size] # seperate confidence array 76 | yolo_boxes = yolo_output[prob_size + conf_size:] # seperate coordinates 77 | 78 | # reshape arrays so that each cell in the yolo grid is a seperate array containing the cells properties 79 | probs = np.reshape(probs, (SS, C)) 80 | confs = np.reshape(confidences, (SS, B)) 81 | yolo_boxes = np.reshape(yolo_boxes, (SS, B, 4)) 82 | 83 | # itterate through grid and then boxes in each cell 84 | gridn = 0 85 | for gridy in range(S): 86 | for gridx in range(S): 87 | for index1 in range(B): 88 | 89 | box = bb() 90 | box.c = confs[gridn, index1] 91 | p = probs[gridn, :] * box.c 92 | 93 | if (p[car_class] >= threshold): 94 | 95 | #find pixel values of xywh in current 448,448 image 96 | box.x = yolo_boxes[gridn, index1, 0] * (448 / 7) + (gridx * (448 / 7)) 97 | box.y = yolo_boxes[gridn, index1, 1] * (448 / 7) + (gridy * (448 / 7)) 98 | box.w = yolo_boxes[gridn, index1, 2] / 2 * 448 99 | box.h = yolo_boxes[gridn, index1, 3] / 2 * 448 100 | 101 | # scale y to match current image ratio without border padding 102 | box.y = box.y - padhw[0] 103 | box.x = box.x - padhw[1] 104 | 105 | # scale boxes pixel values to original image (values still wrong if image was shaved) 106 | nopadh = 448 - (padhw[0]*2) 107 | nopadw = 448 - (padhw[1]*2) 108 | if shaved: 109 | x_scale = (shavedim[3]-shavedim[2]) / nopadw 110 | y_scale = (shavedim[1]- shavedim[0])/nopadh 111 | else: 112 | x_scale = 1280 / nopadw 113 | y_scale = 720 / nopadh 114 | box.y = box.y * y_scale 115 | box.w = box.w * x_scale 116 | box.x = box.x * x_scale 117 | box.h = box.h * y_scale 118 | 119 | #add shaved pixel amounts to coordinates to adjust them back to the original image. 120 | if shaved: 121 | box.y += shavedim[0] 122 | box.x += shavedim[2] 123 | 124 | box.prob = p[car_class] 125 | boxes.append(box) 126 | gridn += 1 127 | # sort in decending order by confidence 128 | boxes.sort(key=lambda box: box.prob, reverse=True) 129 | return boxes 130 | 131 | 132 | #remove boxes that are most likely duplacates 133 | def remove_duplicates(boxes, img): 134 | h, w, _ = img.shape 135 | for box in boxes: 136 | box.x1 = int(box.x - (box.w / 2)) 137 | box.x2 = int(box.x + (box.w / 2)) 138 | box.y1 = int(box.y - (box.h / 2)) 139 | box.y2 = int(box.y + (box.h / 2)) 140 | 141 | # set boxes to be within the border of the picture 142 | if box.x2 > img.shape[1] - 3: 143 | box.x2 = img.shape[1] - 3 144 | if box.y2 > img.shape[0] - 3: 145 | box.y2 = img.shape[0] - 3 146 | 147 | if box.x1 < 3: 148 | box.x1 = 3 149 | if box.y1 < 3: 150 | box.y1 = 3 151 | 152 | # remove boxes that are to similar to a box with a better confidence score 153 | for index1 in range(len(boxes)): 154 | box1 = boxes[index1] 155 | if box1.prob == 0: continue 156 | for index2 in range(index1 + 1, len(boxes)): 157 | box2 = boxes[index2] 158 | boxA = [box1.x1, box1.y1, box1.x2, box1.y2] 159 | boxB = [box2.x1, box2.y1, box2.x2, box2.y2] 160 | if bb_intersection_over_union(boxA, boxB) >= .05: 161 | boxes[index2].prob = 0 162 | boxes = [box for box in boxes if box.prob > 0.] 163 | return boxes 164 | 165 | 166 | #draw boxes with probabilities over them 167 | def draw_boxes(boxes, img): 168 | 169 | font = cv2.FONT_HERSHEY_PLAIN 170 | for box in boxes: 171 | 172 | img = cv2.rectangle(img, (box.x1, box.y1), (box.x2, box.y2), (200, 0, 0), 6) 173 | img = cv2.rectangle(img, (box.x1 - 3, box.y1), (box.x1 + 135, box.y1 - 35), (200, 0, 0), -6) 174 | img = cv2.putText(img, 'Car %{0:.3}'.format(box.prob * 100), (box.x1, box.y1 - 10), font, 1.6, (255, 255, 255), 2, 175 | cv2.LINE_AA) 176 | 177 | return img 178 | 179 | 180 | #get the precent of overlap of two boxes 181 | def bb_intersection_over_union(boxA, boxB): 182 | # determine the (x, y)-coordinates of the intersection rectangle 183 | xA = max(boxA[0], boxB[0]) 184 | yA = max(boxA[1], boxB[1]) 185 | xB = min(boxA[2], boxB[2]) 186 | yB = min(boxA[3], boxB[3]) 187 | 188 | # compute the area of intersection rectangle 189 | interArea = (xB - xA + 1) * (yB - yA + 1) 190 | 191 | # compute the area of both the prediction and ground-truth 192 | # rectangles 193 | boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1) 194 | boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1) 195 | 196 | # compute the intersection over union by taking the intersection 197 | # area and dividing it by the sum of prediction + ground-truth 198 | # areas - the interesection area 199 | iou = interArea / float(boxAArea + boxBArea - interArea) 200 | 201 | # return the intersection over union value 202 | return iou 203 | 204 | 205 | 206 | 207 | --------------------------------------------------------------------------------