├── .editorconfig ├── .gitignore ├── LICENSE ├── README.md ├── images ├── dog.jpg ├── image1.jpg ├── image2.jpg └── image3.jpg ├── model_data ├── coco_classes.txt ├── yolo_anchors.txt ├── yolo_coco_classes.txt └── yolov3_anchors.txt ├── out ├── ssd_mobilenet_v1_dog.jpg ├── ssd_mobilenet_v1_image1.jpg ├── ssd_mobilenet_v1_image2.jpg ├── ssd_mobilenet_v1_image3.jpg ├── ssdlite_mobilenet_v2_dog.jpg ├── tiny_yolo_dog.jpg └── yolov3_dog.jpg ├── requirements.txt ├── test_ssd_mobilenet_v1.py ├── test_ssdlite_mobilenet_v2.py ├── test_tiny_yolo.py ├── test_yolov3.py ├── utils ├── __init__.py ├── ssd_mobilenet_utils.py └── yolo_utils.py ├── yad2k ├── models │ ├── keras_darknet19.py │ └── keras_yolo.py └── utils │ ├── __init__.py │ └── utils.py └── yolov3 ├── __init__.py └── model.py /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig is awesome: http://EditorConfig.org 2 | 3 | # top-most EditorConfig file 4 | root = true 5 | 6 | # Unix-style newlines with a newline ending every file 7 | [*] 8 | charset = utf-8 9 | indent_style = space 10 | indent_size = 4 11 | end_of_line = lf 12 | insert_final_newline = true 13 | 14 | # Tab indentation (no size specified) 15 | [Makefile] 16 | indent_style = tab 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.pyo 3 | *~ 4 | 5 | venv/ 6 | .venv/ 7 | 8 | .vscode/ 9 | 10 | .DS_Store 11 | ._.DS_Store 12 | 13 | 14 | # model 15 | *.pb 16 | *.pbtxt 17 | *.tar.gz 18 | *.h5 19 | *.tflite 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright <2019> 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Object Detection 2 | 3 | * training: 4 | 5 | - [yolov2-tf2](https://github.com/kaka-lin/yolov2-tf2) 6 | - [yolov3-tf2](https://github.com/kaka-lin/yolov3-tf2) 7 | 8 | * model (Inference): 9 | 10 | - [tiny-YOLOv2](#tiny-yolo) 11 | - [YOLOv3](#yolov3) 12 | - [SSD-MobileNet v1](#ssd-mobilenet-v1) 13 | - [SSDLite-MobileNet v2 (tflite)](#ssdlite-mobilenet-v2) 14 | 15 | ## Usage 16 | 17 | 18 | ### 1. tiny-YOLOv2 19 | 20 | * download the [tiny-yolo](https://drive.google.com/file/d/14-5ZojD1HSgMKnv6_E3WUcBPxaVm52X2/view?usp=sharing) file and put it to model_data file 21 | 22 | ```baash 23 | $ python3 test_tiny_yolo.py 24 | ``` 25 | 26 | 27 | ### 2. YOLOv3 28 | 29 | * download the [yolov3](https://drive.google.com/open?id=1vdD9TPiTWqvPxtCXdbVSKKksSdu0j_Hn) file and put it to model_data file 30 | 31 | ```baash 32 | $ python3 test_yolov3.py 33 | ``` 34 | 35 | 36 | ### 3. SSD-MobileNet v1 37 | 38 | ```baash 39 | $ python3 test_ssd_mobilenet_v1.py 40 | ``` 41 | 42 | 43 | ### 4. SSDLite-MobileNet v2 (tflite) 44 | 45 | * download the [ssdlite-mobilenet-v2](https://drive.google.com/file/d/1Ha9yfjkweCatEo6UoZgZyHMeyIBGe5FO/view?usp=sharing) file and put it to model_data file 46 | 47 | ```baash 48 | $ python3 test_ssdlite_mobilenet_v2.py 49 | ``` 50 | 51 | ## Compare 52 | 53 | * tiny-YOLOv2 54 | 55 | ![](/out/tiny_yolo_dog.jpg) 56 | 57 | 58 | * YOLOv3 59 | 60 | ![](/out/yolov3_dog.jpg) 61 | 62 | * SSD-MobileNet v1 63 | 64 | ![](/out/ssd_mobilenet_v1_dog.jpg) 65 | 66 | * SSDLite-MobileNet v2 (tflite) 67 | ![](/out/ssdlite_mobilenet_v2_dog.jpg) 68 | 69 | ## Acknowledgments 70 | 71 | * Thanks to [keras-yolo3](https://github.com/qqwweee/keras-yolo3) for yolov3-keras part. 72 | * Thanks to [mobile-object-detector-with-tensorflow-lite](https://medium.com/datadriveninvestor/mobile-object-detector-with-tensorflow-lite-9e2c278922d0) for ssdlite-mobilenet-v2 part. 73 | -------------------------------------------------------------------------------- /images/dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaka-lin/object-detection/1017f7c3c1792af55dc5c53a7665db0890766302/images/dog.jpg -------------------------------------------------------------------------------- /images/image1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaka-lin/object-detection/1017f7c3c1792af55dc5c53a7665db0890766302/images/image1.jpg -------------------------------------------------------------------------------- /images/image2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaka-lin/object-detection/1017f7c3c1792af55dc5c53a7665db0890766302/images/image2.jpg -------------------------------------------------------------------------------- /images/image3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaka-lin/object-detection/1017f7c3c1792af55dc5c53a7665db0890766302/images/image3.jpg -------------------------------------------------------------------------------- /model_data/coco_classes.txt: -------------------------------------------------------------------------------- 1 | None1 2 | person 3 | bicycle 4 | car 5 | motorbike 6 | airplane 7 | bus 8 | train 9 | truck 10 | boat 11 | traffic light 12 | fire hydrant 13 | None12 14 | stop sign 15 | parking meter 16 | bench 17 | bird 18 | cat 19 | dog 20 | horse 21 | sheep 22 | cow 23 | elephant 24 | bear 25 | zebra 26 | giraffe 27 | None 28 | backpack 29 | umbrella 30 | None29 31 | Noe30 32 | handbag 33 | tie 34 | suitcase 35 | frisbee 36 | skis 37 | snowboard 38 | sports ball 39 | kite 40 | baseball bat 41 | baseball glove 42 | skateboard 43 | surfboard 44 | tennis racket 45 | bottle 46 | None45 47 | wine glass 48 | cup 49 | fork 50 | knife 51 | spoon 52 | bowl 53 | banana 54 | apple 55 | sandwich 56 | orange 57 | broccoli 58 | carrot 59 | hot dog 60 | pizza 61 | donut 62 | cake 63 | chair 64 | sofa 65 | pottedplant 66 | bed 67 | None66 68 | diningtable 69 | None68 70 | None69 71 | toilet 72 | None71 73 | tvmonitor 74 | laptop 75 | mouse 76 | remote 77 | keyboard 78 | cell phone 79 | microwave 80 | oven 81 | toaster 82 | sink 83 | refrigerator 84 | None83 85 | book 86 | clock 87 | vase 88 | scissors 89 | teddy bear 90 | hair drier 91 | toothbrush 92 | -------------------------------------------------------------------------------- /model_data/yolo_anchors.txt: -------------------------------------------------------------------------------- 1 | 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 2 | -------------------------------------------------------------------------------- /model_data/yolo_coco_classes.txt: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /model_data/yolov3_anchors.txt: -------------------------------------------------------------------------------- 1 | 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 -------------------------------------------------------------------------------- /out/ssd_mobilenet_v1_dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaka-lin/object-detection/1017f7c3c1792af55dc5c53a7665db0890766302/out/ssd_mobilenet_v1_dog.jpg -------------------------------------------------------------------------------- /out/ssd_mobilenet_v1_image1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaka-lin/object-detection/1017f7c3c1792af55dc5c53a7665db0890766302/out/ssd_mobilenet_v1_image1.jpg -------------------------------------------------------------------------------- /out/ssd_mobilenet_v1_image2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaka-lin/object-detection/1017f7c3c1792af55dc5c53a7665db0890766302/out/ssd_mobilenet_v1_image2.jpg -------------------------------------------------------------------------------- /out/ssd_mobilenet_v1_image3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaka-lin/object-detection/1017f7c3c1792af55dc5c53a7665db0890766302/out/ssd_mobilenet_v1_image3.jpg -------------------------------------------------------------------------------- /out/ssdlite_mobilenet_v2_dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaka-lin/object-detection/1017f7c3c1792af55dc5c53a7665db0890766302/out/ssdlite_mobilenet_v2_dog.jpg -------------------------------------------------------------------------------- /out/tiny_yolo_dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaka-lin/object-detection/1017f7c3c1792af55dc5c53a7665db0890766302/out/tiny_yolo_dog.jpg -------------------------------------------------------------------------------- /out/yolov3_dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaka-lin/object-detection/1017f7c3c1792af55dc5c53a7665db0890766302/out/yolov3_dog.jpg -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow>=1.14.0 2 | numpy>=1.15.4, <1.17.0 3 | opencv-python==3.4.0.12 4 | Keras>=2.1.4, <=2.2.4 5 | tqdm>=4.32.2 6 | -------------------------------------------------------------------------------- /test_ssd_mobilenet_v1.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import tarfile 4 | import glob 5 | import six.moves.urllib as urllib 6 | import cv2 7 | from tqdm import tqdm 8 | import numpy as np 9 | import tensorflow as tf 10 | from utils.ssd_mobilenet_utils import * 11 | 12 | def run_detection(image_data, sess): 13 | # Definite input and output Tensors for detection_graph 14 | image_tensor = sess.graph.get_tensor_by_name('image_tensor:0') 15 | 16 | # Each box represents a part of the image where a particular object was detected. 17 | detection_boxes = sess.graph.get_tensor_by_name('detection_boxes:0') 18 | 19 | # Each score represent how level of confidence for each of the objects. 20 | # Score is shown on the result image, together with the class label. 21 | detection_scores = sess.graph.get_tensor_by_name('detection_scores:0') 22 | detection_classes = sess.graph.get_tensor_by_name('detection_classes:0') 23 | num_detections = sess.graph.get_tensor_by_name('num_detections:0') 24 | 25 | boxes, scores, classes, num = sess.run([detection_boxes, detection_scores, detection_classes, num_detections], 26 | feed_dict={image_tensor: image_data}) 27 | boxes, scores, classes = np.squeeze(boxes), np.squeeze(scores), np.squeeze(classes).astype(np.int32) 28 | out_scores, out_boxes, out_classes = non_max_suppression(scores, boxes, classes) 29 | 30 | # Print predictions info 31 | #print('Found {} boxes.'.format(len(out_boxes))) 32 | 33 | return out_scores, out_boxes, out_classes 34 | 35 | def image_object_detection(image_path, sess, colors): 36 | image = cv2.imread(image_path) 37 | 38 | image_data = preprocess_image(image, model_image_size=(300,300)) 39 | out_scores, out_boxes, out_classes = run_detection(image_data, sess) 40 | 41 | # Draw bounding boxes on the image file 42 | image = draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors) 43 | # Save the predicted bounding box on the image 44 | image_name = os.path.basename(image_path) 45 | cv2.imwrite(os.path.join("out/", "ssd_mobilenet_v1_" + image_name), image, [cv2.IMWRITE_JPEG_QUALITY, 90]) 46 | 47 | def real_time_object_detection(sess, colors): 48 | camera = cv2.VideoCapture(0) 49 | 50 | while camera.isOpened(): 51 | start = time.time() 52 | ret, frame = camera.read() 53 | 54 | if ret: 55 | image_data = preprocess_image(frame, model_image_size=(300,300)) 56 | out_scores, out_boxes, out_classes = run_detection(image_data, sess) 57 | # Draw bounding boxes on the image file 58 | result = draw_boxes(frame, out_scores, out_boxes, out_classes, class_names, colors) 59 | end = time.time() 60 | 61 | # fps 62 | t = end - start 63 | fps = "Fps: {:.2f}".format(1 / t) 64 | # display a piece of text to the frame 65 | cv2.putText(frame, fps, (10, 30), 66 | cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA) 67 | cv2.imshow("Object detection - ssd_mobilenet_v1", frame) 68 | if cv2.waitKey(1) & 0xFF == ord('q'): 69 | break 70 | 71 | camera.release() 72 | cv2.destroyAllWindows() 73 | 74 | def download_from_url(url, file_name): 75 | file_size = int(urllib.request.urlopen(url).info().get('Content-Length', -1)) 76 | pbar = tqdm(total=file_size) 77 | 78 | def _progress(block_num, block_size, total_size): 79 | """callback func 80 | @block_num: 已經下載的資料塊 81 | @block_size: 資料塊的大小 82 | @total_size: 遠端檔案的大小 83 | """ 84 | pbar.update(block_size) 85 | 86 | filepath, _ = urllib.request.urlretrieve(url, file_name, _progress) 87 | pbar.close() 88 | 89 | def untar_file(file_name, dst): 90 | tar_file = tarfile.open(file_name) 91 | for file in tar_file.getmembers(): 92 | filename = os.path.basename(file.name) 93 | if 'frozen_inference_graph.pb' in filename: 94 | tar_file.extract(file, dst) 95 | 96 | if __name__ == '__main__': 97 | # What model to download 98 | model_name = 'ssd_mobilenet_v1_coco_2017_11_17' 99 | model_file = model_name + '.tar.gz' 100 | download_base = 'http://download.tensorflow.org/models/object_detection/' 101 | url = download_base + model_file 102 | 103 | # Download model to model_data dir 104 | model_dir = 'model_data' 105 | if not os.path.isdir(model_dir): 106 | os.mkdir(model_dir) 107 | file_path = os.path.join(model_dir, model_file) 108 | 109 | # Load a (frozen) Tensorflow model into memory. 110 | path_to_ckpt = model_dir + '/' + model_name + '/frozen_inference_graph.pb' 111 | 112 | if not os.path.exists(path_to_ckpt): 113 | download_from_url(url, file_path) 114 | untar_file(file_path, model_dir) 115 | 116 | detection_graph = tf.Graph() 117 | with detection_graph.as_default(): 118 | od_graph_def = tf.compat.v1.GraphDef() 119 | with tf.io.gfile.GFile(path_to_ckpt, 'rb') as fid: 120 | serialized_graph = fid.read() 121 | od_graph_def.ParseFromString(serialized_graph) 122 | tf.import_graph_def(od_graph_def, name='') 123 | 124 | # label 125 | class_names = read_classes('model_data/coco_classes.txt') 126 | # Generate colors for drawing bounding boxes. 127 | colors = generate_colors(class_names) 128 | 129 | with detection_graph.as_default(): 130 | with tf.Session() as sess: 131 | ''' 132 | # image_object_detection 133 | # Make a list of images 134 | images = glob.glob('./images/*.jpg') 135 | for fname in images: 136 | image_object_detection(fname, sess, colors) 137 | ''' 138 | 139 | # real-time image object detect 140 | real_time_object_detection(sess, colors) 141 | -------------------------------------------------------------------------------- /test_ssdlite_mobilenet_v2.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import cv2 4 | import numpy as np 5 | import tensorflow as tf 6 | from utils.ssd_mobilenet_utils import * 7 | 8 | def run_detection(image, interpreter): 9 | # Run model: start to detect 10 | # Sets the value of the input tensor. 11 | interpreter.set_tensor(input_details[0]['index'], image) 12 | # Invoke the interpreter. 13 | interpreter.invoke() 14 | 15 | # get results 16 | boxes = interpreter.get_tensor(output_details[0]['index']) 17 | classes = interpreter.get_tensor(output_details[1]['index']) 18 | scores = interpreter.get_tensor(output_details[2]['index']) 19 | num = interpreter.get_tensor(output_details[3]['index']) 20 | 21 | boxes, scores, classes = np.squeeze(boxes), np.squeeze(scores), np.squeeze(classes + 1).astype(np.int32) 22 | out_scores, out_boxes, out_classes = non_max_suppression(scores, boxes, classes) 23 | 24 | # Print predictions info 25 | #print('Found {} boxes for {}'.format(len(out_boxes), 'images/dog.jpg')) 26 | 27 | return out_scores, out_boxes, out_classes 28 | 29 | def image_object_detection(interpreter, colors): 30 | image = cv2.imread('images/dog.jpg') 31 | image_data = preprocess_image_for_tflite(image, model_image_size=300) 32 | out_scores, out_boxes, out_classes = run_detection(image_data, interpreter) 33 | 34 | # Draw bounding boxes on the image file 35 | result = draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors) 36 | # Save the predicted bounding box on the image 37 | cv2.imwrite(os.path.join("out", "ssdlite_mobilenet_v2_dog.jpg"), result, [cv2.IMWRITE_JPEG_QUALITY, 90]) 38 | 39 | def real_time_object_detection(interpreter, colors): 40 | camera = cv2.VideoCapture(0) 41 | 42 | while camera.isOpened(): 43 | start = time.time() 44 | ret, frame = camera.read() 45 | 46 | if ret: 47 | image_data = preprocess_image_for_tflite(frame, model_image_size=300) 48 | out_scores, out_boxes, out_classes = run_detection(image_data, interpreter) 49 | # Draw bounding boxes on the image file 50 | result = draw_boxes(frame, out_scores, out_boxes, out_classes, class_names, colors) 51 | end = time.time() 52 | 53 | # fps 54 | t = end - start 55 | fps = "Fps: {:.2f}".format(1 / t) 56 | cv2.putText(result, fps, (10, 30), 57 | cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA) 58 | 59 | cv2.imshow("Object detection - ssdlite_mobilenet_v2", frame) 60 | if cv2.waitKey(1) & 0xFF == ord('q'): 61 | break 62 | 63 | camera.release() 64 | cv2.destroyAllWindows() 65 | 66 | if __name__ == '__main__': 67 | # Load TFLite model and allocate tensors. 68 | interpreter = tf.lite.Interpreter(model_path="model_data/ssdlite_mobilenet_v2.tflite") 69 | interpreter.allocate_tensors() 70 | 71 | # Get input and output tensors. 72 | input_details = interpreter.get_input_details() 73 | output_details = interpreter.get_output_details() 74 | 75 | # label 76 | class_names = read_classes('model_data/coco_classes.txt') 77 | # Generate colors for drawing bounding boxes. 78 | colors = generate_colors(class_names) 79 | 80 | #image_object_detection(interpreter, colors) 81 | real_time_object_detection(interpreter, colors) 82 | -------------------------------------------------------------------------------- /test_tiny_yolo.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import glob 4 | import cv2 5 | import numpy as np 6 | import tensorflow as tf 7 | from keras import backend as K 8 | from keras.models import load_model 9 | from yad2k.models.keras_yolo import yolo_head, yolo_boxes_to_corners 10 | from utils.yolo_utils import * 11 | 12 | def yolo_eval(yolo_outputs, image_shape=(720., 1280.), max_boxes=10, score_threshold=.6, iou_threshold=.5): 13 | # Retrieve outputs of the YOLO model (≈1 line) 14 | box_confidence, box_xy, box_wh, box_class_probs = yolo_outputs 15 | 16 | # Convert boxes to be ready for filtering functions 17 | boxes = yolo_boxes_to_corners(box_xy, box_wh) 18 | 19 | # Use one of the functions you've implemented to perform Score-filtering with a threshold of score_threshold (≈1 line) 20 | scores, boxes, classes = yolo_filter_boxes(box_confidence, boxes, box_class_probs, score_threshold) 21 | 22 | # Scale boxes back to original image shape. 23 | boxes = scale_boxes(boxes, image_shape) # boxes: [y1, x1, y2, x2] 24 | 25 | # Use one of the functions you've implemented to perform Non-max suppression with a threshold of iou_threshold (≈1 line) 26 | scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes, max_boxes, iou_threshold) 27 | 28 | ### END CODE HERE ### 29 | 30 | return scores, boxes, classes 31 | 32 | def yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold = .6): 33 | # Compute box scores 34 | box_scores = box_confidence * box_class_probs 35 | 36 | # Find the box_classes thanks to the max box_scores, keep track of the corresponding score 37 | box_classes = K.argmax(box_scores, axis=-1) 38 | box_class_scores = K.max(box_scores, axis=-1, keepdims=False) 39 | 40 | # Create a filtering mask based on "box_class_scores" by using "threshold". The mask should have the 41 | # same dimension as box_class_scores, and be True for the boxes you want to keep (with probability >= threshold) 42 | filtering_mask = box_class_scores >= threshold 43 | 44 | # Apply the mask to scores, boxes and classes 45 | scores = tf.boolean_mask(box_class_scores, filtering_mask) 46 | boxes = tf.boolean_mask(boxes, filtering_mask) 47 | classes = tf.boolean_mask(box_classes, filtering_mask) 48 | 49 | return scores, boxes, classes 50 | 51 | def yolo_non_max_suppression(scores, boxes, classes, max_boxes = 10, iou_threshold = 0.5): 52 | max_boxes_tensor = K.variable(max_boxes, dtype='int32') # tensor to be used in tf.image.non_max_suppression() 53 | K.get_session().run(tf.variables_initializer([max_boxes_tensor])) # initialize variable max_boxes_tensor 54 | 55 | # Use tf.image.non_max_suppression() to get the list of indices corresponding to boxes you keep 56 | nms_indices = tf.image.non_max_suppression(boxes, scores, max_boxes, iou_threshold) 57 | 58 | # Use K.gather() to select only nms_indices from scores, boxes and classes 59 | scores = K.gather(scores, nms_indices) 60 | boxes = K.gather(boxes, nms_indices) 61 | classes = K.gather(classes, nms_indices) 62 | 63 | return scores, boxes, classes 64 | 65 | def image_detection(sess, image_path, image_file, colors): 66 | # Preprocess your image 67 | image, image_data = preprocess_image(image_path + image_file, model_image_size = (416, 416)) 68 | 69 | # Run the session with the correct tensors and choose the correct placeholders in the feed_dict. 70 | # You'll need to use feed_dict={yolo_model.input: ... , K.learning_phase(): 0}) 71 | out_scores, out_boxes, out_classes = sess.run([scores, boxes, classes], feed_dict={yolo_model.input:image_data, K.learning_phase():0}) 72 | 73 | # Print predictions info 74 | print('Found {} boxes for {}'.format(len(out_boxes), image_file)) 75 | 76 | # Draw bounding boxes on the image file 77 | image = draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors) 78 | 79 | # Save the predicted bounding box on the image 80 | #image.save(os.path.join("out", image_file), quality=90) 81 | cv2.imwrite(os.path.join("out", "tiny_yolo_" + image_file), image, [cv2.IMWRITE_JPEG_QUALITY, 90]) 82 | 83 | return out_scores, out_boxes, out_classes 84 | 85 | def video_detection(sess, image, colors): 86 | resized_image = cv2.resize(image, (416, 416), interpolation=cv2.INTER_AREA) 87 | resized_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB) 88 | image_data = np.array(resized_image, dtype='float32') 89 | image_data /= 255. 90 | image_data = np.expand_dims(image_data, 0) 91 | 92 | out_scores, out_boxes, out_classes = sess.run([scores, boxes, classes], feed_dict={yolo_model.input:image_data, K.learning_phase():0}) 93 | 94 | image = draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors) 95 | 96 | return image 97 | 98 | if __name__ == "__main__": 99 | sess = K.get_session() 100 | 101 | yolo_model = load_model("model_data/tiny_yolo.h5") 102 | #yolo_model.summary() 103 | 104 | class_names = read_classes("model_data/yolo_coco_classes.txt") 105 | anchors = read_anchors("model_data/yolo_anchors.txt") 106 | # Generate colors for drawing bounding boxes. 107 | colors = generate_colors(class_names) 108 | 109 | ''' 110 | # image detection 111 | image_file = "dog.jpg" 112 | image_path = "images/" 113 | image_shape = np.float32(cv2.imread(image_path + image_file).shape[:2]) 114 | 115 | yolo_outputs = yolo_head(yolo_model.output, anchors, len(class_names)) 116 | scores, boxes, classes = yolo_eval(yolo_outputs, image_shape=image_shape) 117 | 118 | # Start to image detect 119 | out_scores, out_boxes, out_classes = image_detection(sess, image_path, image_file, colors) 120 | ''' 121 | 122 | # video detection 123 | camera = cv2.VideoCapture(0) 124 | #camera.set(cv2.CAP_PROP_FRAME_WIDTH, 288) # 設計解析度 125 | #camera.set(cv2.CAP_PROP_FRAME_HEIGHT, 288) 126 | #print('WIDTH', camera.get(3), 'HEIGHT', camera.get(4)) 127 | #print('FPS', camera.get(5)) 128 | 129 | image_shape = np.float32(camera.get(4)), np.float32(camera.get(3)) 130 | yolo_outputs = yolo_head(yolo_model.output, anchors, len(class_names)) 131 | scores, boxes, classes = yolo_eval(yolo_outputs, image_shape=image_shape, score_threshold=.3) 132 | 133 | while camera.isOpened(): 134 | start = time.time() 135 | ret, frame = camera.read() 136 | 137 | if ret: 138 | image = video_detection(sess, frame, colors) 139 | end = time.time() 140 | 141 | # fps 142 | t = end - start 143 | fps = "Fps: {:.2f}".format(1 / t) 144 | # display a piece of text to the frame 145 | cv2.putText(image, fps, (10, 30), 146 | cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA) 147 | 148 | cv2.imshow('image', image) 149 | 150 | if cv2.waitKey(1) & 0xFF == ord('q'): 151 | break 152 | else: 153 | break 154 | 155 | camera.release() 156 | cv2.destroyAllWindows() 157 | -------------------------------------------------------------------------------- /test_yolov3.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import cv2 4 | import numpy as np 5 | import tensorflow as tf 6 | from keras import backend as K 7 | from keras.models import load_model 8 | from yolov3.model import yolo_eval 9 | from utils.yolo_utils import * 10 | 11 | def image_detection(sess, image_path, image_file, colors): 12 | # Preprocess your image 13 | image, image_data = preprocess_image(image_path + image_file, model_image_size = (416, 416)) 14 | 15 | # Run the session with the correct tensors and choose the correct placeholders in the feed_dict. 16 | # You'll need to use feed_dict={yolo_model.input: ... , K.learning_phase(): 0}) 17 | out_scores, out_boxes, out_classes = sess.run([scores, boxes, classes], feed_dict={yolov3.input:image_data, K.learning_phase():0}) 18 | 19 | # Print predictions info 20 | print('Found {} boxes for {}'.format(len(out_boxes), image_file)) 21 | 22 | # Draw bounding boxes on the image file 23 | image = draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors) 24 | 25 | # Save the predicted bounding box on the image 26 | #image.save(os.path.join("out", image_file), quality=90) 27 | cv2.imwrite(os.path.join("out", "yolov3_" + image_file), image, [cv2.IMWRITE_JPEG_QUALITY, 90]) 28 | 29 | return out_scores, out_boxes, out_classes 30 | 31 | def video_detection(sess, image, colors): 32 | resized_image = cv2.resize(image, (416, 416), interpolation=cv2.INTER_AREA) 33 | resized_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB) 34 | image_data = np.array(resized_image, dtype='float32') 35 | image_data /= 255. 36 | image_data = np.expand_dims(image_data, 0) 37 | 38 | out_scores, out_boxes, out_classes = sess.run([scores, boxes, classes], feed_dict={yolov3.input:image_data, K.learning_phase():0}) 39 | 40 | image = draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors) 41 | 42 | return image 43 | 44 | if __name__ == "__main__": 45 | sess = K.get_session() 46 | 47 | yolov3 = load_model("model_data/yolov3.h5") 48 | #yolov3.summary() 49 | 50 | class_names = read_classes("model_data/yolo_coco_classes.txt") 51 | anchors = read_anchors("model_data/yolov3_anchors.txt") 52 | # Generate colors for drawing bounding boxes. 53 | colors = generate_colors(class_names) 54 | 55 | ''' 56 | # image detection 57 | image_file = "dog.jpg" 58 | image_path = "images/" 59 | 60 | image_shape = np.float32(cv2.imread(image_path + image_file).shape[:2]) 61 | scores, boxes, classes = yolo_eval(yolov3.output, anchors, len(class_names), image_shape=image_shape) 62 | 63 | # Start to image detect 64 | out_scores, out_boxes, out_classes = image_detection(sess, image_path, image_file, colors) 65 | ''' 66 | 67 | # video dection 68 | camera = cv2.VideoCapture(0) 69 | 70 | image_shape = np.float32(camera.get(4)), np.float32(camera.get(3)) 71 | scores, boxes, classes = yolo_eval(yolov3.output, anchors, len(class_names), image_shape=image_shape) 72 | 73 | while camera.isOpened(): 74 | start = time.time() 75 | ret, frame = camera.read() 76 | 77 | if ret: 78 | image = video_detection(sess, frame, colors) 79 | end = time.time() 80 | 81 | # fps 82 | t = end - start 83 | fps = "Fps: {:.2f}".format(1 / t) 84 | # display a piece of text to the frame 85 | cv2.putText(image, fps, (10, 30), 86 | cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA) 87 | 88 | cv2.imshow('image', image) 89 | 90 | if cv2.waitKey(1) & 0xFF == ord('q'): 91 | break 92 | else: 93 | break 94 | 95 | camera.release() 96 | cv2.destroyAllWindows() 97 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaka-lin/object-detection/1017f7c3c1792af55dc5c53a7665db0890766302/utils/__init__.py -------------------------------------------------------------------------------- /utils/ssd_mobilenet_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import colorsys 3 | import random 4 | import cv2 5 | import numpy as np 6 | from keras import backend as K 7 | import tensorflow as tf 8 | 9 | def read_classes(classes_path): 10 | with open(classes_path) as f: 11 | class_names = f.readlines() 12 | class_names = [c.strip() for c in class_names] 13 | return class_names 14 | 15 | def generate_colors(class_names): 16 | hsv_tuples = [(x / len(class_names), 1., 1.) for x in range(len(class_names))] 17 | colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) 18 | colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors)) 19 | random.seed(10101) # Fixed seed for consistent colors across runs. 20 | random.shuffle(colors) # Shuffle colors to decorrelate adjacent classes. 21 | random.seed(None) # Reset seed to default. 22 | return colors 23 | 24 | def preprocess_image(image, model_image_size=(300,300)): 25 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 26 | #image = cv2.resize(image, tuple(reversed(model_image_size)), interpolation=cv2.INTER_AREA) 27 | image = np.array(image, dtype='float32') 28 | image = np.expand_dims(image, 0) # Add batch dimension. 29 | 30 | return image 31 | 32 | def preprocess_image_for_tflite(image, model_image_size=300): 33 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 34 | image = cv2.resize(image, (model_image_size, model_image_size)) 35 | image = np.expand_dims(image, axis=0) 36 | image = (2.0 / 255.0) * image - 1.0 37 | image = image.astype('float32') 38 | 39 | return image 40 | 41 | def non_max_suppression(scores, boxes, classes, max_boxes=10, min_score_thresh=0.5): 42 | out_boxes = [] 43 | out_scores = [] 44 | out_classes = [] 45 | if not max_boxes: 46 | max_boxes = boxes.shape[0] 47 | for i in range(min(max_boxes, boxes.shape[0])): 48 | if scores is None or scores[i] > min_score_thresh: 49 | out_boxes.append(boxes[i]) 50 | out_scores.append(scores[i]) 51 | out_classes.append(classes[i]) 52 | 53 | out_boxes = np.array(out_boxes) 54 | out_scores = np.array(out_scores) 55 | out_classes = np.array(out_classes) 56 | 57 | return out_scores, out_boxes, out_classes 58 | 59 | def draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors): 60 | h, w, _ = image.shape 61 | 62 | for i, c in reversed(list(enumerate(out_classes))): 63 | predicted_class = class_names[c] 64 | box = out_boxes[i] 65 | score = out_scores[i] 66 | 67 | label = '{} {:.2f}'.format(predicted_class, score) 68 | 69 | ############################################### 70 | # yolo 71 | #top, left, bottom, right = box 72 | ############################################### 73 | 74 | ############################################### 75 | # ssd_mobilenet 76 | ymin, xmin, ymax, xmax = box 77 | left, right, top, bottom = (xmin * w, xmax * w, 78 | ymin * h, ymax * h) 79 | ############################################### 80 | 81 | top = max(0, np.floor(top + 0.5).astype('int32')) 82 | left = max(0, np.floor(left + 0.5).astype('int32')) 83 | bottom = min(h, np.floor(bottom + 0.5).astype('int32')) 84 | right = min(w, np.floor(right + 0.5).astype('int32')) 85 | print(label, (left, top), (right, bottom)) 86 | 87 | # colors: RGB, opencv: BGR 88 | cv2.rectangle(image, (left, top), (right, bottom), tuple(reversed(colors[c])), 6) 89 | 90 | font_face = cv2.FONT_HERSHEY_SIMPLEX 91 | font_scale = 1 92 | font_thickness = 2 93 | 94 | label_size = cv2.getTextSize(label, font_face, font_scale, font_thickness)[0] 95 | label_rect_left, label_rect_top = int(left - 3), int(top - 3) 96 | label_rect_right, label_rect_bottom = int(left + 3 + label_size[0]), int(top - 5 - label_size[1]) 97 | cv2.rectangle(image, (label_rect_left, label_rect_top), (label_rect_right, label_rect_bottom), tuple(reversed(colors[c])), -1) 98 | 99 | cv2.putText(image, label, (left, int(top - 4)), font_face, font_scale, (0, 0, 0), font_thickness, cv2.LINE_AA) 100 | 101 | return image 102 | 103 | -------------------------------------------------------------------------------- /utils/yolo_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import colorsys 3 | import random 4 | import cv2 5 | import numpy as np 6 | from keras import backend as K 7 | 8 | def read_classes(classes_path): 9 | with open(classes_path) as f: 10 | class_names = f.readlines() 11 | class_names = [c.strip() for c in class_names] 12 | return class_names 13 | 14 | def read_anchors(anchors_path): 15 | with open(anchors_path) as f: 16 | anchors = f.readline() 17 | anchors = [float(x) for x in anchors.split(',')] 18 | anchors = np.array(anchors).reshape(-1, 2) 19 | return anchors 20 | 21 | def generate_colors(class_names): 22 | hsv_tuples = [(x / len(class_names), 1., 1.) for x in range(len(class_names))] 23 | colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) 24 | colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors)) 25 | random.seed(10101) # Fixed seed for consistent colors across runs. 26 | random.shuffle(colors) # Shuffle colors to decorrelate adjacent classes. 27 | random.seed(None) # Reset seed to default. 28 | return colors 29 | 30 | def scale_boxes(boxes, image_shape): 31 | """ Scales the predicted boxes in order to be drawable on the image""" 32 | height = image_shape[0] 33 | width = image_shape[1] 34 | image_dims = K.stack([height, width, height, width]) 35 | image_dims = K.reshape(image_dims, [1, 4]) 36 | boxes = boxes * image_dims 37 | return boxes 38 | 39 | def preprocess_image(img_path, model_image_size): 40 | image = cv2.imread(img_path) 41 | resized_image = cv2.resize(image, tuple(reversed(model_image_size)), interpolation=cv2.INTER_AREA) 42 | # images/dog.jpg use this is good 43 | #resized_image = cv2.resize(image, tuple(reversed(model_image_size)), interpolation=cv2.INTER_CUBIC) 44 | resized_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB) 45 | image_data = np.array(resized_image, dtype='float32') 46 | image_data /= 255. 47 | image_data = np.expand_dims(image_data, 0) # Add batch dimension. 48 | 49 | return image, image_data 50 | 51 | def draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors): 52 | h, w, _ = image.shape 53 | 54 | for i, c in reversed(list(enumerate(out_classes))): 55 | predicted_class = class_names[c] 56 | box = out_boxes[i] 57 | score = out_scores[i] 58 | 59 | label = '{} {:.2f}'.format(predicted_class, score) 60 | 61 | top, left, bottom, right = box 62 | top = max(0, np.floor(top + 0.5).astype('int32')) 63 | left = max(0, np.floor(left + 0.5).astype('int32')) 64 | bottom = min(h, np.floor(bottom + 0.5).astype('int32')) 65 | right = min(w, np.floor(right + 0.5).astype('int32')) 66 | print(label, (left, top), (right, bottom)) 67 | 68 | # colors: RGB, opencv: BGR 69 | cv2.rectangle(image, (left, top), (right, bottom), tuple(reversed(colors[c])), 6) 70 | 71 | font_face = cv2.FONT_HERSHEY_SIMPLEX 72 | font_scale = 1 73 | font_thickness = 2 74 | 75 | label_size = cv2.getTextSize(label, font_face, font_scale, font_thickness)[0] 76 | label_rect_left, label_rect_top = int(left - 3), int(top - 3) 77 | label_rect_right, label_rect_bottom = int(left + 3 + label_size[0]), int(top - 5 - label_size[1]) 78 | cv2.rectangle(image, (label_rect_left, label_rect_top), (label_rect_right, label_rect_bottom), tuple(reversed(colors[c])), -1) 79 | 80 | cv2.putText(image, label, (left, int(top - 4)), font_face, font_scale, (0, 0, 0), font_thickness, cv2.LINE_AA) 81 | 82 | return image 83 | -------------------------------------------------------------------------------- /yad2k/models/keras_darknet19.py: -------------------------------------------------------------------------------- 1 | """Darknet19 Model Defined in Keras.""" 2 | import functools 3 | from functools import partial 4 | 5 | from keras.layers import Conv2D, MaxPooling2D 6 | from keras.layers.advanced_activations import LeakyReLU 7 | from keras.layers.normalization import BatchNormalization 8 | from keras.models import Model 9 | from keras.regularizers import l2 10 | 11 | from ..utils import compose 12 | 13 | # Partial wrapper for Convolution2D with static default argument. 14 | _DarknetConv2D = partial(Conv2D, padding='same') 15 | 16 | 17 | @functools.wraps(Conv2D) 18 | def DarknetConv2D(*args, **kwargs): 19 | """Wrapper to set Darknet weight regularizer for Convolution2D.""" 20 | darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)} 21 | darknet_conv_kwargs.update(kwargs) 22 | return _DarknetConv2D(*args, **darknet_conv_kwargs) 23 | 24 | 25 | def DarknetConv2D_BN_Leaky(*args, **kwargs): 26 | """Darknet Convolution2D followed by BatchNormalization and LeakyReLU.""" 27 | no_bias_kwargs = {'use_bias': False} 28 | no_bias_kwargs.update(kwargs) 29 | return compose( 30 | DarknetConv2D(*args, **no_bias_kwargs), 31 | BatchNormalization(), 32 | LeakyReLU(alpha=0.1)) 33 | 34 | 35 | def bottleneck_block(outer_filters, bottleneck_filters): 36 | """Bottleneck block of 3x3, 1x1, 3x3 convolutions.""" 37 | return compose( 38 | DarknetConv2D_BN_Leaky(outer_filters, (3, 3)), 39 | DarknetConv2D_BN_Leaky(bottleneck_filters, (1, 1)), 40 | DarknetConv2D_BN_Leaky(outer_filters, (3, 3))) 41 | 42 | 43 | def bottleneck_x2_block(outer_filters, bottleneck_filters): 44 | """Bottleneck block of 3x3, 1x1, 3x3, 1x1, 3x3 convolutions.""" 45 | return compose( 46 | bottleneck_block(outer_filters, bottleneck_filters), 47 | DarknetConv2D_BN_Leaky(bottleneck_filters, (1, 1)), 48 | DarknetConv2D_BN_Leaky(outer_filters, (3, 3))) 49 | 50 | 51 | def darknet_body(): 52 | """Generate first 18 conv layers of Darknet-19.""" 53 | return compose( 54 | DarknetConv2D_BN_Leaky(32, (3, 3)), 55 | MaxPooling2D(), 56 | DarknetConv2D_BN_Leaky(64, (3, 3)), 57 | MaxPooling2D(), 58 | bottleneck_block(128, 64), 59 | MaxPooling2D(), 60 | bottleneck_block(256, 128), 61 | MaxPooling2D(), 62 | bottleneck_x2_block(512, 256), 63 | MaxPooling2D(), 64 | bottleneck_x2_block(1024, 512)) 65 | 66 | 67 | def darknet19(inputs): 68 | """Generate Darknet-19 model for Imagenet classification.""" 69 | body = darknet_body()(inputs) 70 | logits = DarknetConv2D(1000, (1, 1), activation='softmax')(body) 71 | return Model(inputs, logits) 72 | -------------------------------------------------------------------------------- /yad2k/models/keras_yolo.py: -------------------------------------------------------------------------------- 1 | """YOLO_v2 Model Defined in Keras.""" 2 | import sys 3 | 4 | import numpy as np 5 | import tensorflow as tf 6 | import keras.backend as K 7 | from keras.layers import Lambda 8 | from keras.layers.merge import concatenate 9 | from keras.models import Model 10 | 11 | from ..utils import compose 12 | from .keras_darknet19 import (DarknetConv2D, DarknetConv2D_BN_Leaky, darknet_body) 13 | 14 | sys.path.append('..') 15 | 16 | voc_anchors = np.array( 17 | [[1.08, 1.19], [3.42, 4.41], [6.63, 11.38], [9.42, 5.11], [16.62, 10.52]]) 18 | 19 | voc_classes = [ 20 | "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", 21 | "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", 22 | "pottedplant", "sheep", "sofa", "train", "tvmonitor" 23 | ] 24 | 25 | 26 | def space_to_depth_x2(x): 27 | """Thin wrapper for Tensorflow space_to_depth with block_size=2.""" 28 | # Import currently required to make Lambda work. 29 | # See: https://github.com/fchollet/keras/issues/5088#issuecomment-273851273 30 | import tensorflow as tf 31 | return tf.space_to_depth(x, block_size=2) 32 | 33 | 34 | def space_to_depth_x2_output_shape(input_shape): 35 | """Determine space_to_depth output shape for block_size=2. 36 | 37 | Note: For Lambda with TensorFlow backend, output shape may not be needed. 38 | """ 39 | return (input_shape[0], input_shape[1] // 2, input_shape[2] // 2, 4 * 40 | input_shape[3]) if input_shape[1] else (input_shape[0], None, None, 41 | 4 * input_shape[3]) 42 | 43 | 44 | def yolo_body(inputs, num_anchors, num_classes): 45 | """Create YOLO_V2 model CNN body in Keras.""" 46 | darknet = Model(inputs, darknet_body()(inputs)) 47 | conv20 = compose( 48 | DarknetConv2D_BN_Leaky(1024, (3, 3)), 49 | DarknetConv2D_BN_Leaky(1024, (3, 3)))(darknet.output) 50 | 51 | conv13 = darknet.layers[43].output 52 | conv21 = DarknetConv2D_BN_Leaky(64, (1, 1))(conv13) 53 | # TODO: Allow Keras Lambda to use func arguments for output_shape? 54 | conv21_reshaped = Lambda( 55 | space_to_depth_x2, 56 | output_shape=space_to_depth_x2_output_shape, 57 | name='space_to_depth')(conv21) 58 | 59 | x = concatenate([conv21_reshaped, conv20]) 60 | x = DarknetConv2D_BN_Leaky(1024, (3, 3))(x) 61 | x = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(x) 62 | return Model(inputs, x) 63 | 64 | 65 | def yolo_head(feats, anchors, num_classes): 66 | """Convert final layer features to bounding box parameters. 67 | 68 | Parameters 69 | ---------- 70 | feats : tensor 71 | Final convolutional layer features. 72 | anchors : array-like 73 | Anchor box widths and heights. 74 | num_classes : int 75 | Number of target classes. 76 | 77 | Returns 78 | ------- 79 | box_xy : tensor 80 | x, y box predictions adjusted by spatial location in conv layer. 81 | box_wh : tensor 82 | w, h box predictions adjusted by anchors and conv spatial resolution. 83 | box_conf : tensor 84 | Probability estimate for whether each box contains any object. 85 | box_class_pred : tensor 86 | Probability distribution estimate for each box over class labels. 87 | """ 88 | num_anchors = len(anchors) 89 | # Reshape to batch, height, width, num_anchors, box_params. 90 | anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2]) 91 | # Static implementation for fixed models. 92 | # TODO: Remove or add option for static implementation. 93 | # _, conv_height, conv_width, _ = K.int_shape(feats) 94 | # conv_dims = K.variable([conv_width, conv_height]) 95 | 96 | # Dynamic implementation of conv dims for fully convolutional model. 97 | conv_dims = K.shape(feats)[1:3] # assuming channels last 98 | # In YOLO the height index is the inner most iteration. 99 | conv_height_index = K.arange(0, stop=conv_dims[0]) 100 | conv_width_index = K.arange(0, stop=conv_dims[1]) 101 | conv_height_index = K.tile(conv_height_index, [conv_dims[1]]) 102 | 103 | # TODO: Repeat_elements and tf.split doesn't support dynamic splits. 104 | # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0) 105 | conv_width_index = K.tile(K.expand_dims(conv_width_index, 0), [conv_dims[0], 1]) 106 | conv_width_index = K.flatten(K.transpose(conv_width_index)) 107 | conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) 108 | conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2]) 109 | conv_index = K.cast(conv_index, K.dtype(feats)) 110 | 111 | feats = K.reshape(feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) 112 | conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats)) 113 | 114 | # Static generation of conv_index: 115 | # conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)]) 116 | # conv_index = conv_index[:, [1, 0]] # swap columns for YOLO ordering. 117 | # conv_index = K.variable( 118 | # conv_index.reshape(1, conv_height, conv_width, 1, 2)) 119 | # feats = Reshape( 120 | # (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats) 121 | 122 | box_confidence = K.sigmoid(feats[..., 4:5]) 123 | box_xy = K.sigmoid(feats[..., :2]) 124 | box_wh = K.exp(feats[..., 2:4]) 125 | box_class_probs = K.softmax(feats[..., 5:]) 126 | 127 | # Adjust preditions to each spatial grid point and anchor size. 128 | # Note: YOLO iterates over height index before width index. 129 | box_xy = (box_xy + conv_index) / conv_dims 130 | box_wh = box_wh * anchors_tensor / conv_dims 131 | 132 | return [box_confidence, box_xy, box_wh, box_class_probs] 133 | 134 | 135 | def yolo_boxes_to_corners(box_xy, box_wh): 136 | """Convert YOLO box predictions to bounding box corners.""" 137 | box_mins = box_xy - (box_wh / 2.) 138 | box_maxes = box_xy + (box_wh / 2.) 139 | 140 | return K.concatenate([ 141 | box_mins[..., 1:2], # y_min 142 | box_mins[..., 0:1], # x_min 143 | box_maxes[..., 1:2], # y_max 144 | box_maxes[..., 0:1] # x_max 145 | ]) 146 | 147 | 148 | def yolo_loss(args, 149 | anchors, 150 | num_classes, 151 | rescore_confidence=False, 152 | print_loss=False): 153 | """YOLO localization loss function. 154 | 155 | Parameters 156 | ---------- 157 | yolo_output : tensor 158 | Final convolutional layer features. 159 | 160 | true_boxes : tensor 161 | Ground truth boxes tensor with shape [batch, num_true_boxes, 5] 162 | containing box x_center, y_center, width, height, and class. 163 | 164 | detectors_mask : array 165 | 0/1 mask for detector positions where there is a matching ground truth. 166 | 167 | matching_true_boxes : array 168 | Corresponding ground truth boxes for positive detector positions. 169 | Already adjusted for conv height and width. 170 | 171 | anchors : tensor 172 | Anchor boxes for model. 173 | 174 | num_classes : int 175 | Number of object classes. 176 | 177 | rescore_confidence : bool, default=False 178 | If true then set confidence target to IOU of best predicted box with 179 | the closest matching ground truth box. 180 | 181 | print_loss : bool, default=False 182 | If True then use a tf.Print() to print the loss components. 183 | 184 | Returns 185 | ------- 186 | mean_loss : float 187 | mean localization loss across minibatch 188 | """ 189 | (yolo_output, true_boxes, detectors_mask, matching_true_boxes) = args 190 | num_anchors = len(anchors) 191 | object_scale = 5 192 | no_object_scale = 1 193 | class_scale = 1 194 | coordinates_scale = 1 195 | pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head( 196 | yolo_output, anchors, num_classes) 197 | 198 | # Unadjusted box predictions for loss. 199 | # TODO: Remove extra computation shared with yolo_head. 200 | yolo_output_shape = K.shape(yolo_output) 201 | feats = K.reshape(yolo_output, [ 202 | -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors, 203 | num_classes + 5 204 | ]) 205 | pred_boxes = K.concatenate( 206 | (K.sigmoid(feats[..., 0:2]), feats[..., 2:4]), axis=-1) 207 | 208 | # TODO: Adjust predictions by image width/height for non-square images? 209 | # IOUs may be off due to different aspect ratio. 210 | 211 | # Expand pred x,y,w,h to allow comparison with ground truth. 212 | # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params 213 | pred_xy = K.expand_dims(pred_xy, 4) 214 | pred_wh = K.expand_dims(pred_wh, 4) 215 | 216 | pred_wh_half = pred_wh / 2. 217 | pred_mins = pred_xy - pred_wh_half 218 | pred_maxes = pred_xy + pred_wh_half 219 | 220 | true_boxes_shape = K.shape(true_boxes) 221 | 222 | # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params 223 | true_boxes = K.reshape(true_boxes, [ 224 | true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2] 225 | ]) 226 | true_xy = true_boxes[..., 0:2] 227 | true_wh = true_boxes[..., 2:4] 228 | 229 | # Find IOU of each predicted box with each ground truth box. 230 | true_wh_half = true_wh / 2. 231 | true_mins = true_xy - true_wh_half 232 | true_maxes = true_xy + true_wh_half 233 | 234 | intersect_mins = K.maximum(pred_mins, true_mins) 235 | intersect_maxes = K.minimum(pred_maxes, true_maxes) 236 | intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) 237 | intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] 238 | 239 | pred_areas = pred_wh[..., 0] * pred_wh[..., 1] 240 | true_areas = true_wh[..., 0] * true_wh[..., 1] 241 | 242 | union_areas = pred_areas + true_areas - intersect_areas 243 | iou_scores = intersect_areas / union_areas 244 | 245 | # Best IOUs for each location. 246 | best_ious = K.max(iou_scores, axis=4) # Best IOU scores. 247 | best_ious = K.expand_dims(best_ious) 248 | 249 | # A detector has found an object if IOU > thresh for some true box. 250 | object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious)) 251 | 252 | # TODO: Darknet region training includes extra coordinate loss for early 253 | # training steps to encourage predictions to match anchor priors. 254 | 255 | # Determine confidence weights from object and no_object weights. 256 | # NOTE: YOLO does not use binary cross-entropy here. 257 | no_object_weights = (no_object_scale * (1 - object_detections) * 258 | (1 - detectors_mask)) 259 | no_objects_loss = no_object_weights * K.square(-pred_confidence) 260 | 261 | if rescore_confidence: 262 | objects_loss = (object_scale * detectors_mask * 263 | K.square(best_ious - pred_confidence)) 264 | else: 265 | objects_loss = (object_scale * detectors_mask * 266 | K.square(1 - pred_confidence)) 267 | confidence_loss = objects_loss + no_objects_loss 268 | 269 | # Classification loss for matching detections. 270 | # NOTE: YOLO does not use categorical cross-entropy loss here. 271 | matching_classes = K.cast(matching_true_boxes[..., 4], 'int32') 272 | matching_classes = K.one_hot(matching_classes, num_classes) 273 | classification_loss = (class_scale * detectors_mask * 274 | K.square(matching_classes - pred_class_prob)) 275 | 276 | # Coordinate loss for matching detection boxes. 277 | matching_boxes = matching_true_boxes[..., 0:4] 278 | coordinates_loss = (coordinates_scale * detectors_mask * 279 | K.square(matching_boxes - pred_boxes)) 280 | 281 | confidence_loss_sum = K.sum(confidence_loss) 282 | classification_loss_sum = K.sum(classification_loss) 283 | coordinates_loss_sum = K.sum(coordinates_loss) 284 | total_loss = 0.5 * ( 285 | confidence_loss_sum + classification_loss_sum + coordinates_loss_sum) 286 | if print_loss: 287 | total_loss = tf.Print( 288 | total_loss, [ 289 | total_loss, confidence_loss_sum, classification_loss_sum, 290 | coordinates_loss_sum 291 | ], 292 | message='yolo_loss, conf_loss, class_loss, box_coord_loss:') 293 | 294 | return total_loss 295 | 296 | 297 | def yolo(inputs, anchors, num_classes): 298 | """Generate a complete YOLO_v2 localization model.""" 299 | num_anchors = len(anchors) 300 | body = yolo_body(inputs, num_anchors, num_classes) 301 | outputs = yolo_head(body.output, anchors, num_classes) 302 | return outputs 303 | 304 | 305 | def yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold=.6): 306 | """Filter YOLO boxes based on object and class confidence.""" 307 | 308 | box_scores = box_confidence * box_class_probs 309 | box_classes = K.argmax(box_scores, axis=-1) 310 | box_class_scores = K.max(box_scores, axis=-1) 311 | prediction_mask = box_class_scores >= threshold 312 | 313 | # TODO: Expose tf.boolean_mask to Keras backend? 314 | boxes = tf.boolean_mask(boxes, prediction_mask) 315 | scores = tf.boolean_mask(box_class_scores, prediction_mask) 316 | classes = tf.boolean_mask(box_classes, prediction_mask) 317 | 318 | return boxes, scores, classes 319 | 320 | 321 | def yolo_eval(yolo_outputs, 322 | image_shape, 323 | max_boxes=10, 324 | score_threshold=.6, 325 | iou_threshold=.5): 326 | """Evaluate YOLO model on given input batch and return filtered boxes.""" 327 | box_confidence, box_xy, box_wh, box_class_probs = yolo_outputs 328 | boxes = yolo_boxes_to_corners(box_xy, box_wh) 329 | boxes, scores, classes = yolo_filter_boxes( 330 | box_confidence, boxes, box_class_probs, threshold=score_threshold) 331 | 332 | # Scale boxes back to original image shape. 333 | height = image_shape[0] 334 | width = image_shape[1] 335 | image_dims = K.stack([height, width, height, width]) 336 | image_dims = K.reshape(image_dims, [1, 4]) 337 | boxes = boxes * image_dims 338 | 339 | # TODO: Something must be done about this ugly hack! 340 | max_boxes_tensor = K.variable(max_boxes, dtype='int32') 341 | K.get_session().run(tf.variables_initializer([max_boxes_tensor])) 342 | nms_index = tf.image.non_max_suppression( 343 | boxes, scores, max_boxes_tensor, iou_threshold=iou_threshold) 344 | boxes = K.gather(boxes, nms_index) 345 | scores = K.gather(scores, nms_index) 346 | classes = K.gather(classes, nms_index) 347 | 348 | return boxes, scores, classes 349 | 350 | 351 | def preprocess_true_boxes(true_boxes, anchors, image_size): 352 | """Find detector in YOLO where ground truth box should appear. 353 | 354 | Parameters 355 | ---------- 356 | true_boxes : array 357 | List of ground truth boxes in form of relative x, y, w, h, class. 358 | Relative coordinates are in the range [0, 1] indicating a percentage 359 | of the original image dimensions. 360 | anchors : array 361 | List of anchors in form of w, h. 362 | Anchors are assumed to be in the range [0, conv_size] where conv_size 363 | is the spatial dimension of the final convolutional features. 364 | image_size : array-like 365 | List of image dimensions in form of h, w in pixels. 366 | 367 | Returns 368 | ------- 369 | detectors_mask : array 370 | 0/1 mask for detectors in [conv_height, conv_width, num_anchors, 1] 371 | that should be compared with a matching ground truth box. 372 | matching_true_boxes: array 373 | Same shape as detectors_mask with the corresponding ground truth box 374 | adjusted for comparison with predicted parameters at training time. 375 | """ 376 | height, width = image_size 377 | num_anchors = len(anchors) 378 | # Downsampling factor of 5x 2-stride max_pools == 32. 379 | # TODO: Remove hardcoding of downscaling calculations. 380 | assert height % 32 == 0, 'Image sizes in YOLO_v2 must be multiples of 32.' 381 | assert width % 32 == 0, 'Image sizes in YOLO_v2 must be multiples of 32.' 382 | conv_height = height // 32 383 | conv_width = width // 32 384 | num_box_params = true_boxes.shape[1] 385 | detectors_mask = np.zeros( 386 | (conv_height, conv_width, num_anchors, 1), dtype=np.float32) 387 | matching_true_boxes = np.zeros( 388 | (conv_height, conv_width, num_anchors, num_box_params), 389 | dtype=np.float32) 390 | 391 | for box in true_boxes: 392 | # scale box to convolutional feature spatial dimensions 393 | box_class = box[4:5] 394 | box = box[0:4] * np.array( 395 | [conv_width, conv_height, conv_width, conv_height]) 396 | i = np.floor(box[1]).astype('int') 397 | j = min(np.floor(box[0]).astype('int'),1) 398 | best_iou = 0 399 | best_anchor = 0 400 | 401 | for k, anchor in enumerate(anchors): 402 | # Find IOU between box shifted to origin and anchor box. 403 | box_maxes = box[2:4] / 2. 404 | box_mins = -box_maxes 405 | anchor_maxes = (anchor / 2.) 406 | anchor_mins = -anchor_maxes 407 | 408 | intersect_mins = np.maximum(box_mins, anchor_mins) 409 | intersect_maxes = np.minimum(box_maxes, anchor_maxes) 410 | intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.) 411 | intersect_area = intersect_wh[0] * intersect_wh[1] 412 | box_area = box[2] * box[3] 413 | anchor_area = anchor[0] * anchor[1] 414 | iou = intersect_area / (box_area + anchor_area - intersect_area) 415 | if iou > best_iou: 416 | best_iou = iou 417 | best_anchor = k 418 | 419 | if best_iou > 0: 420 | detectors_mask[i, j, best_anchor] = 1 421 | adjusted_box = np.array( 422 | [ 423 | box[0] - j, box[1] - i, 424 | np.log(box[2] / anchors[best_anchor][0]), 425 | np.log(box[3] / anchors[best_anchor][1]), box_class 426 | ], 427 | dtype=np.float32) 428 | matching_true_boxes[i, j, best_anchor] = adjusted_box 429 | return detectors_mask, matching_true_boxes 430 | -------------------------------------------------------------------------------- /yad2k/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import * 2 | -------------------------------------------------------------------------------- /yad2k/utils/utils.py: -------------------------------------------------------------------------------- 1 | """Miscellaneous utility functions.""" 2 | 3 | from functools import reduce 4 | 5 | 6 | def compose(*funcs): 7 | """Compose arbitrarily many functions, evaluated left to right. 8 | 9 | Reference: https://mathieularose.com/function-composition-in-python/ 10 | """ 11 | # return lambda x: reduce(lambda v, f: f(v), funcs, x) 12 | if funcs: 13 | return reduce(lambda f, g: lambda *a, **kw: g(f(*a, **kw)), funcs) 14 | else: 15 | raise ValueError('Composition of empty sequence not supported.') 16 | -------------------------------------------------------------------------------- /yolov3/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaka-lin/object-detection/1017f7c3c1792af55dc5c53a7665db0890766302/yolov3/__init__.py -------------------------------------------------------------------------------- /yolov3/model.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | from keras import backend as K 6 | from keras.layers import Lambda 7 | from keras.layers.merge import concatenate 8 | from keras.models import Model 9 | from utils.yolo_utils import scale_boxes 10 | 11 | def yolo_head(feats, anchors, num_classes, n): 12 | """Convert final layer features to bounding box parameters.""" 13 | num_anchors = len(anchors) 14 | # Reshape to batch, height, width, num_anchors, box_params. 15 | anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) 16 | 17 | conv_dims = K.shape(feats)[1:3] # assuming channels last 18 | # In YOLO the height index is the inner most iteration. 19 | conv_height_index = K.arange(0, stop=conv_dims[0]) 20 | conv_width_index = K.arange(0, stop=conv_dims[1]) 21 | conv_height_index = K.tile(conv_height_index, [conv_dims[1]]) 22 | 23 | conv_width_index = K.tile(K.expand_dims(conv_width_index, 0), [conv_dims[0], 1]) 24 | conv_width_index = K.flatten(K.transpose(conv_width_index)) 25 | conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) 26 | conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2]) 27 | conv_index = K.cast(conv_index, K.dtype(feats)) 28 | 29 | feats = K.reshape(feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) 30 | conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats)) 31 | 32 | box_xy = K.sigmoid(feats[..., :2]) 33 | box_wh = K.exp(feats[..., 2:4]) 34 | box_confidence = K.sigmoid(feats[..., 4:5]) 35 | box_class_probs = K.sigmoid(feats[..., 5:]) 36 | 37 | # Adjust preditions to each spatial grid point and anchor size. 38 | # Note: YOLO iterates over height index before width index. 39 | # TODO: It works with +1, don't know why. 40 | box_xy = (box_xy + conv_index + 1) / conv_dims 41 | # TODO: Input layer size 42 | box_wh = box_wh * anchors_tensor / conv_dims / {0:32, 1:16, 2:8}[n] 43 | 44 | return [box_xy, box_wh, box_confidence, box_class_probs] 45 | 46 | 47 | def yolo_boxes_to_corners(box_xy, box_wh): 48 | """Convert YOLO box predictions to bounding box corners.""" 49 | box_mins = box_xy - (box_wh / 2.) 50 | box_maxes = box_xy + (box_wh / 2.) 51 | 52 | return K.concatenate([ 53 | box_mins[..., 1:2], # y_min 54 | box_mins[..., 0:1], # x_min 55 | box_maxes[..., 1:2], # y_max 56 | box_maxes[..., 0:1] # x_max 57 | ]) 58 | 59 | def yolo_boxes_and_scores(feats, anchors, num_classes, n): 60 | '''Process Conv layer output''' 61 | box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats, anchors, num_classes, n) 62 | # Convert boxes to be ready for filtering functions 63 | boxes = yolo_boxes_to_corners(box_xy, box_wh) 64 | boxes = K.reshape(boxes, [-1, 3, 4]) 65 | # Compute box scores 66 | box_scores = box_confidence * box_class_probs 67 | box_scores = K.reshape(box_scores, [-1, 3, num_classes]) 68 | return boxes, box_scores 69 | 70 | def yolo_filter_boxes(boxes, box_scores, box_class_probs, threshold = .6): 71 | # Find the box_classes thanks to the max box_scores, keep track of the corresponding score 72 | box_classes = K.argmax(box_scores, axis=-1) 73 | box_class_scores = K.max(box_scores, axis=-1, keepdims=False) 74 | 75 | # Create a filtering mask based on "box_class_scores" by using "threshold". The mask should have the 76 | # same dimension as box_class_scores, and be True for the boxes you want to keep (with probability >= threshold) 77 | filtering_mask = box_class_scores >= threshold # (3549, 3) 78 | 79 | # Apply the mask to scores, boxes and classes 80 | scores = tf.boolean_mask(box_class_scores, filtering_mask) 81 | boxes = tf.boolean_mask(boxes, filtering_mask) 82 | classes = tf.boolean_mask(box_classes, filtering_mask) 83 | 84 | return scores, boxes, classes 85 | 86 | def yolo_non_max_suppression(scores, boxes, classes, max_boxes = 10, iou_threshold = 0.5): 87 | max_boxes_tensor = K.variable(max_boxes, dtype='int32') # tensor to be used in tf.image.non_max_suppression() 88 | K.get_session().run(tf.variables_initializer([max_boxes_tensor])) # initialize variable max_boxes_tensor 89 | 90 | # Use tf.image.non_max_suppression() to get the list of indices corresponding to boxes you keep 91 | nms_indices = tf.image.non_max_suppression(boxes, scores, max_boxes, iou_threshold) 92 | 93 | # Use K.gather() to select only nms_indices from scores, boxes and classes 94 | scores = K.gather(scores, nms_indices) 95 | boxes = K.gather(boxes, nms_indices) 96 | classes = K.gather(classes, nms_indices) 97 | 98 | return scores, boxes, classes 99 | 100 | 101 | def yolo_eval( 102 | yolo_outputs, 103 | anchors, 104 | num_classes, 105 | image_shape=(720., 1280.), 106 | max_boxes=10, 107 | score_threshold=.6, 108 | iou_threshold=.5): 109 | # Get three scales outputs of the YOLO model 110 | for i in range(0,3): 111 | _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[i], anchors[6-3*i:9-3*i], num_classes, i) 112 | if i==0: 113 | boxes, box_scores= _boxes, _box_scores 114 | else: 115 | boxes = K.concatenate([boxes,_boxes], axis=0) 116 | box_scores = K.concatenate([box_scores,_box_scores], axis=0) 117 | 118 | # Use one of the functions you've implemented to perform Score-filtering with a threshold of score_threshold (≈1 line) 119 | scores, boxes, classes = yolo_filter_boxes(boxes, box_scores, score_threshold) 120 | 121 | # Scale boxes back to original image shape. 122 | boxes = scale_boxes(boxes, image_shape) 123 | 124 | # Use one of the functions you've implemented to perform Non-max suppression with a threshold of iou_threshold (≈1 line) 125 | scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes, max_boxes, iou_threshold) 126 | 127 | return scores, boxes, classes 128 | --------------------------------------------------------------------------------