├── python ├── requirements.txt ├── yolov7_out.jpg ├── image.py ├── webcam.py └── yolov7.py ├── data └── horses.jpg ├── yolov7_out.jpg ├── cpp ├── CMakeLists.txt ├── main.cpp └── main_preprocessing.cpp └── README.md /python/requirements.txt: -------------------------------------------------------------------------------- 1 | openvino==2023.2.0 2 | argparse 3 | numpy 4 | opencv-python 5 | -------------------------------------------------------------------------------- /data/horses.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openvino-dev-samples/YOLOv7_OpenVINO_cpp-python/HEAD/data/horses.jpg -------------------------------------------------------------------------------- /yolov7_out.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openvino-dev-samples/YOLOv7_OpenVINO_cpp-python/HEAD/yolov7_out.jpg -------------------------------------------------------------------------------- /python/yolov7_out.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openvino-dev-samples/YOLOv7_OpenVINO_cpp-python/HEAD/python/yolov7_out.jpg -------------------------------------------------------------------------------- /cpp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | set(CMAKE_CXX_STANDARD 11) 3 | set(TARGET_NAME yolov7) 4 | find_package(OpenCV REQUIRED) 5 | find_package(OpenVINO REQUIRED) 6 | 7 | add_executable(${TARGET_NAME} main_preprocessing.cpp) 8 | # add_executable(${TARGET_NAME} main.cpp) 9 | 10 | target_link_libraries(${TARGET_NAME} PRIVATE openvino::runtime ${OpenCV_LIBS}) -------------------------------------------------------------------------------- /python/image.py: -------------------------------------------------------------------------------- 1 | import yolov7 2 | import argparse 3 | 4 | if __name__ == "__main__": 5 | parser = argparse.ArgumentParser(add_help=False) 6 | parser.add_argument('-h', '--help', action='help', help='Show this help message and exit.') 7 | parser.add_argument('-i', '--input', required=True, type=str, 8 | help='Required. Path to an image file.') 9 | parser.add_argument('-m', '--model', required=True, type=str, 10 | help='Required. Path to an .xml or .onnx file with a trained model.') 11 | parser.add_argument('-d', '--device', required=False, default='CPU', type=str, 12 | help='Device name.') 13 | parser.add_argument('-p', '--pre_api', required=False, action='store_true', 14 | help='Preprocessing api.') 15 | args = parser.parse_args() 16 | yolov7_detector=yolov7.YOLOV7_OPENVINO(args.model, args.device, args.pre_api, 1, 1) 17 | yolov7_detector.infer_image(args.input) 18 | -------------------------------------------------------------------------------- /python/webcam.py: -------------------------------------------------------------------------------- 1 | import yolov7 2 | import argparse 3 | 4 | if __name__ == "__main__": 5 | parser = argparse.ArgumentParser(add_help=False) 6 | parser.add_argument('-h', '--help', action='help', help='Show this help message and exit.') 7 | parser.add_argument('-i', '--input', required=True, type=int, 8 | help='Required. Webcam ID.') 9 | parser.add_argument('-m', '--model', required=True, type=str, 10 | help='Required. Path to an .xml or .onnx file with a trained model.') 11 | parser.add_argument('-d', '--device', required=False, default='CPU', type=str, 12 | help='Device name.') 13 | parser.add_argument('-p', '--pre_api', required=False, action='store_true', 14 | help='Preprocessing api.') 15 | parser.add_argument('-bs', '--batchsize', required=False, default=1, type=int, 16 | help='Batch size.') 17 | parser.add_argument('-n', '--nireq', required=False, default=2, type=int, 18 | help='number of infer request.') 19 | 20 | args = parser.parse_args() 21 | yolov7_detector=yolov7.YOLOV7_OPENVINO(args.model, args.device, args.pre_api, args.batchsize, args.nireq) 22 | yolov7_detector.infer_cam(args.input) 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # YOLOv7_OpenVINO 2 | This repository will demostrate how to deploy a offical YOLOv7 pre-trained model with OpenVINO runtime api 3 | ## 1. Install requirements 4 | ### 1.1 Python 5 | ```shell 6 | $ pip install -r python/requirements.txt 7 | ``` 8 | 9 | ### 1.2 C++ (Ubuntu) 10 | Please follow the Guides to install [OpenVINO](https://docs.openvino.ai/2023.2/openvino_docs_install_guides_installing_openvino_from_archive_linux.html) and [OpenCV](https://docs.opencv.org/4.x/d7/d9f/tutorial_linux_install.html) 11 | 12 | ## 2. Prepare the model 13 | Download YOLOv7 pre-trained weight from [YOLOv7](https://github.com/WongKinYiu/yolov7) 14 | 15 | ## 3. Export the ONNX model and convert it to OpenVINO IR 16 | ```shell 17 | $ git clone git@github.com:WongKinYiu/yolov7.git 18 | $ cd yolov7 19 | $ pip install -r requirements 20 | $ python export.py --weights yolov7.pt 21 | $ ovc yolov7.onnx 22 | ``` 23 | 24 | ## 4. Run inference 25 | The input image can be found in [YOLOv7's repository](https://github.com/WongKinYiu/yolov7/blob/main/inference/images/horses.jpg) 26 | ### 4.1 Python 27 | ```shell 28 | $ python python/image.py -m path_to/yolov7.xml -i data/horse.jpg -d "CPU" 29 | ``` 30 | You can also try running the code with [Preprocessing API](https://docs.openvino.ai/latest/openvino_docs_OV_UG_Preprocessing_Overview.html) for performance optimization. 31 | ```shell 32 | $ python python/image.py -m path_to/yolov7.xml -i data/horse.jpg -d "CPU" -p 33 | ``` 34 | 35 | - -i = path to image or video source; 36 | - -m = Path to IR .xml or .onnx file; 37 | - -d = Device name, e.g "CPU"; 38 | - -p = with/without preprocessing api 39 | - -bs = Batch size; 40 | - -n = number of infer requests; 41 | 42 | ### 4.2 C++ (Ubuntu) 43 | Compile the source code 44 | ```shell 45 | $ cd cpp 46 | $ mkdir build && cd build 47 | $ source '~/intel/openvino_2023.2/bin/setupvars.sh' 48 | $ cmake .. 49 | $ make 50 | ``` 51 | You can also uncomment the code in ```CMakeLists.txt``` to trigger [Preprocessing API](https://docs.openvino.ai/latest/openvino_docs_OV_UG_Preprocessing_Overview.html) for performance optimization. 52 | 53 | Run inference 54 | ```shell 55 | $ yolov7 path_to/yolov7.xml ../../data/horses.jpg 'CPU' 56 | ``` 57 | 58 | ## 5. Results 59 | 60 | ![horse_res](https://user-images.githubusercontent.com/91237924/179361905-44fcd4ac-7a9e-41f0-bd07-b6cf07245c04.jpg) 61 | 62 | 63 | ## 6. Run with webcam 64 | You can also run the sample with webcam for real-time detection 65 | ```shell 66 | $ python python/webcam.py -m path_to/yolov7.xml -i 0 67 | ``` 68 | 69 | Tips: you can switch the device name to **"GPU"** to improve the performance. 70 | 71 | ## 7. Further optimization 72 | Try this notebook ([yolov7-optimization](https://github.com/openvinotoolkit/openvino_notebooks/tree/develop/notebooks/226-yolov7-optimization)) and quantize your YOLOv7 model to INT8. 73 | -------------------------------------------------------------------------------- /python/yolov7.py: -------------------------------------------------------------------------------- 1 | from openvino.runtime import Core 2 | import cv2 3 | import numpy as np 4 | import random 5 | import time 6 | from openvino.preprocess import PrePostProcessor, ColorFormat 7 | from openvino.runtime import Layout, AsyncInferQueue, PartialShape 8 | 9 | 10 | class YOLOV7_OPENVINO(object): 11 | def __init__(self, model_path, device, pre_api, batchsize, nireq): 12 | # set the hyperparameters 13 | self.classes = [ 14 | "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", 15 | "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", 16 | "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", 17 | "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", 18 | "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", 19 | "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", 20 | "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", 21 | "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", 22 | "hair drier", "toothbrush" 23 | ] 24 | self.batchsize = batchsize 25 | self.img_size = (640, 640) 26 | self.conf_thres = 0.1 27 | self.iou_thres = 0.6 28 | self.class_num = 80 29 | self.colors = [[random.randint(0, 255) 30 | for _ in range(3)] for _ in self.classes] 31 | self.stride = [8, 16, 32] 32 | self.anchor_list = [[12, 16, 19, 36, 40, 28], [ 33 | 36, 75, 76, 55, 72, 146], [142, 110, 192, 243, 459, 401]] 34 | self.anchor = np.array(self.anchor_list).astype( 35 | float).reshape(3, -1, 2) 36 | area = self.img_size[0] * self.img_size[1] 37 | self.size = [int(area / self.stride[0] ** 2), int(area / 38 | self.stride[1] ** 2), int(area / self.stride[2] ** 2)] 39 | self.feature = [[int(j / self.stride[i]) 40 | for j in self.img_size] for i in range(3)] 41 | 42 | ie = Core() 43 | self.model = ie.read_model(model_path) 44 | self.num_output = self.model.get_output_size() 45 | self.input_layer = self.model.input(0) 46 | new_shape = PartialShape( 47 | [self.batchsize, 3, self.img_size[0], self.img_size[1]]) 48 | self.model.reshape({self.input_layer.any_name: new_shape}) 49 | self.pre_api = pre_api 50 | if (self.pre_api == True): 51 | # Preprocessing API 52 | ppp = PrePostProcessor(self.model) 53 | # Declare section of desired application's input format 54 | ppp.input().tensor() \ 55 | .set_layout(Layout("NHWC")) \ 56 | .set_color_format(ColorFormat.BGR) 57 | # Here, it is assumed that the model has "NCHW" layout for input. 58 | ppp.input().model().set_layout(Layout("NCHW")) 59 | # Convert current color format (BGR) to RGB 60 | ppp.input().preprocess() \ 61 | .convert_color(ColorFormat.RGB) \ 62 | .scale([255.0, 255.0, 255.0]) 63 | self.model = ppp.build() 64 | print(f'Dump preprocessor: {ppp}') 65 | 66 | self.compiled_model = ie.compile_model( 67 | model=self.model, device_name=device) 68 | self.infer_queue = AsyncInferQueue(self.compiled_model, nireq) 69 | 70 | def letterbox(self, img, new_shape=(640, 640), color=(114, 114, 114)): 71 | # Resize and pad image while meeting stride-multiple constraints 72 | shape = img.shape[:2] # current shape [height, width] 73 | if isinstance(new_shape, int): 74 | new_shape = (new_shape, new_shape) 75 | 76 | # Scale ratio (new / old) 77 | r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) 78 | new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) 79 | dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - \ 80 | new_unpad[1] # wh padding 81 | 82 | # divide padding into 2 sides 83 | dw /= 2 84 | dh /= 2 85 | 86 | # resize 87 | if shape[::-1] != new_unpad: 88 | img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) 89 | top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) 90 | left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) 91 | 92 | # add border 93 | img = cv2.copyMakeBorder( 94 | img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) 95 | 96 | return img 97 | 98 | def xywh2xyxy(self, x): 99 | # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right 100 | y = np.copy(x) 101 | y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x 102 | y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y 103 | y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x 104 | y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y 105 | 106 | return y 107 | 108 | def nms(self, prediction, conf_thres, iou_thres): 109 | predictions = np.squeeze(prediction[0]) 110 | 111 | # Filter out object confidence scores below threshold 112 | obj_conf = predictions[:, 4] 113 | predictions = predictions[obj_conf > conf_thres] 114 | obj_conf = obj_conf[obj_conf > conf_thres] 115 | 116 | # Multiply class confidence with bounding box confidence 117 | predictions[:, 5:] *= obj_conf[:, np.newaxis] 118 | 119 | # Get the scores 120 | scores = np.max(predictions[:, 5:], axis=1) 121 | 122 | # Filter out the objects with a low score 123 | valid_scores = scores > conf_thres 124 | predictions = predictions[valid_scores] 125 | scores = scores[valid_scores] 126 | 127 | # Get the class with the highest confidence 128 | class_ids = np.argmax(predictions[:, 5:], axis=1) 129 | 130 | # Get bounding boxes for each object 131 | boxes = self.xywh2xyxy(predictions[:, :4]) 132 | 133 | # Apply non-maxima suppression to suppress weak, overlapping bounding boxes 134 | # indices = nms(boxes, scores, self.iou_threshold) 135 | indices = cv2.dnn.NMSBoxes( 136 | boxes.tolist(), scores.tolist(), conf_thres, iou_thres) 137 | 138 | return boxes[indices], scores[indices], class_ids[indices] 139 | 140 | def clip_coords(self, boxes, img_shape): 141 | # Clip bounding xyxy bounding boxes to image shape (height, width) 142 | boxes[:, 0].clip(0, img_shape[1]) # x1 143 | boxes[:, 1].clip(0, img_shape[0]) # y1 144 | boxes[:, 2].clip(0, img_shape[1]) # x2 145 | boxes[:, 3].clip(0, img_shape[0]) # y2 146 | 147 | def scale_coords(self, img1_shape, img0_shape, coords, ratio_pad=None): 148 | # Rescale coords (xyxy) from img1_shape to img0_shape 149 | # gain = old / new 150 | if ratio_pad is None: 151 | gain = min(img1_shape[0] / img0_shape[0], 152 | img1_shape[1] / img0_shape[1]) 153 | padding = (img1_shape[1] - img0_shape[1] * gain) / \ 154 | 2, (img1_shape[0] - img0_shape[0] * gain) / 2 155 | else: 156 | gain = ratio_pad[0][0] 157 | padding = ratio_pad[1] 158 | coords[:, [0, 2]] -= padding[0] # x padding 159 | coords[:, [1, 3]] -= padding[1] # y padding 160 | coords[:, :4] /= gain 161 | self.clip_coords(coords, img0_shape) 162 | 163 | def sigmoid(self, x): 164 | return 1 / (1 + np.exp(-x)) 165 | 166 | def plot_one_box(self, x, img, color=None, label=None, line_thickness=None): 167 | # Plots one bounding box on image img 168 | tl = line_thickness or round( 169 | 0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness 170 | color = color or [random.randint(0, 255) for _ in range(3)] 171 | c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) 172 | cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA) 173 | if label: 174 | tf = max(tl - 1, 1) # font thickness 175 | t_size = cv2.getTextSize( 176 | label, 0, fontScale=tl / 3, thickness=tf)[0] 177 | c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 178 | cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled 179 | cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, 180 | [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA) 181 | 182 | def draw(self, img, boxinfo): 183 | for xyxy, conf, cls in boxinfo: 184 | self.plot_one_box(xyxy, img, label=self.classes[int( 185 | cls)], color=self.colors[int(cls)], line_thickness=2) 186 | # cv2.imshow('Press ESC to Exit', img) 187 | # cv2.waitKey(1) 188 | 189 | def postprocess(self, infer_request, info): 190 | src_img_list, src_size = info 191 | for batch_id in range(self.batchsize): 192 | if self.num_output == 1: 193 | results = np.expand_dims( 194 | infer_request.get_output_tensor(0).data[batch_id], axis=0) 195 | else: 196 | output = [] 197 | # Get the each feature map's output data 198 | output.append(self.sigmoid(infer_request.get_output_tensor( 199 | 0).data[batch_id].reshape(-1, self.size[0]*3, 5+self.class_num))) 200 | output.append(self.sigmoid(infer_request.get_output_tensor( 201 | 1).data[batch_id].reshape(-1, self.size[1]*3, 5+self.class_num))) 202 | output.append(self.sigmoid(infer_request.get_output_tensor( 203 | 2).data[batch_id].reshape(-1, self.size[2]*3, 5+self.class_num))) 204 | 205 | # Postprocessing 206 | grid = [] 207 | for _, f in enumerate(self.feature): 208 | grid.append([[i, j] for j in range(f[0]) 209 | for i in range(f[1])]) 210 | 211 | result = [] 212 | for i in range(3): 213 | src = output[i] 214 | xy = src[..., 0:2] * 2. - 0.5 215 | wh = (src[..., 2:4] * 2) ** 2 216 | dst_xy = [] 217 | dst_wh = [] 218 | for j in range(3): 219 | dst_xy.append( 220 | (xy[:, j * self.size[i]:(j + 1) * self.size[i], :] + grid[i]) * self.stride[i]) 221 | dst_wh.append( 222 | wh[:, j * self.size[i]:(j + 1) * self.size[i], :] * self.anchor[i][j]) 223 | src[..., 0:2] = np.concatenate( 224 | (dst_xy[0], dst_xy[1], dst_xy[2]), axis=1) 225 | src[..., 2:4] = np.concatenate( 226 | (dst_wh[0], dst_wh[1], dst_wh[2]), axis=1) 227 | result.append(src) 228 | results = np.concatenate(result, 1) 229 | 230 | boxes, scores, class_ids = self.nms( 231 | results, self.conf_thres, self.iou_thres) 232 | img_shape = self.img_size 233 | self.scale_coords(img_shape, src_size, boxes) 234 | 235 | # Draw the results 236 | self.draw(src_img_list[batch_id], zip(boxes, scores, class_ids)) 237 | 238 | def infer_image(self, img_path): 239 | # Read image 240 | src_img = cv2.imread(img_path) 241 | if src_img is None: 242 | raise ValueError('Failed to read image.') 243 | src_img_list = [] 244 | src_img_list.append(src_img) 245 | img = self.letterbox(src_img, self.img_size) 246 | src_size = src_img.shape[:2] 247 | img = img.astype(dtype=np.float32) 248 | if (self.pre_api == False): 249 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # BGR to RGB 250 | img /= 255.0 251 | img = img.transpose(2, 0, 1) # NHWC to NCHW 252 | input_image = np.expand_dims(img, 0) 253 | 254 | # Set callback function for postprocess 255 | self.infer_queue.set_callback(self.postprocess) 256 | # Do inference 257 | self.infer_queue.start_async( 258 | {self.input_layer.any_name: input_image}, (src_img_list, src_size)) 259 | self.infer_queue.wait_all() 260 | cv2.imwrite("yolov7_out.jpg", src_img_list[0]) 261 | 262 | def infer_cam(self, source): 263 | # Set callback function for postprocess 264 | self.infer_queue.set_callback(self.postprocess) 265 | # Capture camera source 266 | cap = cv2.VideoCapture(source) 267 | src_img_list = [] 268 | img_list = [] 269 | count = 0 270 | start_time = time.time() 271 | while (cap.isOpened()): 272 | _, frame = cap.read() 273 | img = self.letterbox(frame, self.img_size) 274 | src_size = frame.shape[:2] 275 | img = img.astype(dtype=np.float32) 276 | # Preprocessing 277 | input_image = np.expand_dims(img, 0) 278 | # Batching 279 | img_list.append(input_image) 280 | src_img_list.append(frame) 281 | if (len(img_list) < self.batchsize): 282 | continue 283 | img_batch = np.concatenate(img_list) 284 | 285 | # Do inference 286 | self.infer_queue.start_async( 287 | {self.input_layer.any_name: img_batch}, (src_img_list, src_size)) 288 | src_img_list = [] 289 | img_list = [] 290 | count = count + self.batchsize 291 | c = cv2.waitKey(1) 292 | if c == 27: 293 | self.infer_queue.wait_all() 294 | break 295 | cap.release() 296 | cv2.destroyAllWindows() 297 | end_time = time.time() 298 | # Calculate the average FPS\n", 299 | fps = count / (end_time - start_time) 300 | print("throughput: {:.2f} fps".format(fps)) 301 | -------------------------------------------------------------------------------- /cpp/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | using namespace std; 16 | 17 | double get_wall_time() 18 | { 19 | struct timeval time; 20 | if (gettimeofday(&time, NULL)) 21 | { 22 | return 0; 23 | } 24 | return (double)time.tv_sec + (double)time.tv_usec * .000001; 25 | } 26 | 27 | struct Object 28 | { 29 | cv::Rect_ rect; 30 | int label; 31 | float prob; 32 | }; 33 | 34 | const std::vector class_names = { 35 | "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", 36 | "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", 37 | "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", 38 | "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", 39 | "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", 40 | "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", 41 | "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", 42 | "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", 43 | "hair drier", "toothbrush"}; 44 | 45 | inline float sigmoid(float x) 46 | { 47 | return static_cast(1.f / (1.f + exp(-x))); 48 | } 49 | 50 | cv::Mat letterbox(cv::Mat &src, int h, int w, std::vector &padding) 51 | { 52 | // Resize and pad image while meeting stride-multiple constraints 53 | int in_w = src.cols; 54 | int in_h = src.rows; 55 | int tar_w = w; 56 | int tar_h = h; 57 | float r = min(float(tar_h) / in_h, float(tar_w) / in_w); 58 | int inside_w = round(in_w * r); 59 | int inside_h = round(in_h * r); 60 | int padd_w = tar_w - inside_w; 61 | int padd_h = tar_h - inside_h; 62 | cv::Mat resize_img; 63 | 64 | // resize 65 | resize(src, resize_img, cv::Size(inside_w, inside_h)); 66 | 67 | // divide padding into 2 sides 68 | padd_w = padd_w / 2; 69 | padd_h = padd_h / 2; 70 | padding.push_back(padd_w); 71 | padding.push_back(padd_h); 72 | 73 | // store the ratio 74 | padding.push_back(r); 75 | int top = int(round(padd_h - 0.1)); 76 | int bottom = int(round(padd_h + 0.1)); 77 | int left = int(round(padd_w - 0.1)); 78 | int right = int(round(padd_w + 0.1)); 79 | 80 | // add border 81 | copyMakeBorder(resize_img, resize_img, top, bottom, left, right, 0, cv::Scalar(114, 114, 114)); 82 | return resize_img; 83 | } 84 | 85 | cv::Rect scale_box(cv::Rect box, std::vector &padding) 86 | { 87 | // remove the padding area 88 | cv::Rect scaled_box; 89 | scaled_box.x = box.x - padding[0]; 90 | scaled_box.y = box.y - padding[1]; 91 | scaled_box.width = box.width; 92 | scaled_box.height = box.height; 93 | return scaled_box; 94 | } 95 | 96 | void drawPred(int classId, float conf, cv::Rect box, float ratio, float raw_h, float raw_w, cv::Mat &frame, const std::vector &classes) 97 | { 98 | float x0 = box.x; 99 | float y0 = box.y; 100 | float x1 = box.x + box.width; 101 | float y1 = box.y + box.height; 102 | 103 | // scale the bounding boxes to size of origin image 104 | x0 = x0 / ratio; 105 | y0 = y0 / ratio; 106 | x1 = x1 / ratio; 107 | y1 = y1 / ratio; 108 | 109 | // Clip bounding boxes to image shape 110 | x0 = std::max(std::min(x0, (float)(raw_w - 1)), 0.f); 111 | y0 = std::max(std::min(y0, (float)(raw_h - 1)), 0.f); 112 | x1 = std::max(std::min(x1, (float)(raw_w - 1)), 0.f); 113 | y1 = std::max(std::min(y1, (float)(raw_h - 1)), 0.f); 114 | 115 | // Draw the bouding boxes and put the label text on the origin image 116 | cv::rectangle(frame, cv::Point(x0, y0), cv::Point(x1, y1), cv::Scalar(0, 255, 0), 1); 117 | std::string label = cv::format("%.2f", conf); 118 | if (!classes.empty()) 119 | { 120 | CV_Assert(classId < (int)classes.size()); 121 | label = classes[classId] + ": " + label; 122 | } 123 | int baseLine; 124 | cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.25, 1, &baseLine); 125 | y0 = max(int(y0), labelSize.height); 126 | cv::rectangle(frame, cv::Point(x0, y0 - round(1.5 * labelSize.height)), cv::Point(x0 + round(2 * labelSize.width), y0 + baseLine), cv::Scalar(0, 255, 0), cv::FILLED); 127 | cv::putText(frame, label, cv::Point(x0, y0), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(), 1.5); 128 | } 129 | 130 | static void generate_proposals(int stride, const float *feat, float prob_threshold, std::vector &objects) 131 | { 132 | // get the results from proposals 133 | float anchors[18] = {12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401}; 134 | int anchor_num = 3; 135 | int feat_w = 640 / stride; 136 | int feat_h = 640 / stride; 137 | int cls_num = 80; 138 | int anchor_group = 0; 139 | if (stride == 8) 140 | anchor_group = 0; 141 | if (stride == 16) 142 | anchor_group = 1; 143 | if (stride == 32) 144 | anchor_group = 2; 145 | 146 | // 3 x h x w x (80 + 5) 147 | for (int anchor = 0; anchor <= anchor_num - 1; anchor++) 148 | { 149 | for (int i = 0; i <= feat_h - 1; i++) 150 | { 151 | for (int j = 0; j <= feat_w - 1; j++) 152 | { 153 | float box_prob = feat[anchor * feat_h * feat_w * (cls_num + 5) + i * feat_w * (cls_num + 5) + j * (cls_num + 5) + 4]; 154 | box_prob = sigmoid(box_prob); 155 | 156 | // filter the bounding box with low confidence 157 | if (box_prob < prob_threshold) 158 | continue; 159 | float x = feat[anchor * feat_h * feat_w * (cls_num + 5) + i * feat_w * (cls_num + 5) + j * (cls_num + 5) + 0]; 160 | float y = feat[anchor * feat_h * feat_w * (cls_num + 5) + i * feat_w * (cls_num + 5) + j * (cls_num + 5) + 1]; 161 | float w = feat[anchor * feat_h * feat_w * (cls_num + 5) + i * feat_w * (cls_num + 5) + j * (cls_num + 5) + 2]; 162 | float h = feat[anchor * feat_h * feat_w * (cls_num + 5) + i * feat_w * (cls_num + 5) + j * (cls_num + 5) + 3]; 163 | 164 | double max_prob = 0; 165 | int idx = 0; 166 | 167 | // get the class id with maximum confidence 168 | for (int t = 5; t < 85; ++t) 169 | { 170 | double tp = feat[anchor * feat_h * feat_w * (cls_num + 5) + i * feat_w * (cls_num + 5) + j * (cls_num + 5) + t]; 171 | tp = sigmoid(tp); 172 | if (tp > max_prob) 173 | { 174 | max_prob = tp; 175 | idx = t; 176 | } 177 | } 178 | 179 | // filter the class with low confidence 180 | float cof = box_prob * max_prob; 181 | if (cof < prob_threshold) 182 | continue; 183 | 184 | // convert results to xywh 185 | x = (sigmoid(x) * 2 - 0.5 + j) * stride; 186 | y = (sigmoid(y) * 2 - 0.5 + i) * stride; 187 | w = pow(sigmoid(w) * 2, 2) * anchors[anchor_group * 6 + anchor * 2]; 188 | h = pow(sigmoid(h) * 2, 2) * anchors[anchor_group * 6 + anchor * 2 + 1]; 189 | 190 | float r_x = x - w / 2; 191 | float r_y = y - h / 2; 192 | 193 | // store the results 194 | Object obj; 195 | obj.rect.x = r_x; 196 | obj.rect.y = r_y; 197 | obj.rect.width = w; 198 | obj.rect.height = h; 199 | obj.label = idx - 5; 200 | obj.prob = cof; 201 | objects.push_back(obj); 202 | } 203 | } 204 | } 205 | } 206 | 207 | int main(int argc, char *argv[]) 208 | { 209 | // set the hyperparameters 210 | int img_h = 640; 211 | int img_w = 640; 212 | int img_c = 3; 213 | int img_size = img_h * img_h * img_c; 214 | 215 | const float prob_threshold = 0.30f; 216 | const float nms_threshold = 0.60f; 217 | 218 | const std::string model_path{argv[1]}; 219 | const char *image_path{argv[2]}; 220 | const std::string device_name{argv[3]}; 221 | 222 | cv::Mat src_img = cv::imread(image_path); 223 | cv::Mat img; 224 | 225 | std::vector padding; 226 | cv::Mat boxed = letterbox(src_img, img_h, img_w, padding); 227 | 228 | cv::cvtColor(boxed, img, cv::COLOR_BGR2RGB); 229 | 230 | // -------- Step 1. Initialize OpenVINO Runtime Core -------- 231 | ov::Core core; 232 | 233 | // -------- Step 2. Read a model -------- 234 | std::shared_ptr model = core.read_model(model_path); 235 | auto output_num = model->get_output_size(); 236 | 237 | // -------- Step 3. Loading a model to the device -------- 238 | ov::CompiledModel compiled_model = core.compile_model(model, device_name); 239 | 240 | // Get input port for model with one input 241 | auto input_port = compiled_model.input(); 242 | // Create tensor from external memory 243 | // ov::Tensor input_tensor(input_port.get_element_type(), input_port.get_shape(), input_data.data()); 244 | 245 | // -------- Step 4. Create an infer request -------- 246 | ov::InferRequest infer_request = compiled_model.create_infer_request(); 247 | 248 | // -------- Step 5. Prepare input -------- 249 | // ov::Tensor input_tensor1 = infer_request.get_input_tensor(0); 250 | // NHWC => NCHW 251 | float data1[img_h * img_w * 3]; 252 | for (int h = 0; h < img_h; h++) 253 | { 254 | for (int w = 0; w < img_w; w++) 255 | { 256 | for (int c = 0; c < 3; c++) 257 | { 258 | // int in_index = h * img_w * 3 + w * 3 + c; 259 | int out_index = c * img_h * img_w + h * img_w + w; 260 | data1[out_index] = float(img.at(h, w)[c]) / 255.0f; 261 | } 262 | } 263 | } 264 | ov::Tensor input_tensor(input_port.get_element_type(), input_port.get_shape(), data1); 265 | infer_request.set_input_tensor(input_tensor); 266 | // -------- Step 6. Start inference -------- 267 | auto t1 = std::chrono::high_resolution_clock::now(); 268 | infer_request.infer(); 269 | auto t2 = std::chrono::high_resolution_clock::now(); 270 | std::chrono::duration fp_ms = t2 - t1; 271 | 272 | std::cout << "inference took " << fp_ms.count() << " ms, " << std::endl; 273 | 274 | // -------- Step 7. Process output -------- 275 | std::vector proposals; 276 | if (output_num == 1) 277 | { 278 | int total_num = 25200; 279 | auto output_tensor = infer_request.get_output_tensor(0); 280 | const float *result = output_tensor.data(); 281 | std::vector objects; 282 | for (int i = 0; i <= total_num - 1; i++) 283 | { 284 | double max_prob = 0; 285 | int idx = 0; 286 | float box_prob = result[i * 85 + 4]; 287 | if (box_prob < prob_threshold) 288 | continue; 289 | for (int t = 5; t < 85; ++t) 290 | { 291 | double tp = result[i * 85 + t]; 292 | if (tp > max_prob) 293 | { 294 | max_prob = tp; 295 | idx = t; 296 | } 297 | float cof = box_prob * max_prob; 298 | if (cof < prob_threshold) 299 | continue; 300 | Object obj; 301 | obj.rect.x = result[i * 85 + 0] - result[i * 85 + 2] / 2; 302 | obj.rect.y = result[i * 85 + 1] - result[i * 85 + 3] / 2; 303 | obj.rect.width = result[i * 85 + 2]; 304 | obj.rect.height = result[i * 85 + 3]; 305 | obj.label = idx - 5; 306 | obj.prob = cof; 307 | objects.push_back(obj); 308 | } 309 | } 310 | proposals.insert(proposals.end(), objects.begin(), objects.end()); 311 | } 312 | else 313 | { 314 | auto output_tensor_p8 = infer_request.get_output_tensor(0); 315 | const float *result_p8 = output_tensor_p8.data(); 316 | auto output_tensor_p16 = infer_request.get_output_tensor(1); 317 | const float *result_p16 = output_tensor_p16.data(); 318 | auto output_tensor_p32 = infer_request.get_output_tensor(2); 319 | const float *result_p32 = output_tensor_p32.data(); 320 | 321 | std::vector objects8; 322 | std::vector objects16; 323 | std::vector objects32; 324 | 325 | generate_proposals(8, result_p8, prob_threshold, objects8); 326 | proposals.insert(proposals.end(), objects8.begin(), objects8.end()); 327 | generate_proposals(16, result_p16, prob_threshold, objects16); 328 | proposals.insert(proposals.end(), objects16.begin(), objects16.end()); 329 | generate_proposals(32, result_p32, prob_threshold, objects32); 330 | proposals.insert(proposals.end(), objects32.begin(), objects32.end()); 331 | } 332 | 333 | std::vector classIds; 334 | std::vector confidences; 335 | std::vector boxes; 336 | 337 | for (size_t i = 0; i < proposals.size(); i++) 338 | { 339 | classIds.push_back(proposals[i].label); 340 | confidences.push_back(proposals[i].prob); 341 | boxes.push_back(proposals[i].rect); 342 | } 343 | 344 | std::vector picked; 345 | 346 | // do non maximum suppression for each bounding boxx 347 | cv::dnn::NMSBoxes(boxes, confidences, prob_threshold, nms_threshold, picked); 348 | 349 | float raw_h = src_img.rows; 350 | float raw_w = src_img.cols; 351 | float ratio_x = (float)raw_w / img_w; 352 | float ratio_y = (float)raw_h / img_h; 353 | 354 | for (size_t i = 0; i < picked.size(); i++) 355 | { 356 | int idx = picked[i]; 357 | cv::Rect box = boxes[idx]; 358 | cv::Rect scaled_box = scale_box(box, padding); 359 | drawPred(classIds[idx], confidences[idx], scaled_box, padding[2], raw_h, raw_w, src_img, class_names); 360 | } 361 | cv::imwrite("yolov7_out.jpg", src_img); 362 | } -------------------------------------------------------------------------------- /cpp/main_preprocessing.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "time.h" 13 | 14 | using namespace std; 15 | 16 | struct Object 17 | { 18 | cv::Rect_ rect; 19 | int label; 20 | float prob; 21 | }; 22 | 23 | const std::vector class_names = { 24 | "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", 25 | "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", 26 | "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", 27 | "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", 28 | "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", 29 | "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", 30 | "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", 31 | "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", 32 | "hair drier", "toothbrush"}; 33 | 34 | inline float sigmoid(float x) 35 | { 36 | return static_cast(1.f / (1.f + exp(-x))); 37 | } 38 | 39 | cv::Mat letterbox(cv::Mat &src, int h, int w, std::vector &padding) 40 | { 41 | // Resize and pad image while meeting stride-multiple constraints 42 | int in_w = src.cols; 43 | int in_h = src.rows; 44 | int tar_w = w; 45 | int tar_h = h; 46 | float r = min(float(tar_h) / in_h, float(tar_w) / in_w); 47 | int inside_w = round(in_w * r); 48 | int inside_h = round(in_h * r); 49 | int padd_w = tar_w - inside_w; 50 | int padd_h = tar_h - inside_h; 51 | cv::Mat resize_img; 52 | 53 | // resize 54 | resize(src, resize_img, cv::Size(inside_w, inside_h)); 55 | 56 | // divide padding into 2 sides 57 | padd_w = padd_w / 2; 58 | padd_h = padd_h / 2; 59 | padding.push_back(padd_w); 60 | padding.push_back(padd_h); 61 | 62 | // store the ratio 63 | padding.push_back(r); 64 | int top = int(round(padd_h - 0.1)); 65 | int bottom = int(round(padd_h + 0.1)); 66 | int left = int(round(padd_w - 0.1)); 67 | int right = int(round(padd_w + 0.1)); 68 | 69 | // add border 70 | copyMakeBorder(resize_img, resize_img, top, bottom, left, right, 0, cv::Scalar(114, 114, 114)); 71 | return resize_img; 72 | } 73 | 74 | cv::Rect scale_box(cv::Rect box, std::vector &padding) 75 | { 76 | // remove the padding area 77 | cv::Rect scaled_box; 78 | scaled_box.x = box.x - padding[0]; 79 | scaled_box.y = box.y - padding[1]; 80 | scaled_box.width = box.width; 81 | scaled_box.height = box.height; 82 | return scaled_box; 83 | } 84 | 85 | void drawPred(int classId, float conf, cv::Rect box, float ratio, float raw_h, float raw_w, cv::Mat &frame, const std::vector &classes) 86 | { 87 | float x0 = box.x; 88 | float y0 = box.y; 89 | float x1 = box.x + box.width; 90 | float y1 = box.y + box.height; 91 | 92 | // scale the bounding boxes to size of origin image 93 | x0 = x0 / ratio; 94 | y0 = y0 / ratio; 95 | x1 = x1 / ratio; 96 | y1 = y1 / ratio; 97 | 98 | // Clip bounding boxes to image shape 99 | x0 = std::max(std::min(x0, (float)(raw_w - 1)), 0.f); 100 | y0 = std::max(std::min(y0, (float)(raw_h - 1)), 0.f); 101 | x1 = std::max(std::min(x1, (float)(raw_w - 1)), 0.f); 102 | y1 = std::max(std::min(y1, (float)(raw_h - 1)), 0.f); 103 | 104 | // Draw the bouding boxes and put the label text on the origin image 105 | cv::rectangle(frame, cv::Point(x0, y0), cv::Point(x1, y1), cv::Scalar(0, 255, 0), 1); 106 | std::string label = cv::format("%.2f", conf); 107 | if (!classes.empty()) 108 | { 109 | CV_Assert(classId < (int)classes.size()); 110 | label = classes[classId] + ": " + label; 111 | } 112 | int baseLine; 113 | cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.25, 1, &baseLine); 114 | y0 = max(int(y0), labelSize.height); 115 | cv::rectangle(frame, cv::Point(x0, y0 - round(1.5 * labelSize.height)), cv::Point(x0 + round(2 * labelSize.width), y0 + baseLine), cv::Scalar(0, 255, 0), cv::FILLED); 116 | cv::putText(frame, label, cv::Point(x0, y0), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(), 1.5); 117 | } 118 | 119 | static void generate_proposals(int stride, const float *feat, float prob_threshold, std::vector &objects) 120 | { 121 | // get the results from proposals 122 | float anchors[18] = {12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401}; 123 | int anchor_num = 3; 124 | int feat_w = 640 / stride; 125 | int feat_h = 640 / stride; 126 | int cls_num = 80; 127 | int anchor_group = 0; 128 | if (stride == 8) 129 | anchor_group = 0; 130 | if (stride == 16) 131 | anchor_group = 1; 132 | if (stride == 32) 133 | anchor_group = 2; 134 | 135 | // 3 x h x w x (80 + 5) 136 | for (int anchor = 0; anchor <= anchor_num - 1; anchor++) 137 | { 138 | for (int i = 0; i <= feat_h - 1; i++) 139 | { 140 | for (int j = 0; j <= feat_w - 1; j++) 141 | { 142 | float box_prob = feat[anchor * feat_h * feat_w * (cls_num + 5) + i * feat_w * (cls_num + 5) + j * (cls_num + 5) + 4]; 143 | box_prob = sigmoid(box_prob); 144 | 145 | // filter the bounding box with low confidence 146 | if (box_prob < prob_threshold) 147 | continue; 148 | float x = feat[anchor * feat_h * feat_w * (cls_num + 5) + i * feat_w * (cls_num + 5) + j * (cls_num + 5) + 0]; 149 | float y = feat[anchor * feat_h * feat_w * (cls_num + 5) + i * feat_w * (cls_num + 5) + j * (cls_num + 5) + 1]; 150 | float w = feat[anchor * feat_h * feat_w * (cls_num + 5) + i * feat_w * (cls_num + 5) + j * (cls_num + 5) + 2]; 151 | float h = feat[anchor * feat_h * feat_w * (cls_num + 5) + i * feat_w * (cls_num + 5) + j * (cls_num + 5) + 3]; 152 | 153 | double max_prob = 0; 154 | int idx = 0; 155 | 156 | // get the class id with maximum confidence 157 | for (int t = 5; t < 85; ++t) 158 | { 159 | double tp = feat[anchor * feat_h * feat_w * (cls_num + 5) + i * feat_w * (cls_num + 5) + j * (cls_num + 5) + t]; 160 | tp = sigmoid(tp); 161 | if (tp > max_prob) 162 | { 163 | max_prob = tp; 164 | idx = t; 165 | } 166 | } 167 | 168 | // filter the class with low confidence 169 | float cof = box_prob * max_prob; 170 | if (cof < prob_threshold) 171 | continue; 172 | 173 | // convert results to xywh 174 | x = (sigmoid(x) * 2 - 0.5 + j) * stride; 175 | y = (sigmoid(y) * 2 - 0.5 + i) * stride; 176 | w = pow(sigmoid(w) * 2, 2) * anchors[anchor_group * 6 + anchor * 2]; 177 | h = pow(sigmoid(h) * 2, 2) * anchors[anchor_group * 6 + anchor * 2 + 1]; 178 | 179 | float r_x = x - w / 2; 180 | float r_y = y - h / 2; 181 | 182 | // store the results 183 | Object obj; 184 | obj.rect.x = r_x; 185 | obj.rect.y = r_y; 186 | obj.rect.width = w; 187 | obj.rect.height = h; 188 | obj.label = idx - 5; 189 | obj.prob = cof; 190 | objects.push_back(obj); 191 | } 192 | } 193 | } 194 | } 195 | 196 | int main(int argc, char *argv[]) 197 | { 198 | // set the hyperparameters 199 | int img_h = 640; 200 | int img_w = 640; 201 | int img_c = 3; 202 | int img_size = img_h * img_h * img_c; 203 | 204 | const float prob_threshold = 0.30f; 205 | const float nms_threshold = 0.60f; 206 | 207 | const std::string model_path{argv[1]}; 208 | const char *image_path{argv[2]}; 209 | const std::string device_name{argv[3]}; 210 | 211 | cv::Mat src_img = cv::imread(image_path); 212 | 213 | std::vector padding; 214 | cv::Mat boxed = letterbox(src_img, img_h, img_w, padding); 215 | 216 | // -------- Step 1. Initialize OpenVINO Runtime Core -------- 217 | ov::Core core; 218 | 219 | // -------- Step 2. Read a model -------- 220 | std::shared_ptr model = core.read_model(model_path); 221 | auto output_num = model->get_output_size(); 222 | 223 | // -------- Step 3. Preprocessing API-------- 224 | ov::preprocess::PrePostProcessor prep(model); 225 | // Declare section of desired application's input format 226 | prep.input().tensor().set_layout("NHWC").set_color_format(ov::preprocess::ColorFormat::BGR); 227 | // Specify actual model layout 228 | prep.input().model().set_layout("NCHW"); 229 | // Convert current color format (BGR) to RGB 230 | prep.input().preprocess().convert_color(ov::preprocess::ColorFormat::RGB).scale({255.0, 255.0, 255.0}); 231 | // Dump preprocessor 232 | std::cout << "Preprocessor: " << prep << std::endl; 233 | model = prep.build(); 234 | // -------- Step 4. Loading a model to the device -------- 235 | ov::CompiledModel compiled_model = core.compile_model(model, device_name); 236 | 237 | // Get input port for model with one input 238 | auto input_port = compiled_model.input(); 239 | // Create tensor from external memory 240 | // ov::Tensor input_tensor(input_port.get_element_type(), input_port.get_shape(), input_data.data()); 241 | 242 | // -------- Step 5. Create an infer request -------- 243 | ov::InferRequest infer_request = compiled_model.create_infer_request(); 244 | 245 | // -------- Step 6. Set input -------- 246 | double start, end, res; 247 | start = cv::getTickCount(); 248 | boxed.convertTo(boxed, CV_32FC3); 249 | 250 | ov::Tensor input_tensor(input_port.get_element_type(), input_port.get_shape(), (float *)boxed.data); 251 | infer_request.set_input_tensor(input_tensor); 252 | 253 | // -------- Step 7. Start inference -------- 254 | auto t1 = std::chrono::high_resolution_clock::now(); 255 | infer_request.infer(); 256 | auto t2 = std::chrono::high_resolution_clock::now(); 257 | std::chrono::duration fp_ms = t2 - t1; 258 | 259 | std::cout << "inference took " << fp_ms.count() << " ms, " << std::endl; 260 | 261 | // -------- Step 8. Process output -------- 262 | std::vector proposals; 263 | if (output_num == 1) 264 | { 265 | int total_num = 25200; 266 | auto output_tensor = infer_request.get_output_tensor(0); 267 | const float *result = output_tensor.data(); 268 | std::vector objects; 269 | for (int i = 0; i <= total_num - 1; i++) 270 | { 271 | double max_prob = 0; 272 | int idx = 0; 273 | float box_prob = result[i * 85 + 4]; 274 | if (box_prob < prob_threshold) 275 | continue; 276 | for (int t = 5; t < 85; ++t) 277 | { 278 | double tp = result[i * 85 + t]; 279 | if (tp > max_prob) 280 | { 281 | max_prob = tp; 282 | idx = t; 283 | } 284 | float cof = box_prob * max_prob; 285 | if (cof < prob_threshold) 286 | continue; 287 | Object obj; 288 | obj.rect.x = result[i * 85 + 0] - result[i * 85 + 2] / 2; 289 | obj.rect.y = result[i * 85 + 1] - result[i * 85 + 3] / 2; 290 | obj.rect.width = result[i * 85 + 2]; 291 | obj.rect.height = result[i * 85 + 3]; 292 | obj.label = idx - 5; 293 | obj.prob = cof; 294 | objects.push_back(obj); 295 | } 296 | } 297 | proposals.insert(proposals.end(), objects.begin(), objects.end()); 298 | } 299 | else 300 | { 301 | auto output_tensor_p8 = infer_request.get_output_tensor(0); 302 | const float *result_p8 = output_tensor_p8.data(); 303 | auto output_tensor_p16 = infer_request.get_output_tensor(1); 304 | const float *result_p16 = output_tensor_p16.data(); 305 | auto output_tensor_p32 = infer_request.get_output_tensor(2); 306 | const float *result_p32 = output_tensor_p32.data(); 307 | 308 | std::vector objects8; 309 | std::vector objects16; 310 | std::vector objects32; 311 | 312 | generate_proposals(8, result_p8, prob_threshold, objects8); 313 | proposals.insert(proposals.end(), objects8.begin(), objects8.end()); 314 | generate_proposals(16, result_p16, prob_threshold, objects16); 315 | proposals.insert(proposals.end(), objects16.begin(), objects16.end()); 316 | generate_proposals(32, result_p32, prob_threshold, objects32); 317 | proposals.insert(proposals.end(), objects32.begin(), objects32.end()); 318 | } 319 | 320 | std::vector classIds; 321 | std::vector confidences; 322 | std::vector boxes; 323 | 324 | for (size_t i = 0; i < proposals.size(); i++) 325 | { 326 | classIds.push_back(proposals[i].label); 327 | confidences.push_back(proposals[i].prob); 328 | boxes.push_back(proposals[i].rect); 329 | } 330 | 331 | std::vector picked; 332 | 333 | // do non maximum suppression for each bounding boxx 334 | cv::dnn::NMSBoxes(boxes, confidences, prob_threshold, nms_threshold, picked); 335 | 336 | float raw_h = src_img.rows; 337 | float raw_w = src_img.cols; 338 | float ratio_x = (float)raw_w / img_w; 339 | float ratio_y = (float)raw_h / img_h; 340 | end = cv::getTickCount(); 341 | 342 | for (size_t i = 0; i < picked.size(); i++) 343 | { 344 | int idx = picked[i]; 345 | cv::Rect box = boxes[idx]; 346 | cv::Rect scaled_box = scale_box(box, padding); 347 | drawPred(classIds[idx], confidences[idx], scaled_box, padding[2], raw_h, raw_w, src_img, class_names); 348 | } 349 | res = (end - start) / cv::getTickFrequency(); 350 | cout << "time of output --> " << res; 351 | cv::imwrite("yolov7_out.jpg", src_img); 352 | } --------------------------------------------------------------------------------