├── python
    ├── requirements.txt
    ├── yolov7_out.jpg
    ├── image.py
    ├── webcam.py
    └── yolov7.py
├── data
    └── horses.jpg
├── yolov7_out.jpg
├── cpp
    ├── CMakeLists.txt
    ├── main.cpp
    └── main_preprocessing.cpp
└── README.md


/python/requirements.txt:
--------------------------------------------------------------------------------
1 | openvino==2023.2.0
2 | argparse
3 | numpy
4 | opencv-python
5 | 


--------------------------------------------------------------------------------
/data/horses.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openvino-dev-samples/YOLOv7_OpenVINO_cpp-python/HEAD/data/horses.jpg


--------------------------------------------------------------------------------
/yolov7_out.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openvino-dev-samples/YOLOv7_OpenVINO_cpp-python/HEAD/yolov7_out.jpg


--------------------------------------------------------------------------------
/python/yolov7_out.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openvino-dev-samples/YOLOv7_OpenVINO_cpp-python/HEAD/python/yolov7_out.jpg


--------------------------------------------------------------------------------
/cpp/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10)
 2 | set(CMAKE_CXX_STANDARD 11)
 3 | set(TARGET_NAME yolov7)
 4 | find_package(OpenCV REQUIRED)
 5 | find_package(OpenVINO REQUIRED)
 6 | 
 7 | add_executable(${TARGET_NAME} main_preprocessing.cpp)
 8 | # add_executable(${TARGET_NAME} main.cpp)
 9 | 
10 | target_link_libraries(${TARGET_NAME} PRIVATE openvino::runtime ${OpenCV_LIBS})


--------------------------------------------------------------------------------
/python/image.py:
--------------------------------------------------------------------------------
 1 | import yolov7
 2 | import argparse
 3 | 
 4 | if __name__ == "__main__":
 5 |     parser = argparse.ArgumentParser(add_help=False)
 6 |     parser.add_argument('-h', '--help', action='help', help='Show this help message and exit.')
 7 |     parser.add_argument('-i', '--input', required=True, type=str,
 8 |                       help='Required. Path to an image file.')
 9 |     parser.add_argument('-m', '--model', required=True, type=str,
10 |                       help='Required. Path to an .xml or .onnx file with a trained model.')
11 |     parser.add_argument('-d', '--device', required=False, default='CPU', type=str,
12 |                       help='Device name.')
13 |     parser.add_argument('-p', '--pre_api', required=False, action='store_true', 
14 |                       help='Preprocessing api.')
15 |     args = parser.parse_args()
16 |     yolov7_detector=yolov7.YOLOV7_OPENVINO(args.model, args.device, args.pre_api, 1, 1)
17 |     yolov7_detector.infer_image(args.input)
18 | 


--------------------------------------------------------------------------------
/python/webcam.py:
--------------------------------------------------------------------------------
 1 | import yolov7
 2 | import argparse
 3 | 
 4 | if __name__ == "__main__":
 5 |     parser = argparse.ArgumentParser(add_help=False)
 6 |     parser.add_argument('-h', '--help', action='help', help='Show this help message and exit.')
 7 |     parser.add_argument('-i', '--input', required=True, type=int,
 8 |                       help='Required. Webcam ID.')
 9 |     parser.add_argument('-m', '--model', required=True, type=str,
10 |                       help='Required. Path to an .xml or .onnx file with a trained model.')
11 |     parser.add_argument('-d', '--device', required=False, default='CPU', type=str,
12 |                       help='Device name.')
13 |     parser.add_argument('-p', '--pre_api', required=False, action='store_true', 
14 |                       help='Preprocessing api.')
15 |     parser.add_argument('-bs', '--batchsize', required=False, default=1, type=int,
16 |                       help='Batch size.')
17 |     parser.add_argument('-n', '--nireq', required=False, default=2, type=int,
18 |                       help='number of infer request.')
19 |     
20 |     args = parser.parse_args()
21 |     yolov7_detector=yolov7.YOLOV7_OPENVINO(args.model, args.device, args.pre_api, args.batchsize, args.nireq)
22 |     yolov7_detector.infer_cam(args.input)
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # YOLOv7_OpenVINO
 2 | This repository will demostrate how to deploy a offical YOLOv7 pre-trained model with OpenVINO runtime api
 3 | ## 1. Install requirements
 4 | ### 1.1 Python
 5 | ```shell
 6 |   $ pip install -r python/requirements.txt
 7 |  ```
 8 | 
 9 | ### 1.2 C++ (Ubuntu)
10 | Please follow the Guides to install [OpenVINO](https://docs.openvino.ai/2023.2/openvino_docs_install_guides_installing_openvino_from_archive_linux.html) and [OpenCV](https://docs.opencv.org/4.x/d7/d9f/tutorial_linux_install.html)
11 | 
12 | ## 2. Prepare the model
13 | Download YOLOv7 pre-trained weight from [YOLOv7](https://github.com/WongKinYiu/yolov7)
14 | 
15 | ## 3. Export the ONNX model and convert it to OpenVINO IR
16 | ```shell
17 |   $ git clone git@github.com:WongKinYiu/yolov7.git
18 |   $ cd yolov7
19 |   $ pip install -r requirements
20 |   $ python export.py --weights yolov7.pt
21 |   $ ovc yolov7.onnx
22 |  ```
23 |  
24 | ## 4. Run inference
25 |  The input image can be found in [YOLOv7's repository](https://github.com/WongKinYiu/yolov7/blob/main/inference/images/horses.jpg)
26 |  ### 4.1 Python
27 |  ```shell
28 |   $ python python/image.py -m path_to/yolov7.xml -i data/horse.jpg -d "CPU"
29 |  ```
30 |  You can also try running the code with [Preprocessing API](https://docs.openvino.ai/latest/openvino_docs_OV_UG_Preprocessing_Overview.html) for performance optimization.
31 |  ```shell
32 |   $ python python/image.py -m path_to/yolov7.xml -i data/horse.jpg -d "CPU" -p
33 |  ```
34 | 
35 | - -i = path to image or video source;
36 | - -m = Path to IR .xml or .onnx file;
37 | - -d = Device name, e.g "CPU";
38 | - -p = with/without preprocessing api
39 | - -bs = Batch size;
40 | - -n = number of infer requests;
41 |   
42 |  ### 4.2 C++ (Ubuntu)
43 | Compile the source code
44 | ```shell
45 |   $ cd cpp
46 |   $ mkdir build && cd build
47 |   $ source '~/intel/openvino_2023.2/bin/setupvars.sh'
48 |   $ cmake ..
49 |   $ make
50 |  ```
51 | You can also uncomment the code in ```CMakeLists.txt``` to trigger [Preprocessing API](https://docs.openvino.ai/latest/openvino_docs_OV_UG_Preprocessing_Overview.html) for performance optimization.
52 | 
53 | Run inference
54 |  ```shell
55 |   $ yolov7 path_to/yolov7.xml ../../data/horses.jpg 'CPU'
56 |  ```
57 |  
58 | ## 5. Results
59 |  
60 |  ![horse_res](https://user-images.githubusercontent.com/91237924/179361905-44fcd4ac-7a9e-41f0-bd07-b6cf07245c04.jpg)
61 | 
62 | 
63 |  ## 6. Run with webcam
64 |  You can also run the sample with webcam for real-time detection
65 |   ```shell
66 |   $ python python/webcam.py -m path_to/yolov7.xml -i 0
67 |  ```
68 |  
69 | Tips: you can switch the device name to **"GPU"** to improve the performance.
70 | 
71 | ## 7. Further optimization
72 | Try this notebook ([yolov7-optimization](https://github.com/openvinotoolkit/openvino_notebooks/tree/develop/notebooks/226-yolov7-optimization)) and quantize your YOLOv7 model to INT8.
73 | 


--------------------------------------------------------------------------------
/python/yolov7.py:
--------------------------------------------------------------------------------
  1 | from openvino.runtime import Core
  2 | import cv2
  3 | import numpy as np
  4 | import random
  5 | import time
  6 | from openvino.preprocess import PrePostProcessor, ColorFormat
  7 | from openvino.runtime import Layout, AsyncInferQueue, PartialShape
  8 | 
  9 | 
 10 | class YOLOV7_OPENVINO(object):
 11 |     def __init__(self, model_path, device, pre_api, batchsize, nireq):
 12 |         # set the hyperparameters
 13 |         self.classes = [
 14 |             "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
 15 |             "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
 16 |             "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
 17 |             "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
 18 |             "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
 19 |             "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
 20 |             "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
 21 |             "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
 22 |             "hair drier", "toothbrush"
 23 |         ]
 24 |         self.batchsize = batchsize
 25 |         self.img_size = (640, 640)
 26 |         self.conf_thres = 0.1
 27 |         self.iou_thres = 0.6
 28 |         self.class_num = 80
 29 |         self.colors = [[random.randint(0, 255)
 30 |                         for _ in range(3)] for _ in self.classes]
 31 |         self.stride = [8, 16, 32]
 32 |         self.anchor_list = [[12, 16, 19, 36, 40, 28], [
 33 |             36, 75, 76, 55, 72, 146], [142, 110, 192, 243, 459, 401]]
 34 |         self.anchor = np.array(self.anchor_list).astype(
 35 |             float).reshape(3, -1, 2)
 36 |         area = self.img_size[0] * self.img_size[1]
 37 |         self.size = [int(area / self.stride[0] ** 2), int(area /
 38 |                                                           self.stride[1] ** 2), int(area / self.stride[2] ** 2)]
 39 |         self.feature = [[int(j / self.stride[i])
 40 |                          for j in self.img_size] for i in range(3)]
 41 | 
 42 |         ie = Core()
 43 |         self.model = ie.read_model(model_path)
 44 |         self.num_output = self.model.get_output_size()
 45 |         self.input_layer = self.model.input(0)
 46 |         new_shape = PartialShape(
 47 |             [self.batchsize, 3, self.img_size[0], self.img_size[1]])
 48 |         self.model.reshape({self.input_layer.any_name: new_shape})
 49 |         self.pre_api = pre_api
 50 |         if (self.pre_api == True):
 51 |             # Preprocessing API
 52 |             ppp = PrePostProcessor(self.model)
 53 |             # Declare section of desired application's input format
 54 |             ppp.input().tensor() \
 55 |                 .set_layout(Layout("NHWC")) \
 56 |                 .set_color_format(ColorFormat.BGR)
 57 |             # Here, it is assumed that the model has "NCHW" layout for input.
 58 |             ppp.input().model().set_layout(Layout("NCHW"))
 59 |             # Convert current color format (BGR) to RGB
 60 |             ppp.input().preprocess() \
 61 |                 .convert_color(ColorFormat.RGB) \
 62 |                 .scale([255.0, 255.0, 255.0])
 63 |             self.model = ppp.build()
 64 |             print(f'Dump preprocessor: {ppp}')
 65 | 
 66 |         self.compiled_model = ie.compile_model(
 67 |             model=self.model, device_name=device)
 68 |         self.infer_queue = AsyncInferQueue(self.compiled_model, nireq)
 69 | 
 70 |     def letterbox(self, img, new_shape=(640, 640), color=(114, 114, 114)):
 71 |         # Resize and pad image while meeting stride-multiple constraints
 72 |         shape = img.shape[:2]  # current shape [height, width]
 73 |         if isinstance(new_shape, int):
 74 |             new_shape = (new_shape, new_shape)
 75 | 
 76 |         # Scale ratio (new / old)
 77 |         r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
 78 |         new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
 79 |         dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - \
 80 |             new_unpad[1]  # wh padding
 81 | 
 82 |         # divide padding into 2 sides
 83 |         dw /= 2
 84 |         dh /= 2
 85 | 
 86 |         # resize
 87 |         if shape[::-1] != new_unpad:
 88 |             img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
 89 |         top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
 90 |         left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
 91 | 
 92 |         # add border
 93 |         img = cv2.copyMakeBorder(
 94 |             img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
 95 | 
 96 |         return img
 97 | 
 98 |     def xywh2xyxy(self, x):
 99 |         # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
100 |         y = np.copy(x)
101 |         y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
102 |         y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
103 |         y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
104 |         y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
105 | 
106 |         return y
107 | 
108 |     def nms(self, prediction, conf_thres, iou_thres):
109 |         predictions = np.squeeze(prediction[0])
110 | 
111 |         # Filter out object confidence scores below threshold
112 |         obj_conf = predictions[:, 4]
113 |         predictions = predictions[obj_conf > conf_thres]
114 |         obj_conf = obj_conf[obj_conf > conf_thres]
115 | 
116 |         # Multiply class confidence with bounding box confidence
117 |         predictions[:, 5:] *= obj_conf[:, np.newaxis]
118 | 
119 |         # Get the scores
120 |         scores = np.max(predictions[:, 5:], axis=1)
121 | 
122 |         # Filter out the objects with a low score
123 |         valid_scores = scores > conf_thres
124 |         predictions = predictions[valid_scores]
125 |         scores = scores[valid_scores]
126 | 
127 |         # Get the class with the highest confidence
128 |         class_ids = np.argmax(predictions[:, 5:], axis=1)
129 | 
130 |         # Get bounding boxes for each object
131 |         boxes = self.xywh2xyxy(predictions[:, :4])
132 | 
133 |         # Apply non-maxima suppression to suppress weak, overlapping bounding boxes
134 |         # indices = nms(boxes, scores, self.iou_threshold)
135 |         indices = cv2.dnn.NMSBoxes(
136 |             boxes.tolist(), scores.tolist(), conf_thres, iou_thres)
137 | 
138 |         return boxes[indices], scores[indices], class_ids[indices]
139 | 
140 |     def clip_coords(self, boxes, img_shape):
141 |         # Clip bounding xyxy bounding boxes to image shape (height, width)
142 |         boxes[:, 0].clip(0, img_shape[1])  # x1
143 |         boxes[:, 1].clip(0, img_shape[0])  # y1
144 |         boxes[:, 2].clip(0, img_shape[1])  # x2
145 |         boxes[:, 3].clip(0, img_shape[0])  # y2
146 | 
147 |     def scale_coords(self, img1_shape, img0_shape, coords, ratio_pad=None):
148 |         # Rescale coords (xyxy) from img1_shape to img0_shape
149 |         # gain  = old / new
150 |         if ratio_pad is None:
151 |             gain = min(img1_shape[0] / img0_shape[0],
152 |                        img1_shape[1] / img0_shape[1])
153 |             padding = (img1_shape[1] - img0_shape[1] * gain) / \
154 |                 2, (img1_shape[0] - img0_shape[0] * gain) / 2
155 |         else:
156 |             gain = ratio_pad[0][0]
157 |             padding = ratio_pad[1]
158 |         coords[:, [0, 2]] -= padding[0]  # x padding
159 |         coords[:, [1, 3]] -= padding[1]  # y padding
160 |         coords[:, :4] /= gain
161 |         self.clip_coords(coords, img0_shape)
162 | 
163 |     def sigmoid(self, x):
164 |         return 1 / (1 + np.exp(-x))
165 | 
166 |     def plot_one_box(self, x, img, color=None, label=None, line_thickness=None):
167 |         # Plots one bounding box on image img
168 |         tl = line_thickness or round(
169 |             0.002 * (img.shape[0] + img.shape[1]) / 2) + 1  # line/font thickness
170 |         color = color or [random.randint(0, 255) for _ in range(3)]
171 |         c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
172 |         cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
173 |         if label:
174 |             tf = max(tl - 1, 1)  # font thickness
175 |             t_size = cv2.getTextSize(
176 |                 label, 0, fontScale=tl / 3, thickness=tf)[0]
177 |             c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
178 |             cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
179 |             cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3,
180 |                         [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
181 | 
182 |     def draw(self, img, boxinfo):
183 |         for xyxy, conf, cls in boxinfo:
184 |             self.plot_one_box(xyxy, img, label=self.classes[int(
185 |                 cls)], color=self.colors[int(cls)], line_thickness=2)
186 |         # cv2.imshow('Press ESC to Exit', img)
187 |         # cv2.waitKey(1)
188 | 
189 |     def postprocess(self, infer_request, info):
190 |         src_img_list, src_size = info
191 |         for batch_id in range(self.batchsize):
192 |             if self.num_output == 1:
193 |                 results = np.expand_dims(
194 |                     infer_request.get_output_tensor(0).data[batch_id], axis=0)
195 |             else:
196 |                 output = []
197 |                 # Get the each feature map's output data
198 |                 output.append(self.sigmoid(infer_request.get_output_tensor(
199 |                     0).data[batch_id].reshape(-1, self.size[0]*3, 5+self.class_num)))
200 |                 output.append(self.sigmoid(infer_request.get_output_tensor(
201 |                     1).data[batch_id].reshape(-1, self.size[1]*3, 5+self.class_num)))
202 |                 output.append(self.sigmoid(infer_request.get_output_tensor(
203 |                     2).data[batch_id].reshape(-1, self.size[2]*3, 5+self.class_num)))
204 | 
205 |                 # Postprocessing
206 |                 grid = []
207 |                 for _, f in enumerate(self.feature):
208 |                     grid.append([[i, j] for j in range(f[0])
209 |                                 for i in range(f[1])])
210 | 
211 |                 result = []
212 |                 for i in range(3):
213 |                     src = output[i]
214 |                     xy = src[..., 0:2] * 2. - 0.5
215 |                     wh = (src[..., 2:4] * 2) ** 2
216 |                     dst_xy = []
217 |                     dst_wh = []
218 |                     for j in range(3):
219 |                         dst_xy.append(
220 |                             (xy[:, j * self.size[i]:(j + 1) * self.size[i], :] + grid[i]) * self.stride[i])
221 |                         dst_wh.append(
222 |                             wh[:, j * self.size[i]:(j + 1) * self.size[i], :] * self.anchor[i][j])
223 |                     src[..., 0:2] = np.concatenate(
224 |                         (dst_xy[0], dst_xy[1], dst_xy[2]), axis=1)
225 |                     src[..., 2:4] = np.concatenate(
226 |                         (dst_wh[0], dst_wh[1], dst_wh[2]), axis=1)
227 |                     result.append(src)
228 |                 results = np.concatenate(result, 1)
229 | 
230 |             boxes, scores, class_ids = self.nms(
231 |                 results, self.conf_thres, self.iou_thres)
232 |             img_shape = self.img_size
233 |             self.scale_coords(img_shape, src_size, boxes)
234 | 
235 |             # Draw the results
236 |             self.draw(src_img_list[batch_id], zip(boxes, scores, class_ids))
237 | 
238 |     def infer_image(self, img_path):
239 |         # Read image
240 |         src_img = cv2.imread(img_path)
241 |         if src_img is None:
242 |             raise ValueError('Failed to read image.')
243 |         src_img_list = []
244 |         src_img_list.append(src_img)
245 |         img = self.letterbox(src_img, self.img_size)
246 |         src_size = src_img.shape[:2]
247 |         img = img.astype(dtype=np.float32)
248 |         if (self.pre_api == False):
249 |             img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # BGR to RGB
250 |             img /= 255.0
251 |             img = img.transpose(2, 0, 1)  # NHWC to NCHW
252 |         input_image = np.expand_dims(img, 0)
253 | 
254 |         # Set callback function for postprocess
255 |         self.infer_queue.set_callback(self.postprocess)
256 |         # Do inference
257 |         self.infer_queue.start_async(
258 |             {self.input_layer.any_name: input_image}, (src_img_list, src_size))
259 |         self.infer_queue.wait_all()
260 |         cv2.imwrite("yolov7_out.jpg", src_img_list[0])
261 | 
262 |     def infer_cam(self, source):
263 |         # Set callback function for postprocess
264 |         self.infer_queue.set_callback(self.postprocess)
265 |         # Capture camera source
266 |         cap = cv2.VideoCapture(source)
267 |         src_img_list = []
268 |         img_list = []
269 |         count = 0
270 |         start_time = time.time()
271 |         while (cap.isOpened()):
272 |             _, frame = cap.read()
273 |             img = self.letterbox(frame, self.img_size)
274 |             src_size = frame.shape[:2]
275 |             img = img.astype(dtype=np.float32)
276 |             # Preprocessing
277 |             input_image = np.expand_dims(img, 0)
278 |             # Batching
279 |             img_list.append(input_image)
280 |             src_img_list.append(frame)
281 |             if (len(img_list) < self.batchsize):
282 |                 continue
283 |             img_batch = np.concatenate(img_list)
284 | 
285 |             # Do inference
286 |             self.infer_queue.start_async(
287 |                 {self.input_layer.any_name: img_batch}, (src_img_list, src_size))
288 |             src_img_list = []
289 |             img_list = []
290 |             count = count + self.batchsize
291 |             c = cv2.waitKey(1)
292 |             if c == 27:
293 |                 self.infer_queue.wait_all()
294 |                 break
295 |         cap.release()
296 |         cv2.destroyAllWindows()
297 |         end_time = time.time()
298 |         # Calculate the average FPS\n",
299 |         fps = count / (end_time - start_time)
300 |         print("throughput: {:.2f} fps".format(fps))
301 | 


--------------------------------------------------------------------------------
/cpp/main.cpp:
--------------------------------------------------------------------------------
  1 | #include <math.h>
  2 | #include <stdlib.h>
  3 | #include <sys/time.h>
  4 | #include <algorithm>
  5 | #include <chrono>
  6 | #include <cmath>
  7 | #include <iostream>
  8 | #include <opencv2/core/core.hpp>
  9 | #include <opencv2/dnn/dnn.hpp>
 10 | #include <opencv2/opencv.hpp>
 11 | #include <openvino/openvino.hpp>
 12 | #include <string>
 13 | #include <vector>
 14 | 
 15 | using namespace std;
 16 | 
 17 | double get_wall_time()
 18 | {
 19 |     struct timeval time;
 20 |     if (gettimeofday(&time, NULL))
 21 |     {
 22 |         return 0;
 23 |     }
 24 |     return (double)time.tv_sec + (double)time.tv_usec * .000001;
 25 | }
 26 | 
 27 | struct Object
 28 | {
 29 |     cv::Rect_<float> rect;
 30 |     int label;
 31 |     float prob;
 32 | };
 33 | 
 34 | const std::vector<std::string> class_names = {
 35 |     "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
 36 |     "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
 37 |     "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
 38 |     "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
 39 |     "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
 40 |     "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
 41 |     "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
 42 |     "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
 43 |     "hair drier", "toothbrush"};
 44 | 
 45 | inline float sigmoid(float x)
 46 | {
 47 |     return static_cast<float>(1.f / (1.f + exp(-x)));
 48 | }
 49 | 
 50 | cv::Mat letterbox(cv::Mat &src, int h, int w, std::vector<float> &padding)
 51 | {
 52 |     // Resize and pad image while meeting stride-multiple constraints
 53 |     int in_w = src.cols;
 54 |     int in_h = src.rows;
 55 |     int tar_w = w;
 56 |     int tar_h = h;
 57 |     float r = min(float(tar_h) / in_h, float(tar_w) / in_w);
 58 |     int inside_w = round(in_w * r);
 59 |     int inside_h = round(in_h * r);
 60 |     int padd_w = tar_w - inside_w;
 61 |     int padd_h = tar_h - inside_h;
 62 |     cv::Mat resize_img;
 63 | 
 64 |     // resize
 65 |     resize(src, resize_img, cv::Size(inside_w, inside_h));
 66 | 
 67 |     // divide padding into 2 sides
 68 |     padd_w = padd_w / 2;
 69 |     padd_h = padd_h / 2;
 70 |     padding.push_back(padd_w);
 71 |     padding.push_back(padd_h);
 72 | 
 73 |     // store the ratio
 74 |     padding.push_back(r);
 75 |     int top = int(round(padd_h - 0.1));
 76 |     int bottom = int(round(padd_h + 0.1));
 77 |     int left = int(round(padd_w - 0.1));
 78 |     int right = int(round(padd_w + 0.1));
 79 | 
 80 |     // add border
 81 |     copyMakeBorder(resize_img, resize_img, top, bottom, left, right, 0, cv::Scalar(114, 114, 114));
 82 |     return resize_img;
 83 | }
 84 | 
 85 | cv::Rect scale_box(cv::Rect box, std::vector<float> &padding)
 86 | {
 87 |     // remove the padding area
 88 |     cv::Rect scaled_box;
 89 |     scaled_box.x = box.x - padding[0];
 90 |     scaled_box.y = box.y - padding[1];
 91 |     scaled_box.width = box.width;
 92 |     scaled_box.height = box.height;
 93 |     return scaled_box;
 94 | }
 95 | 
 96 | void drawPred(int classId, float conf, cv::Rect box, float ratio, float raw_h, float raw_w, cv::Mat &frame, const std::vector<std::string> &classes)
 97 | {
 98 |     float x0 = box.x;
 99 |     float y0 = box.y;
100 |     float x1 = box.x + box.width;
101 |     float y1 = box.y + box.height;
102 | 
103 |     // scale the bounding boxes to size of origin image
104 |     x0 = x0 / ratio;
105 |     y0 = y0 / ratio;
106 |     x1 = x1 / ratio;
107 |     y1 = y1 / ratio;
108 | 
109 |     // Clip bounding boxes to image shape
110 |     x0 = std::max(std::min(x0, (float)(raw_w - 1)), 0.f);
111 |     y0 = std::max(std::min(y0, (float)(raw_h - 1)), 0.f);
112 |     x1 = std::max(std::min(x1, (float)(raw_w - 1)), 0.f);
113 |     y1 = std::max(std::min(y1, (float)(raw_h - 1)), 0.f);
114 | 
115 |     // Draw the bouding boxes and put the label text on the origin image
116 |     cv::rectangle(frame, cv::Point(x0, y0), cv::Point(x1, y1), cv::Scalar(0, 255, 0), 1);
117 |     std::string label = cv::format("%.2f", conf);
118 |     if (!classes.empty())
119 |     {
120 |         CV_Assert(classId < (int)classes.size());
121 |         label = classes[classId] + ": " + label;
122 |     }
123 |     int baseLine;
124 |     cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.25, 1, &baseLine);
125 |     y0 = max(int(y0), labelSize.height);
126 |     cv::rectangle(frame, cv::Point(x0, y0 - round(1.5 * labelSize.height)), cv::Point(x0 + round(2 * labelSize.width), y0 + baseLine), cv::Scalar(0, 255, 0), cv::FILLED);
127 |     cv::putText(frame, label, cv::Point(x0, y0), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(), 1.5);
128 | }
129 | 
130 | static void generate_proposals(int stride, const float *feat, float prob_threshold, std::vector<Object> &objects)
131 | {
132 |     // get the results from proposals
133 |     float anchors[18] = {12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401};
134 |     int anchor_num = 3;
135 |     int feat_w = 640 / stride;
136 |     int feat_h = 640 / stride;
137 |     int cls_num = 80;
138 |     int anchor_group = 0;
139 |     if (stride == 8)
140 |         anchor_group = 0;
141 |     if (stride == 16)
142 |         anchor_group = 1;
143 |     if (stride == 32)
144 |         anchor_group = 2;
145 | 
146 |     // 3 x h x w x (80 + 5)
147 |     for (int anchor = 0; anchor <= anchor_num - 1; anchor++)
148 |     {
149 |         for (int i = 0; i <= feat_h - 1; i++)
150 |         {
151 |             for (int j = 0; j <= feat_w - 1; j++)
152 |             {
153 |                 float box_prob = feat[anchor * feat_h * feat_w * (cls_num + 5) + i * feat_w * (cls_num + 5) + j * (cls_num + 5) + 4];
154 |                 box_prob = sigmoid(box_prob);
155 | 
156 |                 // filter the bounding box with low confidence
157 |                 if (box_prob < prob_threshold)
158 |                     continue;
159 |                 float x = feat[anchor * feat_h * feat_w * (cls_num + 5) + i * feat_w * (cls_num + 5) + j * (cls_num + 5) + 0];
160 |                 float y = feat[anchor * feat_h * feat_w * (cls_num + 5) + i * feat_w * (cls_num + 5) + j * (cls_num + 5) + 1];
161 |                 float w = feat[anchor * feat_h * feat_w * (cls_num + 5) + i * feat_w * (cls_num + 5) + j * (cls_num + 5) + 2];
162 |                 float h = feat[anchor * feat_h * feat_w * (cls_num + 5) + i * feat_w * (cls_num + 5) + j * (cls_num + 5) + 3];
163 | 
164 |                 double max_prob = 0;
165 |                 int idx = 0;
166 | 
167 |                 // get the class id with maximum confidence
168 |                 for (int t = 5; t < 85; ++t)
169 |                 {
170 |                     double tp = feat[anchor * feat_h * feat_w * (cls_num + 5) + i * feat_w * (cls_num + 5) + j * (cls_num + 5) + t];
171 |                     tp = sigmoid(tp);
172 |                     if (tp > max_prob)
173 |                     {
174 |                         max_prob = tp;
175 |                         idx = t;
176 |                     }
177 |                 }
178 | 
179 |                 // filter the class with low confidence
180 |                 float cof = box_prob * max_prob;
181 |                 if (cof < prob_threshold)
182 |                     continue;
183 | 
184 |                 // convert results to xywh
185 |                 x = (sigmoid(x) * 2 - 0.5 + j) * stride;
186 |                 y = (sigmoid(y) * 2 - 0.5 + i) * stride;
187 |                 w = pow(sigmoid(w) * 2, 2) * anchors[anchor_group * 6 + anchor * 2];
188 |                 h = pow(sigmoid(h) * 2, 2) * anchors[anchor_group * 6 + anchor * 2 + 1];
189 | 
190 |                 float r_x = x - w / 2;
191 |                 float r_y = y - h / 2;
192 | 
193 |                 // store the results
194 |                 Object obj;
195 |                 obj.rect.x = r_x;
196 |                 obj.rect.y = r_y;
197 |                 obj.rect.width = w;
198 |                 obj.rect.height = h;
199 |                 obj.label = idx - 5;
200 |                 obj.prob = cof;
201 |                 objects.push_back(obj);
202 |             }
203 |         }
204 |     }
205 | }
206 | 
207 | int main(int argc, char *argv[])
208 | {
209 |     // set the hyperparameters
210 |     int img_h = 640;
211 |     int img_w = 640;
212 |     int img_c = 3;
213 |     int img_size = img_h * img_h * img_c;
214 | 
215 |     const float prob_threshold = 0.30f;
216 |     const float nms_threshold = 0.60f;
217 | 
218 |     const std::string model_path{argv[1]};
219 |     const char *image_path{argv[2]};
220 |     const std::string device_name{argv[3]};
221 | 
222 |     cv::Mat src_img = cv::imread(image_path);
223 |     cv::Mat img;
224 | 
225 |     std::vector<float> padding;
226 |     cv::Mat boxed = letterbox(src_img, img_h, img_w, padding);
227 | 
228 |     cv::cvtColor(boxed, img, cv::COLOR_BGR2RGB);
229 | 
230 |     // -------- Step 1. Initialize OpenVINO Runtime Core --------
231 |     ov::Core core;
232 | 
233 |     // -------- Step 2. Read a model --------
234 |     std::shared_ptr<ov::Model> model = core.read_model(model_path);
235 |     auto output_num = model->get_output_size();
236 | 
237 |     // -------- Step 3. Loading a model to the device --------
238 |     ov::CompiledModel compiled_model = core.compile_model(model, device_name);
239 | 
240 |     // Get input port for model with one input
241 |     auto input_port = compiled_model.input();
242 |     // Create tensor from external memory
243 |     // ov::Tensor input_tensor(input_port.get_element_type(), input_port.get_shape(), input_data.data());
244 | 
245 |     // -------- Step 4. Create an infer request --------
246 |     ov::InferRequest infer_request = compiled_model.create_infer_request();
247 | 
248 |     // -------- Step 5. Prepare input --------
249 |     // ov::Tensor input_tensor1 = infer_request.get_input_tensor(0);
250 |     // NHWC => NCHW
251 |     float data1[img_h * img_w * 3];
252 |     for (int h = 0; h < img_h; h++)
253 |     {
254 |         for (int w = 0; w < img_w; w++)
255 |         {
256 |             for (int c = 0; c < 3; c++)
257 |             {
258 |                 // int in_index = h * img_w * 3 + w * 3 + c;
259 |                 int out_index = c * img_h * img_w + h * img_w + w;
260 |                 data1[out_index] = float(img.at<cv::Vec3b>(h, w)[c]) / 255.0f;
261 |             }
262 |         }
263 |     }
264 |     ov::Tensor input_tensor(input_port.get_element_type(), input_port.get_shape(), data1);
265 |     infer_request.set_input_tensor(input_tensor);
266 |     // -------- Step 6. Start inference --------
267 |     auto t1 = std::chrono::high_resolution_clock::now();
268 |     infer_request.infer();
269 |     auto t2 = std::chrono::high_resolution_clock::now();
270 |     std::chrono::duration<double, std::milli> fp_ms = t2 - t1;
271 | 
272 |     std::cout << "inference took " << fp_ms.count() << " ms, " << std::endl;
273 | 
274 |     // -------- Step 7. Process output --------
275 |     std::vector<Object> proposals;
276 |     if (output_num == 1)
277 |     {
278 |         int total_num = 25200;
279 |         auto output_tensor = infer_request.get_output_tensor(0);
280 |         const float *result = output_tensor.data<const float>();
281 |         std::vector<Object> objects;
282 |         for (int i = 0; i <= total_num - 1; i++)
283 |         {
284 |             double max_prob = 0;
285 |             int idx = 0;
286 |             float box_prob = result[i * 85 + 4];
287 |             if (box_prob < prob_threshold)
288 |                 continue;
289 |             for (int t = 5; t < 85; ++t)
290 |             {
291 |                 double tp = result[i * 85 + t];
292 |                 if (tp > max_prob)
293 |                 {
294 |                     max_prob = tp;
295 |                     idx = t;
296 |                 }
297 |                 float cof = box_prob * max_prob;
298 |                 if (cof < prob_threshold)
299 |                     continue;
300 |                 Object obj;
301 |                 obj.rect.x = result[i * 85 + 0] - result[i * 85 + 2] / 2;
302 |                 obj.rect.y = result[i * 85 + 1] - result[i * 85 + 3] / 2;
303 |                 obj.rect.width = result[i * 85 + 2];
304 |                 obj.rect.height = result[i * 85 + 3];
305 |                 obj.label = idx - 5;
306 |                 obj.prob = cof;
307 |                 objects.push_back(obj);
308 |             }
309 |         }
310 |         proposals.insert(proposals.end(), objects.begin(), objects.end());
311 |     }
312 |     else
313 |     {
314 |         auto output_tensor_p8 = infer_request.get_output_tensor(0);
315 |         const float *result_p8 = output_tensor_p8.data<const float>();
316 |         auto output_tensor_p16 = infer_request.get_output_tensor(1);
317 |         const float *result_p16 = output_tensor_p16.data<const float>();
318 |         auto output_tensor_p32 = infer_request.get_output_tensor(2);
319 |         const float *result_p32 = output_tensor_p32.data<const float>();
320 | 
321 |         std::vector<Object> objects8;
322 |         std::vector<Object> objects16;
323 |         std::vector<Object> objects32;
324 | 
325 |         generate_proposals(8, result_p8, prob_threshold, objects8);
326 |         proposals.insert(proposals.end(), objects8.begin(), objects8.end());
327 |         generate_proposals(16, result_p16, prob_threshold, objects16);
328 |         proposals.insert(proposals.end(), objects16.begin(), objects16.end());
329 |         generate_proposals(32, result_p32, prob_threshold, objects32);
330 |         proposals.insert(proposals.end(), objects32.begin(), objects32.end());
331 |     }
332 | 
333 |     std::vector<int> classIds;
334 |     std::vector<float> confidences;
335 |     std::vector<cv::Rect> boxes;
336 | 
337 |     for (size_t i = 0; i < proposals.size(); i++)
338 |     {
339 |         classIds.push_back(proposals[i].label);
340 |         confidences.push_back(proposals[i].prob);
341 |         boxes.push_back(proposals[i].rect);
342 |     }
343 | 
344 |     std::vector<int> picked;
345 | 
346 |     // do non maximum suppression for each bounding boxx
347 |     cv::dnn::NMSBoxes(boxes, confidences, prob_threshold, nms_threshold, picked);
348 | 
349 |     float raw_h = src_img.rows;
350 |     float raw_w = src_img.cols;
351 |     float ratio_x = (float)raw_w / img_w;
352 |     float ratio_y = (float)raw_h / img_h;
353 | 
354 |     for (size_t i = 0; i < picked.size(); i++)
355 |     {
356 |         int idx = picked[i];
357 |         cv::Rect box = boxes[idx];
358 |         cv::Rect scaled_box = scale_box(box, padding);
359 |         drawPred(classIds[idx], confidences[idx], scaled_box, padding[2], raw_h, raw_w, src_img, class_names);
360 |     }
361 |     cv::imwrite("yolov7_out.jpg", src_img);
362 | }


--------------------------------------------------------------------------------
/cpp/main_preprocessing.cpp:
--------------------------------------------------------------------------------
  1 | #include <math.h>
  2 | #include <stdlib.h>
  3 | #include <algorithm>
  4 | #include <cmath>
  5 | #include <opencv2/core/core.hpp>
  6 | #include <opencv2/dnn/dnn.hpp>
  7 | #include <opencv2/opencv.hpp>
  8 | #include <openvino/core/preprocess/pre_post_process.hpp>
  9 | #include <openvino/openvino.hpp>
 10 | #include <string>
 11 | #include <vector>
 12 | #include "time.h"
 13 | 
 14 | using namespace std;
 15 | 
 16 | struct Object
 17 | {
 18 |     cv::Rect_<float> rect;
 19 |     int label;
 20 |     float prob;
 21 | };
 22 | 
 23 | const std::vector<std::string> class_names = {
 24 |     "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
 25 |     "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
 26 |     "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
 27 |     "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
 28 |     "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
 29 |     "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
 30 |     "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
 31 |     "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
 32 |     "hair drier", "toothbrush"};
 33 | 
 34 | inline float sigmoid(float x)
 35 | {
 36 |     return static_cast<float>(1.f / (1.f + exp(-x)));
 37 | }
 38 | 
 39 | cv::Mat letterbox(cv::Mat &src, int h, int w, std::vector<float> &padding)
 40 | {
 41 |     // Resize and pad image while meeting stride-multiple constraints
 42 |     int in_w = src.cols;
 43 |     int in_h = src.rows;
 44 |     int tar_w = w;
 45 |     int tar_h = h;
 46 |     float r = min(float(tar_h) / in_h, float(tar_w) / in_w);
 47 |     int inside_w = round(in_w * r);
 48 |     int inside_h = round(in_h * r);
 49 |     int padd_w = tar_w - inside_w;
 50 |     int padd_h = tar_h - inside_h;
 51 |     cv::Mat resize_img;
 52 | 
 53 |     // resize
 54 |     resize(src, resize_img, cv::Size(inside_w, inside_h));
 55 | 
 56 |     // divide padding into 2 sides
 57 |     padd_w = padd_w / 2;
 58 |     padd_h = padd_h / 2;
 59 |     padding.push_back(padd_w);
 60 |     padding.push_back(padd_h);
 61 | 
 62 |     // store the ratio
 63 |     padding.push_back(r);
 64 |     int top = int(round(padd_h - 0.1));
 65 |     int bottom = int(round(padd_h + 0.1));
 66 |     int left = int(round(padd_w - 0.1));
 67 |     int right = int(round(padd_w + 0.1));
 68 | 
 69 |     // add border
 70 |     copyMakeBorder(resize_img, resize_img, top, bottom, left, right, 0, cv::Scalar(114, 114, 114));
 71 |     return resize_img;
 72 | }
 73 | 
 74 | cv::Rect scale_box(cv::Rect box, std::vector<float> &padding)
 75 | {
 76 |     // remove the padding area
 77 |     cv::Rect scaled_box;
 78 |     scaled_box.x = box.x - padding[0];
 79 |     scaled_box.y = box.y - padding[1];
 80 |     scaled_box.width = box.width;
 81 |     scaled_box.height = box.height;
 82 |     return scaled_box;
 83 | }
 84 | 
 85 | void drawPred(int classId, float conf, cv::Rect box, float ratio, float raw_h, float raw_w, cv::Mat &frame, const std::vector<std::string> &classes)
 86 | {
 87 |     float x0 = box.x;
 88 |     float y0 = box.y;
 89 |     float x1 = box.x + box.width;
 90 |     float y1 = box.y + box.height;
 91 | 
 92 |     // scale the bounding boxes to size of origin image
 93 |     x0 = x0 / ratio;
 94 |     y0 = y0 / ratio;
 95 |     x1 = x1 / ratio;
 96 |     y1 = y1 / ratio;
 97 | 
 98 |     // Clip bounding boxes to image shape
 99 |     x0 = std::max(std::min(x0, (float)(raw_w - 1)), 0.f);
100 |     y0 = std::max(std::min(y0, (float)(raw_h - 1)), 0.f);
101 |     x1 = std::max(std::min(x1, (float)(raw_w - 1)), 0.f);
102 |     y1 = std::max(std::min(y1, (float)(raw_h - 1)), 0.f);
103 | 
104 |     // Draw the bouding boxes and put the label text on the origin image
105 |     cv::rectangle(frame, cv::Point(x0, y0), cv::Point(x1, y1), cv::Scalar(0, 255, 0), 1);
106 |     std::string label = cv::format("%.2f", conf);
107 |     if (!classes.empty())
108 |     {
109 |         CV_Assert(classId < (int)classes.size());
110 |         label = classes[classId] + ": " + label;
111 |     }
112 |     int baseLine;
113 |     cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.25, 1, &baseLine);
114 |     y0 = max(int(y0), labelSize.height);
115 |     cv::rectangle(frame, cv::Point(x0, y0 - round(1.5 * labelSize.height)), cv::Point(x0 + round(2 * labelSize.width), y0 + baseLine), cv::Scalar(0, 255, 0), cv::FILLED);
116 |     cv::putText(frame, label, cv::Point(x0, y0), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(), 1.5);
117 | }
118 | 
119 | static void generate_proposals(int stride, const float *feat, float prob_threshold, std::vector<Object> &objects)
120 | {
121 |     // get the results from proposals
122 |     float anchors[18] = {12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401};
123 |     int anchor_num = 3;
124 |     int feat_w = 640 / stride;
125 |     int feat_h = 640 / stride;
126 |     int cls_num = 80;
127 |     int anchor_group = 0;
128 |     if (stride == 8)
129 |         anchor_group = 0;
130 |     if (stride == 16)
131 |         anchor_group = 1;
132 |     if (stride == 32)
133 |         anchor_group = 2;
134 | 
135 |     // 3 x h x w x (80 + 5)
136 |     for (int anchor = 0; anchor <= anchor_num - 1; anchor++)
137 |     {
138 |         for (int i = 0; i <= feat_h - 1; i++)
139 |         {
140 |             for (int j = 0; j <= feat_w - 1; j++)
141 |             {
142 |                 float box_prob = feat[anchor * feat_h * feat_w * (cls_num + 5) + i * feat_w * (cls_num + 5) + j * (cls_num + 5) + 4];
143 |                 box_prob = sigmoid(box_prob);
144 | 
145 |                 // filter the bounding box with low confidence
146 |                 if (box_prob < prob_threshold)
147 |                     continue;
148 |                 float x = feat[anchor * feat_h * feat_w * (cls_num + 5) + i * feat_w * (cls_num + 5) + j * (cls_num + 5) + 0];
149 |                 float y = feat[anchor * feat_h * feat_w * (cls_num + 5) + i * feat_w * (cls_num + 5) + j * (cls_num + 5) + 1];
150 |                 float w = feat[anchor * feat_h * feat_w * (cls_num + 5) + i * feat_w * (cls_num + 5) + j * (cls_num + 5) + 2];
151 |                 float h = feat[anchor * feat_h * feat_w * (cls_num + 5) + i * feat_w * (cls_num + 5) + j * (cls_num + 5) + 3];
152 | 
153 |                 double max_prob = 0;
154 |                 int idx = 0;
155 | 
156 |                 // get the class id with maximum confidence
157 |                 for (int t = 5; t < 85; ++t)
158 |                 {
159 |                     double tp = feat[anchor * feat_h * feat_w * (cls_num + 5) + i * feat_w * (cls_num + 5) + j * (cls_num + 5) + t];
160 |                     tp = sigmoid(tp);
161 |                     if (tp > max_prob)
162 |                     {
163 |                         max_prob = tp;
164 |                         idx = t;
165 |                     }
166 |                 }
167 | 
168 |                 // filter the class with low confidence
169 |                 float cof = box_prob * max_prob;
170 |                 if (cof < prob_threshold)
171 |                     continue;
172 | 
173 |                 // convert results to xywh
174 |                 x = (sigmoid(x) * 2 - 0.5 + j) * stride;
175 |                 y = (sigmoid(y) * 2 - 0.5 + i) * stride;
176 |                 w = pow(sigmoid(w) * 2, 2) * anchors[anchor_group * 6 + anchor * 2];
177 |                 h = pow(sigmoid(h) * 2, 2) * anchors[anchor_group * 6 + anchor * 2 + 1];
178 | 
179 |                 float r_x = x - w / 2;
180 |                 float r_y = y - h / 2;
181 | 
182 |                 // store the results
183 |                 Object obj;
184 |                 obj.rect.x = r_x;
185 |                 obj.rect.y = r_y;
186 |                 obj.rect.width = w;
187 |                 obj.rect.height = h;
188 |                 obj.label = idx - 5;
189 |                 obj.prob = cof;
190 |                 objects.push_back(obj);
191 |             }
192 |         }
193 |     }
194 | }
195 | 
196 | int main(int argc, char *argv[])
197 | {
198 |     // set the hyperparameters
199 |     int img_h = 640;
200 |     int img_w = 640;
201 |     int img_c = 3;
202 |     int img_size = img_h * img_h * img_c;
203 | 
204 |     const float prob_threshold = 0.30f;
205 |     const float nms_threshold = 0.60f;
206 | 
207 |     const std::string model_path{argv[1]};
208 |     const char *image_path{argv[2]};
209 |     const std::string device_name{argv[3]};
210 | 
211 |     cv::Mat src_img = cv::imread(image_path);
212 | 
213 |     std::vector<float> padding;
214 |     cv::Mat boxed = letterbox(src_img, img_h, img_w, padding);
215 | 
216 |     // -------- Step 1. Initialize OpenVINO Runtime Core --------
217 |     ov::Core core;
218 | 
219 |     // -------- Step 2. Read a model --------
220 |     std::shared_ptr<ov::Model> model = core.read_model(model_path);
221 |     auto output_num = model->get_output_size();
222 | 
223 |     // -------- Step 3. Preprocessing API--------
224 |     ov::preprocess::PrePostProcessor prep(model);
225 |     // Declare section of desired application's input format
226 |     prep.input().tensor().set_layout("NHWC").set_color_format(ov::preprocess::ColorFormat::BGR);
227 |     // Specify actual model layout
228 |     prep.input().model().set_layout("NCHW");
229 |     // Convert current color format (BGR) to RGB
230 |     prep.input().preprocess().convert_color(ov::preprocess::ColorFormat::RGB).scale({255.0, 255.0, 255.0});
231 |     // Dump preprocessor
232 |     std::cout << "Preprocessor: " << prep << std::endl;
233 |     model = prep.build();
234 |     // -------- Step 4. Loading a model to the device --------
235 |     ov::CompiledModel compiled_model = core.compile_model(model, device_name);
236 | 
237 |     // Get input port for model with one input
238 |     auto input_port = compiled_model.input();
239 |     // Create tensor from external memory
240 |     // ov::Tensor input_tensor(input_port.get_element_type(), input_port.get_shape(), input_data.data());
241 | 
242 |     // -------- Step 5. Create an infer request --------
243 |     ov::InferRequest infer_request = compiled_model.create_infer_request();
244 | 
245 |     // -------- Step 6. Set input --------
246 |     double start, end, res;
247 |     start = cv::getTickCount();
248 |     boxed.convertTo(boxed, CV_32FC3);
249 | 
250 |     ov::Tensor input_tensor(input_port.get_element_type(), input_port.get_shape(), (float *)boxed.data);
251 |     infer_request.set_input_tensor(input_tensor);
252 | 
253 |     // -------- Step 7. Start inference --------
254 |     auto t1 = std::chrono::high_resolution_clock::now();
255 |     infer_request.infer();
256 |     auto t2 = std::chrono::high_resolution_clock::now();
257 |     std::chrono::duration<double, std::milli> fp_ms = t2 - t1;
258 | 
259 |     std::cout << "inference took " << fp_ms.count() << " ms, " << std::endl;
260 | 
261 |     // -------- Step 8. Process output --------
262 |     std::vector<Object> proposals;
263 |     if (output_num == 1)
264 |     {
265 |         int total_num = 25200;
266 |         auto output_tensor = infer_request.get_output_tensor(0);
267 |         const float *result = output_tensor.data<const float>();
268 |         std::vector<Object> objects;
269 |         for (int i = 0; i <= total_num - 1; i++)
270 |         {
271 |             double max_prob = 0;
272 |             int idx = 0;
273 |             float box_prob = result[i * 85 + 4];
274 |             if (box_prob < prob_threshold)
275 |                 continue;
276 |             for (int t = 5; t < 85; ++t)
277 |             {
278 |                 double tp = result[i * 85 + t];
279 |                 if (tp > max_prob)
280 |                 {
281 |                     max_prob = tp;
282 |                     idx = t;
283 |                 }
284 |                 float cof = box_prob * max_prob;
285 |                 if (cof < prob_threshold)
286 |                     continue;
287 |                 Object obj;
288 |                 obj.rect.x = result[i * 85 + 0] - result[i * 85 + 2] / 2;
289 |                 obj.rect.y = result[i * 85 + 1] - result[i * 85 + 3] / 2;
290 |                 obj.rect.width = result[i * 85 + 2];
291 |                 obj.rect.height = result[i * 85 + 3];
292 |                 obj.label = idx - 5;
293 |                 obj.prob = cof;
294 |                 objects.push_back(obj);
295 |             }
296 |         }
297 |         proposals.insert(proposals.end(), objects.begin(), objects.end());
298 |     }
299 |     else
300 |     {
301 |         auto output_tensor_p8 = infer_request.get_output_tensor(0);
302 |         const float *result_p8 = output_tensor_p8.data<const float>();
303 |         auto output_tensor_p16 = infer_request.get_output_tensor(1);
304 |         const float *result_p16 = output_tensor_p16.data<const float>();
305 |         auto output_tensor_p32 = infer_request.get_output_tensor(2);
306 |         const float *result_p32 = output_tensor_p32.data<const float>();
307 | 
308 |         std::vector<Object> objects8;
309 |         std::vector<Object> objects16;
310 |         std::vector<Object> objects32;
311 | 
312 |         generate_proposals(8, result_p8, prob_threshold, objects8);
313 |         proposals.insert(proposals.end(), objects8.begin(), objects8.end());
314 |         generate_proposals(16, result_p16, prob_threshold, objects16);
315 |         proposals.insert(proposals.end(), objects16.begin(), objects16.end());
316 |         generate_proposals(32, result_p32, prob_threshold, objects32);
317 |         proposals.insert(proposals.end(), objects32.begin(), objects32.end());
318 |     }
319 | 
320 |     std::vector<int> classIds;
321 |     std::vector<float> confidences;
322 |     std::vector<cv::Rect> boxes;
323 | 
324 |     for (size_t i = 0; i < proposals.size(); i++)
325 |     {
326 |         classIds.push_back(proposals[i].label);
327 |         confidences.push_back(proposals[i].prob);
328 |         boxes.push_back(proposals[i].rect);
329 |     }
330 | 
331 |     std::vector<int> picked;
332 | 
333 |     // do non maximum suppression for each bounding boxx
334 |     cv::dnn::NMSBoxes(boxes, confidences, prob_threshold, nms_threshold, picked);
335 | 
336 |     float raw_h = src_img.rows;
337 |     float raw_w = src_img.cols;
338 |     float ratio_x = (float)raw_w / img_w;
339 |     float ratio_y = (float)raw_h / img_h;
340 |     end = cv::getTickCount();
341 | 
342 |     for (size_t i = 0; i < picked.size(); i++)
343 |     {
344 |         int idx = picked[i];
345 |         cv::Rect box = boxes[idx];
346 |         cv::Rect scaled_box = scale_box(box, padding);
347 |         drawPred(classIds[idx], confidences[idx], scaled_box, padding[2], raw_h, raw_w, src_img, class_names);
348 |     }
349 |     res = (end - start) / cv::getTickFrequency();
350 |     cout << "time of output --> " << res;
351 |     cv::imwrite("yolov7_out.jpg", src_img);
352 | }


--------------------------------------------------------------------------------