├── LICENSE ├── README.md ├── boundingbox.py ├── client_image.py ├── client_video.py ├── labels.py ├── processing.py └── render.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 JulyLi2019 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # tensorrt-yolov5 2 | 包含目标检测前处理与后处理 3 | -------------------------------------------------------------------------------- /boundingbox.py: -------------------------------------------------------------------------------- 1 | class BoundingBox: 2 | def __init__(self, classID, confidence, x1, x2, y1, y2, image_width, image_height): 3 | self.classID = classID 4 | self.confidence = confidence 5 | self.x1 = x1 6 | self.x2 = x2 7 | self.y1 = y1 8 | self.y2 = y2 9 | self.u1 = x1 / image_width 10 | self.u2 = x2 / image_width 11 | self.v1 = y1 / image_height 12 | self.v2 = y2 / image_height 13 | 14 | def box(self): 15 | return (self.x1, self.y1, self.x2, self.y2) 16 | 17 | def width(self): 18 | return self.x2 - self.x1 19 | 20 | def height(self): 21 | return self.y2 - self.y1 22 | 23 | def center_absolute(self): 24 | return (0.5 * (self.x1 + self.x2), 0.5 * (self.y1 + self.y2)) 25 | 26 | def center_normalized(self): 27 | return (0.5 * (self.u1 + self.u2), 0.5 * (self.v1 + self.v2)) 28 | 29 | def size_absolute(self): 30 | return (self.x2 - self.x1, self.y2 - self.y1) 31 | 32 | def size_normalized(self): 33 | return (self.u2 - self.u1, self.v2 - self.v1) -------------------------------------------------------------------------------- /client_image.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # @Time : 2021/8/19 9:28 3 | # @Author : JulyLi 4 | # @File : client_new.py 5 | # @Software: PyCharm 6 | 7 | # !/usr/bin/env python 8 | 9 | import argparse 10 | import numpy as np 11 | import sys 12 | import cv2 13 | 14 | import tritonclient.grpc as grpcclient 15 | from tritonclient.utils import InferenceServerException 16 | 17 | from processing import preprocess, postprocess 18 | from render import render_box, render_filled_box, get_text_size, render_text, RAND_COLORS 19 | from labels import COCOLabels 20 | 21 | import time 22 | import multiprocessing 23 | from multiprocessing import Pool 24 | import os 25 | 26 | parser = argparse.ArgumentParser() 27 | url = '192.168.2.197:8001' 28 | model = 'yolov5s' 29 | confidence = 0.01 30 | nms = 0.5 31 | model_info = False 32 | verbose = False 33 | client_timeout = None 34 | ssl = False 35 | root_certificates = None 36 | private_key = None 37 | certificate_chain = None 38 | 39 | # Create server context 40 | try: 41 | triton_client = grpcclient.InferenceServerClient( 42 | url=url, 43 | verbose=verbose, 44 | ssl=ssl, 45 | root_certificates=root_certificates, 46 | private_key=private_key, 47 | certificate_chain=certificate_chain) 48 | except Exception as e: 49 | print("context creation failed: " + str(e)) 50 | sys.exit() 51 | 52 | # Health check 53 | if not triton_client.is_server_live(): 54 | print("FAILED : is_server_live") 55 | sys.exit(1) 56 | 57 | if not triton_client.is_server_ready(): 58 | print("FAILED : is_server_ready") 59 | sys.exit(1) 60 | 61 | if not triton_client.is_model_ready(model): 62 | print("FAILED : is_model_ready") 63 | sys.exit(1) 64 | 65 | try: 66 | metadata = triton_client.get_model_metadata(model) 67 | # print(metadata) 68 | except InferenceServerException as ex: 69 | if "Request for unknown model" not in ex.message(): 70 | print("FAILED : get_model_metadata") 71 | print("Got: {}".format(ex.message())) 72 | sys.exit(1) 73 | else: 74 | print("FAILED : get_model_metadata") 75 | sys.exit(1) 76 | 77 | # Model configuration 78 | try: 79 | config = triton_client.get_model_config(model) 80 | if not (config.config.name == model): 81 | print("FAILED: get_model_config") 82 | sys.exit(1) 83 | # print(config) 84 | except InferenceServerException as ex: 85 | print("FAILED : get_model_config") 86 | print("Got: {}".format(ex.message())) 87 | sys.exit(1) 88 | 89 | 90 | def infer(input_img): 91 | out = "./output/" + input_img.split("/")[1] 92 | # IMAGE MODE 93 | # print("Running in 'image' mode") 94 | if not input_img: 95 | # print("FAILED: no input image") 96 | sys.exit(1) 97 | 98 | inputs = [] 99 | outputs = [] 100 | inputs.append(grpcclient.InferInput('data', [1, 3, 640, 640], "FP32")) 101 | outputs.append(grpcclient.InferRequestedOutput('prob')) 102 | 103 | # print("Creating buffer from image file...") 104 | input_image = cv2.imread(input_img) 105 | if input_image is None: 106 | # print(f"FAILED: could not load input image {str(input_img)}") 107 | sys.exit(1) 108 | input_image_buffer = preprocess(input_image) 109 | input_image_buffer = np.expand_dims(input_image_buffer, axis=0) 110 | inputs[0].set_data_from_numpy(input_image_buffer) 111 | 112 | # print("Invoking inference...") 113 | results = triton_client.infer(model_name=model, 114 | inputs=inputs, 115 | outputs=outputs, 116 | client_timeout=client_timeout) 117 | if model_info: 118 | statistics = triton_client.get_inference_statistics(model_name=model) 119 | if len(statistics.model_stats) != 1: 120 | # print("FAILED: get_inference_statistics") 121 | sys.exit(1) 122 | # print(statistics) 123 | # print("load model done") 124 | 125 | result = results.as_numpy('prob') 126 | # print(f"Received result buffer of size {result.shape}") 127 | # print(f"Naive buffer sum: {np.sum(result)}") 128 | 129 | detected_objects = postprocess(result, input_image.shape[1], input_image.shape[0], confidence, nms) 130 | # print(f"Raw boxes: {int(result[0, 0, 0, 0])}") 131 | # print(f"Detected objects: {len(detected_objects)}") 132 | 133 | for box in detected_objects: 134 | # print(f"{COCOLabels(box.classID).name}: {box.confidence}") 135 | # input_image = render_box(input_image, box.box(), color=tuple(RAND_COLORS[box.classID % 64].tolist())) 136 | input_image = render_box(input_image, box.box()) 137 | size = get_text_size(input_image, f"{COCOLabels(box.classID).name}: {box.confidence:.2f}", 138 | normalised_scaling=0.6) 139 | input_image = render_filled_box(input_image, (box.x1 - 3, box.y1 - 3, box.x1 + size[0], box.y1 + size[1]), 140 | color=(220, 220, 220)) 141 | input_image = render_text(input_image, f"{COCOLabels(box.classID).name}: {box.confidence:.2f}", 142 | (box.x1, box.y1), color=(30, 30, 30), normalised_scaling=0.5) 143 | 144 | if out: 145 | cv2.imwrite(out, input_image) 146 | # print(f"Saved result to {out}") 147 | else: 148 | cv2.imshow('image', input_image) 149 | cv2.waitKey(0) 150 | cv2.destroyAllWindows() 151 | 152 | 153 | # infer images 154 | 155 | time_start = time.time() 156 | image_names = [] 157 | for name in os.listdir("input"): 158 | time_begin=time.time() 159 | infer("input/" + name) 160 | print("time:",time.time() - time_begin) 161 | # print("consume_time", time.time() - time_start) 162 | 163 | 164 | -------------------------------------------------------------------------------- /client_video.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy as np 3 | import sys 4 | import cv2 5 | import time 6 | import os 7 | 8 | import tritonclient.grpc as grpcclient 9 | from tritonclient.utils import InferenceServerException 10 | 11 | from processing import preprocess, postprocess 12 | from render import render_box, render_filled_box, get_text_size, render_text, RAND_COLORS 13 | from labels import COCOLabels 14 | 15 | 16 | def client(frame, ip, port=':8001'): 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument('-m', 19 | '--model', 20 | type=str, 21 | required=False, 22 | default='yolov5s', 23 | help='Inference model name, default yolov5s') 24 | parser.add_argument('-c', 25 | '--confidence', 26 | type=float, 27 | required=False, 28 | default=0.5, 29 | help='Confidence threshold for detected objects, default 0.5') 30 | parser.add_argument('-n', 31 | '--nms', 32 | type=float, 33 | required=False, 34 | default=0.5, 35 | help='Non-maximum suppression threshold for filtering raw boxes, default 0.5') 36 | parser.add_argument('-y', 37 | '--mask_y', 38 | type=int, 39 | default=150, 40 | help='y coordinate of mask') 41 | 42 | FLAGS = parser.parse_args() 43 | 44 | # Create server context 45 | url = ip + port 46 | try: 47 | triton_client = grpcclient.InferenceServerClient( 48 | url=url, 49 | verbose=False, 50 | ssl=False, 51 | root_certificates=None, 52 | private_key=None, 53 | certificate_chain=None) 54 | except Exception as e: 55 | print("context creation failed: " + str(e)) 56 | sys.exit() 57 | 58 | # Health check 59 | if not triton_client.is_server_live(): 60 | print("FAILED : is_server_live") 61 | sys.exit(1) 62 | 63 | if not triton_client.is_server_ready(): 64 | print("FAILED : is_server_ready") 65 | sys.exit(1) 66 | 67 | if not triton_client.is_model_ready(FLAGS.model): 68 | print("FAILED : is_model_ready") 69 | sys.exit(1) 70 | 71 | t0 = time.time() 72 | 73 | inputs = [] 74 | outputs = [] 75 | inputs.append(grpcclient.InferInput('data', [1, 3, 640, 640], "FP32")) 76 | outputs.append(grpcclient.InferRequestedOutput('prob')) 77 | 78 | input_image_buffer = preprocess(frame, FLAGS.mask_y) 79 | input_image_buffer = np.expand_dims(input_image_buffer, axis=0) 80 | inputs[0].set_data_from_numpy(input_image_buffer) 81 | 82 | results = triton_client.infer(model_name=FLAGS.model, 83 | inputs=inputs, 84 | outputs=outputs, 85 | client_timeout=None) 86 | 87 | result = results.as_numpy('prob') 88 | detected_objects = postprocess(result, frame.shape[1], frame.shape[0], FLAGS.confidence, FLAGS.nms) 89 | 90 | for box in detected_objects: 91 | # 绘制边界框 92 | frame = render_box(frame, box.box(), color=tuple(RAND_COLORS[box.classID])) 93 | # 绘制文本框并打印类别信息 94 | size = get_text_size(frame, f"{COCOLabels(box.classID).name}", normalised_scaling=0.6) 95 | frame = render_filled_box(frame, (box.x1 - 3, box.y1 - 3, box.x1 + size[0], box.y1 + size[1]), 96 | color=tuple(RAND_COLORS[box.classID])) 97 | frame = render_text(frame, f"{COCOLabels(box.classID).name}", (box.x1, box.y1), color=(255, 255, 255), 98 | normalised_scaling=0.5) 99 | 100 | # 计时 101 | t1 = time.time() 102 | print("time:%.2fs/frame" % (t1 - t0)) 103 | 104 | return frame 105 | 106 | 107 | if __name__ == '__main__': 108 | cap = cv2.VideoCapture('./daolu1.avi') 109 | while True: 110 | ret, frame = cap.read() 111 | print("######################") 112 | if ret: 113 | frame = client(frame, ip='192.168.2.100') 114 | cv2.imshow('frame', frame) 115 | cv2.waitKey(1) 116 | else: 117 | break 118 | cap.release() 119 | # cv2.destroyAllWindows() 120 | -------------------------------------------------------------------------------- /labels.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class COCOLabels(Enum): 5 | person = 0 6 | bicycle = 1 7 | car = 2 8 | motorbike = 3 9 | aeroplane = 4 10 | bus = 5 11 | train = 6 12 | truck = 7 13 | boat = 8 14 | traffic_light = 9 15 | fire_hydrant = 10 16 | stop_sign = 11 17 | parking_meter = 12 18 | bench = 13 19 | bird = 14 20 | cat = 15 21 | dog = 16 22 | horse = 17 23 | sheep = 18 24 | cow = 19 25 | elephant = 20 26 | bear = 21 27 | zebra = 22 28 | giraffe = 23 29 | backpack = 24 30 | umbrella = 25 31 | handbag = 26 32 | tie = 27 33 | suitcase = 28 34 | frisbee = 29 35 | skis = 30 36 | snowboard = 31 37 | sports_ball = 32 38 | kite = 33 39 | baseball_bat = 34 40 | baseball_glove = 35 41 | skateboard = 36 42 | surfboard = 37 43 | tennis_racket = 38 44 | bottle = 39 45 | wine_glass = 40 46 | cup = 41 47 | fork = 42 48 | knife = 43 49 | spoon = 44 50 | bowl = 45 51 | banana = 46 52 | apple = 47 53 | sandwich = 48 54 | orange = 49 55 | broccoli = 50 56 | carrot = 51 57 | hot_dog = 52 58 | pizza = 53 59 | donut = 54 60 | cake = 55 61 | chair = 56 62 | sofa = 57 63 | pottedplant = 58 64 | bed = 59 65 | diningtable = 60 66 | toilet = 61 67 | tvmonitor = 62 68 | laptop = 63 69 | mouse = 64 70 | remote = 65 71 | keyboard = 66 72 | cell_phone = 67 73 | microwave = 68 74 | oven = 69 75 | toaster = 70 76 | sink = 71 77 | refrigerator = 72 78 | book = 73 79 | clock = 74 80 | vase = 75 81 | scissors = 76 82 | teddy_bear = 77 83 | hair_drier = 78 84 | toothbrush = 79 85 | -------------------------------------------------------------------------------- /processing.py: -------------------------------------------------------------------------------- 1 | from boundingbox import BoundingBox 2 | 3 | import cv2 4 | import numpy as np 5 | 6 | np.set_printoptions(threshold=np.inf) 7 | 8 | INPUT_HEIGHT = 640 9 | INPUT_WIDTH = 640 10 | 11 | def preprocess(image, mask_y=150): 12 | img = image.copy() 13 | #image[mask_y-5:mask_y, :, :] = 0 14 | #img[:mask_y, :, :] = 0 15 | img = cv2.resize(img, (INPUT_WIDTH, INPUT_HEIGHT)) 16 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 17 | img = np.transpose(np.array(img, dtype=np.float32, order='C'), (2, 0, 1)) 18 | img /= 255.0 19 | return img 20 | 21 | def nms(boxes, box_confidences, nms_threshold=0.5): 22 | x_coord = boxes[:, 0] 23 | y_coord = boxes[:, 1] 24 | width = boxes[:, 2] 25 | height = boxes[:, 3] 26 | 27 | areas = width * height 28 | ordered = box_confidences.argsort()[::-1] 29 | 30 | keep = list() 31 | while ordered.size > 0: 32 | i = ordered[0] 33 | keep.append(i) 34 | xx1 = np.maximum(x_coord[i], x_coord[ordered[1:]]) 35 | yy1 = np.maximum(y_coord[i], y_coord[ordered[1:]]) 36 | xx2 = np.minimum(x_coord[i] + width[i], x_coord[ordered[1:]] + width[ordered[1:]]) 37 | yy2 = np.minimum(y_coord[i] + height[i], y_coord[ordered[1:]] + height[ordered[1:]]) 38 | 39 | width1 = np.maximum(0.0, xx2 - xx1 + 1) 40 | height1 = np.maximum(0.0, yy2 - yy1 + 1) 41 | intersection = width1 * height1 42 | union = (areas[i] + areas[ordered[1:]] - intersection) 43 | 44 | iou = intersection / union 45 | 46 | indexes = np.where(iou <= nms_threshold)[0] 47 | ordered = ordered[indexes + 1] 48 | keep = np.array(keep).astype(int) 49 | return keep 50 | 51 | def postprocess(buffer, image_width, image_height, conf_threshold=0.8, nms_threshold=0.5): 52 | detected_objects = [] 53 | img_scale = [image_width / INPUT_WIDTH, image_height / INPUT_HEIGHT, image_width / INPUT_WIDTH, image_height / INPUT_HEIGHT] 54 | num_bboxes = int(buffer[0, 0, 0, 0]) 55 | 56 | if num_bboxes: 57 | bboxes = buffer[0, 1 : (num_bboxes * 6 + 1), 0, 0].reshape(-1, 6) 58 | labels = set(bboxes[:, 5].astype(int)) 59 | 60 | for label in labels: 61 | selected_bboxes = bboxes[np.where((bboxes[:, 5] == label) & (bboxes[:, 4] >= conf_threshold))] 62 | selected_bboxes_keep = selected_bboxes[nms(selected_bboxes[:, :4], selected_bboxes[:, 4], nms_threshold)] 63 | for idx in range(selected_bboxes_keep.shape[0]): 64 | box_xy = selected_bboxes_keep[idx, :2] 65 | box_wh = selected_bboxes_keep[idx, 2:4] 66 | score = selected_bboxes_keep[idx, 4] 67 | 68 | box_x1y1 = box_xy - (box_wh / 2) 69 | box_x2y2 = np.minimum(box_xy + (box_wh / 2), [INPUT_WIDTH, INPUT_HEIGHT]) 70 | box = np.concatenate([box_x1y1, box_x2y2]) 71 | box *= img_scale 72 | 73 | if box[0] == box[2]: 74 | continue 75 | if box[1] == box[3]: 76 | continue 77 | detected_objects.append(BoundingBox(label, score, box[0], box[2], box[1], box[3], image_height, image_width)) 78 | return detected_objects 79 | -------------------------------------------------------------------------------- /render.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import cv2 4 | 5 | from math import sqrt 6 | 7 | _LINE_THICKNESS_SCALING = 500.0 8 | 9 | np.random.seed(0) 10 | RAND_COLORS = {0:[255, 0, 0],1:[0, 255, 0],2:[0, 0, 255],3:[255, 255, 0]} 11 | 12 | def render_box(img, box, color=(200, 200, 200)): 13 | """ 14 | Render a box. Calculates scaling and thickness automatically. 15 | :param img: image to render into 16 | :param box: (x1, y1, x2, y2) - box coordinates 17 | :param color: (b, g, r) - box color 18 | :return: updated image 19 | """ 20 | x1, y1, x2, y2 = box 21 | thickness = int( 22 | round( 23 | (img.shape[0] * img.shape[1]) 24 | / (_LINE_THICKNESS_SCALING * _LINE_THICKNESS_SCALING) 25 | ) 26 | ) 27 | thickness = max(1, thickness) 28 | img = cv2.rectangle( 29 | img, 30 | (int(x1), int(y1)), 31 | (int(x2), int(y2)), 32 | color, 33 | thickness=thickness 34 | ) 35 | return img 36 | 37 | def render_filled_box(img, box, color=(200, 200, 200)): 38 | """ 39 | Render a box. Calculates scaling and thickness automatically. 40 | :param img: image to render into 41 | :param box: (x1, y1, x2, y2) - box coordinates 42 | :param color: (b, g, r) - box color 43 | :return: updated image 44 | """ 45 | x1, y1, x2, y2 = box 46 | img = cv2.rectangle( 47 | img, 48 | (int(x1), int(y1)), 49 | (int(x2), int(y2)), 50 | color, 51 | thickness=cv2.FILLED 52 | ) 53 | return img 54 | 55 | _TEXT_THICKNESS_SCALING = 700.0 56 | _TEXT_SCALING = 520.0 57 | 58 | 59 | def get_text_size(img, text, normalised_scaling=1.0): 60 | """ 61 | Get calculated text size (as box width and height) 62 | :param img: image reference, used to determine appropriate text scaling 63 | :param text: text to display 64 | :param normalised_scaling: additional normalised scaling. Default 1.0. 65 | :return: (width, height) - width and height of text box 66 | """ 67 | thickness = int( 68 | round( 69 | (img.shape[0] * img.shape[1]) 70 | / (_TEXT_THICKNESS_SCALING * _TEXT_THICKNESS_SCALING) 71 | ) 72 | * normalised_scaling 73 | ) 74 | thickness = max(1, thickness) 75 | scaling = img.shape[0] / _TEXT_SCALING * normalised_scaling 76 | return cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, scaling, thickness)[0] 77 | 78 | 79 | def render_text(img, text, pos, color=(200, 200, 200), normalised_scaling=1.0): 80 | """ 81 | Render a text into the image. Calculates scaling and thickness automatically. 82 | :param img: image to render into 83 | :param text: text to display 84 | :param pos: (x, y) - upper left coordinates of render position 85 | :param color: (b, g, r) - text color 86 | :param normalised_scaling: additional normalised scaling. Default 1.0. 87 | :return: updated image 88 | """ 89 | x, y = pos 90 | thickness = int( 91 | round( 92 | (img.shape[0] * img.shape[1]) 93 | / (_TEXT_THICKNESS_SCALING * _TEXT_THICKNESS_SCALING) 94 | ) 95 | * normalised_scaling 96 | ) 97 | thickness = max(1, thickness) 98 | scaling = img.shape[0] / _TEXT_SCALING * normalised_scaling 99 | size = get_text_size(img, text, normalised_scaling) 100 | cv2.putText( 101 | img, 102 | text, 103 | (int(x), int(y + size[1])), 104 | cv2.FONT_HERSHEY_SIMPLEX, 105 | scaling, 106 | color, 107 | thickness=thickness, 108 | ) 109 | return img --------------------------------------------------------------------------------