├── 0001-Enable-onnx-export-with-batchNMS-plugin.patch ├── 0001-Enable-onnx-export-with-decode-plugin.patch ├── LICENSE ├── README.md ├── deepstream-sample ├── README.md ├── config │ ├── config_infer_primary_yoloV5.txt │ ├── config_infer_primary_yoloV5_bs8.txt │ ├── deepstream_app_config.txt │ ├── deepstream_app_config_8s.txt │ ├── deepstream_app_config_save_video.txt │ └── labels.txt ├── nvdsparsebbox_Yolo.cpp ├── yoloForward_nc.cu ├── yoloPlugins.cpp └── yoloPlugins.h ├── requirement_export.txt └── tensorrt-sample ├── requirement_infer.txt ├── trt_inference ├── __init__.py └── engine.py └── yolov5_trt_inference.py /0001-Enable-onnx-export-with-batchNMS-plugin.patch: -------------------------------------------------------------------------------- 1 | From aa2d2584b6f1c61709436184735bb70b81fde7e9 Mon Sep 17 00:00:00 2001 2 | From: Tyler Zhu 3 | Date: Thu, 2 Jun 2022 10:34:21 +0800 4 | Subject: [PATCH] Enable onnx export with batchNMS plugin 5 | 6 | --- 7 | export.py | 170 ++++++++++++++++++++++++++++++++----------------- 8 | models/yolo.py | 24 +++++-- 9 | 2 files changed, 132 insertions(+), 62 deletions(-) 10 | 11 | diff --git a/export.py b/export.py 12 | index 72e170a..55e0a5f 100644 13 | --- a/export.py 14 | +++ b/export.py 15 | @@ -111,62 +111,115 @@ def export_torchscript(model, im, file, optimize, prefix=colorstr('TorchScript:' 16 | 17 | def export_onnx(model, im, file, opset, train, dynamic, simplify, prefix=colorstr('ONNX:')): 18 | # YOLOv5 ONNX export 19 | - try: 20 | - check_requirements(('onnx',)) 21 | - import onnx 22 | - 23 | - LOGGER.info(f'\n{prefix} starting export with onnx {onnx.__version__}...') 24 | - f = file.with_suffix('.onnx') 25 | - 26 | - torch.onnx.export( 27 | - model, 28 | - im, 29 | - f, 30 | - verbose=False, 31 | - opset_version=opset, 32 | - training=torch.onnx.TrainingMode.TRAINING if train else torch.onnx.TrainingMode.EVAL, 33 | - do_constant_folding=not train, 34 | - input_names=['images'], 35 | - output_names=['output'], 36 | - dynamic_axes={ 37 | - 'images': { 38 | - 0: 'batch', 39 | - 2: 'height', 40 | - 3: 'width'}, # shape(1,3,640,640) 41 | - 'output': { 42 | - 0: 'batch', 43 | - 1: 'anchors'} # shape(1,25200,85) 44 | - } if dynamic else None) 45 | - 46 | - # Checks 47 | - model_onnx = onnx.load(f) # load onnx model 48 | - onnx.checker.check_model(model_onnx) # check onnx model 49 | - 50 | - # Metadata 51 | - d = {'stride': int(max(model.stride)), 'names': model.names} 52 | - for k, v in d.items(): 53 | - meta = model_onnx.metadata_props.add() 54 | - meta.key, meta.value = k, str(v) 55 | - onnx.save(model_onnx, f) 56 | + # try: 57 | + check_requirements(('onnx',)) 58 | + import onnx 59 | + 60 | + LOGGER.info(f'\n{prefix} starting export with onnx {onnx.__version__}...') 61 | + f = file.with_suffix('.onnx') 62 | + print(train) 63 | + torch.onnx.export( 64 | + model, 65 | + im, 66 | + f, 67 | + verbose=False, 68 | + opset_version=opset, 69 | + training=torch.onnx.TrainingMode.TRAINING if train else torch.onnx.TrainingMode.EVAL, 70 | + do_constant_folding=not train, 71 | + input_names=['images'], 72 | + output_names=['bbox', 'cls_score'], 73 | + dynamic_axes={ 74 | + 'images': { 75 | + 0: 'batch', 76 | + 2: 'height', 77 | + 3: 'width'}, # shape(1,3,640,640) 78 | + 'bbox': { 79 | + 0: 'batch', 80 | + 1: 'anchors'}, # shape(1,25200,4) 81 | + 'cls_score': { 82 | + 0: 'batch', 83 | + 1: 'anchors'} 84 | + } if dynamic else None) 85 | 86 | - # Simplify 87 | - if simplify: 88 | - try: 89 | - check_requirements(('onnx-simplifier',)) 90 | - import onnxsim 91 | - 92 | - LOGGER.info(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...') 93 | - model_onnx, check = onnxsim.simplify(model_onnx, 94 | - dynamic_input_shape=dynamic, 95 | - input_shapes={'images': list(im.shape)} if dynamic else None) 96 | - assert check, 'assert check failed' 97 | - onnx.save(model_onnx, f) 98 | - except Exception as e: 99 | - LOGGER.info(f'{prefix} simplifier failure: {e}') 100 | - LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)') 101 | - return f 102 | - except Exception as e: 103 | - LOGGER.info(f'{prefix} export failure: {e}') 104 | + # Checks 105 | + model_onnx = onnx.load(f) # load onnx model 106 | + onnx.checker.check_model(model_onnx) # check onnx model 107 | + 108 | + # Simplify 109 | + if simplify: 110 | + # try: 111 | + check_requirements(('onnx-simplifier',)) 112 | + import onnxsim 113 | + 114 | + LOGGER.info(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...') 115 | + model_onnx, check = onnxsim.simplify(model_onnx, 116 | + dynamic_input_shape=dynamic, 117 | + input_shapes={'images': list(im.shape)} if dynamic else None) 118 | + assert check, 'assert check failed' 119 | + onnx.save(model_onnx, f) 120 | + # except Exception as e: 121 | + # LOGGER.info(f'{prefix} simplifier failure: {e}') 122 | + 123 | + # add batch NMS: 124 | + import onnx_graphsurgeon as onnx_gs 125 | + import numpy as np 126 | + yolo_graph = onnx_gs.import_onnx(model_onnx) 127 | + box_data = yolo_graph.outputs[0] 128 | + cls_data = yolo_graph.outputs[1] 129 | + 130 | + nms_out_0 = onnx_gs.Variable( 131 | + "BatchedNMS", 132 | + dtype=np.int32 133 | + ) 134 | + nms_out_1 = onnx_gs.Variable( 135 | + "BatchedNMS_1", 136 | + dtype=np.float32 137 | + ) 138 | + nms_out_2 = onnx_gs.Variable( 139 | + "BatchedNMS_2", 140 | + dtype=np.float32 141 | + ) 142 | + nms_out_3 = onnx_gs.Variable( 143 | + "BatchedNMS_3", 144 | + dtype=np.float32 145 | + ) 146 | + 147 | + nms_attrs = dict() 148 | + 149 | + nms_attrs["shareLocation"] = 1 150 | + nms_attrs["backgroundLabelId"] = -1 151 | + nms_attrs["scoreThreshold"] = 0.001 152 | + nms_attrs["iouThreshold"] = 0.65 153 | + nms_attrs["topK"] = 2*300 154 | + nms_attrs["keepTopK"] = 300 155 | + nms_attrs["numClasses"] = 80 156 | + nms_attrs["clipBoxes"] = 0 157 | + nms_attrs["isNormalized"] = 0 158 | + nms_attrs["scoreBits"] = 16 159 | + 160 | + nms_plugin = onnx_gs.Node( 161 | + op="BatchedNMSDynamic_TRT", 162 | + name="BatchedNMS_N", 163 | + inputs=[box_data, cls_data], 164 | + outputs=[nms_out_0, nms_out_1, nms_out_2, nms_out_3], 165 | + attrs=nms_attrs 166 | + ) 167 | + 168 | + yolo_graph.nodes.append(nms_plugin) 169 | + yolo_graph.outputs = nms_plugin.outputs 170 | + yolo_graph.cleanup().toposort() 171 | + model_onnx = onnx_gs.export_onnx(yolo_graph) 172 | + # Metadata 173 | + d = {'stride': int(max(model.stride)), 'names': model.names} 174 | + for k, v in d.items(): 175 | + meta = model_onnx.metadata_props.add() 176 | + meta.key, meta.value = k, str(v) 177 | + 178 | + onnx.save(model_onnx, f) 179 | + LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)') 180 | + return f 181 | + # except Exception as e: 182 | + # LOGGER.info(f'{prefix} export failure: {e}') 183 | 184 | 185 | def export_openvino(model, file, half, prefix=colorstr('OpenVINO:')): 186 | @@ -488,7 +541,7 @@ def run( 187 | assert not dynamic, '--half not compatible with --dynamic, i.e. use either --half or --dynamic but not both' 188 | model = attempt_load(weights, device=device, inplace=True, fuse=True) # load FP32 model 189 | nc, names = model.nc, model.names # number of classes, class names 190 | - 191 | + 192 | # Checks 193 | imgsz *= 2 if len(imgsz) == 1 else 1 # expand 194 | assert nc == len(names), f'Model class count {nc} != len(names) {len(names)}' 195 | @@ -499,6 +552,7 @@ def run( 196 | im = torch.zeros(batch_size, 3, *imgsz).to(device) # image size(1,3,320,192) BCHW iDetection 197 | 198 | # Update model 199 | + import torch.nn as nn 200 | if half and not coreml and not xml: 201 | im, model = im.half(), model.half() # to FP16 202 | model.train() if train else model.eval() # training mode = no Detect() layer grid construction 203 | @@ -507,7 +561,9 @@ def run( 204 | m.inplace = inplace 205 | m.onnx_dynamic = dynamic 206 | m.export = True 207 | - 208 | + elif isinstance(m, nn.Upsample): 209 | + print(m) 210 | + 211 | for _ in range(2): 212 | y = model(im) # dry runs 213 | shape = tuple(y[0].shape) # model output shape 214 | diff --git a/models/yolo.py b/models/yolo.py 215 | index 02660e6..e98fb33 100644 216 | --- a/models/yolo.py 217 | +++ b/models/yolo.py 218 | @@ -70,14 +70,28 @@ class Detect(nn.Module): 219 | xy, wh, conf = y.split((2, 2, self.nc + 1), 4) # y.tensor_split((2, 4, 5), 4) # torch 1.8.0 220 | xy = (xy * 2 + self.grid[i]) * self.stride[i] # xy 221 | wh = (wh * 2) ** 2 * self.anchor_grid[i] # wh 222 | - y = torch.cat((xy, wh, conf), 4) 223 | - z.append(y.view(bs, -1, self.no)) 224 | - 225 | - return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x) 226 | + xmin = xy[..., 0:1] - wh[..., 0:1] / 2 227 | + ymin = xy[..., 1:2] - wh[..., 1:2] / 2 228 | + xmax = xy[..., 0:1] + wh[..., 0:1] / 2 229 | + ymax = xy[..., 1:2] + wh[..., 1:2] / 2 230 | + obj_conf = conf[..., 0:1] 231 | + cls_conf = conf[..., 1:] 232 | + cls_conf *= obj_conf 233 | + # y = torch.cat((xy, wh, conf), 4) 234 | + y = torch.cat((xmin, ymin, xmax, ymax, cls_conf), 4) 235 | + z.append(y.view(bs, -1, self.no - 1)) 236 | + 237 | + z = torch.cat(z, 1) 238 | + bbox = z[..., 0:4].view(bs, -1, 1, 4) 239 | + cls_conf = z[..., 4:] 240 | + return bbox, cls_conf 241 | + # return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x) 242 | 243 | def _make_grid(self, nx=20, ny=20, i=0): 244 | d = self.anchors[i].device 245 | - t = self.anchors[i].dtype 246 | + # t = self.anchors[i].dtype 247 | + # TODO(tylerz) hard-code data type to int 248 | + t = torch.int32 249 | shape = 1, self.na, ny, nx, 2 # grid shape 250 | y, x = torch.arange(ny, device=d, dtype=t), torch.arange(nx, device=d, dtype=t) 251 | if check_version(torch.__version__, '1.10.0'): # torch>=1.10.0 meshgrid workaround for torch>=0.7 compatibility 252 | -- 253 | 2.36.0 254 | 255 | -------------------------------------------------------------------------------- /0001-Enable-onnx-export-with-decode-plugin.patch: -------------------------------------------------------------------------------- 1 | From 0ab0d40b5874791700720282a20259b2b404c984 Mon Sep 17 00:00:00 2001 2 | From: Tyler Zhu 3 | Date: Thu, 2 Jun 2022 10:34:21 +0800 4 | Subject: [PATCH] Enable onnx export with decode plugin 5 | 6 | --- 7 | export.py | 170 ++++++++++++++++++++++++++++++++----------------- 8 | models/yolo.py | 26 ++------ 9 | 2 files changed, 119 insertions(+), 77 deletions(-) 10 | 11 | diff --git a/export.py b/export.py 12 | index 72e170a..f7a5572 100644 13 | --- a/export.py 14 | +++ b/export.py 15 | @@ -111,62 +111,115 @@ def export_torchscript(model, im, file, optimize, prefix=colorstr('TorchScript:' 16 | 17 | def export_onnx(model, im, file, opset, train, dynamic, simplify, prefix=colorstr('ONNX:')): 18 | # YOLOv5 ONNX export 19 | - try: 20 | - check_requirements(('onnx',)) 21 | - import onnx 22 | - 23 | - LOGGER.info(f'\n{prefix} starting export with onnx {onnx.__version__}...') 24 | - f = file.with_suffix('.onnx') 25 | - 26 | - torch.onnx.export( 27 | - model, 28 | - im, 29 | - f, 30 | - verbose=False, 31 | - opset_version=opset, 32 | - training=torch.onnx.TrainingMode.TRAINING if train else torch.onnx.TrainingMode.EVAL, 33 | - do_constant_folding=not train, 34 | - input_names=['images'], 35 | - output_names=['output'], 36 | - dynamic_axes={ 37 | - 'images': { 38 | - 0: 'batch', 39 | - 2: 'height', 40 | - 3: 'width'}, # shape(1,3,640,640) 41 | - 'output': { 42 | - 0: 'batch', 43 | - 1: 'anchors'} # shape(1,25200,85) 44 | - } if dynamic else None) 45 | - 46 | - # Checks 47 | - model_onnx = onnx.load(f) # load onnx model 48 | - onnx.checker.check_model(model_onnx) # check onnx model 49 | - 50 | - # Metadata 51 | - d = {'stride': int(max(model.stride)), 'names': model.names} 52 | - for k, v in d.items(): 53 | - meta = model_onnx.metadata_props.add() 54 | - meta.key, meta.value = k, str(v) 55 | - onnx.save(model_onnx, f) 56 | + # try: 57 | + check_requirements(('onnx',)) 58 | + import onnx 59 | + 60 | + LOGGER.info(f'\n{prefix} starting export with onnx {onnx.__version__}...') 61 | + f = file.with_suffix('.onnx') 62 | + print(train) 63 | + torch.onnx.export( 64 | + model, 65 | + im, 66 | + f, 67 | + verbose=False, 68 | + opset_version=opset, 69 | + training=torch.onnx.TrainingMode.TRAINING if train else torch.onnx.TrainingMode.EVAL, 70 | + do_constant_folding=not train, 71 | + input_names=['images'], 72 | + output_names=['p3', 'p4', 'p5'], 73 | + dynamic_axes={ 74 | + 'images': { 75 | + 0: 'batch', 76 | + 2: 'height', 77 | + 3: 'width'}, # shape(1,3,640,640) 78 | + 'p3': { 79 | + 0: 'batch', 80 | + 2: 'height', 81 | + 3: 'width'}, # shape(1,25200,4) 82 | + 'p4': { 83 | + 0: 'batch', 84 | + 2: 'height', 85 | + 3: 'width'}, 86 | + 'p5': { 87 | + 0: 'batch', 88 | + 2: 'height', 89 | + 3: 'width'} 90 | + } if dynamic else None) 91 | 92 | - # Simplify 93 | - if simplify: 94 | - try: 95 | - check_requirements(('onnx-simplifier',)) 96 | - import onnxsim 97 | - 98 | - LOGGER.info(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...') 99 | - model_onnx, check = onnxsim.simplify(model_onnx, 100 | - dynamic_input_shape=dynamic, 101 | - input_shapes={'images': list(im.shape)} if dynamic else None) 102 | - assert check, 'assert check failed' 103 | - onnx.save(model_onnx, f) 104 | - except Exception as e: 105 | - LOGGER.info(f'{prefix} simplifier failure: {e}') 106 | - LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)') 107 | - return f 108 | - except Exception as e: 109 | - LOGGER.info(f'{prefix} export failure: {e}') 110 | + # Checks 111 | + model_onnx = onnx.load(f) # load onnx model 112 | + onnx.checker.check_model(model_onnx) # check onnx model 113 | + 114 | + # Simplify 115 | + if simplify: 116 | + # try: 117 | + check_requirements(('onnx-simplifier',)) 118 | + import onnxsim 119 | + 120 | + LOGGER.info(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...') 121 | + model_onnx, check = onnxsim.simplify(model_onnx, 122 | + dynamic_input_shape=dynamic, 123 | + input_shapes={'images': list(im.shape)} if dynamic else None) 124 | + assert check, 'assert check failed' 125 | + onnx.save(model_onnx, f) 126 | + # except Exception as e: 127 | + # LOGGER.info(f'{prefix} simplifier failure: {e}') 128 | + 129 | + # add yolov5_decoding: 130 | + import onnx_graphsurgeon as onnx_gs 131 | + import numpy as np 132 | + yolo_graph = onnx_gs.import_onnx(model_onnx) 133 | + p3 = yolo_graph.outputs[0] 134 | + p4 = yolo_graph.outputs[1] 135 | + p5 = yolo_graph.outputs[2] 136 | + decode_out_0 = onnx_gs.Variable( 137 | + "DecodeNumDetection", 138 | + dtype=np.int32 139 | + ) 140 | + decode_out_1 = onnx_gs.Variable( 141 | + "DecodeDetectionBoxes", 142 | + dtype=np.float32 143 | + ) 144 | + decode_out_2 = onnx_gs.Variable( 145 | + "DecodeDetectionScores", 146 | + dtype=np.float32 147 | + ) 148 | + decode_out_3 = onnx_gs.Variable( 149 | + "DecodeDetectionClasses", 150 | + dtype=np.int32 151 | + ) 152 | + 153 | + decode_attrs = dict() 154 | + 155 | + decode_attrs["max_stride"] = int(max(model.stride)) 156 | + decode_attrs["num_classes"] = model.model[-1].nc 157 | + decode_attrs["anchors"] = [float(v) for v in [10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326]] 158 | + decode_attrs["prenms_score_threshold"] = 0.25 159 | + 160 | + decode_plugin = onnx_gs.Node( 161 | + op="YoloLayer_TRT", 162 | + name="YoloLayer", 163 | + inputs=[p3, p4, p5], 164 | + outputs=[decode_out_0, decode_out_1, decode_out_2, decode_out_3], 165 | + attrs=decode_attrs 166 | + ) 167 | + 168 | + yolo_graph.nodes.append(decode_plugin) 169 | + yolo_graph.outputs = decode_plugin.outputs 170 | + yolo_graph.cleanup().toposort() 171 | + model_onnx = onnx_gs.export_onnx(yolo_graph) 172 | + 173 | + d = {'stride': int(max(model.stride)), 'names': model.names} 174 | + for k, v in d.items(): 175 | + meta = model_onnx.metadata_props.add() 176 | + meta.key, meta.value = k, str(v) 177 | + 178 | + onnx.save(model_onnx, f) 179 | + LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)') 180 | + return f 181 | + # except Exception as e: 182 | + # LOGGER.info(f'{prefix} export failure: {e}') 183 | 184 | 185 | def export_openvino(model, file, half, prefix=colorstr('OpenVINO:')): 186 | @@ -488,7 +541,7 @@ def run( 187 | assert not dynamic, '--half not compatible with --dynamic, i.e. use either --half or --dynamic but not both' 188 | model = attempt_load(weights, device=device, inplace=True, fuse=True) # load FP32 model 189 | nc, names = model.nc, model.names # number of classes, class names 190 | - 191 | + 192 | # Checks 193 | imgsz *= 2 if len(imgsz) == 1 else 1 # expand 194 | assert nc == len(names), f'Model class count {nc} != len(names) {len(names)}' 195 | @@ -499,6 +552,7 @@ def run( 196 | im = torch.zeros(batch_size, 3, *imgsz).to(device) # image size(1,3,320,192) BCHW iDetection 197 | 198 | # Update model 199 | + import torch.nn as nn 200 | if half and not coreml and not xml: 201 | im, model = im.half(), model.half() # to FP16 202 | model.train() if train else model.eval() # training mode = no Detect() layer grid construction 203 | @@ -507,7 +561,9 @@ def run( 204 | m.inplace = inplace 205 | m.onnx_dynamic = dynamic 206 | m.export = True 207 | - 208 | + elif isinstance(m, nn.Upsample): 209 | + print(m) 210 | + 211 | for _ in range(2): 212 | y = model(im) # dry runs 213 | shape = tuple(y[0].shape) # model output shape 214 | diff --git a/models/yolo.py b/models/yolo.py 215 | index 02660e6..c810745 100644 216 | --- a/models/yolo.py 217 | +++ b/models/yolo.py 218 | @@ -55,29 +55,15 @@ class Detect(nn.Module): 219 | z = [] # inference output 220 | for i in range(self.nl): 221 | x[i] = self.m[i](x[i]) # conv 222 | - bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) 223 | - x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() 224 | - 225 | - if not self.training: # inference 226 | - if self.onnx_dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]: 227 | - self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i) 228 | - 229 | - y = x[i].sigmoid() 230 | - if self.inplace: 231 | - y[..., 0:2] = (y[..., 0:2] * 2 + self.grid[i]) * self.stride[i] # xy 232 | - y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh 233 | - else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953 234 | - xy, wh, conf = y.split((2, 2, self.nc + 1), 4) # y.tensor_split((2, 4, 5), 4) # torch 1.8.0 235 | - xy = (xy * 2 + self.grid[i]) * self.stride[i] # xy 236 | - wh = (wh * 2) ** 2 * self.anchor_grid[i] # wh 237 | - y = torch.cat((xy, wh, conf), 4) 238 | - z.append(y.view(bs, -1, self.no)) 239 | - 240 | - return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x) 241 | + y = x[i].sigmoid() 242 | + z.append(y) 243 | + return z 244 | 245 | def _make_grid(self, nx=20, ny=20, i=0): 246 | d = self.anchors[i].device 247 | - t = self.anchors[i].dtype 248 | + # t = self.anchors[i].dtype 249 | + # TODO(tylerz) hard-code data type to int 250 | + t = torch.int32 251 | shape = 1, self.na, ny, nx, 2 # grid shape 252 | y, x = torch.arange(ny, device=d, dtype=t), torch.arange(nx, device=d, dtype=t) 253 | if check_version(torch.__version__, '1.10.0'): # torch>=1.10.0 meshgrid workaround for torch>=0.7 compatibility 254 | -- 255 | 2.36.0 256 | 257 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # YOLOV5 inference solution in DeepStream and TensorRT 2 | This repo provides sample codes to deploy YOLOV5 models in DeepStream or stand-alone TensorRT sample on Nvidia devices. 3 | 4 | * [DeepStream sample](#deepstream-sample) 5 | * [TensorRT sample](#tensorrt-sample) 6 | * [Appendix](#appendix) 7 | 8 | ## DeepStream sample 9 | In this section, we will walk through the steps to run YOLOV5 model using DeepStream with CPU NMS. 10 | ### Export the ultralytics YOLOV5 model to ONNX with TRT decode plugin 11 | You could start from nvcr.io/nvidia/pytorch:22.03-py3 container for export. 12 | ``` 13 | git clone https://github.com/ultralytics/yolov5.git 14 | # clone yolov5_trt_infer repo and copy the patch into yolov5 folder 15 | git clone https://github.com/NVIDIA-AI-IOT/yolov5_gpu_optimization.git 16 | cp yolov5_gpu_optimization/0001-Enable-onnx-export-with-decode-plugin.patch yolov5_gpu_optimization/requirement_export.txt yolov5/ 17 | cd yolov5 18 | git checkout a80dd66efe0bc7fe3772f259260d5b7278aab42f 19 | git am 0001-Enable-onnx-export-with-decode-plugin.patch 20 | pip install -r requirement_export.txt 21 | apt update && apt install -y libgl1-mesa-glx 22 | python export.py --weights yolov5s.pt --include onnx --simplify --dynamic 23 | ``` 24 | ### Prepare the library for DeepStream inference. 25 | You could start from nvcr.io/nvidia/deepstream:6.1.1-devel container for inference. 26 | 27 | Then go to the deepstream sample directory. 28 | ``` 29 | cd deepstream-sample 30 | ``` 31 | Compile the plugin and deepstream parser: 32 | 33 | * On x86: 34 | ``` 35 | nvcc -Xcompiler -fPIC -shared -o yolov5_decode.so ./yoloForward_nc.cu ./yoloPlugins.cpp ./nvdsparsebbox_Yolo.cpp -isystem /usr/include/x86_64-linux-gnu/ -L /usr/lib/x86_64-linux-gnu/ -I /opt/nvidia/deepstream/deepstream/sources/includes -lnvinfer 36 | ``` 37 | * On Jetson device: 38 | ``` 39 | nvcc -Xcompiler -fPIC -shared -o yolov5_decode.so ./yoloForward_nc.cu ./yoloPlugins.cpp ./nvdsparsebbox_Yolo.cpp -isystem /usr/include/aarch64-linux-gnu/ -L /usr/lib/aarch64-linux-gnu/ -I /opt/nvidia/deepstream/deepstream/sources/includes -lnvinfer 40 | ``` 41 | ### Run inference 42 | You could place the exported onnx models to `deepstream-sample` 43 | ``` 44 | cp yolov5/yolov5s.onnx yolov5_gpu_optimization/deepstream-sample/ 45 | ``` 46 | Then you could run the model pre-defined configs. 47 | 48 | * Run inference with saving inferened video: 49 | ``` 50 | deepstream-app -c config/deepstream_app_config_save_video.txt 51 | ``` 52 | * Run inference without display 53 | ``` 54 | deepstream-app -c config/deepstream_app_config.txt 55 | ``` 56 | * Run inference with 8 streams and batch_size=8 and without display 57 | ``` 58 | deepstream-app -c config/deepstream_app_config_8s.txt 59 | ``` 60 | 61 | ### Performance summary: 62 | The performance test is conducted on T4 with nvcr.io/nvidia/deepstream:6.1.1-devel 63 | 64 | | Model | Input Size | Device | precision | 1 stream bs=1 | 4 streams bs=4 | 8 streams bs=8 | 65 | |---------|------------|--------|-----------|---------------|----------------|----------------| 66 | | yolov5n | 3x640x640 | T4 | FP16 | 640 | 980 | 988 | 67 | | yolov5m | 3x640x640 | T4 | FP16 | 220 | 270 | 277 | 68 | 69 | ## TensorRT sample 70 | In this section, we will walk through the steps to run YOLOV5 model using GPU NMS with stand-alone inference script. 71 | ### Export the ultralytics YOLOV5 model to ONNX with TRT BatchNMS plugin 72 | You could start from nvcr.io/nvidia/pytorch:22.03-py3 container for export. 73 | ``` 74 | git clone https://github.com/ultralytics/yolov5.git 75 | # clone yolov5_trt_infer repo and copy files into yolov5 folder 76 | git clone https://github.com/NVIDIA-AI-IOT/yolov5_gpu_optimization.git 77 | cp -r yolov5_gpu_optimization/0001-Enable-onnx-export-with-batchNMS-plugin.patch yolov5_gpu_optimization/requirement_export.txt yolov5/ 78 | cd yolov5 79 | git checkout a80dd66efe0bc7fe3772f259260d5b7278aab42f 80 | git am 0001-Enable-onnx-export-with-batchNMS-plugin.patch 81 | pip install -r requirement_export.txt 82 | apt update && apt install -y libgl1-mesa-glx 83 | python export.py --weights yolov5s.pt --include onnx --simplify --dynamic 84 | ``` 85 | 86 | ### Run with TensorRT: 87 | 88 | For the following section, you could start from nvcr.io/nvidia/tensorrt:22.05-py3 and prepare env by: 89 | ``` 90 | cd tensorrt-sample 91 | pip install -r requirement_infer.txt 92 | apt update && apt install -y libgl1-mesa-glx 93 | ``` 94 | 95 | Build plugin library by following the [previous steps](#prepare-the-library-for-deepstream-inference). 96 | #### Run inference 97 | ``` 98 | python yolov5_trt_inference.py --input_images_folder= --output_images_folder=./coco_output --onnx= 99 | ``` 100 | #### Run evaluation on COCO17 validation dataset 101 | 102 | ##### Square inference evaluation: 103 | The image will be resized to 3xINPUT_SIZExINPUT_SIZE while be kept aspect ratio. 104 | ``` 105 | python yolov5_trt_inference.py --input_images_folder= --output_images_folder= --onnx= --coco_anno= 106 | ``` 107 | 108 | ##### Rectangular inference evaluation: 109 | This is not real rectangular inference as in pytorch. It is same to setting `pad=0, rect=False, imgsz=input_size + stride` in ultralytics YOLOV5. 110 | ``` 111 | # Default FP16 precision 112 | python yolov5_trt_inference.py --input_images_folder= --output_images_folder= --onnx= --coco_anno= --rect 113 | ``` 114 | 115 | 116 | #### Eavaluation in INT8 mode 117 | To run int8 inference or evaluation, you need to install TensorRT above 8.4. You could start from `nvcr.io/nvidia/tensorrt:22.07-py3` 118 | 119 | Following command is to run evaluation in int8 precision (and calibration cache will be saved into the path specify by `--calib_cache`): 120 | ``` 121 | # INT8 precision 122 | python yolov5_trt_inference.py --input_images_folder= --output_images_folder= --onnx= --coco_anno= --rect --data_type=int8 --save_engine=./yolov5s_int8_maxbs16.engine --calib_img_dir= --calib_cache=yolov5s_bs16_n10.cache --n_batches=10 --batch_size=16 123 | ``` 124 | 125 | **Notes**: The calibration algorithm for YOLOV5 is `IInt8MinMaxCalibrator` instead of `IInt8EntropyCalibrator2`. So if you want to play with `trtexec` with the saved calibration cache, you have to change the first line of cache from `MinMaxCalibration` to `EntropyCalibration2`. 126 | 127 | ### Misc for TensorRT sample 128 | 129 | #### Performance&&mAP summary 130 | Here is the performance and mAP summary. Tested on V100 16G with TensorRT 8.2.5 in rectangular inference mode. 131 | 132 | | Model | Input Size | precision | FPS bs=32 | FPS bs= 1 | mAP@0.5 | 133 | | -------- | ---------- | --------- | --------- | --------- | ------- | 134 | | yolov5n | 640 | FP16 | 1295 | 448 | 45.9% | 135 | | yolov5s | 640 | FP16 | 917 | 378 | 57.1% | 136 | | yolov5m | 640 | FP16 | 614 | 282 | 64% | 137 | | yolov5l | 640 | FP16 | 416 | 202 | 67.3% | 138 | | yolov5x | 640 | FP16 | 231 | 135 | 68.5% | 139 | | yolov5n6 | 1280 | FP16 | 341 | 160 | 54.2% | 140 | | yolov5s6 | 1280 | FP16 | 261 | 139 | 63.2% | 141 | | yolov5m6 | 1280 | FP16 | 155 | 99 | 68.8% | 142 | | yolov5l6 | 1280 | FP16 | 106 | 68 | 70.7% | 143 | | yolov5x6 | 1280 | FP16 | 60 | 45 | 71.9% | 144 | 145 | #### nbit-NMS 146 | Users can also enable nbit-NMS by changing the `scoreBits` in export.py. 147 | ```python 148 | # Default to be 16-bit 149 | nms_attrs["scoreBits"] = 16 150 | # Can be changed to smaller one to boost NMS operation: 151 | # e.g. nms_attrs["scoreBits"] = 8 152 | ``` 153 | performance gain: 154 | | Classes number | Device | Anchors number | Score bits | Batch size | NMS Execution time (ms) | 155 | | -------------- | --------- | ------------- | ---------- | ---------- | ----------------------- | 156 | | 80 | A30 | 25200 | 16 | 32 | 12.1 | 157 | | 80 | A30 | 25200 | 8 | 32 | 10.0 | 158 | | 4 | Xavier NX | 10560 | 16 | 4 | 1.38 | 159 | | 4 | Xavier NX | 10560 | 8 | 4 | 1.08 | 160 | 161 | *Note*: small score bits may slightly decrease the final mAP. 162 | 163 | #### DeepStream deployment: 164 | Users can intergrate the YOLOV5 with BatchedNMS plugin into DeepStream following [deepstream_tao_apps](https://github.com/NVIDIA-AI-IOT/deepstream_tao_apps) 165 | 166 | ## Appendix: 167 | ### YOLOV5 with different activation: 168 | We conducted experiments with different activations for pursing better trade-off between mAP and performance on TensorRT. 169 | 170 | You can change the activation of YOLOV5 model in `yolov5/models/common.py`: 171 | ``` 172 | class Conv(nn.Module): 173 | # Standard convolution 174 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups 175 | super().__init__() 176 | self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) 177 | self.bn = nn.BatchNorm2d(c2) 178 | # self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) 179 | self.act = nn.ReLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) 180 | 181 | def forward(self, x): 182 | return self.act(self.bn(self.conv(x))) 183 | 184 | def forward_fuse(self, x): 185 | return self.act(self.conv(x)) 186 | ``` 187 | 188 | YOLOV5s experiments results so far: 189 | 190 | | Activation type | mAP@0.5 | V100 --best FPS (bs = 32) | A10 --best FPS (bs=32) | 191 | |-------------------------|------------------------------------------------------|----------------------------------|--------------------------------| 192 | | swish (baseline) | 56.7% | 1047 | 965 | 193 | | ReLU | 54.8% (scratch)
55.7% (swish pretrained) | 1177 | 1065 | 194 | | GELU | 56.6% | 1004 | 916 | 195 | | Leaky ReLU | 55.0% | 1172 | 892 | 196 | | PReLU | 54.8% | 1123 | 932 | 197 | 198 | ## Known issue: 199 | 200 | - int8 0% mAP in TensorRT 8.2.5: Install TensorRT above 8.4 to avoid the issue. 201 | - TensorRT warning at the end of the execution of stand-alone tensorrt inference script: The warning won't block the inference or evaluation. You can just ignore it. -------------------------------------------------------------------------------- /deepstream-sample/README.md: -------------------------------------------------------------------------------- 1 | - Start from deepstream container: 2 | ``` 3 | nvidia-docker run -v : --rm -it nvcr.io/nvidia/deepstream:6.1.1-devel bash 4 | ``` 5 | 6 | - Compile the lib: 7 | ``` 8 | nvcc -Xcompiler -fPIC -shared -o yolov5_decode.so ./yoloForward_nc.cu ./yoloPlugins.cpp ./nvdsparsebbox_Yolo.cpp -isystem /usr/include/x86_64-linux-gnu/ -L /usr/lib/x86_64-linux-gnu/ -I /opt/nvidia/deepstream/deepstream/sources/includes -lnvinfer 9 | ``` 10 | 11 | - Run the deepstream sample: 12 | ``` 13 | deepstream -c config/deepstream_app_config.txt 14 | ``` 15 | 16 | 17 | -------------------------------------------------------------------------------- /deepstream-sample/config/config_infer_primary_yoloV5.txt: -------------------------------------------------------------------------------- 1 | [property] 2 | gpu-id=0 3 | net-scale-factor=0.0039215697906911373 4 | model-color-format=0 5 | onnx-file=./yolov5n.onnx 6 | model-engine-file=./yolov5n.onnx_b1_gpu1_fp16.engine 7 | infer-dims=3;640;640 8 | labelfile-path=labels.txt 9 | batch-size=1 10 | workspace-size=1024 11 | network-mode=2 12 | num-detected-classes=80 13 | interval=0 14 | gie-unique-id=1 15 | process-mode=1 16 | network-type=0 17 | cluster-mode=2 18 | maintain-aspect-ratio=1 19 | parse-bbox-func-name=NvDsInferParseYolo 20 | custom-lib-path=./yolov5_decode.so 21 | 22 | [class-attrs-all] 23 | nms-iou-threshold=0.45 24 | pre-cluster-threshold=0.25 25 | topk=300 26 | -------------------------------------------------------------------------------- /deepstream-sample/config/config_infer_primary_yoloV5_bs8.txt: -------------------------------------------------------------------------------- 1 | [property] 2 | gpu-id=0 3 | net-scale-factor=0.0039215697906911373 4 | model-color-format=0 5 | onnx-file=./yolov5n.onnx 6 | model-engine-file=./yolov5n.onnx_b8_gpu1_fp16.engine 7 | infer-dims=3;640;640 8 | labelfile-path=labels.txt 9 | batch-size=8 10 | workspace-size=1024 11 | network-mode=2 12 | num-detected-classes=80 13 | interval=0 14 | gie-unique-id=1 15 | process-mode=1 16 | network-type=0 17 | cluster-mode=2 18 | maintain-aspect-ratio=1 19 | parse-bbox-func-name=NvDsInferParseYolo 20 | custom-lib-path=./yolov5_decode.so 21 | 22 | [class-attrs-all] 23 | nms-iou-threshold=0.45 24 | pre-cluster-threshold=0.25 25 | topk=300 26 | -------------------------------------------------------------------------------- /deepstream-sample/config/deepstream_app_config.txt: -------------------------------------------------------------------------------- 1 | [application] 2 | enable-perf-measurement=1 3 | perf-measurement-interval-sec=5 4 | 5 | [tiled-display] 6 | enable=0 7 | rows=1 8 | columns=1 9 | width=1280 10 | height=720 11 | gpu-id=0 12 | nvbuf-memory-type=0 13 | 14 | [source0] 15 | enable=1 16 | type=3 17 | uri=file:///opt/nvidia/deepstream/deepstream/samples/streams/sample_1080p_h264.mp4 18 | num-sources=1 19 | gpu-id=0 20 | cudadec-memtype=0 21 | 22 | [sink0] 23 | enable=0 24 | type=2 25 | sync=0 26 | gpu-id=0 27 | nvbuf-memory-type=0 28 | 29 | [osd] 30 | enable=0 31 | gpu-id=0 32 | border-width=5 33 | text-size=15 34 | text-color=1;1;1;1; 35 | text-bg-color=0.3;0.3;0.3;1 36 | font=Serif 37 | show-clock=0 38 | clock-x-offset=800 39 | clock-y-offset=820 40 | clock-text-size=12 41 | clock-color=1;0;0;0 42 | nvbuf-memory-type=0 43 | 44 | [streammux] 45 | gpu-id=0 46 | live-source=0 47 | batch-size=1 48 | batched-push-timeout=40000 49 | width=1920 50 | height=1080 51 | enable-padding=0 52 | nvbuf-memory-type=0 53 | 54 | [primary-gie] 55 | enable=1 56 | gpu-id=0 57 | gie-unique-id=1 58 | nvbuf-memory-type=0 59 | config-file=config/config_infer_primary_yoloV5.txt 60 | 61 | [tests] 62 | file-loop=1 63 | -------------------------------------------------------------------------------- /deepstream-sample/config/deepstream_app_config_8s.txt: -------------------------------------------------------------------------------- 1 | [application] 2 | enable-perf-measurement=1 3 | perf-measurement-interval-sec=5 4 | 5 | [tiled-display] 6 | enable=0 7 | rows=1 8 | columns=1 9 | width=1280 10 | height=720 11 | gpu-id=0 12 | nvbuf-memory-type=0 13 | 14 | [source0] 15 | enable=1 16 | type=3 17 | uri=file:///opt/nvidia/deepstream/deepstream/samples/streams/sample_1080p_h264.mp4 18 | num-sources=8 19 | gpu-id=0 20 | cudadec-memtype=0 21 | 22 | [sink0] 23 | enable=0 24 | type=2 25 | sync=0 26 | gpu-id=0 27 | nvbuf-memory-type=0 28 | 29 | [osd] 30 | enable=0 31 | gpu-id=0 32 | border-width=5 33 | text-size=15 34 | text-color=1;1;1;1; 35 | text-bg-color=0.3;0.3;0.3;1 36 | font=Serif 37 | show-clock=0 38 | clock-x-offset=800 39 | clock-y-offset=820 40 | clock-text-size=12 41 | clock-color=1;0;0;0 42 | nvbuf-memory-type=0 43 | 44 | [streammux] 45 | gpu-id=0 46 | live-source=0 47 | batch-size=8 48 | batched-push-timeout=40000 49 | width=1920 50 | height=1080 51 | enable-padding=0 52 | nvbuf-memory-type=0 53 | 54 | [primary-gie] 55 | enable=1 56 | gpu-id=0 57 | gie-unique-id=1 58 | nvbuf-memory-type=0 59 | config-file=config/config_infer_primary_yoloV5_bs8.txt 60 | 61 | [tests] 62 | file-loop=1 63 | -------------------------------------------------------------------------------- /deepstream-sample/config/deepstream_app_config_save_video.txt: -------------------------------------------------------------------------------- 1 | [application] 2 | enable-perf-measurement=1 3 | perf-measurement-interval-sec=5 4 | 5 | [tiled-display] 6 | enable=1 7 | rows=1 8 | columns=1 9 | width=1280 10 | height=720 11 | gpu-id=0 12 | nvbuf-memory-type=0 13 | 14 | [source0] 15 | enable=1 16 | type=3 17 | uri=file:///opt/nvidia/deepstream/deepstream/samples/streams/sample_1080p_h264.mp4 18 | num-sources=1 19 | gpu-id=0 20 | cudadec-memtype=0 21 | 22 | [sink0] 23 | enable=1 24 | type=3 25 | sync=0 26 | gpu-id=0 27 | container=1 28 | codec=1 29 | enc-type=1 30 | bitrate=4000000 31 | output-file=./out.mp4 32 | nvbuf-memory-type=0 33 | 34 | [osd] 35 | enable=1 36 | gpu-id=0 37 | border-width=5 38 | text-size=15 39 | text-color=1;1;1;1; 40 | text-bg-color=0.3;0.3;0.3;1 41 | font=Serif 42 | show-clock=0 43 | clock-x-offset=800 44 | clock-y-offset=820 45 | clock-text-size=12 46 | clock-color=1;0;0;0 47 | nvbuf-memory-type=0 48 | 49 | [streammux] 50 | gpu-id=0 51 | live-source=0 52 | batch-size=1 53 | batched-push-timeout=40000 54 | width=1920 55 | height=1080 56 | enable-padding=0 57 | nvbuf-memory-type=0 58 | 59 | [primary-gie] 60 | enable=1 61 | gpu-id=0 62 | gie-unique-id=1 63 | nvbuf-memory-type=0 64 | config-file=config/config_infer_primary_yoloV5.txt 65 | 66 | [tests] 67 | file-loop=1 68 | -------------------------------------------------------------------------------- /deepstream-sample/config/labels.txt: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush -------------------------------------------------------------------------------- /deepstream-sample/nvdsparsebbox_Yolo.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | * 22 | * Edited by Marcos Luciano 23 | * https://www.github.com/marcoslucianops 24 | */ 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include "nvdsinfer_custom_impl.h" 32 | 33 | float clamp(const float val, const float minVal, const float maxVal) 34 | { 35 | assert(minVal <= maxVal); 36 | return std::min(maxVal, std::max(minVal, val)); 37 | } 38 | 39 | extern "C" bool NvDsInferParseYolo( 40 | std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, 41 | NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList); 42 | 43 | static NvDsInferParseObjectInfo convertBBox( 44 | const float& bx1, const float& by1, const float& bx2, const float& by2, const uint& netW, const uint& netH) 45 | { 46 | NvDsInferParseObjectInfo b; 47 | 48 | float x1 = bx1; 49 | float y1 = by1; 50 | float x2 = bx2; 51 | float y2 = by2; 52 | 53 | x1 = clamp(x1, 0, netW); 54 | y1 = clamp(y1, 0, netH); 55 | x2 = clamp(x2, 0, netW); 56 | y2 = clamp(y2, 0, netH); 57 | 58 | b.left = x1; 59 | b.width = clamp(x2 - x1, 0, netW); 60 | b.top = y1; 61 | b.height = clamp(y2 - y1, 0, netH); 62 | 63 | return b; 64 | } 65 | 66 | static void addBBoxProposal( 67 | const float bx1, const float by1, const float bx2, const float by2, const uint& netW, const uint& netH, 68 | const int maxIndex, const float maxProb, std::vector& binfo) 69 | { 70 | NvDsInferParseObjectInfo bbi = convertBBox(bx1, by1, bx2, by2, netW, netH); 71 | if (bbi.width < 1 || bbi.height < 1) return; 72 | 73 | bbi.detectionConfidence = maxProb; 74 | bbi.classId = maxIndex; 75 | binfo.push_back(bbi); 76 | } 77 | 78 | static std::vector decodeYoloTensor( 79 | const int* counts, const float* boxes, const float* scores, const int* classes, const uint& netW, const uint& netH) 80 | { 81 | std::vector binfo; 82 | 83 | uint numBoxes = counts[0]; 84 | for (uint b = 0; b < numBoxes; ++b) 85 | { 86 | float bx1 = boxes[b * 4 + 0]; 87 | float by1 = boxes[b * 4 + 1]; 88 | float bx2 = boxes[b * 4 + 2]; 89 | float by2 = boxes[b * 4 + 3]; 90 | 91 | float maxProb = scores[b]; 92 | int maxIndex = classes[b]; 93 | 94 | addBBoxProposal(bx1, by1, bx2, by2, netW, netH, maxIndex, maxProb, binfo); 95 | } 96 | return binfo; 97 | } 98 | 99 | static bool NvDsInferParseCustomYolo( 100 | std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, 101 | NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList) 102 | { 103 | if (outputLayersInfo.empty()) 104 | { 105 | std::cerr << "ERROR: Could not find output layer in bbox parsing" << std::endl; 106 | return false; 107 | } 108 | 109 | std::vector objects; 110 | 111 | const NvDsInferLayerInfo &counts = outputLayersInfo[0]; 112 | const NvDsInferLayerInfo &boxes = outputLayersInfo[1]; 113 | const NvDsInferLayerInfo &scores = outputLayersInfo[2]; 114 | const NvDsInferLayerInfo &classes = outputLayersInfo[3]; 115 | 116 | std::vector outObjs = 117 | decodeYoloTensor( 118 | (const int*)(counts.buffer), (const float*)(boxes.buffer), (const float*)(scores.buffer), 119 | (const int*)(classes.buffer), networkInfo.width, networkInfo.height); 120 | 121 | objects.insert(objects.end(), outObjs.begin(), outObjs.end()); 122 | 123 | objectList = objects; 124 | 125 | return true; 126 | } 127 | 128 | extern "C" bool NvDsInferParseYolo( 129 | std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, 130 | NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList) 131 | { 132 | return NvDsInferParseCustomYolo ( 133 | outputLayersInfo, networkInfo, detectionParams, objectList); 134 | } 135 | 136 | CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseYolo); 137 | -------------------------------------------------------------------------------- /deepstream-sample/yoloForward_nc.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | * 4 | * Created by Marcos Luciano 5 | * https://www.github.com/marcoslucianops 6 | */ 7 | 8 | #include 9 | 10 | __global__ void gpuYoloLayer_nc( 11 | const float* input, int* num_detections, float* detection_boxes, float* detection_scores, int* detection_classes, 12 | const float scoreThreshold, const uint netWidth, const uint netHeight, const uint gridSizeX, const uint gridSizeY, 13 | const uint numOutputClasses, const uint numBBoxes, const float scaleXY, const float* anchors) 14 | { 15 | uint x_id = blockIdx.x * blockDim.x + threadIdx.x; 16 | uint y_id = blockIdx.y * blockDim.y + threadIdx.y; 17 | uint z_id = blockIdx.z * blockDim.z + threadIdx.z; 18 | 19 | if (x_id >= gridSizeX || y_id >= gridSizeY || z_id >= numBBoxes) 20 | return; 21 | 22 | const int numGridCells = gridSizeX * gridSizeY; 23 | const int bbindex = y_id * gridSizeX + x_id; 24 | 25 | const float objectness 26 | = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]; 27 | 28 | if (objectness < scoreThreshold) 29 | return; 30 | 31 | int count = (int)atomicAdd(num_detections, 1); 32 | 33 | const float alpha = scaleXY; 34 | const float beta = -0.5 * (scaleXY - 1); 35 | 36 | float x 37 | = (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)] 38 | * alpha + beta + x_id) * netWidth / gridSizeX; 39 | 40 | float y 41 | = (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)] 42 | * alpha + beta + y_id) * netHeight / gridSizeY; 43 | 44 | float w 45 | = __powf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)] * 2, 2) 46 | * anchors[z_id * 2]; 47 | 48 | float h 49 | = __powf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)] * 2, 2) 50 | * anchors[z_id * 2 + 1]; 51 | 52 | float maxProb = 0.0f; 53 | int maxIndex = -1; 54 | 55 | for (uint i = 0; i < numOutputClasses; ++i) 56 | { 57 | float prob 58 | = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]; 59 | 60 | if (prob > maxProb) 61 | { 62 | maxProb = prob; 63 | maxIndex = i; 64 | } 65 | } 66 | 67 | detection_boxes[count * 4 + 0] = x - 0.5 * w; 68 | detection_boxes[count * 4 + 1] = y - 0.5 * h; 69 | detection_boxes[count * 4 + 2] = x + 0.5 * w; 70 | detection_boxes[count * 4 + 3] = y + 0.5 * h; 71 | detection_scores[count] = objectness * maxProb; 72 | detection_classes[count] = maxIndex; 73 | } 74 | 75 | cudaError_t cudaYoloLayer_nc( 76 | const void* input, void* num_detections, void* detection_boxes, void* detection_scores, void* detection_classes, 77 | const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth, 78 | const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, 79 | const float& scaleXY, const void* anchors, cudaStream_t stream); 80 | 81 | cudaError_t cudaYoloLayer_nc( 82 | const void* input, void* num_detections, void* detection_boxes, void* detection_scores, void* detection_classes, 83 | const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth, 84 | const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, 85 | const float& scaleXY, const void* anchors, cudaStream_t stream) 86 | { 87 | dim3 threads_per_block(16, 16, 4); 88 | dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, 89 | (gridSizeY / threads_per_block.y) + 1, 90 | (numBBoxes / threads_per_block.z) + 1); 91 | 92 | for (unsigned int batch = 0; batch < batchSize; ++batch) 93 | { 94 | gpuYoloLayer_nc<<>>( 95 | reinterpret_cast(input) + (batch * inputSize), 96 | reinterpret_cast(num_detections) + (batch), 97 | reinterpret_cast(detection_boxes) + (batch * 4 * outputSize), 98 | reinterpret_cast(detection_scores) + (batch * outputSize), 99 | reinterpret_cast(detection_classes) + (batch * outputSize), 100 | scoreThreshold, netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, scaleXY, 101 | reinterpret_cast(anchors)); 102 | } 103 | return cudaGetLastError(); 104 | } 105 | -------------------------------------------------------------------------------- /deepstream-sample/yoloPlugins.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | * 4 | * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a 7 | * copy of this software and associated documentation files (the "Software"), 8 | * to deal in the Software without restriction, including without limitation 9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 | * and/or sell copies of the Software, and to permit persons to whom the 11 | * Software is furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 | * DEALINGS IN THE SOFTWARE. 23 | * 24 | * Edited by Marcos Luciano 25 | * https://www.github.com/marcoslucianops 26 | */ 27 | 28 | #include "yoloPlugins.h" 29 | #include "NvInferPlugin.h" 30 | #include 31 | #include 32 | #include 33 | #define NANCHORS 3 34 | #define NFEATURES 3 35 | 36 | namespace { 37 | template 38 | void write(char*& buffer, const T& val) 39 | { 40 | *reinterpret_cast(buffer) = val; 41 | buffer += sizeof(T); 42 | } 43 | 44 | template 45 | void read(const char*& buffer, T& val) 46 | { 47 | val = *reinterpret_cast(buffer); 48 | buffer += sizeof(T); 49 | } 50 | } 51 | 52 | nvinfer1::PluginFieldCollection YoloLayerPluginCreator::mFC{}; 53 | std::vector YoloLayerPluginCreator::mPluginAttributes; 54 | 55 | YoloLayerPluginCreator::YoloLayerPluginCreator() noexcept 56 | { 57 | mPluginAttributes.emplace_back(nvinfer1::PluginField("max_stride", nullptr, nvinfer1::PluginFieldType::kINT32, 1)); 58 | // mPluginAttributes.emplace_back(nvinfer1::PluginField("net_height", nullptr, nvinfer1::PluginFieldType::kINT32, 1)); 59 | mPluginAttributes.emplace_back(nvinfer1::PluginField("num_classes", nullptr, nvinfer1::PluginFieldType::kINT32, 1)); 60 | mPluginAttributes.emplace_back(nvinfer1::PluginField("anchors", nullptr, nvinfer1::PluginFieldType::kFLOAT32, NFEATURES * NANCHORS * 2)); 61 | mPluginAttributes.emplace_back(nvinfer1::PluginField("prenms_score_threshold", nullptr, nvinfer1::PluginFieldType::kFLOAT32, 1)); 62 | 63 | mFC.nbFields = mPluginAttributes.size(); 64 | mFC.fields = mPluginAttributes.data(); 65 | } 66 | 67 | nvinfer1::IPluginV2DynamicExt* YoloLayerPluginCreator::createPlugin(const char* name, const nvinfer1::PluginFieldCollection* fc) noexcept 68 | { 69 | const nvinfer1::PluginField* fields = fc->fields; 70 | int max_stride = 32; 71 | int num_classes = 80; 72 | std::vector anchors; 73 | float score_threshold = 0.0; 74 | for (int i = 0; i < fc->nbFields; ++i) 75 | { 76 | const char* attrName = fields[i].name; 77 | if (!strcmp(attrName, "max_stride")) 78 | { 79 | assert(fields[i].type == nvinfer1::PluginFieldType::kINT32); 80 | max_stride = *(static_cast(fields[i].data)); 81 | } 82 | if (!strcmp(attrName, "num_classes")) 83 | { 84 | assert(fields[i].type == nvinfer1::PluginFieldType::kINT32); 85 | num_classes = *(static_cast(fields[i].data)); 86 | } 87 | if (!strcmp(attrName, "anchors")) 88 | { 89 | assert(fields[i].type == nvinfer1::PluginFieldType::kFLOAT32); 90 | const auto anchors_ptr = static_cast(fields[i].data); 91 | anchors.assign(anchors_ptr, anchors_ptr + NFEATURES * NANCHORS * 2); 92 | } 93 | if (!strcmp(attrName, "prenms_score_threshold")) 94 | { 95 | assert(fields[i].type == nvinfer1::PluginFieldType::kFLOAT32); 96 | score_threshold = *(static_cast(fields[i].data)); 97 | } 98 | } 99 | return new YoloLayer(max_stride, num_classes, anchors, score_threshold); 100 | } 101 | 102 | cudaError_t cudaYoloLayer_nc( 103 | const void* input, void* num_detections, void* detection_boxes, void* detection_scores, void* detection_classes, 104 | const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth, 105 | const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, 106 | const float& scaleXY, const void* anchors, cudaStream_t stream); 107 | 108 | YoloLayer::YoloLayer (const void* data, size_t length) 109 | { 110 | const char *d = static_cast(data); 111 | 112 | read(d, m_NetWidth); 113 | read(d, m_NetHeight); 114 | read(d, m_MaxStride); 115 | read(d, m_NumClasses); 116 | read(d, m_ScoreThreshold); 117 | read(d, m_OutputSize); 118 | 119 | m_Anchors.resize(NFEATURES * NANCHORS * 2); 120 | for(uint i = 0; i < m_Anchors.size(); i++){ 121 | read(d, m_Anchors[i]); 122 | } 123 | 124 | for(uint i = 0; i < NFEATURES; i++){ 125 | int height; 126 | int width; 127 | read(d, height); 128 | read(d, width); 129 | m_FeatureSpatialSize.push_back(nvinfer1::DimsHW(height, width)); 130 | } 131 | }; 132 | 133 | YoloLayer::YoloLayer( 134 | const uint& maxStride, const uint& numClasses, 135 | const std::vector& anchors, const float& scoreThreshold) : 136 | m_MaxStride(maxStride), 137 | m_NumClasses(numClasses), 138 | m_Anchors(anchors), 139 | m_ScoreThreshold(scoreThreshold) 140 | { 141 | 142 | }; 143 | 144 | nvinfer1::DimsExprs 145 | YoloLayer::getOutputDimensions( 146 | int outputIndex, const nvinfer1::DimsExprs* inputs, int nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept 147 | { 148 | assert(outputIndex < 4); 149 | 150 | nvinfer1::DimsExprs out_dim; 151 | const nvinfer1::IDimensionExpr *batch_size = inputs[0].d[0]; 152 | 153 | const nvinfer1::IDimensionExpr *output_num_boxes = exprBuilder.constant(0); 154 | // input feature [batch_size, (nc+5) * nanchor, height, width] 155 | for(int32_t i = 0; i < NFEATURES; i++) 156 | { 157 | output_num_boxes = exprBuilder.operation(nvinfer1::DimensionOperation::kSUM, *output_num_boxes, 158 | *exprBuilder.operation(nvinfer1::DimensionOperation::kPROD, 159 | *inputs[i].d[2], *inputs[i].d[3]) 160 | ); 161 | } 162 | 163 | output_num_boxes = exprBuilder.operation(nvinfer1::DimensionOperation::kPROD, 164 | *output_num_boxes, *exprBuilder.constant(NANCHORS)); 165 | 166 | // num_detections [batch_size, 1] 167 | // detection_boxes [batch_size, numboxes, 4] 168 | // detection_scores [batch_size, numboxes] 169 | // detection_classes [batch_size, numboxes] 170 | if (outputIndex == 0) { 171 | out_dim.nbDims = 2; 172 | out_dim.d[0] = batch_size; 173 | out_dim.d[1] = exprBuilder.constant(1); 174 | } 175 | else if (outputIndex == 1) 176 | { 177 | out_dim.nbDims = 3; 178 | out_dim.d[0] = batch_size; 179 | out_dim.d[1] = output_num_boxes; 180 | out_dim.d[2] = exprBuilder.constant(4); 181 | } 182 | else 183 | { 184 | out_dim.nbDims = 2; 185 | out_dim.d[0] = batch_size; 186 | out_dim.d[1] = output_num_boxes; 187 | } 188 | return out_dim; 189 | } 190 | 191 | nvinfer1::DataType 192 | YoloLayer::getOutputDataType(int index, const nvinfer1::DataType* inputType, int nbInputs) const noexcept 193 | { 194 | // num_detection and classes 195 | if (index == 0 || index == 3) 196 | { 197 | return nvinfer1::DataType::kINT32; 198 | } 199 | // All others should use the same datatype as the input 200 | return inputType[0]; 201 | } 202 | 203 | bool 204 | YoloLayer::supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc* inOut, int nbInputs, int nbOutputs) noexcept 205 | { 206 | if (inOut[pos].format != nvinfer1::PluginFormat::kLINEAR) 207 | { 208 | return false; 209 | } 210 | 211 | const int posOut = pos - nbInputs; 212 | // num_detection and classes 213 | if (posOut == 0 || posOut == 3 ) 214 | { 215 | return inOut[pos].type == nvinfer1::DataType::kINT32 && inOut[pos].format == nvinfer1::PluginFormat::kLINEAR; 216 | } 217 | 218 | // all other inputs/outputs: fp32 or fp16 219 | return (inOut[pos].type == nvinfer1::DataType::kFLOAT) 220 | && (inOut[0].type == inOut[pos].type); 221 | } 222 | 223 | void 224 | YoloLayer::configurePlugin( 225 | const nvinfer1::DynamicPluginTensorDesc* in, int nbInputs, const nvinfer1::DynamicPluginTensorDesc* out, int nbOutputs) noexcept 226 | { 227 | assert(nbInputs == NFEATURES); 228 | // input feature [batch_size, (nc+5) * nanchor, height, width] 229 | m_OutputSize = 0; 230 | m_FeatureSpatialSize.clear(); 231 | for(int i = 0; i < NFEATURES; i++) 232 | { 233 | m_FeatureSpatialSize.push_back(nvinfer1::DimsHW(in[i].desc.dims.d[2], in[i].desc.dims.d[3])); 234 | m_OutputSize += in[i].desc.dims.d[2] * in[i].desc.dims.d[3] * NANCHORS; 235 | } 236 | // Compute the network input by last feature map and max stride 237 | m_NetHeight = in[NFEATURES - 1].desc.dims.d[2] * m_MaxStride; 238 | m_NetWidth = in[NFEATURES - 1].desc.dims.d[3] * m_MaxStride; 239 | } 240 | 241 | int 242 | YoloLayer::enqueue(const nvinfer1::PluginTensorDesc* inputDesc, const nvinfer1::PluginTensorDesc* outputDesc, const void* const* inputs, 243 | void* const* outputs, void* workspace, cudaStream_t stream) noexcept 244 | { 245 | const int batchSize = inputDesc[0].dims.d[0]; 246 | void* num_detections = outputs[0]; 247 | void* detection_boxes = outputs[1]; 248 | void* detection_scores = outputs[2]; 249 | void* detection_classes = outputs[3]; 250 | CUDA_CHECK(cudaMemsetAsync((int*)num_detections, 0, sizeof(int) * batchSize, stream)); 251 | CUDA_CHECK(cudaMemsetAsync((float*)detection_boxes, 0, sizeof(float) * m_OutputSize * 4 * batchSize, stream)); 252 | CUDA_CHECK(cudaMemsetAsync((float*)detection_scores, 0, sizeof(float) * m_OutputSize * batchSize, stream)); 253 | CUDA_CHECK(cudaMemsetAsync((int*)detection_classes, 0, sizeof(int) * m_OutputSize * batchSize, stream)); 254 | 255 | uint yoloTensorsSize = NFEATURES; 256 | for (uint i = 0; i < yoloTensorsSize; ++i) 257 | { 258 | // TensorInfo& curYoloTensor = m_YoloTensors.at(i); 259 | const nvinfer1::DimsHW& gridSize = m_FeatureSpatialSize[i]; 260 | 261 | uint numBBoxes = NANCHORS; 262 | float scaleXY = 2.0; 263 | uint gridSizeX = gridSize.w(); 264 | uint gridSizeY = gridSize.h(); 265 | std::vector anchors(m_Anchors.begin() + i * NANCHORS * 2, m_Anchors.begin() + (i+1) * NANCHORS * 2); 266 | 267 | void* v_anchors; 268 | if (anchors.size() > 0) { 269 | float* f_anchors = anchors.data(); 270 | CUDA_CHECK(cudaMalloc(&v_anchors, sizeof(float) * anchors.size())); 271 | CUDA_CHECK(cudaMemcpyAsync(v_anchors, f_anchors, sizeof(float) * anchors.size(), cudaMemcpyHostToDevice, 272 | stream)); 273 | } 274 | 275 | uint64_t inputSize = gridSizeX * gridSizeY * (numBBoxes * (4 + 1 + m_NumClasses)); 276 | 277 | CUDA_CHECK(cudaYoloLayer_nc( 278 | inputs[i], num_detections, detection_boxes, detection_scores, detection_classes, batchSize, 279 | inputSize, m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY, 280 | m_NumClasses, numBBoxes, scaleXY, v_anchors, stream)); 281 | 282 | if (anchors.size() > 0) { 283 | CUDA_CHECK(cudaFree(v_anchors)); 284 | } 285 | 286 | } 287 | 288 | return 0; 289 | } 290 | 291 | size_t YoloLayer::getSerializationSize() const noexcept 292 | { 293 | size_t totalSize = 0; 294 | 295 | totalSize += sizeof(m_NetWidth); 296 | totalSize += sizeof(m_NetHeight); 297 | totalSize += sizeof(m_MaxStride); 298 | totalSize += sizeof(m_NumClasses); 299 | totalSize += sizeof(m_ScoreThreshold); 300 | totalSize += sizeof(m_OutputSize); 301 | 302 | 303 | // anchors 304 | totalSize += m_Anchors.size() * sizeof(m_Anchors[0]); 305 | 306 | // feature size 307 | totalSize += m_FeatureSpatialSize.size() * 2 * sizeof(m_FeatureSpatialSize[0].h()); 308 | 309 | return totalSize; 310 | } 311 | 312 | void YoloLayer::serialize(void* buffer) const noexcept 313 | { 314 | char *d = static_cast(buffer); 315 | 316 | write(d, m_NetWidth); 317 | write(d, m_NetHeight); 318 | write(d, m_MaxStride); 319 | write(d, m_NumClasses); 320 | write(d, m_ScoreThreshold); 321 | write(d, m_OutputSize); 322 | 323 | // write anchors: 324 | for (int i = 0; i < m_Anchors.size(); i++){ 325 | write(d, m_Anchors[i]); 326 | } 327 | 328 | // write feature size: 329 | uint yoloTensorsSize = m_FeatureSpatialSize.size(); 330 | for (uint i = 0; i < yoloTensorsSize; ++i) 331 | { 332 | write(d, m_FeatureSpatialSize[i].h()); 333 | write(d, m_FeatureSpatialSize[i].w()); 334 | } 335 | } 336 | 337 | nvinfer1::IPluginV2DynamicExt* YoloLayer::clone() const noexcept 338 | { 339 | return new YoloLayer ( 340 | m_MaxStride, m_NumClasses, m_Anchors, m_ScoreThreshold); 341 | } 342 | 343 | REGISTER_TENSORRT_PLUGIN(YoloLayerPluginCreator); 344 | -------------------------------------------------------------------------------- /deepstream-sample/yoloPlugins.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | * 4 | * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a 7 | * copy of this software and associated documentation files (the "Software"), 8 | * to deal in the Software without restriction, including without limitation 9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 | * and/or sell copies of the Software, and to permit persons to whom the 11 | * Software is furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 | * DEALINGS IN THE SOFTWARE. 23 | * 24 | * Edited by Marcos Luciano 25 | * https://www.github.com/marcoslucianops 26 | */ 27 | 28 | #ifndef __YOLO_PLUGINS__ 29 | #define __YOLO_PLUGINS__ 30 | 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | 37 | #include 38 | 39 | #include "NvInferPlugin.h" 40 | 41 | #define CUDA_CHECK(status) \ 42 | { \ 43 | if (status != 0) \ 44 | { \ 45 | std::cout << "CUDA failure: " << cudaGetErrorString(status) << " in file " << __FILE__ << " at line " \ 46 | << __LINE__ << std::endl; \ 47 | abort(); \ 48 | } \ 49 | } 50 | 51 | namespace 52 | { 53 | const char* YOLOLAYER_PLUGIN_VERSION {"1"}; 54 | const char* YOLOLAYER_PLUGIN_NAME {"YoloLayer_TRT"}; 55 | } // namespace 56 | 57 | class YoloLayer : public nvinfer1::IPluginV2DynamicExt 58 | { 59 | public: 60 | YoloLayer (const void* data, size_t length); 61 | YoloLayer ( 62 | const uint& maxStride, const uint& numClasses, 63 | const std::vector& anchors, const float& scoreThreshold); 64 | 65 | // IPluginV2 methods 66 | const char* getPluginType () const noexcept override { return YOLOLAYER_PLUGIN_NAME; } 67 | const char* getPluginVersion () const noexcept override { return YOLOLAYER_PLUGIN_VERSION; } 68 | int getNbOutputs () const noexcept override { return 4; } 69 | int initialize () noexcept override { return 0; } 70 | void terminate () noexcept override {} 71 | size_t getSerializationSize() const noexcept override; 72 | void serialize (void* buffer) const noexcept override; 73 | void destroy () noexcept override { delete this; } 74 | void setPluginNamespace (const char* pluginNamespace) noexcept override { 75 | m_Namespace = pluginNamespace; 76 | } 77 | virtual const char* getPluginNamespace () const noexcept override { 78 | return m_Namespace.c_str(); 79 | } 80 | 81 | // IPluginV2Ext methods 82 | nvinfer1::DataType getOutputDataType(int index, const nvinfer1::DataType* inputType, int nbInputs) const noexcept override; 83 | 84 | // IPluginV2DynamicExt methods 85 | nvinfer1::IPluginV2DynamicExt* clone() const noexcept override; 86 | nvinfer1::DimsExprs getOutputDimensions( 87 | int outputIndex, const nvinfer1::DimsExprs* inputs, int nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept override; 88 | bool supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc* inOut, int nbInputs, int nbOutputs) noexcept override; 89 | void configurePlugin( 90 | const nvinfer1::DynamicPluginTensorDesc* in, int nbInputs, const nvinfer1::DynamicPluginTensorDesc* out, int nbOutputs) noexcept override; 91 | size_t getWorkspaceSize( 92 | const nvinfer1::PluginTensorDesc* inputs, int nbInputs, const nvinfer1::PluginTensorDesc* outputs, int nbOutputs) const noexcept override {return 0;}; 93 | int enqueue(const nvinfer1::PluginTensorDesc* inputDesc, const nvinfer1::PluginTensorDesc* outputDesc, const void* const* inputs, 94 | void* const* outputs, void* workspace, cudaStream_t stream) noexcept override; 95 | 96 | private: 97 | std::string m_Namespace {""}; 98 | int m_NetWidth {0}; 99 | int m_NetHeight {0}; 100 | int m_MaxStride {0}; 101 | int m_NumClasses {0}; 102 | std::vector m_Anchors; 103 | std::vector m_FeatureSpatialSize; 104 | float m_ScoreThreshold {0}; 105 | uint64_t m_OutputSize {0}; 106 | }; 107 | 108 | class YoloLayerPluginCreator : public nvinfer1::IPluginCreator 109 | { 110 | public: 111 | YoloLayerPluginCreator () noexcept; 112 | 113 | ~YoloLayerPluginCreator () noexcept {} 114 | 115 | const char* getPluginName () const noexcept override { return YOLOLAYER_PLUGIN_NAME; } 116 | 117 | const char* getPluginVersion () const noexcept override { return YOLOLAYER_PLUGIN_VERSION; } 118 | 119 | nvinfer1::IPluginV2DynamicExt* createPlugin ( 120 | const char* name, const nvinfer1::PluginFieldCollection* fc) noexcept override; 121 | 122 | nvinfer1::IPluginV2DynamicExt* deserializePlugin ( 123 | const char* name, const void* serialData, size_t serialLength) noexcept override 124 | { 125 | std::cout << "Deserialize yoloLayer plugin: " << name << std::endl; 126 | return new YoloLayer(serialData, serialLength); 127 | } 128 | 129 | void setPluginNamespace(const char* libNamespace) noexcept override 130 | { 131 | mNamespace = libNamespace; 132 | } 133 | 134 | const char* getPluginNamespace() const noexcept override 135 | { 136 | return mNamespace.c_str(); 137 | } 138 | const nvinfer1::PluginFieldCollection* getFieldNames() noexcept override 139 | { 140 | return &mFC; 141 | } 142 | 143 | private: 144 | static nvinfer1::PluginFieldCollection mFC; 145 | static std::vector mPluginAttributes; 146 | std::string mNamespace; 147 | }; 148 | 149 | #endif // __YOLO_PLUGINS__ 150 | -------------------------------------------------------------------------------- /requirement_export.txt: -------------------------------------------------------------------------------- 1 | seaborn 2 | nvidia-pyindex 3 | onnx-graphsurgeon 4 | opencv-python==4.5.5.64 5 | onnx-simplifier==0.3.10 6 | -------------------------------------------------------------------------------- /tensorrt-sample/requirement_infer.txt: -------------------------------------------------------------------------------- 1 | opencv-python==4.5.5.64 2 | tqdm 3 | pycocotools 4 | -------------------------------------------------------------------------------- /tensorrt-sample/trt_inference/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | 3 | from .engine import * 4 | -------------------------------------------------------------------------------- /tensorrt-sample/trt_inference/engine.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | 3 | import logging 4 | import os 5 | import numpy as np 6 | import tensorrt as trt 7 | import pycuda.autoinit 8 | import pycuda.driver as cuda 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) 13 | 14 | 15 | # Array of TensorRT loggers. We need to keep global references to 16 | # the TensorRT loggers that we create to prevent them from being 17 | # garbage collected as those are referenced from C++ code without 18 | # Python knowing about it. 19 | 20 | 21 | tensorrt_loggers = [] 22 | 23 | 24 | def _create_tensorrt_logger(verbose=False): 25 | """Create a TensorRT logger. 26 | 27 | Args: 28 | verbose (bool): whether to make the logger verbose. 29 | """ 30 | if verbose: 31 | # trt_verbosity = trt.Logger.Severity.INFO 32 | trt_verbosity = trt.Logger.Severity.VERBOSE 33 | else: 34 | trt_verbosity = trt.Logger.Severity.WARNING 35 | tensorrt_logger = trt.Logger(trt_verbosity) 36 | tensorrt_loggers.append(tensorrt_logger) 37 | return tensorrt_logger 38 | 39 | 40 | # class PTQEntropyCalibrator(trt.IInt8LegacyCalibrator): 41 | class PTQEntropyCalibrator(trt.IInt8MinMaxCalibrator): 42 | def __init__(self, cal_data, cache_file, load_func, n_batches, batch_size=64): 43 | # Whenever you specify a custom constructor for a TensorRT class, 44 | # you MUST call the constructor of the parent explicitly. 45 | trt.IInt8MinMaxCalibrator.__init__(self) 46 | 47 | self.cache_file = cache_file 48 | 49 | # Every time get_batch is called, the next batch of size batch_size will be copied to the device and returned. 50 | self.load_func = load_func 51 | self.img_list = [ os.path.join(cal_data, name) for name in os.listdir(cal_data) if name.split(".")[-1] in ["png", "jpg"] ] 52 | self.batch_size = batch_size 53 | self.n_batches = n_batches 54 | self.current_index = 0 55 | 56 | self.device_input = None 57 | 58 | def get_batch_size(self): 59 | return self.batch_size 60 | 61 | # TensorRT passes along the names of the engine bindings to the get_batch function. 62 | # You don't necessarily have to use them, but they can be useful to understand the order of 63 | # the inputs. The bindings list is expected to have the same ordering as 'names'. 64 | def get_batch(self, names): 65 | if self.current_index + self.batch_size > len(self.img_list): 66 | return None 67 | 68 | current_batch = int(self.current_index / self.batch_size) 69 | if current_batch >= self.n_batches: 70 | return None 71 | 72 | cur_batch_img_paths = self.img_list[self.current_index : self.current_index + self.batch_size] 73 | batch = self.load_func(cur_batch_img_paths) 74 | 75 | if self.device_input is None: 76 | self.device_input = cuda.mem_alloc( 77 | batch.size * 4 78 | ) # 4 bytes per float32. 79 | 80 | if current_batch % 10 == 0: 81 | print("Calibrating batch {:}, containing {:} images".format(current_batch, self.batch_size)) 82 | 83 | cuda.memcpy_htod(self.device_input, np.ascontiguousarray(batch, dtype=np.float32)) 84 | self.current_index += self.batch_size 85 | return [int(self.device_input)] 86 | 87 | def read_calibration_cache(self): 88 | # If there is a cache, use it instead of calibrating again. Otherwise, implicitly return None. 89 | if os.path.exists(self.cache_file): 90 | with open(self.cache_file, "rb") as f: 91 | return f.read() 92 | 93 | def write_calibration_cache(self, cache): 94 | with open(self.cache_file, "wb") as f: 95 | f.write(cache) 96 | # # Methods for LegacyCalibrator 97 | # def get_quantile(self): 98 | # return 1.0 99 | 100 | # def get_regression_cutoff(self): 101 | # return 1.0 102 | 103 | class HostDeviceMem(object): 104 | def __init__(self, host_mem, device_mem, binding_name, shape=None): 105 | self.host = host_mem 106 | self.device = device_mem 107 | self.binding_name = binding_name 108 | self.shape = shape 109 | 110 | def __str__(self): 111 | return "Host:\n" + str(self.host) + "\nDevice\n" + str(self.device) + "Shape: " + str(self.shape) 112 | 113 | def __repr__(self): 114 | return self.__str__() 115 | 116 | DEFAULT_MAX_WORKSPACE_SIZE = (1 << 30) * 8 117 | 118 | def build_engine_from_onnx( 119 | onnx_filename, 120 | min_shape, 121 | opt_shape, 122 | max_shape, 123 | max_workspace_size=DEFAULT_MAX_WORKSPACE_SIZE, 124 | dtype="fp32", 125 | calibrator=None, 126 | fp32_layer_ids=None, 127 | fp16_layer_ids=None, 128 | layers_min_max_dict=None, 129 | verbose=False, 130 | layer_names=[], 131 | extra_output_layer=[], 132 | ): 133 | 134 | """Initialization routine.""" 135 | if dtype == "int8": 136 | t_dtype = trt.DataType.INT8 137 | elif dtype == "fp16": 138 | t_dtype = trt.DataType.HALF 139 | elif dtype == "fp32": 140 | t_dtype = trt.DataType.FLOAT 141 | else: 142 | raise ValueError("Unsupported data type: %s" % dtype) 143 | 144 | if fp32_layer_ids is None: 145 | fp32_layer_ids = [] 146 | elif dtype != "int8": 147 | raise ValueError( 148 | "FP32 layer precision could be set only when dtype is INT8" 149 | ) 150 | 151 | if fp16_layer_ids is None: 152 | fp16_layer_ids = [] 153 | elif dtype != "int8": 154 | raise ValueError( 155 | "FP16 layer precision could be set only when dtype is INT8" 156 | ) 157 | 158 | 159 | tensorrt_logger = _create_tensorrt_logger(verbose) 160 | 161 | with trt.Builder(tensorrt_logger) as builder, builder.create_network(EXPLICIT_BATCH) as network, trt.OnnxParser(network, tensorrt_logger) as parser: 162 | 163 | if t_dtype == trt.DataType.HALF and not builder.platform_has_fast_fp16: 164 | logger.error("Specified FP16 but not supported on platform.") 165 | raise AttributeError("Specified FP16 but not supported on platform.") 166 | return 167 | 168 | if t_dtype == trt.DataType.INT8 and not builder.platform_has_fast_int8: 169 | logger.error("Specified INT8 but not supported on platform.") 170 | raise AttributeError("Specified INT8 but not supported on platform.") 171 | return 172 | 173 | if t_dtype == trt.DataType.INT8 and calibrator is None and layers_min_max_dict is None: 174 | logger.error("Specified INT8 but no calibrator provided.") 175 | raise AttributeError("Specified INT8 but no calibrator provided.") 176 | 177 | 178 | with open(onnx_filename, 'rb') as model: 179 | if not parser.parse(model.read()): 180 | print('ERROR: ONNX Parse Failed') 181 | for error in range(parser.num_errors): 182 | print(parser.get_error(error)) 183 | 184 | 185 | # Save the layers names in prototxt: 186 | # for layer_idx in range(network.num_layers): 187 | # layer = network.get_layer(layer_idx) 188 | # layer_names.append(layer.name) 189 | # print(f"{layer_idx} // {layer.name}:{layer.type}") 190 | 191 | for layer_idx in extra_output_layer: 192 | layer = network.get_layer(layer_idx) 193 | output_tensor = layer.get_output(0) 194 | network.mark_output(output_tensor) 195 | 196 | config = builder.create_builder_config() 197 | opt_profile = builder.create_optimization_profile() 198 | image_input = network.get_input(0) 199 | input_shape = image_input.shape 200 | input_name = image_input.name 201 | print("{}:{}".format(input_name, input_shape)) 202 | opt_profile.set_shape(input="images", 203 | min=min_shape, 204 | opt=opt_shape, 205 | max=max_shape) 206 | config.add_optimization_profile(opt_profile) 207 | config.max_workspace_size = max_workspace_size 208 | 209 | if t_dtype == trt.DataType.HALF: 210 | print("Generating FP16 engine") 211 | config.flags |= 1 << int(trt.BuilderFlag.FP16) 212 | config.flags |= 1 << int(trt.BuilderFlag.STRICT_TYPES) 213 | 214 | if t_dtype == trt.DataType.INT8: 215 | print("Generating INT8 engine") 216 | config.flags |= 1 << int(trt.BuilderFlag.INT8) 217 | config.flags |= 1 < RGB 156 | images = preprocess_ds_nchw([img]) 157 | 158 | return images, orig_img 159 | 160 | def load_images_cv_mt(img_paths, new_shape, pool): 161 | pass 162 | 163 | def load_single_ul(img_path, img_size, new_shape): 164 | orig_img = cv2.imread(img_path) 165 | h0, w0 = orig_img.shape[:2] # orig hw 166 | r = img_size / max(h0, w0) # ratio 167 | if r != 1: # if sizes are not equal 168 | interp = cv2.INTER_LINEAR if (r > 1) else cv2.INTER_AREA 169 | im = cv2.resize(orig_img, (int(w0 * r), int(h0 * r)), interpolation=interp) 170 | else: 171 | im = orig_img 172 | img, _, _ = letterbox(im.copy(), new_shape, auto=False, scaleup=False) 173 | img = img[..., [2, 1, 0]] # BGR -> RGB 174 | return img 175 | 176 | def load_images_cv_ultralytics_mt(img_paths, new_shape, pool, n_thread=8): 177 | 178 | load_func = partial(load_single_ul, img_size=INPUT_SIZE, new_shape=new_shape) 179 | cnt = 0 180 | imgs = [] 181 | while cnt < len(img_paths): 182 | if cnt + n_thread >= len(img_paths): 183 | cur_map_list = img_paths[cnt :] 184 | else: 185 | cur_map_list = img_paths[cnt : cnt + n_thread] 186 | cnt += n_thread 187 | imgs.extend(pool.map(load_func, cur_map_list)) 188 | 189 | images = preprocess_v2(imgs) 190 | return images 191 | 192 | def load_images_cv_ultralytics(img_path, new_shape): 193 | orig_img = cv2.imread(img_path) 194 | h0, w0 = orig_img.shape[:2] # orig hw 195 | r = INPUT_SIZE / max(h0, w0) # ratio 196 | if r != 1: # if sizes are not equal 197 | interp = cv2.INTER_LINEAR if (r > 1) else cv2.INTER_AREA 198 | im = cv2.resize(orig_img, (int(w0 * r), int(h0 * r)), interpolation=interp) 199 | else: 200 | im = orig_img 201 | h, w, _ = im.shape 202 | img, _, pad = letterbox(im.copy(), new_shape, auto=False, scaleup=False) 203 | ratio_pad = ((h / h0, w / w0), pad) # for COCO mAP rescaling 204 | img = img[..., [2, 1, 0]] # BGR -> RGB 205 | images = preprocess_ds_nchw([img]) 206 | 207 | return images, orig_img, ratio_pad 208 | 209 | 210 | def rect_inference(engine, img_root, output_img_root, max_shape, img_new_shapes, jlist=None): 211 | with engine.create_execution_context() as context: 212 | context.set_binding_shape(0, max_shape) 213 | inputs, outputs, bindings, stream = allocate_buffers(engine, context) 214 | for img_name in tqdm(sorted(os.listdir(img_root))): 215 | img_path = os.path.join(img_root, img_name) 216 | if jlist is not None: 217 | img_id = int(img_name.split(".")[0]) 218 | else: 219 | img_id = None 220 | 221 | new_shape = (INPUT_SIZE, INPUT_SIZE) 222 | context.set_optimization_profile_async(0, stream.handle) 223 | context.set_binding_shape(0, (1, 3, new_shape[0], new_shape[1])) 224 | images, orig_img, ratio_pad = load_images_cv_ultralytics(img_path, new_shape) 225 | 226 | batch_images = images 227 | # Hard Coded For explicit_batch and the ONNX model's batch_size = 1 228 | batch_images = batch_images[np.newaxis, :, :, :] 229 | outputs_shape, outputs_data = do_inference_v2(batch=batch_images, context=context, 230 | bindings=bindings, inputs=inputs, 231 | outputs=outputs, stream=stream) 232 | results = decode(keep_k = outputs_data["BatchedNMS"], 233 | boxes = outputs_data["BatchedNMS_1"], 234 | scores = outputs_data["BatchedNMS_2"], 235 | cls_id = outputs_data["BatchedNMS_3"]) 236 | # visualize the bbox 237 | draw_bbox_cv(orig_img, images, os.path.join(output_img_root, img_name), 238 | results[0], image_id=img_id, jlist=jlist, ratio_pad=ratio_pad) 239 | 240 | 241 | def square_inference(engine, img_root, output_img_root, jlist): 242 | with engine.create_execution_context() as context: 243 | context.set_binding_shape(0, (1, 3, INPUT_SIZE, INPUT_SIZE)) 244 | new_shape = (INPUT_SIZE, INPUT_SIZE) 245 | inputs, outputs, bindings, stream = allocate_buffers(engine, context) 246 | for img_name in tqdm(sorted(os.listdir(img_root))): 247 | img_path = os.path.join(img_root, img_name) 248 | if jlist is not None: 249 | img_id = int(img_name.split(".")[0]) 250 | else: 251 | img_id = None 252 | 253 | images, orig_img = load_images_cv(img_path, new_shape) 254 | ratio_pad = None 255 | batch_images = images 256 | # Hard Coded For explicit_batch and the ONNX model's batch_size = 1 257 | batch_images = batch_images[np.newaxis, :, :, :] 258 | outputs_shape, outputs_data = do_inference(batch=batch_images, context=context, 259 | bindings=bindings, inputs=inputs, 260 | outputs=outputs, stream=stream) 261 | results = decode(keep_k = outputs_data["BatchedNMS"], 262 | boxes = outputs_data["BatchedNMS_1"], 263 | scores = outputs_data["BatchedNMS_2"], 264 | cls_id = outputs_data["BatchedNMS_3"]) 265 | # visualize the bbox 266 | draw_bbox_cv(orig_img, images, os.path.join(output_img_root, img_name), 267 | results[0], image_id=img_id, jlist=jlist, ratio_pad=ratio_pad) 268 | 269 | 270 | if __name__ == "__main__": 271 | 272 | parser = argparse.ArgumentParser(description='Do YOLOV4 inference using TRT') 273 | parser.add_argument('--input_images_folder', type=str, help='input images path', required=True) 274 | parser.add_argument('--output_images_folder', type=str, help='output images path', required=True) 275 | parser.add_argument('--onnx', type=str, help='ONNX file path', required=True) 276 | parser.add_argument('--coco_anno', type=str, default="", help="COCO annotation file") 277 | parser.add_argument('--save_engine', type=str, default="", help="Save trt engine path") 278 | parser.add_argument('--rect', action="store_true", help="Do rect inference in COCO evaluation") 279 | parser.add_argument('--input_size', type=int, default=640, help="Input Size") 280 | parser.add_argument('--stride', type=int, default=32, help="the max stride of the model") 281 | parser.add_argument("--data_type", type=str, default="fp16", help="Data type for the TensorRT inference.", choices=["fp32", "fp16", "int8"]) 282 | 283 | parser.add_argument('--calib_img_dir', type=str, default="", help="calibration images directory.") 284 | parser.add_argument('--calib_cache', type=str, default="", help="int8 calibration cache.") 285 | parser.add_argument('--n_batches', type=int, default=10, help="number of batches to do calibration.") 286 | parser.add_argument('--batch_size', type=int, default=1, help="batch size to do calibration.") 287 | 288 | args = parser.parse_args() 289 | 290 | batch_size = 1 291 | engine_file = args.onnx 292 | img_root = args.input_images_folder 293 | output_img_root = args.output_images_folder 294 | batch_cnt = 1 295 | INPUT_SIZE=args.input_size 296 | precision = args.data_type 297 | max_bs = args.batch_size 298 | save_engine = args.save_engine 299 | total_cnt = 0 300 | ac_cnt = 0 301 | 302 | if not os.path.exists(output_img_root): 303 | print("Please create the output images directory: {output_img_root}") 304 | exit(0) 305 | 306 | if args.coco_anno != "": # Do coco evaluation 307 | jlist = [] 308 | # loop over the images to get the inference shape: 309 | if args.rect: 310 | img_new_shapes = {} 311 | pad = 0.5 312 | stride = args.stride 313 | min_w= 1e5 314 | min_h= 1e5 315 | max_w= -1 316 | max_h= -1 317 | # for img_name in sorted(os.listdir(img_root)): 318 | # img_path = os.path.join(img_root, img_name) 319 | # img = cv2.imread(img_path) 320 | # h, w, _ = img.shape 321 | # ar = h / w 322 | # r = [1, 1] 323 | # if ar < 1: 324 | # r = [ar, 1] 325 | # elif ar > 1: 326 | # r = [1, 1/ar] 327 | # new_shape = np.ceil(np.array(r) * INPUT_SIZE / stride + pad).astype(int) * stride 328 | # if new_shape[0] < min_h: 329 | # min_h = new_shape[0] 330 | # elif new_shape[0] > max_h: 331 | # max_h = new_shape[0] 332 | # if new_shape[1] < min_w: 333 | # min_w = new_shape[1] 334 | # elif new_shape[1] > max_w: 335 | # max_w = new_shape[1] 336 | # img_new_shapes[img_name]=new_shape 337 | max_w, max_h = 672, 672 338 | INPUT_SIZE = max(max_w, max_h) 339 | min_shape = (1, 3, INPUT_SIZE, INPUT_SIZE) 340 | opt_shape = (1, 3, INPUT_SIZE, INPUT_SIZE) 341 | max_shape = (max_bs, 3, INPUT_SIZE, INPUT_SIZE) 342 | print(min_shape) 343 | print(opt_shape) 344 | print(max_shape) 345 | else: 346 | min_shape=(1, 3, INPUT_SIZE, INPUT_SIZE) 347 | opt_shape=(1, 3, INPUT_SIZE, INPUT_SIZE) 348 | max_shape=(max_bs, 3, INPUT_SIZE, INPUT_SIZE) 349 | else: 350 | jlist = None 351 | min_shape = (1, 3, INPUT_SIZE, INPUT_SIZE) 352 | opt_shape = (1, 3, INPUT_SIZE, INPUT_SIZE) 353 | max_shape = (max_bs, 3, INPUT_SIZE, INPUT_SIZE) 354 | 355 | if precision == "int8": 356 | n_thread = 8 357 | pool = Pool(n_thread) 358 | if args.rect: 359 | # load_func = partial(load_images_cv_ultralytics, new_shape=INPUT_SIZE) 360 | load_func = partial(load_images_cv_ultralytics_mt, new_shape=INPUT_SIZE, pool=pool, n_thread=n_thread) 361 | else: 362 | load_func = partial(load_images_cv, new_shape=INPUT_SIZE) 363 | 364 | calibrator = PTQEntropyCalibrator(cal_data=args.calib_img_dir, 365 | cache_file=args.calib_cache, 366 | load_func=load_func, 367 | n_batches=args.n_batches, 368 | batch_size=max_bs) 369 | else: 370 | calibrator = None 371 | 372 | trt.init_libnvinfer_plugins(None, '') 373 | # with load_tensorrt_engine(engine_file) as engine: 374 | # print("Engine Loaded.") 375 | with build_engine_from_onnx(engine_file, verbose=False, 376 | dtype=precision, 377 | min_shape=min_shape, 378 | opt_shape=opt_shape, 379 | max_shape=max_shape, 380 | extra_output_layer=[], 381 | calibrator=calibrator 382 | ) as engine: 383 | if save_engine != "": 384 | save_tensorrt_engine(save_engine, engine) 385 | if args.rect: 386 | rect_inference(engine, img_root=img_root, output_img_root=output_img_root, 387 | max_shape=max_shape, img_new_shapes=img_new_shapes, jlist=jlist) 388 | else: 389 | square_inference(engine, img_root=img_root, output_img_root=output_img_root, jlist=jlist) 390 | 391 | if args.coco_anno != "": 392 | anno_json = args.coco_anno # annotations json 393 | pred_json = './cocoval17_predictions.json' # predictions json 394 | import json 395 | with open(pred_json, 'w') as f: 396 | json.dump(jlist, f) 397 | 398 | from pycocotools.coco import COCO 399 | from pycocotools.cocoeval import COCOeval 400 | 401 | anno = COCO(anno_json) # init annotations api 402 | pred = anno.loadRes(pred_json) # init predictions api 403 | eval = COCOeval(anno, pred, 'bbox') 404 | eval.evaluate() 405 | eval.accumulate() 406 | eval.summarize() 407 | --------------------------------------------------------------------------------