├── .github └── FUNDING.yml ├── LICENSE.md ├── README.md ├── config_infer_primary.txt ├── config_infer_primary_codetr.txt ├── config_infer_primary_damoyolo.txt ├── config_infer_primary_dfine.txt ├── config_infer_primary_goldyolo.txt ├── config_infer_primary_ppyoloe.txt ├── config_infer_primary_ppyoloe_plus.txt ├── config_infer_primary_rtdetr.txt ├── config_infer_primary_rtmdet.txt ├── config_infer_primary_yolo11.txt ├── config_infer_primary_yoloV10.txt ├── config_infer_primary_yoloV2.txt ├── config_infer_primary_yoloV5.txt ├── config_infer_primary_yoloV5u.txt ├── config_infer_primary_yoloV6.txt ├── config_infer_primary_yoloV7.txt ├── config_infer_primary_yoloV8.txt ├── config_infer_primary_yoloV9.txt ├── config_infer_primary_yolonas.txt ├── config_infer_primary_yolonas_custom.txt ├── config_infer_primary_yolor.txt ├── config_infer_primary_yolox.txt ├── config_infer_primary_yolox_legacy.txt ├── deepstream_app_config.txt ├── docs ├── CODETR.md ├── DAMOYOLO.md ├── DFINE.md ├── GoldYOLO.md ├── INT8Calibration.md ├── PPYOLOE.md ├── RTDETR_Paddle.md ├── RTDETR_PyTorch.md ├── RTDETR_Ultralytics.md ├── RTMDet.md ├── YOLO11.md ├── YOLONAS.md ├── YOLOR.md ├── YOLOX.md ├── YOLOv10.md ├── YOLOv5.md ├── YOLOv5u.md ├── YOLOv6.md ├── YOLOv7.md ├── YOLOv8.md ├── YOLOv9.md ├── benchmarks.md ├── customModels.md ├── dGPUInstalation.md ├── multipleGIEs.md └── multipleGIEs_tree.png ├── labels.txt ├── nvdsinfer_custom_impl_Yolo ├── Makefile ├── calibrator.cpp ├── calibrator.h ├── layers │ ├── activation_layer.cpp │ ├── activation_layer.h │ ├── batchnorm_layer.cpp │ ├── batchnorm_layer.h │ ├── channels_layer.cpp │ ├── channels_layer.h │ ├── convolutional_layer.cpp │ ├── convolutional_layer.h │ ├── deconvolutional_layer.cpp │ ├── deconvolutional_layer.h │ ├── implicit_layer.cpp │ ├── implicit_layer.h │ ├── pooling_layer.cpp │ ├── pooling_layer.h │ ├── reorg_layer.cpp │ ├── reorg_layer.h │ ├── route_layer.cpp │ ├── route_layer.h │ ├── sam_layer.cpp │ ├── sam_layer.h │ ├── shortcut_layer.cpp │ ├── shortcut_layer.h │ ├── slice_layer.cpp │ ├── slice_layer.h │ ├── upsample_layer.cpp │ └── upsample_layer.h ├── nvdsinfer_yolo_engine.cpp ├── nvdsparsebbox_Yolo.cpp ├── nvdsparsebbox_Yolo_cuda.cu ├── utils.cpp ├── utils.h ├── yolo.cpp ├── yolo.h ├── yoloForward.cu ├── yoloForward_nc.cu ├── yoloForward_v2.cu ├── yoloPlugins.cpp └── yoloPlugins.h └── utils ├── export_codetr.py ├── export_damoyolo.py ├── export_dfine.py ├── export_goldyolo.py ├── export_ppyoloe.py ├── export_rtdetr_paddle.py ├── export_rtdetr_pytorch.py ├── export_rtdetr_ultralytics.py ├── export_rtmdet.py ├── export_yolo11.py ├── export_yoloV10.py ├── export_yoloV5.py ├── export_yoloV5u.py ├── export_yoloV6.py ├── export_yoloV7.py ├── export_yoloV7_u6.py ├── export_yoloV8.py ├── export_yoloV9.py ├── export_yolonas.py ├── export_yolor.py └── export_yolox.py /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | #github: [marcoslucianops] 2 | custom: ['https://www.buymeacoffee.com/marcoslucianops'] 3 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018-2023, Marcos Luciano Piropo Santos. 4 | Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /config_infer_primary.txt: -------------------------------------------------------------------------------- 1 | [property] 2 | gpu-id=0 3 | net-scale-factor=0.0039215697906911373 4 | model-color-format=0 5 | custom-network-config=yolov4.cfg 6 | model-file=yolov4.weights 7 | model-engine-file=model_b1_gpu0_fp32.engine 8 | #int8-calib-file=calib.table 9 | labelfile-path=labels.txt 10 | batch-size=1 11 | network-mode=0 12 | num-detected-classes=80 13 | interval=0 14 | gie-unique-id=1 15 | process-mode=1 16 | network-type=0 17 | cluster-mode=2 18 | maintain-aspect-ratio=0 19 | symmetric-padding=1 20 | force-implicit-batch-dim=0 21 | #workspace-size=2000 22 | parse-bbox-func-name=NvDsInferParseYolo 23 | #parse-bbox-func-name=NvDsInferParseYoloCuda 24 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so 25 | engine-create-func-name=NvDsInferYoloCudaEngineGet 26 | 27 | [class-attrs-all] 28 | nms-iou-threshold=0.45 29 | pre-cluster-threshold=0.25 30 | topk=300 31 | -------------------------------------------------------------------------------- /config_infer_primary_codetr.txt: -------------------------------------------------------------------------------- 1 | [property] 2 | gpu-id=0 3 | net-scale-factor=0.0039215697906911373 4 | model-color-format=0 5 | onnx-file=co_dino_5scale_r50_1x_coco-7481f903.onnx 6 | model-engine-file=model_b1_gpu0_fp32.engine 7 | #int8-calib-file=calib.table 8 | labelfile-path=labels.txt 9 | batch-size=1 10 | network-mode=0 11 | num-detected-classes=80 12 | interval=0 13 | gie-unique-id=1 14 | process-mode=1 15 | network-type=0 16 | cluster-mode=2 17 | maintain-aspect-ratio=1 18 | symmetric-padding=0 19 | #workspace-size=2000 20 | parse-bbox-func-name=NvDsInferParseYolo 21 | #parse-bbox-func-name=NvDsInferParseYoloCuda 22 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so 23 | engine-create-func-name=NvDsInferYoloCudaEngineGet 24 | 25 | [class-attrs-all] 26 | nms-iou-threshold=0.45 27 | pre-cluster-threshold=0.25 28 | topk=300 29 | -------------------------------------------------------------------------------- /config_infer_primary_damoyolo.txt: -------------------------------------------------------------------------------- 1 | [property] 2 | gpu-id=0 3 | net-scale-factor=1 4 | model-color-format=0 5 | onnx-file=damoyolo_tinynasL25_S.onnx 6 | model-engine-file=model_b1_gpu0_fp32.engine 7 | #int8-calib-file=calib.table 8 | labelfile-path=labels.txt 9 | batch-size=1 10 | network-mode=0 11 | num-detected-classes=80 12 | interval=0 13 | gie-unique-id=1 14 | process-mode=1 15 | network-type=0 16 | cluster-mode=2 17 | maintain-aspect-ratio=0 18 | #workspace-size=2000 19 | parse-bbox-func-name=NvDsInferParseYolo 20 | #parse-bbox-func-name=NvDsInferParseYoloCuda 21 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so 22 | engine-create-func-name=NvDsInferYoloCudaEngineGet 23 | 24 | [class-attrs-all] 25 | nms-iou-threshold=0.45 26 | pre-cluster-threshold=0.25 27 | topk=300 28 | -------------------------------------------------------------------------------- /config_infer_primary_dfine.txt: -------------------------------------------------------------------------------- 1 | [property] 2 | gpu-id=0 3 | net-scale-factor=0.0039215697906911373 4 | model-color-format=0 5 | onnx-file=dfine_s_coco.pth.onnx 6 | model-engine-file=model_b1_gpu0_fp32.engine 7 | #int8-calib-file=calib.table 8 | labelfile-path=labels.txt 9 | batch-size=1 10 | network-mode=0 11 | num-detected-classes=80 12 | interval=0 13 | gie-unique-id=1 14 | process-mode=1 15 | network-type=0 16 | cluster-mode=4 17 | maintain-aspect-ratio=0 18 | #workspace-size=2000 19 | parse-bbox-func-name=NvDsInferParseYolo 20 | #parse-bbox-func-name=NvDsInferParseYoloCuda 21 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so 22 | engine-create-func-name=NvDsInferYoloCudaEngineGet 23 | 24 | [class-attrs-all] 25 | pre-cluster-threshold=0.25 26 | topk=300 27 | -------------------------------------------------------------------------------- /config_infer_primary_goldyolo.txt: -------------------------------------------------------------------------------- 1 | [property] 2 | gpu-id=0 3 | net-scale-factor=0.0039215697906911373 4 | model-color-format=0 5 | onnx-file=Gold_s_pre_dist.onnx 6 | model-engine-file=model_b1_gpu0_fp32.engine 7 | #int8-calib-file=calib.table 8 | labelfile-path=labels.txt 9 | batch-size=1 10 | network-mode=0 11 | num-detected-classes=80 12 | interval=0 13 | gie-unique-id=1 14 | process-mode=1 15 | network-type=0 16 | cluster-mode=2 17 | maintain-aspect-ratio=1 18 | symmetric-padding=1 19 | #workspace-size=2000 20 | parse-bbox-func-name=NvDsInferParseYolo 21 | #parse-bbox-func-name=NvDsInferParseYoloCuda 22 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so 23 | engine-create-func-name=NvDsInferYoloCudaEngineGet 24 | 25 | [class-attrs-all] 26 | nms-iou-threshold=0.45 27 | pre-cluster-threshold=0.25 28 | topk=300 29 | -------------------------------------------------------------------------------- /config_infer_primary_ppyoloe.txt: -------------------------------------------------------------------------------- 1 | [property] 2 | gpu-id=0 3 | net-scale-factor=0.0173520735727919486 4 | offsets=123.675;116.28;103.53 5 | model-color-format=0 6 | onnx-file=ppyoloe_crn_s_400e_coco.onnx 7 | model-engine-file=model_b1_gpu0_fp32.engine 8 | #int8-calib-file=calib.table 9 | labelfile-path=labels.txt 10 | batch-size=1 11 | network-mode=0 12 | num-detected-classes=80 13 | interval=0 14 | gie-unique-id=1 15 | process-mode=1 16 | network-type=0 17 | cluster-mode=2 18 | maintain-aspect-ratio=0 19 | #workspace-size=2000 20 | parse-bbox-func-name=NvDsInferParseYolo 21 | #parse-bbox-func-name=NvDsInferParseYoloCuda 22 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so 23 | engine-create-func-name=NvDsInferYoloCudaEngineGet 24 | 25 | [class-attrs-all] 26 | nms-iou-threshold=0.45 27 | pre-cluster-threshold=0.25 28 | topk=300 29 | -------------------------------------------------------------------------------- /config_infer_primary_ppyoloe_plus.txt: -------------------------------------------------------------------------------- 1 | [property] 2 | gpu-id=0 3 | net-scale-factor=0.0039215697906911373 4 | model-color-format=0 5 | onnx-file=ppyoloe_plus_crn_s_80e_coco.onnx 6 | model-engine-file=model_b1_gpu0_fp32.engine 7 | #int8-calib-file=calib.table 8 | labelfile-path=labels.txt 9 | batch-size=1 10 | network-mode=0 11 | num-detected-classes=80 12 | interval=0 13 | gie-unique-id=1 14 | process-mode=1 15 | network-type=0 16 | cluster-mode=2 17 | maintain-aspect-ratio=0 18 | #workspace-size=2000 19 | parse-bbox-func-name=NvDsInferParseYolo 20 | #parse-bbox-func-name=NvDsInferParseYoloCuda 21 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so 22 | engine-create-func-name=NvDsInferYoloCudaEngineGet 23 | 24 | [class-attrs-all] 25 | nms-iou-threshold=0.45 26 | pre-cluster-threshold=0.25 27 | topk=300 28 | -------------------------------------------------------------------------------- /config_infer_primary_rtdetr.txt: -------------------------------------------------------------------------------- 1 | [property] 2 | gpu-id=0 3 | net-scale-factor=0.0039215697906911373 4 | model-color-format=0 5 | onnx-file=rtdetr_r50vd_6x_coco_from_paddle.onnx 6 | model-engine-file=model_b1_gpu0_fp32.engine 7 | #int8-calib-file=calib.table 8 | labelfile-path=labels.txt 9 | batch-size=1 10 | network-mode=0 11 | num-detected-classes=80 12 | interval=0 13 | gie-unique-id=1 14 | process-mode=1 15 | network-type=0 16 | cluster-mode=4 17 | maintain-aspect-ratio=0 18 | #workspace-size=2000 19 | parse-bbox-func-name=NvDsInferParseYolo 20 | #parse-bbox-func-name=NvDsInferParseYoloCuda 21 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so 22 | engine-create-func-name=NvDsInferYoloCudaEngineGet 23 | 24 | [class-attrs-all] 25 | pre-cluster-threshold=0.25 26 | topk=300 27 | -------------------------------------------------------------------------------- /config_infer_primary_rtmdet.txt: -------------------------------------------------------------------------------- 1 | [property] 2 | gpu-id=0 3 | net-scale-factor=0.0173520735727919486 4 | offsets=103.53;116.28;123.675 5 | model-color-format=1 6 | onnx-file=rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329-0a8c901a.onnx 7 | model-engine-file=model_b1_gpu0_fp32.engine 8 | #int8-calib-file=calib.table 9 | labelfile-path=labels.txt 10 | batch-size=1 11 | network-mode=0 12 | num-detected-classes=80 13 | interval=0 14 | gie-unique-id=1 15 | process-mode=1 16 | network-type=0 17 | cluster-mode=2 18 | maintain-aspect-ratio=1 19 | symmetric-padding=1 20 | #workspace-size=2000 21 | parse-bbox-func-name=NvDsInferParseYolo 22 | #parse-bbox-func-name=NvDsInferParseYoloCuda 23 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so 24 | engine-create-func-name=NvDsInferYoloCudaEngineGet 25 | 26 | [class-attrs-all] 27 | nms-iou-threshold=0.45 28 | pre-cluster-threshold=0.25 29 | topk=300 30 | -------------------------------------------------------------------------------- /config_infer_primary_yolo11.txt: -------------------------------------------------------------------------------- 1 | [property] 2 | gpu-id=0 3 | net-scale-factor=0.0039215697906911373 4 | model-color-format=0 5 | onnx-file=yolo11s.onnx 6 | model-engine-file=model_b1_gpu0_fp32.engine 7 | #int8-calib-file=calib.table 8 | labelfile-path=labels.txt 9 | batch-size=1 10 | network-mode=0 11 | num-detected-classes=80 12 | interval=0 13 | gie-unique-id=1 14 | process-mode=1 15 | network-type=0 16 | cluster-mode=2 17 | maintain-aspect-ratio=1 18 | symmetric-padding=1 19 | #workspace-size=2000 20 | parse-bbox-func-name=NvDsInferParseYolo 21 | #parse-bbox-func-name=NvDsInferParseYoloCuda 22 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so 23 | engine-create-func-name=NvDsInferYoloCudaEngineGet 24 | 25 | [class-attrs-all] 26 | nms-iou-threshold=0.45 27 | pre-cluster-threshold=0.25 28 | topk=300 29 | -------------------------------------------------------------------------------- /config_infer_primary_yoloV10.txt: -------------------------------------------------------------------------------- 1 | [property] 2 | gpu-id=0 3 | net-scale-factor=0.0039215697906911373 4 | model-color-format=0 5 | onnx-file=yolov10s.onnx 6 | model-engine-file=model_b1_gpu0_fp32.engine 7 | #int8-calib-file=calib.table 8 | labelfile-path=labels.txt 9 | batch-size=1 10 | network-mode=0 11 | num-detected-classes=80 12 | interval=0 13 | gie-unique-id=1 14 | process-mode=1 15 | network-type=0 16 | cluster-mode=4 17 | maintain-aspect-ratio=1 18 | symmetric-padding=1 19 | #workspace-size=2000 20 | parse-bbox-func-name=NvDsInferParseYolo 21 | #parse-bbox-func-name=NvDsInferParseYoloCuda 22 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so 23 | engine-create-func-name=NvDsInferYoloCudaEngineGet 24 | 25 | [class-attrs-all] 26 | nms-iou-threshold=0.45 27 | pre-cluster-threshold=0.25 28 | topk=300 29 | -------------------------------------------------------------------------------- /config_infer_primary_yoloV2.txt: -------------------------------------------------------------------------------- 1 | [property] 2 | gpu-id=0 3 | net-scale-factor=0.0039215697906911373 4 | model-color-format=0 5 | custom-network-config=yolov2.cfg 6 | model-file=yolov2.weights 7 | model-engine-file=model_b1_gpu0_fp32.engine 8 | #int8-calib-file=calib.table 9 | labelfile-path=labels.txt 10 | batch-size=1 11 | network-mode=0 12 | num-detected-classes=80 13 | interval=0 14 | gie-unique-id=1 15 | process-mode=1 16 | network-type=0 17 | cluster-mode=2 18 | maintain-aspect-ratio=0 19 | force-implicit-batch-dim=0 20 | #workspace-size=2000 21 | parse-bbox-func-name=NvDsInferParseYolo 22 | #parse-bbox-func-name=NvDsInferParseYoloCuda 23 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so 24 | engine-create-func-name=NvDsInferYoloCudaEngineGet 25 | 26 | [class-attrs-all] 27 | nms-iou-threshold=0.45 28 | pre-cluster-threshold=0.25 29 | topk=300 30 | -------------------------------------------------------------------------------- /config_infer_primary_yoloV5.txt: -------------------------------------------------------------------------------- 1 | [property] 2 | gpu-id=0 3 | net-scale-factor=0.0039215697906911373 4 | model-color-format=0 5 | onnx-file=yolov5s.onnx 6 | model-engine-file=model_b1_gpu0_fp32.engine 7 | #int8-calib-file=calib.table 8 | labelfile-path=labels.txt 9 | batch-size=1 10 | network-mode=0 11 | num-detected-classes=80 12 | interval=0 13 | gie-unique-id=1 14 | process-mode=1 15 | network-type=0 16 | cluster-mode=2 17 | maintain-aspect-ratio=1 18 | symmetric-padding=1 19 | #workspace-size=2000 20 | parse-bbox-func-name=NvDsInferParseYolo 21 | #parse-bbox-func-name=NvDsInferParseYoloCuda 22 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so 23 | engine-create-func-name=NvDsInferYoloCudaEngineGet 24 | 25 | [class-attrs-all] 26 | nms-iou-threshold=0.45 27 | pre-cluster-threshold=0.25 28 | topk=300 29 | -------------------------------------------------------------------------------- /config_infer_primary_yoloV5u.txt: -------------------------------------------------------------------------------- 1 | [property] 2 | gpu-id=0 3 | net-scale-factor=0.0039215697906911373 4 | model-color-format=0 5 | onnx-file=yolov5su.onnx 6 | model-engine-file=model_b1_gpu0_fp32.engine 7 | #int8-calib-file=calib.table 8 | labelfile-path=labels.txt 9 | batch-size=1 10 | network-mode=0 11 | num-detected-classes=80 12 | interval=0 13 | gie-unique-id=1 14 | process-mode=1 15 | network-type=0 16 | cluster-mode=2 17 | maintain-aspect-ratio=1 18 | symmetric-padding=1 19 | #workspace-size=2000 20 | parse-bbox-func-name=NvDsInferParseYolo 21 | #parse-bbox-func-name=NvDsInferParseYoloCuda 22 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so 23 | engine-create-func-name=NvDsInferYoloCudaEngineGet 24 | 25 | [class-attrs-all] 26 | nms-iou-threshold=0.45 27 | pre-cluster-threshold=0.25 28 | topk=300 29 | -------------------------------------------------------------------------------- /config_infer_primary_yoloV6.txt: -------------------------------------------------------------------------------- 1 | [property] 2 | gpu-id=0 3 | net-scale-factor=0.0039215697906911373 4 | model-color-format=0 5 | onnx-file=yolov6s.onnx 6 | model-engine-file=model_b1_gpu0_fp32.engine 7 | #int8-calib-file=calib.table 8 | labelfile-path=labels.txt 9 | batch-size=1 10 | network-mode=0 11 | num-detected-classes=80 12 | interval=0 13 | gie-unique-id=1 14 | process-mode=1 15 | network-type=0 16 | cluster-mode=2 17 | maintain-aspect-ratio=1 18 | symmetric-padding=1 19 | #workspace-size=2000 20 | parse-bbox-func-name=NvDsInferParseYolo 21 | #parse-bbox-func-name=NvDsInferParseYoloCuda 22 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so 23 | engine-create-func-name=NvDsInferYoloCudaEngineGet 24 | 25 | [class-attrs-all] 26 | nms-iou-threshold=0.45 27 | pre-cluster-threshold=0.25 28 | topk=300 29 | -------------------------------------------------------------------------------- /config_infer_primary_yoloV7.txt: -------------------------------------------------------------------------------- 1 | [property] 2 | gpu-id=0 3 | net-scale-factor=0.0039215697906911373 4 | model-color-format=0 5 | onnx-file=yolov7.onnx 6 | model-engine-file=model_b1_gpu0_fp32.engine 7 | #int8-calib-file=calib.table 8 | labelfile-path=labels.txt 9 | batch-size=1 10 | network-mode=0 11 | num-detected-classes=80 12 | interval=0 13 | gie-unique-id=1 14 | process-mode=1 15 | network-type=0 16 | cluster-mode=2 17 | maintain-aspect-ratio=1 18 | symmetric-padding=1 19 | #workspace-size=2000 20 | parse-bbox-func-name=NvDsInferParseYolo 21 | #parse-bbox-func-name=NvDsInferParseYoloCuda 22 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so 23 | engine-create-func-name=NvDsInferYoloCudaEngineGet 24 | 25 | [class-attrs-all] 26 | nms-iou-threshold=0.45 27 | pre-cluster-threshold=0.25 28 | topk=300 29 | -------------------------------------------------------------------------------- /config_infer_primary_yoloV8.txt: -------------------------------------------------------------------------------- 1 | [property] 2 | gpu-id=0 3 | net-scale-factor=0.0039215697906911373 4 | model-color-format=0 5 | onnx-file=yolov8s.onnx 6 | model-engine-file=model_b1_gpu0_fp32.engine 7 | #int8-calib-file=calib.table 8 | labelfile-path=labels.txt 9 | batch-size=1 10 | network-mode=0 11 | num-detected-classes=80 12 | interval=0 13 | gie-unique-id=1 14 | process-mode=1 15 | network-type=0 16 | cluster-mode=2 17 | maintain-aspect-ratio=1 18 | symmetric-padding=1 19 | #workspace-size=2000 20 | parse-bbox-func-name=NvDsInferParseYolo 21 | #parse-bbox-func-name=NvDsInferParseYoloCuda 22 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so 23 | engine-create-func-name=NvDsInferYoloCudaEngineGet 24 | 25 | [class-attrs-all] 26 | nms-iou-threshold=0.45 27 | pre-cluster-threshold=0.25 28 | topk=300 29 | -------------------------------------------------------------------------------- /config_infer_primary_yoloV9.txt: -------------------------------------------------------------------------------- 1 | [property] 2 | gpu-id=0 3 | net-scale-factor=0.0039215697906911373 4 | model-color-format=0 5 | onnx-file=yolov9-c.onnx 6 | model-engine-file=model_b1_gpu0_fp32.engine 7 | #int8-calib-file=calib.table 8 | labelfile-path=labels.txt 9 | batch-size=1 10 | network-mode=0 11 | num-detected-classes=80 12 | interval=0 13 | gie-unique-id=1 14 | process-mode=1 15 | network-type=0 16 | cluster-mode=2 17 | maintain-aspect-ratio=1 18 | symmetric-padding=1 19 | #workspace-size=2000 20 | parse-bbox-func-name=NvDsInferParseYolo 21 | #parse-bbox-func-name=NvDsInferParseYoloCuda 22 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so 23 | engine-create-func-name=NvDsInferYoloCudaEngineGet 24 | 25 | [class-attrs-all] 26 | nms-iou-threshold=0.45 27 | pre-cluster-threshold=0.25 28 | topk=300 29 | -------------------------------------------------------------------------------- /config_infer_primary_yolonas.txt: -------------------------------------------------------------------------------- 1 | [property] 2 | gpu-id=0 3 | net-scale-factor=0.0039215697906911373 4 | model-color-format=0 5 | onnx-file=yolo_nas_s_coco.onnx 6 | model-engine-file=model_b1_gpu0_fp32.engine 7 | #int8-calib-file=calib.table 8 | labelfile-path=labels.txt 9 | batch-size=1 10 | network-mode=0 11 | num-detected-classes=80 12 | interval=0 13 | gie-unique-id=1 14 | process-mode=1 15 | network-type=0 16 | cluster-mode=2 17 | maintain-aspect-ratio=1 18 | symmetric-padding=0 19 | #workspace-size=2000 20 | parse-bbox-func-name=NvDsInferParseYolo 21 | #parse-bbox-func-name=NvDsInferParseYoloCuda 22 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so 23 | engine-create-func-name=NvDsInferYoloCudaEngineGet 24 | 25 | [class-attrs-all] 26 | nms-iou-threshold=0.45 27 | pre-cluster-threshold=0.25 28 | topk=300 29 | -------------------------------------------------------------------------------- /config_infer_primary_yolonas_custom.txt: -------------------------------------------------------------------------------- 1 | [property] 2 | gpu-id=0 3 | net-scale-factor=1 4 | model-color-format=0 5 | onnx-file=yolo_nas_s_coco.onnx 6 | model-engine-file=model_b1_gpu0_fp32.engine 7 | #int8-calib-file=calib.table 8 | labelfile-path=labels.txt 9 | batch-size=1 10 | network-mode=0 11 | num-detected-classes=80 12 | interval=0 13 | gie-unique-id=1 14 | process-mode=1 15 | network-type=0 16 | cluster-mode=2 17 | maintain-aspect-ratio=1 18 | symmetric-padding=0 19 | #workspace-size=2000 20 | parse-bbox-func-name=NvDsInferParseYolo 21 | #parse-bbox-func-name=NvDsInferParseYoloCuda 22 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so 23 | engine-create-func-name=NvDsInferYoloCudaEngineGet 24 | 25 | [class-attrs-all] 26 | nms-iou-threshold=0.45 27 | pre-cluster-threshold=0.25 28 | topk=300 29 | -------------------------------------------------------------------------------- /config_infer_primary_yolor.txt: -------------------------------------------------------------------------------- 1 | [property] 2 | gpu-id=0 3 | net-scale-factor=0.0039215697906911373 4 | model-color-format=0 5 | onnx-file=yolor_csp.onnx 6 | model-engine-file=model_b1_gpu0_fp32.engine 7 | #int8-calib-file=calib.table 8 | labelfile-path=labels.txt 9 | batch-size=1 10 | network-mode=0 11 | num-detected-classes=80 12 | interval=0 13 | gie-unique-id=1 14 | process-mode=1 15 | network-type=0 16 | cluster-mode=2 17 | maintain-aspect-ratio=1 18 | symmetric-padding=1 19 | #workspace-size=2000 20 | parse-bbox-func-name=NvDsInferParseYolo 21 | #parse-bbox-func-name=NvDsInferParseYoloCuda 22 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so 23 | engine-create-func-name=NvDsInferYoloCudaEngineGet 24 | 25 | [class-attrs-all] 26 | nms-iou-threshold=0.45 27 | pre-cluster-threshold=0.25 28 | topk=300 29 | -------------------------------------------------------------------------------- /config_infer_primary_yolox.txt: -------------------------------------------------------------------------------- 1 | [property] 2 | gpu-id=0 3 | net-scale-factor=1 4 | model-color-format=1 5 | onnx-file=yolox_s.onnx 6 | model-engine-file=model_b1_gpu0_fp32.engine 7 | #int8-calib-file=calib.table 8 | labelfile-path=labels.txt 9 | batch-size=1 10 | network-mode=0 11 | num-detected-classes=80 12 | interval=0 13 | gie-unique-id=1 14 | process-mode=1 15 | network-type=0 16 | cluster-mode=2 17 | maintain-aspect-ratio=1 18 | symmetric-padding=0 19 | #workspace-size=2000 20 | parse-bbox-func-name=NvDsInferParseYolo 21 | #parse-bbox-func-name=NvDsInferParseYoloCuda 22 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so 23 | engine-create-func-name=NvDsInferYoloCudaEngineGet 24 | 25 | [class-attrs-all] 26 | nms-iou-threshold=0.45 27 | pre-cluster-threshold=0.25 28 | topk=300 29 | -------------------------------------------------------------------------------- /config_infer_primary_yolox_legacy.txt: -------------------------------------------------------------------------------- 1 | [property] 2 | gpu-id=0 3 | net-scale-factor=0.0173520735727919486 4 | offsets=123.675;116.28;103.53 5 | model-color-format=0 6 | onnx-file=yolox_s.onnx 7 | model-engine-file=model_b1_gpu0_fp32.engine 8 | #int8-calib-file=calib.table 9 | labelfile-path=labels.txt 10 | batch-size=1 11 | network-mode=0 12 | num-detected-classes=80 13 | interval=0 14 | gie-unique-id=1 15 | process-mode=1 16 | network-type=0 17 | cluster-mode=2 18 | maintain-aspect-ratio=1 19 | symmetric-padding=0 20 | #workspace-size=2000 21 | parse-bbox-func-name=NvDsInferParseYolo 22 | #parse-bbox-func-name=NvDsInferParseYoloCuda 23 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so 24 | engine-create-func-name=NvDsInferYoloCudaEngineGet 25 | 26 | [class-attrs-all] 27 | nms-iou-threshold=0.45 28 | pre-cluster-threshold=0.25 29 | topk=300 30 | -------------------------------------------------------------------------------- /deepstream_app_config.txt: -------------------------------------------------------------------------------- 1 | [application] 2 | enable-perf-measurement=1 3 | perf-measurement-interval-sec=5 4 | 5 | [tiled-display] 6 | enable=1 7 | rows=1 8 | columns=1 9 | width=1280 10 | height=720 11 | gpu-id=0 12 | nvbuf-memory-type=0 13 | 14 | [source0] 15 | enable=1 16 | type=3 17 | uri=file:///opt/nvidia/deepstream/deepstream/samples/streams/sample_1080p_h264.mp4 18 | num-sources=1 19 | gpu-id=0 20 | cudadec-memtype=0 21 | 22 | [sink0] 23 | enable=1 24 | type=2 25 | sync=0 26 | gpu-id=0 27 | nvbuf-memory-type=0 28 | 29 | [osd] 30 | enable=1 31 | gpu-id=0 32 | border-width=5 33 | text-size=15 34 | text-color=1;1;1;1; 35 | text-bg-color=0.3;0.3;0.3;1 36 | font=Serif 37 | show-clock=0 38 | clock-x-offset=800 39 | clock-y-offset=820 40 | clock-text-size=12 41 | clock-color=1;0;0;0 42 | nvbuf-memory-type=0 43 | 44 | [streammux] 45 | gpu-id=0 46 | live-source=0 47 | batch-size=1 48 | batched-push-timeout=40000 49 | width=1920 50 | height=1080 51 | enable-padding=0 52 | nvbuf-memory-type=0 53 | 54 | [primary-gie] 55 | enable=1 56 | gpu-id=0 57 | gie-unique-id=1 58 | nvbuf-memory-type=0 59 | config-file=config_infer_primary.txt 60 | 61 | [tests] 62 | file-loop=0 63 | -------------------------------------------------------------------------------- /docs/CODETR.md: -------------------------------------------------------------------------------- 1 | # CO-DETR (MMDetection) usage 2 | 3 | * [Convert model](#convert-model) 4 | * [Compile the lib](#compile-the-lib) 5 | * [Edit the config_infer_primary_codetr file](#edit-the-config_infer_primary_codetr-file) 6 | * [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file) 7 | * [Testing the model](#testing-the-model) 8 | 9 | ## 10 | 11 | ### Convert model 12 | 13 | #### 1. Download the CO-DETR (MMDetection) repo and install the requirements 14 | 15 | ``` 16 | git clone https://github.com/open-mmlab/mmdetection.git 17 | cd mmdetection 18 | pip3 install openmim 19 | mim install mmengine 20 | mim install mmdeploy 21 | mim install "mmcv>=2.0.0rc4,<2.2.0" 22 | pip3 install -v -e . 23 | pip3 install onnx onnxslim onnxruntime 24 | ``` 25 | 26 | **NOTE**: It is recommended to use Python virtualenv. 27 | 28 | #### 2. Copy conversor 29 | 30 | Copy the `export_codetr.py` file from `DeepStream-Yolo/utils` directory to the `mmdetection` folder. 31 | 32 | #### 3. Download the model 33 | 34 | Download the `pth` file from [CO-DETR (MMDetection)](https://github.com/open-mmlab/mmdetection/tree/main/projects/CO-DETR) releases (example for Co-DINO R50 DETR*) 35 | 36 | ``` 37 | wget https://download.openmmlab.com/mmdetection/v3.0/codetr/co_dino_5scale_r50_1x_coco-7481f903.pth 38 | ``` 39 | 40 | **NOTE**: You can use your custom model. 41 | 42 | #### 4. Convert model 43 | 44 | Generate the ONNX model file (example for Co-DINO R50 DETR) 45 | 46 | ``` 47 | python3 export_codetr.py -w co_dino_5scale_r50_1x_coco-7481f903.pth -c projects/CO-DETR/configs/codino/co_dino_5scale_r50_8xb2_1x_coco.py --dynamic 48 | ``` 49 | 50 | **NOTE**: To change the inference size (defaut: 640) 51 | 52 | ``` 53 | -s SIZE 54 | --size SIZE 55 | -s HEIGHT WIDTH 56 | --size HEIGHT WIDTH 57 | ``` 58 | 59 | Example for 1280 60 | 61 | ``` 62 | -s 1280 63 | ``` 64 | 65 | or 66 | 67 | ``` 68 | -s 1280 1280 69 | ``` 70 | 71 | **NOTE**: To simplify the ONNX model (DeepStream >= 6.0) 72 | 73 | ``` 74 | --simplify 75 | ``` 76 | 77 | **NOTE**: To use dynamic batch-size (DeepStream >= 6.1) 78 | 79 | ``` 80 | --dynamic 81 | ``` 82 | 83 | **NOTE**: To use static batch-size (example for batch-size = 4) 84 | 85 | ``` 86 | --batch 4 87 | ``` 88 | 89 | **NOTE**: If you are using the DeepStream 5.1, remove the `--dynamic` arg and use opset 12 or lower. The default opset is 11. 90 | 91 | ``` 92 | --opset 12 93 | ``` 94 | 95 | #### 5. Copy generated files 96 | 97 | Copy the generated ONNX model file and labels.txt file (if generated) to the `DeepStream-Yolo` folder. 98 | 99 | ## 100 | 101 | ### Compile the lib 102 | 103 | 1. Open the `DeepStream-Yolo` folder and compile the lib 104 | 105 | 2. Set the `CUDA_VER` according to your DeepStream version 106 | 107 | ``` 108 | export CUDA_VER=XY.Z 109 | ``` 110 | 111 | * x86 platform 112 | 113 | ``` 114 | DeepStream 7.1 = 12.6 115 | DeepStream 7.0 / 6.4 = 12.2 116 | DeepStream 6.3 = 12.1 117 | DeepStream 6.2 = 11.8 118 | DeepStream 6.1.1 = 11.7 119 | DeepStream 6.1 = 11.6 120 | DeepStream 6.0.1 / 6.0 = 11.4 121 | DeepStream 5.1 = 11.1 122 | ``` 123 | 124 | * Jetson platform 125 | 126 | ``` 127 | DeepStream 7.1 = 12.6 128 | DeepStream 7.0 / 6.4 = 12.2 129 | DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 = 11.4 130 | DeepStream 6.0.1 / 6.0 / 5.1 = 10.2 131 | ``` 132 | 133 | 3. Make the lib 134 | 135 | ``` 136 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo 137 | ``` 138 | 139 | ## 140 | 141 | ### Edit the config_infer_primary_codetr file 142 | 143 | Edit the `config_infer_primary_codetr.txt` file according to your model (example for Co-DINO R50 DETR with 80 classes) 144 | 145 | ``` 146 | [property] 147 | ... 148 | onnx-file=co_dino_5scale_r50_1x_coco-7481f903.pth.onnx 149 | ... 150 | num-detected-classes=80 151 | ... 152 | parse-bbox-func-name=NvDsInferParseYolo 153 | ... 154 | ``` 155 | 156 | **NOTE**: The **CO-DETR (MMDetection)** resizes the input with left/top padding. To get better accuracy, use 157 | 158 | ``` 159 | [property] 160 | ... 161 | maintain-aspect-ratio=1 162 | symmetric-padding=0 163 | ... 164 | ``` 165 | 166 | ## 167 | 168 | ### Edit the deepstream_app_config file 169 | 170 | ``` 171 | ... 172 | [primary-gie] 173 | ... 174 | config-file=config_infer_primary_codetr.txt 175 | ``` 176 | 177 | ## 178 | 179 | ### Testing the model 180 | 181 | ``` 182 | deepstream-app -c deepstream_app_config.txt 183 | ``` 184 | 185 | **NOTE**: The TensorRT engine file may take a very long time to generate (sometimes more than 10 minutes). 186 | 187 | **NOTE**: For more information about custom models configuration (`batch-size`, `network-mode`, etc), please check the [`docs/customModels.md`](customModels.md) file. 188 | -------------------------------------------------------------------------------- /docs/DAMOYOLO.md: -------------------------------------------------------------------------------- 1 | # DAMO-YOLO usage 2 | 3 | * [Convert model](#convert-model) 4 | * [Compile the lib](#compile-the-lib) 5 | * [Edit the config_infer_primary_damoyolo file](#edit-the-config_infer_primary_damoyolo-file) 6 | * [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file) 7 | * [Testing the model](#testing-the-model) 8 | 9 | ## 10 | 11 | ### Convert model 12 | 13 | #### 1. Download the DAMO-YOLO repo and install the requirements 14 | 15 | ``` 16 | git clone https://github.com/tinyvision/DAMO-YOLO.git 17 | cd DAMO-YOLO 18 | pip3 install -r requirements.txt 19 | pip3 install onnx onnxslim onnxruntime 20 | ``` 21 | 22 | **NOTE**: It is recommended to use Python virtualenv. 23 | 24 | #### 2. Copy conversor 25 | 26 | Copy the `export_damoyolo.py` file from `DeepStream-Yolo/utils` directory to the `DAMO-YOLO` folder. 27 | 28 | #### 3. Download the model 29 | 30 | Download the `pth` file from [DAMO-YOLO](https://github.com/tinyvision/DAMO-YOLO) releases (example for DAMO-YOLO-S*) 31 | 32 | ``` 33 | wget https://idstcv.oss-cn-zhangjiakou.aliyuncs.com/DAMO-YOLO/release_model/clean_model_0317/damoyolo_tinynasL25_S_477.pth 34 | ``` 35 | 36 | **NOTE**: You can use your custom model. 37 | 38 | #### 4. Convert model 39 | 40 | Generate the ONNX model file (example for DAMO-YOLO-S*) 41 | 42 | ``` 43 | python3 export_damoyolo.py -w damoyolo_tinynasL25_S_477.pth -c configs/damoyolo_tinynasL25_S.py --dynamic 44 | ``` 45 | 46 | **NOTE**: To change the inference size (defaut: 640) 47 | 48 | ``` 49 | -s SIZE 50 | --size SIZE 51 | -s HEIGHT WIDTH 52 | --size HEIGHT WIDTH 53 | ``` 54 | 55 | Example for 1280 56 | 57 | ``` 58 | -s 1280 59 | ``` 60 | 61 | or 62 | 63 | ``` 64 | -s 1280 1280 65 | ``` 66 | 67 | **NOTE**: To simplify the ONNX model (DeepStream >= 6.0) 68 | 69 | ``` 70 | --simplify 71 | ``` 72 | 73 | **NOTE**: To use dynamic batch-size (DeepStream >= 6.1) 74 | 75 | ``` 76 | --dynamic 77 | ``` 78 | 79 | **NOTE**: To use static batch-size (example for batch-size = 4) 80 | 81 | ``` 82 | --batch 4 83 | ``` 84 | 85 | **NOTE**: If you are using the DeepStream 5.1, remove the `--dynamic` arg and use opset 11 or lower. The default opset is 11. 86 | 87 | ``` 88 | --opset 11 89 | ``` 90 | 91 | #### 5. Copy generated files 92 | 93 | Copy the generated ONNX model file and labels.txt file (if generated) to the `DeepStream-Yolo` folder. 94 | 95 | ## 96 | 97 | ### Compile the lib 98 | 99 | 1. Open the `DeepStream-Yolo` folder and compile the lib 100 | 101 | 2. Set the `CUDA_VER` according to your DeepStream version 102 | 103 | ``` 104 | export CUDA_VER=XY.Z 105 | ``` 106 | 107 | * x86 platform 108 | 109 | ``` 110 | DeepStream 7.1 = 12.6 111 | DeepStream 7.0 / 6.4 = 12.2 112 | DeepStream 6.3 = 12.1 113 | DeepStream 6.2 = 11.8 114 | DeepStream 6.1.1 = 11.7 115 | DeepStream 6.1 = 11.6 116 | DeepStream 6.0.1 / 6.0 = 11.4 117 | DeepStream 5.1 = 11.1 118 | ``` 119 | 120 | * Jetson platform 121 | 122 | ``` 123 | DeepStream 7.1 = 12.6 124 | DeepStream 7.0 / 6.4 = 12.2 125 | DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 = 11.4 126 | DeepStream 6.0.1 / 6.0 / 5.1 = 10.2 127 | ``` 128 | 129 | 3. Make the lib 130 | 131 | ``` 132 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo 133 | ``` 134 | 135 | ## 136 | 137 | ### Edit the config_infer_primary_damoyolo file 138 | 139 | Edit the `config_infer_primary_damoyolo.txt` file according to your model (example for DAMO-YOLO-S* with 80 classes) 140 | 141 | ``` 142 | [property] 143 | ... 144 | onnx-file=damoyolo_tinynasL25_S_477.pth.onnx 145 | ... 146 | num-detected-classes=80 147 | ... 148 | parse-bbox-func-name=NvDsInferParseYolo 149 | ... 150 | ``` 151 | 152 | **NOTE**: The **DAMO-YOLO** do not resize the input with padding. To get better accuracy, use 153 | 154 | ``` 155 | [property] 156 | ... 157 | maintain-aspect-ratio=0 158 | ... 159 | ``` 160 | 161 | ## 162 | 163 | ### Edit the deepstream_app_config file 164 | 165 | ``` 166 | ... 167 | [primary-gie] 168 | ... 169 | config-file=config_infer_primary_damoyolo.txt 170 | ``` 171 | 172 | ## 173 | 174 | ### Testing the model 175 | 176 | ``` 177 | deepstream-app -c deepstream_app_config.txt 178 | ``` 179 | 180 | **NOTE**: The TensorRT engine file may take a very long time to generate (sometimes more than 10 minutes). 181 | 182 | **NOTE**: For more information about custom models configuration (`batch-size`, `network-mode`, etc), please check the [`docs/customModels.md`](customModels.md) file. 183 | -------------------------------------------------------------------------------- /docs/DFINE.md: -------------------------------------------------------------------------------- 1 | # D-FINE usage 2 | 3 | * [Convert model](#convert-model) 4 | * [Compile the lib](#compile-the-lib) 5 | * [Edit the config_infer_primary_dfine file](#edit-the-config_infer_primary_dfine-file) 6 | * [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file) 7 | * [Testing the model](#testing-the-model) 8 | 9 | ## 10 | 11 | ### Convert model 12 | 13 | #### 1. Download the D-FINE repo and install the requirements 14 | 15 | ``` 16 | git clone https://github.com/Peterande/D-FINE.git 17 | cd D-FINE 18 | pip3 install -r requirements.txt 19 | pip3 install onnx onnxslim onnxruntime 20 | ``` 21 | 22 | **NOTE**: It is recommended to use Python virtualenv. 23 | 24 | #### 2. Copy conversor 25 | 26 | Copy the `export_dfine.py` file from `DeepStream-Yolo/utils` directory to the `D-FINE` folder. 27 | 28 | #### 3. Download the model 29 | 30 | Download the `pth` file from [D-FINE](https://github.com/Peterande/storage/releases/tag/dfinev1.0) releases (example for D-FINE-S) 31 | 32 | ``` 33 | wget https://github.com/Peterande/storage/releases/download/dfinev1.0/dfine_s_coco.pth 34 | ``` 35 | 36 | **NOTE**: You can use your custom model. 37 | 38 | #### 4. Convert model 39 | 40 | Generate the ONNX model file (example for D-FINE-S) 41 | 42 | ``` 43 | python3 export_dfine.py -w dfine_s_coco.pth -c configs/dfine/dfine_hgnetv2_s_coco.yml --dynamic 44 | ``` 45 | 46 | **NOTE**: To change the inference size (defaut: 640) 47 | 48 | ``` 49 | -s SIZE 50 | --size SIZE 51 | -s HEIGHT WIDTH 52 | --size HEIGHT WIDTH 53 | ``` 54 | 55 | Example for 1280 56 | 57 | ``` 58 | -s 1280 59 | ``` 60 | 61 | or 62 | 63 | ``` 64 | -s 1280 1280 65 | ``` 66 | 67 | **NOTE**: To simplify the ONNX model (DeepStream >= 6.0) 68 | 69 | ``` 70 | --simplify 71 | ``` 72 | 73 | **NOTE**: To use dynamic batch-size (DeepStream >= 6.1) 74 | 75 | ``` 76 | --dynamic 77 | ``` 78 | 79 | **NOTE**: To use static batch-size (example for batch-size = 4) 80 | 81 | ``` 82 | --batch 4 83 | ``` 84 | 85 | **NOTE**: If you are using the DeepStream 5.1, remove the `--dynamic` arg and use opset 12 or lower. The default opset is 16. 86 | 87 | ``` 88 | --opset 12 89 | ``` 90 | 91 | #### 5. Copy generated files 92 | 93 | Copy the generated ONNX model file and labels.txt file (if generated) to the `DeepStream-Yolo` folder. 94 | 95 | ## 96 | 97 | ### Compile the lib 98 | 99 | 1. Open the `DeepStream-Yolo` folder and compile the lib 100 | 101 | 2. Set the `CUDA_VER` according to your DeepStream version 102 | 103 | ``` 104 | export CUDA_VER=XY.Z 105 | ``` 106 | 107 | * x86 platform 108 | 109 | ``` 110 | DeepStream 7.1 = 12.6 111 | DeepStream 7.0 / 6.4 = 12.2 112 | DeepStream 6.3 = 12.1 113 | DeepStream 6.2 = 11.8 114 | DeepStream 6.1.1 = 11.7 115 | DeepStream 6.1 = 11.6 116 | DeepStream 6.0.1 / 6.0 = 11.4 117 | DeepStream 5.1 = 11.1 118 | ``` 119 | 120 | * Jetson platform 121 | 122 | ``` 123 | DeepStream 7.1 = 12.6 124 | DeepStream 7.0 / 6.4 = 12.2 125 | DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 = 11.4 126 | DeepStream 6.0.1 / 6.0 / 5.1 = 10.2 127 | ``` 128 | 129 | 3. Make the lib 130 | 131 | ``` 132 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo 133 | ``` 134 | 135 | ## 136 | 137 | ### Edit the config_infer_primary_dfine file 138 | 139 | Edit the `config_infer_primary_dfine.txt` file according to your model (example for D-FINE-S with 80 classes) 140 | 141 | ``` 142 | [property] 143 | ... 144 | onnx-file=dfine_s_coco.pth.onnx 145 | ... 146 | num-detected-classes=80 147 | ... 148 | parse-bbox-func-name=NvDsInferParseYolo 149 | ... 150 | ``` 151 | 152 | **NOTE**: The **D_FINE** do not resize the input with padding. To get better accuracy, use 153 | 154 | ``` 155 | [property] 156 | ... 157 | maintain-aspect-ratio=0 158 | ... 159 | ``` 160 | 161 | **NOTE**: The **D-FINE** do not require NMS. To get better accuracy, use 162 | 163 | ``` 164 | [property] 165 | ... 166 | cluster-mode=4 167 | ... 168 | ``` 169 | 170 | ## 171 | 172 | ### Edit the deepstream_app_config file 173 | 174 | ``` 175 | ... 176 | [primary-gie] 177 | ... 178 | config-file=config_infer_primary_dfine.txt 179 | ``` 180 | 181 | ## 182 | 183 | ### Testing the model 184 | 185 | ``` 186 | deepstream-app -c deepstream_app_config.txt 187 | ``` 188 | 189 | **NOTE**: The TensorRT engine file may take a very long time to generate (sometimes more than 10 minutes). 190 | 191 | **NOTE**: For more information about custom models configuration (`batch-size`, `network-mode`, etc), please check the [`docs/customModels.md`](customModels.md) file. 192 | -------------------------------------------------------------------------------- /docs/GoldYOLO.md: -------------------------------------------------------------------------------- 1 | # Gold-YOLO usage 2 | 3 | * [Convert model](#convert-model) 4 | * [Compile the lib](#compile-the-lib) 5 | * [Edit the config_infer_primary_goldyolo file](#edit-the-config_infer_primary_goldyolo-file) 6 | * [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file) 7 | * [Testing the model](#testing-the-model) 8 | 9 | ## 10 | 11 | ### Convert model 12 | 13 | #### 1. Download the Gold-YOLO repo and install the requirements 14 | 15 | ``` 16 | git clone https://github.com/huawei-noah/Efficient-Computing.git 17 | cd Efficient-Computing/Detection/Gold-YOLO 18 | pip3 install -r requirements.txt 19 | pip3 install onnx onnxslim onnxruntime 20 | ``` 21 | 22 | **NOTE**: It is recommended to use Python virtualenv. 23 | 24 | #### 2. Copy conversor 25 | 26 | Copy the `export_goldyolo.py` file from `DeepStream-Yolo/utils` directory to the `Gold-YOLO` folder. 27 | 28 | #### 3. Download the model 29 | 30 | Download the `pt` file from [Gold-YOLO](https://github.com/huawei-noah/Efficient-Computing/tree/master/Detection/Gold-YOLO) releases 31 | 32 | **NOTE**: You can use your custom model. 33 | 34 | #### 4. Convert model 35 | 36 | Generate the ONNX model file (example for Gold-YOLO-S) 37 | 38 | ``` 39 | python3 export_goldyolo.py -w Gold_s_pre_dist.pt --dynamic 40 | ``` 41 | 42 | **NOTE**: To change the inference size (defaut: 640) 43 | 44 | ``` 45 | -s SIZE 46 | --size SIZE 47 | -s HEIGHT WIDTH 48 | --size HEIGHT WIDTH 49 | ``` 50 | 51 | Example for 1280 52 | 53 | ``` 54 | -s 1280 55 | ``` 56 | 57 | or 58 | 59 | ``` 60 | -s 1280 1280 61 | ``` 62 | 63 | **NOTE**: To simplify the ONNX model (DeepStream >= 6.0) 64 | 65 | ``` 66 | --simplify 67 | ``` 68 | 69 | **NOTE**: To use dynamic batch-size (DeepStream >= 6.1) 70 | 71 | ``` 72 | --dynamic 73 | ``` 74 | 75 | **NOTE**: To use static batch-size (example for batch-size = 4) 76 | 77 | ``` 78 | --batch 4 79 | ``` 80 | 81 | **NOTE**: If you are using the DeepStream 5.1, remove the `--dynamic` arg and use opset 12 or lower. The default opset is 13. 82 | 83 | ``` 84 | --opset 12 85 | ``` 86 | 87 | #### 5. Copy generated files 88 | 89 | Copy the generated ONNX model file and labels.txt file (if generated) to the `DeepStream-Yolo` folder. 90 | 91 | ## 92 | 93 | ### Compile the lib 94 | 95 | 1. Open the `DeepStream-Yolo` folder and compile the lib 96 | 97 | 2. Set the `CUDA_VER` according to your DeepStream version 98 | 99 | ``` 100 | export CUDA_VER=XY.Z 101 | ``` 102 | 103 | * x86 platform 104 | 105 | ``` 106 | DeepStream 7.1 = 12.6 107 | DeepStream 7.0 / 6.4 = 12.2 108 | DeepStream 6.3 = 12.1 109 | DeepStream 6.2 = 11.8 110 | DeepStream 6.1.1 = 11.7 111 | DeepStream 6.1 = 11.6 112 | DeepStream 6.0.1 / 6.0 = 11.4 113 | DeepStream 5.1 = 11.1 114 | ``` 115 | 116 | * Jetson platform 117 | 118 | ``` 119 | DeepStream 7.1 = 12.6 120 | DeepStream 7.0 / 6.4 = 12.2 121 | DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 = 11.4 122 | DeepStream 6.0.1 / 6.0 / 5.1 = 10.2 123 | ``` 124 | 125 | 3. Make the lib 126 | 127 | ``` 128 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo 129 | ``` 130 | 131 | ## 132 | 133 | ### Edit the config_infer_primary_goldyolo file 134 | 135 | Edit the `config_infer_primary_goldyolo.txt` file according to your model (example for Gold-YOLO-S with 80 classes) 136 | 137 | ``` 138 | [property] 139 | ... 140 | onnx-file=Gold_s_pre_dist.pt.onnx 141 | ... 142 | num-detected-classes=80 143 | ... 144 | parse-bbox-func-name=NvDsInferParseYolo 145 | ... 146 | ``` 147 | 148 | **NOTE**: The **Gold-YOLO** resizes the input with center padding. To get better accuracy, use 149 | 150 | ``` 151 | [property] 152 | ... 153 | maintain-aspect-ratio=1 154 | symmetric-padding=1 155 | ... 156 | ``` 157 | 158 | ## 159 | 160 | ### Edit the deepstream_app_config file 161 | 162 | ``` 163 | ... 164 | [primary-gie] 165 | ... 166 | config-file=config_infer_primary_goldyolo.txt 167 | ``` 168 | 169 | ## 170 | 171 | ### Testing the model 172 | 173 | ``` 174 | deepstream-app -c deepstream_app_config.txt 175 | ``` 176 | 177 | **NOTE**: The TensorRT engine file may take a very long time to generate (sometimes more than 10 minutes). 178 | 179 | **NOTE**: For more information about custom models configuration (`batch-size`, `network-mode`, etc), please check the [`docs/customModels.md`](customModels.md) file. 180 | -------------------------------------------------------------------------------- /docs/INT8Calibration.md: -------------------------------------------------------------------------------- 1 | # INT8 calibration (PTQ) 2 | 3 | ### 1. Install OpenCV 4 | 5 | ``` 6 | sudo apt-get install libopencv-dev 7 | ``` 8 | 9 | ### 2. Compile/recompile the `nvdsinfer_custom_impl_Yolo` lib with OpenCV support 10 | 11 | 2.1. Set the `CUDA_VER` according to your DeepStream version 12 | 13 | ``` 14 | export CUDA_VER=XY.Z 15 | ``` 16 | 17 | * x86 platform 18 | 19 | ``` 20 | DeepStream 7.1 = 12.6 21 | DeepStream 7.0 / 6.4 = 12.2 22 | DeepStream 6.3 = 12.1 23 | DeepStream 6.2 = 11.8 24 | DeepStream 6.1.1 = 11.7 25 | DeepStream 6.1 = 11.6 26 | DeepStream 6.0.1 / 6.0 = 11.4 27 | DeepStream 5.1 = 11.1 28 | ``` 29 | 30 | * Jetson platform 31 | 32 | ``` 33 | DeepStream 7.1 = 12.6 34 | DeepStream 7.0 / 6.4 = 12.2 35 | DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 = 11.4 36 | DeepStream 6.0.1 / 6.0 / 5.1 = 10.2 37 | ``` 38 | 39 | 2.2. Set the `OPENCV` env 40 | 41 | ``` 42 | export OPENCV=1 43 | ``` 44 | 45 | 2.3. Make the lib 46 | 47 | ``` 48 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo 49 | ``` 50 | 51 | ### 3. For COCO dataset, download the [val2017](https://drive.google.com/file/d/1gbvfn7mcsGDRZ_luJwtITL-ru2kK99aK/view?usp=sharing), extract, and move to DeepStream-Yolo folder 52 | 53 | * Select 1000 random images from COCO dataset to run calibration 54 | 55 | ``` 56 | mkdir calibration 57 | ``` 58 | 59 | ``` 60 | for jpg in $(ls -1 val2017/*.jpg | sort -R | head -1000); do \ 61 | cp ${jpg} calibration/; \ 62 | done 63 | ``` 64 | 65 | * Create the `calibration.txt` file with all selected images 66 | 67 | ``` 68 | realpath calibration/*jpg > calibration.txt 69 | ``` 70 | 71 | * Set environment variables 72 | 73 | ``` 74 | export INT8_CALIB_IMG_PATH=calibration.txt 75 | export INT8_CALIB_BATCH_SIZE=1 76 | ``` 77 | 78 | * Edit the `config_infer` file 79 | 80 | ``` 81 | ... 82 | model-engine-file=model_b1_gpu0_fp32.engine 83 | #int8-calib-file=calib.table 84 | ... 85 | network-mode=0 86 | ... 87 | ``` 88 | 89 | To 90 | 91 | ``` 92 | ... 93 | model-engine-file=model_b1_gpu0_int8.engine 94 | int8-calib-file=calib.table 95 | ... 96 | network-mode=1 97 | ... 98 | ``` 99 | 100 | * Run 101 | 102 | ``` 103 | deepstream-app -c deepstream_app_config.txt 104 | ``` 105 | 106 | **NOTE**: NVIDIA recommends at least 500 images to get a good accuracy. On this example, I recommend to use 1000 images to get better accuracy (more images = more accuracy). Higher `INT8_CALIB_BATCH_SIZE` values will result in more accuracy and faster calibration speed. Set it according to you GPU memory. This process may take a long time. 107 | -------------------------------------------------------------------------------- /docs/RTDETR_Paddle.md: -------------------------------------------------------------------------------- 1 | # RT-DETR Paddle usage 2 | 3 | **NOTE**: https://github.com/lyuwenyu/RT-DETR/tree/main/rtdetr_paddle version. 4 | 5 | * [Convert model](#convert-model) 6 | * [Compile the lib](#compile-the-lib) 7 | * [Edit the config_infer_primary_rtdetr file](#edit-the-config_infer_primary_rtdetr-file) 8 | * [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file) 9 | * [Testing the model](#testing-the-model) 10 | 11 | ## 12 | 13 | ### Convert model 14 | 15 | #### 1. Download the PaddleDetection repo and install the requirements 16 | 17 | https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.8/docs/tutorials/INSTALL.md 18 | 19 | ``` 20 | git clone https://github.com/lyuwenyu/RT-DETR.git 21 | cd RT-DETR/rtdetr_paddle 22 | pip3 install -r requirements.txt 23 | pip3 install onnx onnxslim onnxruntime paddle2onnx 24 | ``` 25 | 26 | **NOTE**: It is recommended to use Python virtualenv. 27 | 28 | #### 2. Copy conversor 29 | 30 | Copy the `export_rtdetr_paddle.py` file from `DeepStream-Yolo/utils` directory to the `RT-DETR/rtdetr_paddle` folder. 31 | 32 | #### 3. Download the model 33 | 34 | Download the `pdparams` file from [RT-DETR Paddle](https://github.com/lyuwenyu/RT-DETR/tree/main/rtdetr_paddle) releases (example for RT-DETR-R50) 35 | 36 | ``` 37 | wget https://bj.bcebos.com/v1/paddledet/models/rtdetr_r50vd_6x_coco.pdparams 38 | ``` 39 | 40 | **NOTE**: You can use your custom model. 41 | 42 | #### 4. Convert model 43 | 44 | Generate the ONNX model file (example for RT-DETR-R50) 45 | 46 | ``` 47 | python3 export_rtdetr_paddle.py -w rtdetr_r50vd_6x_coco.pdparams -c configs/rtdetr/rtdetr_r50vd_6x_coco.yml --dynamic 48 | ``` 49 | 50 | **NOTE**: To simplify the ONNX model (DeepStream >= 6.0) 51 | 52 | ``` 53 | --simplify 54 | ``` 55 | 56 | **NOTE**: To use dynamic batch-size (DeepStream >= 6.1) 57 | 58 | ``` 59 | --dynamic 60 | ``` 61 | 62 | **NOTE**: To use static batch-size (example for batch-size = 4) 63 | 64 | ``` 65 | --batch 4 66 | ``` 67 | 68 | **NOTE**: If you are using the DeepStream 5.1, remove the `--dynamic` arg and use opset 12 or lower. The default opset is 16. 69 | 70 | ``` 71 | --opset 12 72 | ``` 73 | 74 | #### 5. Copy generated files 75 | 76 | Copy the generated ONNX model file and labels.txt file (if generated) to the `DeepStream-Yolo` folder. 77 | 78 | ## 79 | 80 | ### Compile the lib 81 | 82 | 1. Open the `DeepStream-Yolo` folder and compile the lib 83 | 84 | 2. Set the `CUDA_VER` according to your DeepStream version 85 | 86 | ``` 87 | export CUDA_VER=XY.Z 88 | ``` 89 | 90 | * x86 platform 91 | 92 | ``` 93 | DeepStream 7.1 = 12.6 94 | DeepStream 7.0 / 6.4 = 12.2 95 | DeepStream 6.3 = 12.1 96 | DeepStream 6.2 = 11.8 97 | DeepStream 6.1.1 = 11.7 98 | DeepStream 6.1 = 11.6 99 | DeepStream 6.0.1 / 6.0 = 11.4 100 | DeepStream 5.1 = 11.1 101 | ``` 102 | 103 | * Jetson platform 104 | 105 | ``` 106 | DeepStream 7.1 = 12.6 107 | DeepStream 7.0 / 6.4 = 12.2 108 | DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 = 11.4 109 | DeepStream 6.0.1 / 6.0 / 5.1 = 10.2 110 | ``` 111 | 112 | 3. Make the lib 113 | 114 | ``` 115 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo 116 | ``` 117 | 118 | ## 119 | 120 | ### Edit the config_infer_primary_rtdetr file 121 | 122 | Edit the `config_infer_primary_rtdetr.txt` file according to your model (example for RT-DETR-R50 with 80 classes) 123 | 124 | ``` 125 | [property] 126 | ... 127 | onnx-file=rtdetr_r50vd_6x_coco.pdparams.onnx 128 | ... 129 | num-detected-classes=80 130 | ... 131 | parse-bbox-func-name=NvDsInferParseYolo 132 | ... 133 | ``` 134 | 135 | **NOTE**: The **RT-DETR** do not resize the input with padding. To get better accuracy, use 136 | 137 | ``` 138 | [property] 139 | ... 140 | maintain-aspect-ratio=0 141 | ... 142 | ``` 143 | 144 | **NOTE**: The **RT-DETR** do not require NMS. To get better accuracy, use 145 | 146 | ``` 147 | [property] 148 | ... 149 | cluster-mode=4 150 | ... 151 | ``` 152 | 153 | ## 154 | 155 | ### Edit the deepstream_app_config file 156 | 157 | ``` 158 | ... 159 | [primary-gie] 160 | ... 161 | config-file=config_infer_primary_rtdetr.txt 162 | ``` 163 | 164 | ## 165 | 166 | ### Testing the model 167 | 168 | ``` 169 | deepstream-app -c deepstream_app_config.txt 170 | ``` 171 | 172 | **NOTE**: The TensorRT engine file may take a very long time to generate (sometimes more than 10 minutes). 173 | 174 | **NOTE**: For more information about custom models configuration (`batch-size`, `network-mode`, etc), please check the [`docs/customModels.md`](customModels.md) file. 175 | -------------------------------------------------------------------------------- /docs/RTDETR_PyTorch.md: -------------------------------------------------------------------------------- 1 | # RT-DETR PyTorch usage 2 | 3 | **NOTE**: https://github.com/lyuwenyu/RT-DETR/tree/main/rtdetr_pytorch version. 4 | 5 | * [Convert model](#convert-model) 6 | * [Compile the lib](#compile-the-lib) 7 | * [Edit the config_infer_primary_rtdetr file](#edit-the-config_infer_primary_rtdetr-file) 8 | * [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file) 9 | * [Testing the model](#testing-the-model) 10 | 11 | ## 12 | 13 | ### Convert model 14 | 15 | #### 1. Download the RT-DETR repo and install the requirements 16 | 17 | ``` 18 | git clone https://github.com/lyuwenyu/RT-DETR.git 19 | cd RT-DETR/rtdetr_pytorch 20 | pip3 install -r requirements.txt 21 | pip3 install onnx onnxslim onnxruntime 22 | ``` 23 | 24 | **NOTE**: It is recommended to use Python virtualenv. 25 | 26 | #### 2. Copy conversor 27 | 28 | Copy the `export_rtdetr_pytorch.py` file from `DeepStream-Yolo/utils` directory to the `RT-DETR/rtdetr_pytorch` folder. 29 | 30 | #### 3. Download the model 31 | 32 | Download the `pth` file from [RT-DETR PyTorch](https://github.com/lyuwenyu/storage/releases/tag/v0.1) releases (example for RT-DETR-R50) 33 | 34 | ``` 35 | wget https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r50vd_6x_coco_from_paddle.pth 36 | ``` 37 | 38 | **NOTE**: You can use your custom model. 39 | 40 | #### 4. Convert model 41 | 42 | Generate the ONNX model file (example for RT-DETR-R50) 43 | 44 | ``` 45 | python3 export_rtdetr_pytorch.py -w rtdetr_r50vd_6x_coco_from_paddle.pth -c configs/rtdetr/rtdetr_r50vd_6x_coco.yml --dynamic 46 | ``` 47 | 48 | **NOTE**: To change the inference size (defaut: 640) 49 | 50 | ``` 51 | -s SIZE 52 | --size SIZE 53 | -s HEIGHT WIDTH 54 | --size HEIGHT WIDTH 55 | ``` 56 | 57 | Example for 1280 58 | 59 | ``` 60 | -s 1280 61 | ``` 62 | 63 | or 64 | 65 | ``` 66 | -s 1280 1280 67 | ``` 68 | 69 | **NOTE**: To simplify the ONNX model (DeepStream >= 6.0) 70 | 71 | ``` 72 | --simplify 73 | ``` 74 | 75 | **NOTE**: To use dynamic batch-size (DeepStream >= 6.1) 76 | 77 | ``` 78 | --dynamic 79 | ``` 80 | 81 | **NOTE**: To use static batch-size (example for batch-size = 4) 82 | 83 | ``` 84 | --batch 4 85 | ``` 86 | 87 | **NOTE**: If you are using the DeepStream 5.1, remove the `--dynamic` arg and use opset 12 or lower. The default opset is 16. 88 | 89 | ``` 90 | --opset 12 91 | ``` 92 | 93 | #### 5. Copy generated files 94 | 95 | Copy the generated ONNX model file and labels.txt file (if generated) to the `DeepStream-Yolo` folder. 96 | 97 | ## 98 | 99 | ### Compile the lib 100 | 101 | 1. Open the `DeepStream-Yolo` folder and compile the lib 102 | 103 | 2. Set the `CUDA_VER` according to your DeepStream version 104 | 105 | ``` 106 | export CUDA_VER=XY.Z 107 | ``` 108 | 109 | * x86 platform 110 | 111 | ``` 112 | DeepStream 7.1 = 12.6 113 | DeepStream 7.0 / 6.4 = 12.2 114 | DeepStream 6.3 = 12.1 115 | DeepStream 6.2 = 11.8 116 | DeepStream 6.1.1 = 11.7 117 | DeepStream 6.1 = 11.6 118 | DeepStream 6.0.1 / 6.0 = 11.4 119 | DeepStream 5.1 = 11.1 120 | ``` 121 | 122 | * Jetson platform 123 | 124 | ``` 125 | DeepStream 7.1 = 12.6 126 | DeepStream 7.0 / 6.4 = 12.2 127 | DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 = 11.4 128 | DeepStream 6.0.1 / 6.0 / 5.1 = 10.2 129 | ``` 130 | 131 | 3. Make the lib 132 | 133 | ``` 134 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo 135 | ``` 136 | 137 | ## 138 | 139 | ### Edit the config_infer_primary_rtdetr file 140 | 141 | Edit the `config_infer_primary_rtdetr.txt` file according to your model (example for RT-DETR-R50 with 80 classes) 142 | 143 | ``` 144 | [property] 145 | ... 146 | onnx-file=rtdetr_r50vd_6x_coco_from_paddle.pth.onnx 147 | ... 148 | num-detected-classes=80 149 | ... 150 | parse-bbox-func-name=NvDsInferParseYolo 151 | ... 152 | ``` 153 | 154 | **NOTE**: The **RT-DETR** do not resize the input with padding. To get better accuracy, use 155 | 156 | ``` 157 | [property] 158 | ... 159 | maintain-aspect-ratio=0 160 | ... 161 | ``` 162 | 163 | **NOTE**: The **RT-DETR** do not require NMS. To get better accuracy, use 164 | 165 | ``` 166 | [property] 167 | ... 168 | cluster-mode=4 169 | ... 170 | ``` 171 | 172 | ## 173 | 174 | ### Edit the deepstream_app_config file 175 | 176 | ``` 177 | ... 178 | [primary-gie] 179 | ... 180 | config-file=config_infer_primary_rtdetr.txt 181 | ``` 182 | 183 | ## 184 | 185 | ### Testing the model 186 | 187 | ``` 188 | deepstream-app -c deepstream_app_config.txt 189 | ``` 190 | 191 | **NOTE**: The TensorRT engine file may take a very long time to generate (sometimes more than 10 minutes). 192 | 193 | **NOTE**: For more information about custom models configuration (`batch-size`, `network-mode`, etc), please check the [`docs/customModels.md`](customModels.md) file. 194 | -------------------------------------------------------------------------------- /docs/RTDETR_Ultralytics.md: -------------------------------------------------------------------------------- 1 | # RT-DETR Ultralytics usage 2 | 3 | **NOTE**: Ultralytics (https://docs.ultralytics.com/models/rtdetr) version. 4 | 5 | * [Convert model](#convert-model) 6 | * [Compile the lib](#compile-the-lib) 7 | * [Edit the config_infer_primary_rtdetr file](#edit-the-config_infer_primary_rtdetr-file) 8 | * [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file) 9 | * [Testing the model](#testing-the-model) 10 | 11 | ## 12 | 13 | ### Convert model 14 | 15 | #### 1. Download the Ultralytics repo and install the requirements 16 | 17 | ``` 18 | git clone https://github.com/ultralytics/ultralytics.git 19 | cd ultralytics 20 | pip3 install -e . 21 | pip3 install onnx onnxslim onnxruntime 22 | ``` 23 | 24 | **NOTE**: It is recommended to use Python virtualenv. 25 | 26 | #### 2. Copy conversor 27 | 28 | Copy the `export_rtdetr_ultralytics.py` file from `DeepStream-Yolo/utils` directory to the `ultralytics` folder. 29 | 30 | #### 3. Download the model 31 | 32 | Download the `pt` file from [Ultralytics](https://github.com/ultralytics/assets/releases/) releases (example for RT-DETR-L) 33 | 34 | ``` 35 | wget https://github.com/ultralytics/assets/releases/download/v8.2.0/rtdetr-l.pt 36 | ``` 37 | 38 | **NOTE**: You can use your custom model. 39 | 40 | #### 4. Convert model 41 | 42 | Generate the ONNX model file (example for RT-DETR-L) 43 | 44 | ``` 45 | python3 export_rtdetr_ultralytics.py -w rtdetr-l.pt --dynamic 46 | ``` 47 | 48 | **NOTE**: To change the inference size (defaut: 640) 49 | 50 | ``` 51 | -s SIZE 52 | --size SIZE 53 | -s HEIGHT WIDTH 54 | --size HEIGHT WIDTH 55 | ``` 56 | 57 | Example for 1280 58 | 59 | ``` 60 | -s 1280 61 | ``` 62 | 63 | or 64 | 65 | ``` 66 | -s 1280 1280 67 | ``` 68 | 69 | **NOTE**: To simplify the ONNX model (DeepStream >= 6.0) 70 | 71 | ``` 72 | --simplify 73 | ``` 74 | 75 | **NOTE**: To use dynamic batch-size (DeepStream >= 6.1) 76 | 77 | ``` 78 | --dynamic 79 | ``` 80 | 81 | **NOTE**: To use static batch-size (example for batch-size = 4) 82 | 83 | ``` 84 | --batch 4 85 | ``` 86 | 87 | **NOTE**: If you are using the DeepStream 5.1, remove the `--dynamic` arg and use opset 12 or lower. The default opset is 16. 88 | 89 | ``` 90 | --opset 12 91 | ``` 92 | 93 | #### 5. Copy generated files 94 | 95 | Copy the generated ONNX model file and labels.txt file (if generated) to the `DeepStream-Yolo` folder. 96 | 97 | ## 98 | 99 | ### Compile the lib 100 | 101 | 1. Open the `DeepStream-Yolo` folder and compile the lib 102 | 103 | 2. Set the `CUDA_VER` according to your DeepStream version 104 | 105 | ``` 106 | export CUDA_VER=XY.Z 107 | ``` 108 | 109 | * x86 platform 110 | 111 | ``` 112 | DeepStream 7.1 = 12.6 113 | DeepStream 7.0 / 6.4 = 12.2 114 | DeepStream 6.3 = 12.1 115 | DeepStream 6.2 = 11.8 116 | DeepStream 6.1.1 = 11.7 117 | DeepStream 6.1 = 11.6 118 | DeepStream 6.0.1 / 6.0 = 11.4 119 | DeepStream 5.1 = 11.1 120 | ``` 121 | 122 | * Jetson platform 123 | 124 | ``` 125 | DeepStream 7.1 = 12.6 126 | DeepStream 7.0 / 6.4 = 12.2 127 | DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 = 11.4 128 | DeepStream 6.0.1 / 6.0 / 5.1 = 10.2 129 | ``` 130 | 131 | 3. Make the lib 132 | 133 | ``` 134 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo 135 | ``` 136 | 137 | ## 138 | 139 | ### Edit the config_infer_primary_rtdetr file 140 | 141 | Edit the `config_infer_primary_rtdetr.txt` file according to your model (example for RT-DETR-L with 80 classes) 142 | 143 | ``` 144 | [property] 145 | ... 146 | onnx-file=rtdetr-l.pt.onnx 147 | ... 148 | num-detected-classes=80 149 | ... 150 | parse-bbox-func-name=NvDsInferParseYolo 151 | ... 152 | ``` 153 | 154 | **NOTE**: The **RT-DETR Ultralytics** do not resize the input with padding. To get better accuracy, use 155 | 156 | ``` 157 | [property] 158 | ... 159 | maintain-aspect-ratio=0 160 | ... 161 | ``` 162 | 163 | **NOTE**: The **RT-DETR Ultralytics** do not require NMS. To get better accuracy, use 164 | 165 | ``` 166 | [property] 167 | ... 168 | cluster-mode=4 169 | ... 170 | ``` 171 | 172 | ## 173 | 174 | ### Edit the deepstream_app_config file 175 | 176 | ``` 177 | ... 178 | [primary-gie] 179 | ... 180 | config-file=config_infer_primary_rtdetr.txt 181 | ``` 182 | 183 | ## 184 | 185 | ### Testing the model 186 | 187 | ``` 188 | deepstream-app -c deepstream_app_config.txt 189 | ``` 190 | 191 | **NOTE**: The TensorRT engine file may take a very long time to generate (sometimes more than 10 minutes). 192 | 193 | **NOTE**: For more information about custom models configuration (`batch-size`, `network-mode`, etc), please check the [`docs/customModels.md`](customModels.md) file. 194 | -------------------------------------------------------------------------------- /docs/YOLO11.md: -------------------------------------------------------------------------------- 1 | # YOLO11 usage 2 | 3 | **NOTE**: The yaml file is not required. 4 | 5 | * [Convert model](#convert-model) 6 | * [Compile the lib](#compile-the-lib) 7 | * [Edit the config_infer_primary_yolo11 file](#edit-the-config_infer_primary_yolo11-file) 8 | * [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file) 9 | * [Testing the model](#testing-the-model) 10 | 11 | ## 12 | 13 | ### Convert model 14 | 15 | #### 1. Download the YOLO11 repo and install the requirements 16 | 17 | ``` 18 | git clone https://github.com/ultralytics/ultralytics.git 19 | cd ultralytics 20 | pip3 install -e . 21 | pip3 install onnx onnxslim onnxruntime 22 | ``` 23 | 24 | **NOTE**: It is recommended to use Python virtualenv. 25 | 26 | #### 2. Copy conversor 27 | 28 | Copy the `export_yolo11.py` file from `DeepStream-Yolo/utils` directory to the `ultralytics` folder. 29 | 30 | #### 3. Download the model 31 | 32 | Download the `pt` file from [YOLO11](https://github.com/ultralytics/assets/releases/) releases (example for YOLO11s) 33 | 34 | ``` 35 | wget https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s.pt 36 | ``` 37 | 38 | **NOTE**: You can use your custom model. 39 | 40 | #### 4. Convert model 41 | 42 | Generate the ONNX model file (example for YOLO11s) 43 | 44 | ``` 45 | python3 export_yolo11.py -w yolo11s.pt --dynamic 46 | ``` 47 | 48 | **NOTE**: To change the inference size (defaut: 640) 49 | 50 | ``` 51 | -s SIZE 52 | --size SIZE 53 | -s HEIGHT WIDTH 54 | --size HEIGHT WIDTH 55 | ``` 56 | 57 | Example for 1280 58 | 59 | ``` 60 | -s 1280 61 | ``` 62 | 63 | or 64 | 65 | ``` 66 | -s 1280 1280 67 | ``` 68 | 69 | **NOTE**: To simplify the ONNX model (DeepStream >= 6.0) 70 | 71 | ``` 72 | --simplify 73 | ``` 74 | 75 | **NOTE**: To use dynamic batch-size (DeepStream >= 6.1) 76 | 77 | ``` 78 | --dynamic 79 | ``` 80 | 81 | **NOTE**: To use static batch-size (example for batch-size = 4) 82 | 83 | ``` 84 | --batch 4 85 | ``` 86 | 87 | **NOTE**: If you are using the DeepStream 5.1, remove the `--dynamic` arg and use opset 12 or lower. The default opset is 17. 88 | 89 | ``` 90 | --opset 12 91 | ``` 92 | 93 | #### 5. Copy generated files 94 | 95 | Copy the generated ONNX model file and labels.txt file (if generated) to the `DeepStream-Yolo` folder. 96 | 97 | ## 98 | 99 | ### Compile the lib 100 | 101 | 1. Open the `DeepStream-Yolo` folder and compile the lib 102 | 103 | 2. Set the `CUDA_VER` according to your DeepStream version 104 | 105 | ``` 106 | export CUDA_VER=XY.Z 107 | ``` 108 | 109 | * x86 platform 110 | 111 | ``` 112 | DeepStream 7.1 = 12.6 113 | DeepStream 7.0 / 6.4 = 12.2 114 | DeepStream 6.3 = 12.1 115 | DeepStream 6.2 = 11.8 116 | DeepStream 6.1.1 = 11.7 117 | DeepStream 6.1 = 11.6 118 | DeepStream 6.0.1 / 6.0 = 11.4 119 | DeepStream 5.1 = 11.1 120 | ``` 121 | 122 | * Jetson platform 123 | 124 | ``` 125 | DeepStream 7.1 = 12.6 126 | DeepStream 7.0 / 6.4 = 12.2 127 | DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 = 11.4 128 | DeepStream 6.0.1 / 6.0 / 5.1 = 10.2 129 | ``` 130 | 131 | 3. Make the lib 132 | 133 | ``` 134 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo 135 | ``` 136 | 137 | ## 138 | 139 | ### Edit the config_infer_primary_yolo11 file 140 | 141 | Edit the `config_infer_primary_yolo11.txt` file according to your model (example for YOLO11s with 80 classes) 142 | 143 | ``` 144 | [property] 145 | ... 146 | onnx-file=yolo11s.pt.onnx 147 | ... 148 | num-detected-classes=80 149 | ... 150 | parse-bbox-func-name=NvDsInferParseYolo 151 | ... 152 | ``` 153 | 154 | **NOTE**: The **YOLO11** resizes the input with center padding. To get better accuracy, use 155 | 156 | ``` 157 | [property] 158 | ... 159 | maintain-aspect-ratio=1 160 | symmetric-padding=1 161 | ... 162 | ``` 163 | 164 | ## 165 | 166 | ### Edit the deepstream_app_config file 167 | 168 | ``` 169 | ... 170 | [primary-gie] 171 | ... 172 | config-file=config_infer_primary_yolo11.txt 173 | ``` 174 | 175 | ## 176 | 177 | ### Testing the model 178 | 179 | ``` 180 | deepstream-app -c deepstream_app_config.txt 181 | ``` 182 | 183 | **NOTE**: The TensorRT engine file may take a very long time to generate (sometimes more than 10 minutes). 184 | 185 | **NOTE**: For more information about custom models configuration (`batch-size`, `network-mode`, etc), please check the [`docs/customModels.md`](customModels.md) file. 186 | -------------------------------------------------------------------------------- /docs/YOLOR.md: -------------------------------------------------------------------------------- 1 | # YOLOR usage 2 | 3 | **NOTE**: Select the correct branch of the YOLOR repo before the conversion. 4 | 5 | **NOTE**: The cfg file is required for the main branch. 6 | 7 | * [Convert model](#convert-model) 8 | * [Compile the lib](#compile-the-lib) 9 | * [Edit the config_infer_primary_yolor file](#edit-the-config_infer_primary_yolor-file) 10 | * [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file) 11 | * [Testing the model](#testing-the-model) 12 | 13 | ## 14 | 15 | ### Convert model 16 | 17 | #### 1. Download the YOLOR repo and install the requirements 18 | 19 | ``` 20 | git clone https://github.com/WongKinYiu/yolor.git 21 | cd yolor 22 | pip3 install -r requirements.txt 23 | pip3 install onnx onnxslim onnxruntime 24 | ``` 25 | 26 | **NOTE**: It is recommended to use Python virtualenv. 27 | 28 | #### 2. Copy conversor 29 | 30 | Copy the `export_yolor.py` file from `DeepStream-Yolo/utils` directory to the `yolor` folder. 31 | 32 | #### 3. Download the model 33 | 34 | Download the `pt` file from [YOLOR](https://github.com/WongKinYiu/yolor) repo. 35 | 36 | **NOTE**: You can use your custom model. 37 | 38 | #### 4. Convert model 39 | 40 | Generate the ONNX model file 41 | 42 | - Main branch 43 | 44 | Example for YOLOR-CSP 45 | 46 | ``` 47 | python3 export_yolor.py -w yolor_csp.pt -c cfg/yolor_csp.cfg --dynamic 48 | ``` 49 | 50 | - Paper branch 51 | 52 | Example for YOLOR-P6 53 | 54 | ``` 55 | python3 export_yolor.py -w yolor-p6.pt --dynamic 56 | ``` 57 | 58 | **NOTE**: To convert a P6 model 59 | 60 | ``` 61 | --p6 62 | ``` 63 | 64 | **NOTE**: To change the inference size (defaut: 640 / 1280 for `--p6` models) 65 | 66 | ``` 67 | -s SIZE 68 | --size SIZE 69 | -s HEIGHT WIDTH 70 | --size HEIGHT WIDTH 71 | ``` 72 | 73 | Example for 1280 74 | 75 | ``` 76 | -s 1280 77 | ``` 78 | 79 | or 80 | 81 | ``` 82 | -s 1280 1280 83 | ``` 84 | 85 | **NOTE**: To simplify the ONNX model (DeepStream >= 6.0) 86 | 87 | ``` 88 | --simplify 89 | ``` 90 | 91 | **NOTE**: To use dynamic batch-size (DeepStream >= 6.1) 92 | 93 | ``` 94 | --dynamic 95 | ``` 96 | 97 | **NOTE**: To use static batch-size (example for batch-size = 4) 98 | 99 | ``` 100 | --batch 4 101 | ``` 102 | 103 | **NOTE**: If you are using the DeepStream 5.1, remove the `--dynamic` arg and use opset 12 or lower. The default opset is 12. 104 | 105 | ``` 106 | --opset 12 107 | ``` 108 | 109 | #### 5. Copy generated files 110 | 111 | Copy the generated ONNX model file and labels.txt file (if generated) to the `DeepStream-Yolo` folder 112 | 113 | ## 114 | 115 | ### Compile the lib 116 | 117 | 1. Open the `DeepStream-Yolo` folder and compile the lib 118 | 119 | 2. Set the `CUDA_VER` according to your DeepStream version 120 | 121 | ``` 122 | export CUDA_VER=XY.Z 123 | ``` 124 | 125 | * x86 platform 126 | 127 | ``` 128 | DeepStream 7.1 = 12.6 129 | DeepStream 7.0 / 6.4 = 12.2 130 | DeepStream 6.3 = 12.1 131 | DeepStream 6.2 = 11.8 132 | DeepStream 6.1.1 = 11.7 133 | DeepStream 6.1 = 11.6 134 | DeepStream 6.0.1 / 6.0 = 11.4 135 | DeepStream 5.1 = 11.1 136 | ``` 137 | 138 | * Jetson platform 139 | 140 | ``` 141 | DeepStream 7.1 = 12.6 142 | DeepStream 7.0 / 6.4 = 12.2 143 | DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 = 11.4 144 | DeepStream 6.0.1 / 6.0 / 5.1 = 10.2 145 | ``` 146 | 147 | 3. Make the lib 148 | 149 | ``` 150 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo 151 | ``` 152 | 153 | ## 154 | 155 | ### Edit the config_infer_primary_yolor file 156 | 157 | Edit the `config_infer_primary_yolor.txt` file according to your model (example for YOLOR-CSP with 80 classes) 158 | 159 | ``` 160 | [property] 161 | ... 162 | onnx-file=yolor_csp.pt.onnx 163 | ... 164 | num-detected-classes=80 165 | ... 166 | parse-bbox-func-name=NvDsInferParseYolo 167 | ... 168 | ``` 169 | 170 | **NOTE**: The **YOLOR** resizes the input with center padding. To get better accuracy, use 171 | 172 | ``` 173 | [property] 174 | ... 175 | maintain-aspect-ratio=1 176 | symmetric-padding=1 177 | ... 178 | ``` 179 | 180 | ## 181 | 182 | ### Edit the deepstream_app_config file 183 | 184 | ``` 185 | ... 186 | [primary-gie] 187 | ... 188 | config-file=config_infer_primary_yolor.txt 189 | ``` 190 | 191 | ## 192 | 193 | ### Testing the model 194 | 195 | ``` 196 | deepstream-app -c deepstream_app_config.txt 197 | ``` 198 | 199 | **NOTE**: The TensorRT engine file may take a very long time to generate (sometimes more than 10 minutes). 200 | 201 | **NOTE**: For more information about custom models configuration (`batch-size`, `network-mode`, etc), please check the [`docs/customModels.md`](customModels.md) file. 202 | -------------------------------------------------------------------------------- /docs/YOLOv10.md: -------------------------------------------------------------------------------- 1 | # YOLOv10 usage 2 | 3 | **NOTE**: The yaml file is not required. 4 | 5 | * [Convert model](#convert-model) 6 | * [Compile the lib](#compile-the-lib) 7 | * [Edit the config_infer_primary_yoloV10 file](#edit-the-config_infer_primary_yolov10-file) 8 | * [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file) 9 | * [Testing the model](#testing-the-model) 10 | 11 | ## 12 | 13 | ### Convert model 14 | 15 | #### 1. Download the YOLOv10 repo and install the requirements 16 | 17 | ``` 18 | git clone https://github.com/ultralytics/ultralytics.git 19 | cd ultralytics 20 | pip3 install -e . 21 | pip3 install onnx onnxslim onnxruntime 22 | ``` 23 | 24 | **NOTE**: It is recommended to use Python virtualenv. 25 | 26 | #### 2. Copy conversor 27 | 28 | Copy the `export_yoloV10.py` file from `DeepStream-Yolo/utils` directory to the `ultralytics` folder. 29 | 30 | #### 3. Download the model 31 | 32 | Download the `pt` file from [YOLOv10](https://github.com/THU-MIG/yolov10/releases/tag/v1.1) releases (example for YOLOv10s) 33 | 34 | ``` 35 | wget https://github.com/THU-MIG/yolov10/releases/download/v1.1/yolov10s.pt 36 | ``` 37 | 38 | **NOTE**: You can use your custom model. 39 | 40 | #### 4. Convert model 41 | 42 | Generate the ONNX model file (example for YOLOv10s) 43 | 44 | ``` 45 | python3 export_yoloV10.py -w yolov10s.pt --dynamic 46 | ``` 47 | 48 | **NOTE**: To change the inference size (defaut: 640) 49 | 50 | ``` 51 | -s SIZE 52 | --size SIZE 53 | -s HEIGHT WIDTH 54 | --size HEIGHT WIDTH 55 | ``` 56 | 57 | Example for 1280 58 | 59 | ``` 60 | -s 1280 61 | ``` 62 | 63 | or 64 | 65 | ``` 66 | -s 1280 1280 67 | ``` 68 | 69 | **NOTE**: To simplify the ONNX model (DeepStream >= 6.0) 70 | 71 | ``` 72 | --simplify 73 | ``` 74 | 75 | **NOTE**: To use dynamic batch-size (DeepStream >= 6.1) 76 | 77 | ``` 78 | --dynamic 79 | ``` 80 | 81 | **NOTE**: To use static batch-size (example for batch-size = 4) 82 | 83 | ``` 84 | --batch 4 85 | ``` 86 | 87 | **NOTE**: If you are using the DeepStream 5.1, remove the `--dynamic` arg and use opset 12 or lower. The default opset is 17. 88 | 89 | ``` 90 | --opset 12 91 | ``` 92 | 93 | #### 5. Copy generated files 94 | 95 | Copy the generated ONNX model file and labels.txt file (if generated) to the `DeepStream-Yolo` folder. 96 | 97 | ## 98 | 99 | ### Compile the lib 100 | 101 | 1. Open the `DeepStream-Yolo` folder and compile the lib 102 | 103 | 2. Set the `CUDA_VER` according to your DeepStream version 104 | 105 | ``` 106 | export CUDA_VER=XY.Z 107 | ``` 108 | 109 | * x86 platform 110 | 111 | ``` 112 | DeepStream 7.1 = 12.6 113 | DeepStream 7.0 / 6.4 = 12.2 114 | DeepStream 6.3 = 12.1 115 | DeepStream 6.2 = 11.8 116 | DeepStream 6.1.1 = 11.7 117 | DeepStream 6.1 = 11.6 118 | DeepStream 6.0.1 / 6.0 = 11.4 119 | DeepStream 5.1 = 11.1 120 | ``` 121 | 122 | * Jetson platform 123 | 124 | ``` 125 | DeepStream 7.1 = 12.6 126 | DeepStream 7.0 / 6.4 = 12.2 127 | DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 = 11.4 128 | DeepStream 6.0.1 / 6.0 / 5.1 = 10.2 129 | ``` 130 | 131 | 3. Make the lib 132 | 133 | ``` 134 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo 135 | ``` 136 | 137 | ## 138 | 139 | ### Edit the config_infer_primary_yoloV10 file 140 | 141 | Edit the `config_infer_primary_yoloV10.txt` file according to your model (example for YOLOv10s with 80 classes) 142 | 143 | ``` 144 | [property] 145 | ... 146 | onnx-file=yolov10s.pt.onnx 147 | ... 148 | num-detected-classes=80 149 | ... 150 | parse-bbox-func-name=NvDsInferParseYolo 151 | ... 152 | ``` 153 | 154 | **NOTE**: The **YOLOv10** resizes the input with center padding. To get better accuracy, use 155 | 156 | ``` 157 | [property] 158 | ... 159 | maintain-aspect-ratio=1 160 | symmetric-padding=1 161 | ... 162 | ``` 163 | 164 | **NOTE**: The **YOLOv10** do not require NMS. To get better accuracy, use 165 | 166 | ``` 167 | [property] 168 | ... 169 | cluster-mode=4 170 | ... 171 | ``` 172 | 173 | ## 174 | 175 | ### Edit the deepstream_app_config file 176 | 177 | ``` 178 | ... 179 | [primary-gie] 180 | ... 181 | config-file=config_infer_primary_yoloV10.txt 182 | ``` 183 | 184 | ## 185 | 186 | ### Testing the model 187 | 188 | ``` 189 | deepstream-app -c deepstream_app_config.txt 190 | ``` 191 | 192 | **NOTE**: The TensorRT engine file may take a very long time to generate (sometimes more than 10 minutes). 193 | 194 | **NOTE**: For more information about custom models configuration (`batch-size`, `network-mode`, etc), please check the [`docs/customModels.md`](customModels.md) file. 195 | -------------------------------------------------------------------------------- /docs/YOLOv5.md: -------------------------------------------------------------------------------- 1 | # YOLOv5 usage 2 | 3 | **NOTE**: You can use the master branch of the YOLOv5 repo to convert all model versions. 4 | 5 | **NOTE**: The yaml file is not required. 6 | 7 | * [Convert model](#convert-model) 8 | * [Compile the lib](#compile-the-lib) 9 | * [Edit the config_infer_primary_yoloV5 file](#edit-the-config_infer_primary_yolov5-file) 10 | * [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file) 11 | * [Testing the model](#testing-the-model) 12 | 13 | ## 14 | 15 | ### Convert model 16 | 17 | #### 1. Download the YOLOv5 repo and install the requirements 18 | 19 | ``` 20 | git clone https://github.com/ultralytics/yolov5.git 21 | cd yolov5 22 | pip3 install -r requirements.txt 23 | pip3 install onnx onnxslim onnxruntime 24 | ``` 25 | 26 | **NOTE**: It is recommended to use Python virtualenv. 27 | 28 | #### 2. Copy conversor 29 | 30 | Copy the `export_yoloV5.py` file from `DeepStream-Yolo/utils` directory to the `yolov5` folder. 31 | 32 | #### 3. Download the model 33 | 34 | Download the `pt` file from [YOLOv5](https://github.com/ultralytics/yolov5/releases/) releases (example for YOLOv5s 7.0) 35 | 36 | ``` 37 | wget https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s.pt 38 | ``` 39 | 40 | **NOTE**: You can use your custom model. 41 | 42 | #### 4. Convert model 43 | 44 | Generate the ONNX model file (example for YOLOv5s) 45 | 46 | ``` 47 | python3 export_yoloV5.py -w yolov5s.pt --dynamic 48 | ``` 49 | 50 | **NOTE**: To convert a P6 model 51 | 52 | ``` 53 | --p6 54 | ``` 55 | 56 | **NOTE**: To change the inference size (defaut: 640 / 1280 for `--p6` models) 57 | 58 | ``` 59 | -s SIZE 60 | --size SIZE 61 | -s HEIGHT WIDTH 62 | --size HEIGHT WIDTH 63 | ``` 64 | 65 | Example for 1280 66 | 67 | ``` 68 | -s 1280 69 | ``` 70 | 71 | or 72 | 73 | ``` 74 | -s 1280 1280 75 | ``` 76 | 77 | **NOTE**: To simplify the ONNX model (DeepStream >= 6.0) 78 | 79 | ``` 80 | --simplify 81 | ``` 82 | 83 | **NOTE**: To use dynamic batch-size (DeepStream >= 6.1) 84 | 85 | ``` 86 | --dynamic 87 | ``` 88 | 89 | **NOTE**: To use static batch-size (example for batch-size = 4) 90 | 91 | ``` 92 | --batch 4 93 | ``` 94 | 95 | **NOTE**: If you are using the DeepStream 5.1, remove the `--dynamic` arg and use opset 12 or lower. The default opset is 17. 96 | 97 | ``` 98 | --opset 12 99 | ``` 100 | 101 | #### 5. Copy generated files 102 | 103 | Copy the generated ONNX model file and labels.txt file (if generated) to the `DeepStream-Yolo` folder. 104 | 105 | ## 106 | 107 | ### Compile the lib 108 | 109 | 1. Open the `DeepStream-Yolo` folder and compile the lib 110 | 111 | 2. Set the `CUDA_VER` according to your DeepStream version 112 | 113 | ``` 114 | export CUDA_VER=XY.Z 115 | ``` 116 | 117 | * x86 platform 118 | 119 | ``` 120 | DeepStream 7.1 = 12.6 121 | DeepStream 7.0 / 6.4 = 12.2 122 | DeepStream 6.3 = 12.1 123 | DeepStream 6.2 = 11.8 124 | DeepStream 6.1.1 = 11.7 125 | DeepStream 6.1 = 11.6 126 | DeepStream 6.0.1 / 6.0 = 11.4 127 | DeepStream 5.1 = 11.1 128 | ``` 129 | 130 | * Jetson platform 131 | 132 | ``` 133 | DeepStream 7.1 = 12.6 134 | DeepStream 7.0 / 6.4 = 12.2 135 | DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 = 11.4 136 | DeepStream 6.0.1 / 6.0 / 5.1 = 10.2 137 | ``` 138 | 139 | 3. Make the lib 140 | 141 | ``` 142 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo 143 | ``` 144 | 145 | ## 146 | 147 | ### Edit the config_infer_primary_yoloV5 file 148 | 149 | Edit the `config_infer_primary_yoloV5.txt` file according to your model (example for YOLOv5s with 80 classes) 150 | 151 | ``` 152 | [property] 153 | ... 154 | onnx-file=yolov5s.pt.onnx 155 | ... 156 | num-detected-classes=80 157 | ... 158 | parse-bbox-func-name=NvDsInferParseYolo 159 | ... 160 | ``` 161 | 162 | **NOTE**: The **YOLOv5** resizes the input with center padding. To get better accuracy, use 163 | 164 | ``` 165 | [property] 166 | ... 167 | maintain-aspect-ratio=1 168 | symmetric-padding=1 169 | ... 170 | ``` 171 | 172 | ## 173 | 174 | ### Edit the deepstream_app_config file 175 | 176 | ``` 177 | ... 178 | [primary-gie] 179 | ... 180 | config-file=config_infer_primary_yoloV5.txt 181 | ``` 182 | 183 | ## 184 | 185 | ### Testing the model 186 | 187 | ``` 188 | deepstream-app -c deepstream_app_config.txt 189 | ``` 190 | 191 | **NOTE**: The TensorRT engine file may take a very long time to generate (sometimes more than 10 minutes). 192 | 193 | **NOTE**: For more information about custom models configuration (`batch-size`, `network-mode`, etc), please check the [`docs/customModels.md`](customModels.md) file. 194 | -------------------------------------------------------------------------------- /docs/YOLOv5u.md: -------------------------------------------------------------------------------- 1 | # YOLOv5u usage 2 | 3 | **NOTE**: The yaml file is not required. 4 | 5 | * [Convert model](#convert-model) 6 | * [Compile the lib](#compile-the-lib) 7 | * [Edit the config_infer_primary_yoloV5u file](#edit-the-config_infer_primary_yolov5u-file) 8 | * [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file) 9 | * [Testing the model](#testing-the-model) 10 | 11 | ## 12 | 13 | ### Convert model 14 | 15 | #### 1. Download the YOLOv5u repo and install the requirements 16 | 17 | ``` 18 | git clone https://github.com/ultralytics/ultralytics.git 19 | cd ultralytics 20 | pip3 install -e . 21 | pip3 install onnx onnxslim onnxruntime 22 | ``` 23 | 24 | **NOTE**: It is recommended to use Python virtualenv. 25 | 26 | #### 2. Copy conversor 27 | 28 | Copy the `export_yoloV5u.py` file from `DeepStream-Yolo/utils` directory to the `ultralytics` folder. 29 | 30 | #### 3. Download the model 31 | 32 | Download the `pt` file from [YOLOv5u](https://github.com/ultralytics/assets/releases/) releases (example for YOLOv5su) 33 | 34 | ``` 35 | wget https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov5su.pt 36 | ``` 37 | 38 | **NOTE**: You can use your custom model. 39 | 40 | #### 4. Convert model 41 | 42 | Generate the ONNX model file (example for YOLOv5su) 43 | 44 | ``` 45 | python3 export_yoloV5u.py -w yolov5su.pt --dynamic 46 | ``` 47 | 48 | **NOTE**: To change the inference size (defaut: 640) 49 | 50 | ``` 51 | -s SIZE 52 | --size SIZE 53 | -s HEIGHT WIDTH 54 | --size HEIGHT WIDTH 55 | ``` 56 | 57 | Example for 1280 58 | 59 | ``` 60 | -s 1280 61 | ``` 62 | 63 | or 64 | 65 | ``` 66 | -s 1280 1280 67 | ``` 68 | 69 | **NOTE**: To simplify the ONNX model (DeepStream >= 6.0) 70 | 71 | ``` 72 | --simplify 73 | ``` 74 | 75 | **NOTE**: To use dynamic batch-size (DeepStream >= 6.1) 76 | 77 | ``` 78 | --dynamic 79 | ``` 80 | 81 | **NOTE**: To use static batch-size (example for batch-size = 4) 82 | 83 | ``` 84 | --batch 4 85 | ``` 86 | 87 | **NOTE**: If you are using the DeepStream 5.1, remove the `--dynamic` arg and use opset 12 or lower. The default opset is 17. 88 | 89 | ``` 90 | --opset 12 91 | ``` 92 | 93 | #### 5. Copy generated files 94 | 95 | Copy the generated ONNX model file and labels.txt file (if generated) to the `DeepStream-Yolo` folder. 96 | 97 | ## 98 | 99 | ### Compile the lib 100 | 101 | 1. Open the `DeepStream-Yolo` folder and compile the lib 102 | 103 | 2. Set the `CUDA_VER` according to your DeepStream version 104 | 105 | ``` 106 | export CUDA_VER=XY.Z 107 | ``` 108 | 109 | * x86 platform 110 | 111 | ``` 112 | DeepStream 7.1 = 12.6 113 | DeepStream 7.0 / 6.4 = 12.2 114 | DeepStream 6.3 = 12.1 115 | DeepStream 6.2 = 11.8 116 | DeepStream 6.1.1 = 11.7 117 | DeepStream 6.1 = 11.6 118 | DeepStream 6.0.1 / 6.0 = 11.4 119 | DeepStream 5.1 = 11.1 120 | ``` 121 | 122 | * Jetson platform 123 | 124 | ``` 125 | DeepStream 7.1 = 12.6 126 | DeepStream 7.0 / 6.4 = 12.2 127 | DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 = 11.4 128 | DeepStream 6.0.1 / 6.0 / 5.1 = 10.2 129 | ``` 130 | 131 | 3. Make the lib 132 | 133 | ``` 134 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo 135 | ``` 136 | 137 | ## 138 | 139 | ### Edit the config_infer_primary_yoloV5u file 140 | 141 | Edit the `config_infer_primary_yoloV5u.txt` file according to your model (example for YOLOv5su with 80 classes) 142 | 143 | ``` 144 | [property] 145 | ... 146 | onnx-file=yolov5su.pt.onnx 147 | ... 148 | num-detected-classes=80 149 | ... 150 | parse-bbox-func-name=NvDsInferParseYolo 151 | ... 152 | ``` 153 | 154 | **NOTE**: The **YOLOv5u** resizes the input with center padding. To get better accuracy, use 155 | 156 | ``` 157 | [property] 158 | ... 159 | maintain-aspect-ratio=1 160 | symmetric-padding=1 161 | ... 162 | ``` 163 | 164 | ## 165 | 166 | ### Edit the deepstream_app_config file 167 | 168 | ``` 169 | ... 170 | [primary-gie] 171 | ... 172 | config-file=config_infer_primary_yoloV5u.txt 173 | ``` 174 | 175 | ## 176 | 177 | ### Testing the model 178 | 179 | ``` 180 | deepstream-app -c deepstream_app_config.txt 181 | ``` 182 | 183 | **NOTE**: The TensorRT engine file may take a very long time to generate (sometimes more than 10 minutes). 184 | 185 | **NOTE**: For more information about custom models configuration (`batch-size`, `network-mode`, etc), please check the [`docs/customModels.md`](customModels.md) file. 186 | -------------------------------------------------------------------------------- /docs/YOLOv6.md: -------------------------------------------------------------------------------- 1 | # YOLOv6 usage 2 | 3 | **NOTE**: You need to change the branch of the YOLOv6 repo according to the version of the model you want to convert. 4 | 5 | **NOTE**: The yaml file is not required. 6 | 7 | * [Convert model](#convert-model) 8 | * [Compile the lib](#compile-the-lib) 9 | * [Edit the config_infer_primary_yoloV6 file](#edit-the-config_infer_primary_yolov6-file) 10 | * [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file) 11 | * [Testing the model](#testing-the-model) 12 | 13 | ## 14 | 15 | ### Convert model 16 | 17 | #### 1. Download the YOLOv6 repo and install the requirements 18 | 19 | ``` 20 | git clone https://github.com/meituan/YOLOv6.git 21 | cd YOLOv6 22 | pip3 install -r requirements.txt 23 | pip3 install onnx onnxslim onnxruntime 24 | ``` 25 | 26 | **NOTE**: It is recommended to use Python virtualenv. 27 | 28 | #### 2. Copy conversor 29 | 30 | Copy the `export_yoloV6.py` file from `DeepStream-Yolo/utils` directory to the `YOLOv6` folder. 31 | 32 | #### 3. Download the model 33 | 34 | Download the `pt` file from [YOLOv6](https://github.com/meituan/YOLOv6/releases/) releases (example for YOLOv6-S 4.0) 35 | 36 | ``` 37 | wget https://github.com/meituan/YOLOv6/releases/download/0.4.0/yolov6s.pt 38 | ``` 39 | 40 | **NOTE**: You can use your custom model. 41 | 42 | #### 4. Convert model 43 | 44 | Generate the ONNX model file (example for YOLOv6-S 4.0) 45 | 46 | ``` 47 | python3 export_yoloV6.py -w yolov6s.pt --dynamic 48 | ``` 49 | 50 | **NOTE**: To convert a P6 model 51 | 52 | ``` 53 | --p6 54 | ``` 55 | 56 | **NOTE**: To change the inference size (defaut: 640 / 1280 for `--p6` models) 57 | 58 | ``` 59 | -s SIZE 60 | --size SIZE 61 | -s HEIGHT WIDTH 62 | --size HEIGHT WIDTH 63 | ``` 64 | 65 | Example for 1280 66 | 67 | ``` 68 | -s 1280 69 | ``` 70 | 71 | or 72 | 73 | ``` 74 | -s 1280 1280 75 | ``` 76 | 77 | **NOTE**: To simplify the ONNX model (DeepStream >= 6.0) 78 | 79 | ``` 80 | --simplify 81 | ``` 82 | 83 | **NOTE**: To use dynamic batch-size (DeepStream >= 6.1) 84 | 85 | ``` 86 | --dynamic 87 | ``` 88 | 89 | **NOTE**: To use static batch-size (example for batch-size = 4) 90 | 91 | ``` 92 | --batch 4 93 | ``` 94 | 95 | **NOTE**: If you are using the DeepStream 5.1, remove the `--dynamic` arg and use opset 12 or lower. The default opset is 13. 96 | 97 | ``` 98 | --opset 12 99 | ``` 100 | 101 | #### 5. Copy generated file 102 | 103 | Copy the generated ONNX model file to the `DeepStream-Yolo` folder. 104 | 105 | ## 106 | 107 | ### Compile the lib 108 | 109 | 1. Open the `DeepStream-Yolo` folder and compile the lib 110 | 111 | 2. Set the `CUDA_VER` according to your DeepStream version 112 | 113 | ``` 114 | export CUDA_VER=XY.Z 115 | ``` 116 | 117 | * x86 platform 118 | 119 | ``` 120 | DeepStream 7.1 = 12.6 121 | DeepStream 7.0 / 6.4 = 12.2 122 | DeepStream 6.3 = 12.1 123 | DeepStream 6.2 = 11.8 124 | DeepStream 6.1.1 = 11.7 125 | DeepStream 6.1 = 11.6 126 | DeepStream 6.0.1 / 6.0 = 11.4 127 | DeepStream 5.1 = 11.1 128 | ``` 129 | 130 | * Jetson platform 131 | 132 | ``` 133 | DeepStream 7.1 = 12.6 134 | DeepStream 7.0 / 6.4 = 12.2 135 | DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 = 11.4 136 | DeepStream 6.0.1 / 6.0 / 5.1 = 10.2 137 | ``` 138 | 139 | 3. Make the lib 140 | 141 | ``` 142 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo 143 | ``` 144 | 145 | ## 146 | 147 | ### Edit the config_infer_primary_yoloV6 file 148 | 149 | Edit the `config_infer_primary_yoloV6.txt` file according to your model (example for YOLOv6-S 4.0 with 80 classes) 150 | 151 | ``` 152 | [property] 153 | ... 154 | onnx-file=yolov6s.pt.onnx 155 | ... 156 | num-detected-classes=80 157 | ... 158 | parse-bbox-func-name=NvDsInferParseYolo 159 | ... 160 | ``` 161 | 162 | **NOTE**: The **YOLOv6** resizes the input with center padding. To get better accuracy, use 163 | 164 | ``` 165 | [property] 166 | ... 167 | maintain-aspect-ratio=1 168 | symmetric-padding=1 169 | ... 170 | ``` 171 | 172 | ## 173 | 174 | ### Edit the deepstream_app_config file 175 | 176 | ``` 177 | ... 178 | [primary-gie] 179 | ... 180 | config-file=config_infer_primary_yoloV6.txt 181 | ``` 182 | 183 | ## 184 | 185 | ### Testing the model 186 | 187 | ``` 188 | deepstream-app -c deepstream_app_config.txt 189 | ``` 190 | 191 | **NOTE**: The TensorRT engine file may take a very long time to generate (sometimes more than 10 minutes). 192 | 193 | **NOTE**: For more information about custom models configuration (`batch-size`, `network-mode`, etc), please check the [`docs/customModels.md`](customModels.md) file. 194 | -------------------------------------------------------------------------------- /docs/YOLOv7.md: -------------------------------------------------------------------------------- 1 | # YOLOv7 usage 2 | 3 | **NOTE**: The yaml file is not required. 4 | 5 | * [Convert model](#convert-model) 6 | * [Compile the lib](#compile-the-lib) 7 | * [Edit the config_infer_primary_yoloV7 file](#edit-the-config_infer_primary_yolov7-file) 8 | * [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file) 9 | * [Testing the model](#testing-the-model) 10 | 11 | ## 12 | 13 | ### Convert model 14 | 15 | #### 1. Download the YOLOv7 repo and install the requirements 16 | 17 | ``` 18 | git clone https://github.com/WongKinYiu/yolov7.git 19 | cd yolov7 20 | pip3 install -r requirements.txt 21 | pip3 install onnx onnxslim onnxruntime 22 | ``` 23 | 24 | **NOTE**: It is recommended to use Python virtualenv. 25 | 26 | #### 2. Copy conversor 27 | 28 | Copy the `export_yoloV7.py` file from `DeepStream-Yolo/utils` directory to the `yolov7` folder. 29 | 30 | #### 3. Download the model 31 | 32 | Download the `pt` file from [YOLOv7](https://github.com/WongKinYiu/yolov7/releases/) releases (example for YOLOv7) 33 | 34 | ``` 35 | wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7.pt 36 | ``` 37 | 38 | **NOTE**: You can use your custom model. 39 | 40 | #### 4. Reparameterize your model (for custom models) 41 | 42 | Custom YOLOv7 models cannot be directly converted to engine file. Therefore, you will have to reparameterize your model using the code [here](https://github.com/WongKinYiu/yolov7/blob/main/tools/reparameterization.ipynb). Make sure to convert your custom checkpoints in YOLOv7 repository, and then save your reparmeterized checkpoints for conversion in the next step. 43 | 44 | #### 5. Convert model 45 | 46 | Generate the ONNX model file (example for YOLOv7) 47 | 48 | ``` 49 | python3 export_yoloV7.py -w yolov7.pt --dynamic 50 | ``` 51 | 52 | **NOTE**: To convert a P6 model 53 | 54 | ``` 55 | --p6 56 | ``` 57 | 58 | **NOTE**: To change the inference size (defaut: 640 / 1280 for `--p6` models) 59 | 60 | ``` 61 | -s SIZE 62 | --size SIZE 63 | -s HEIGHT WIDTH 64 | --size HEIGHT WIDTH 65 | ``` 66 | 67 | Example for 1280 68 | 69 | ``` 70 | -s 1280 71 | ``` 72 | 73 | or 74 | 75 | ``` 76 | -s 1280 1280 77 | ``` 78 | 79 | **NOTE**: To simplify the ONNX model (DeepStream >= 6.0) 80 | 81 | ``` 82 | --simplify 83 | ``` 84 | 85 | **NOTE**: To use dynamic batch-size (DeepStream >= 6.1) 86 | 87 | ``` 88 | --dynamic 89 | ``` 90 | 91 | **NOTE**: To use static batch-size (example for batch-size = 4) 92 | 93 | ``` 94 | --batch 4 95 | ``` 96 | 97 | **NOTE**: If you are using the DeepStream 5.1, remove the `--dynamic` arg and use opset 12 or lower. The default opset is 12. 98 | 99 | ``` 100 | --opset 12 101 | ``` 102 | 103 | #### 6. Copy generated files 104 | 105 | Copy the generated ONNX model file and labels.txt file (if generated) to the `DeepStream-Yolo` folder. 106 | 107 | ## 108 | 109 | ### Compile the lib 110 | 111 | 1. Open the `DeepStream-Yolo` folder and compile the lib 112 | 113 | 2. Set the `CUDA_VER` according to your DeepStream version 114 | 115 | ``` 116 | export CUDA_VER=XY.Z 117 | ``` 118 | 119 | * x86 platform 120 | 121 | ``` 122 | DeepStream 7.1 = 12.6 123 | DeepStream 7.0 / 6.4 = 12.2 124 | DeepStream 6.3 = 12.1 125 | DeepStream 6.2 = 11.8 126 | DeepStream 6.1.1 = 11.7 127 | DeepStream 6.1 = 11.6 128 | DeepStream 6.0.1 / 6.0 = 11.4 129 | DeepStream 5.1 = 11.1 130 | ``` 131 | 132 | * Jetson platform 133 | 134 | ``` 135 | DeepStream 7.1 = 12.6 136 | DeepStream 7.0 / 6.4 = 12.2 137 | DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 = 11.4 138 | DeepStream 6.0.1 / 6.0 / 5.1 = 10.2 139 | ``` 140 | 141 | 3. Make the lib 142 | 143 | ``` 144 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo 145 | ``` 146 | 147 | ## 148 | 149 | ### Edit the config_infer_primary_yoloV7 file 150 | 151 | Edit the `config_infer_primary_yoloV7.txt` file according to your model (example for YOLOv7 with 80 classes) 152 | 153 | ``` 154 | [property] 155 | ... 156 | onnx-file=yolov7.pt.onnx 157 | ... 158 | num-detected-classes=80 159 | ... 160 | parse-bbox-func-name=NvDsInferParseYolo 161 | ... 162 | ``` 163 | 164 | **NOTE**: The **YOLOv7** resizes the input with center padding. To get better accuracy, use 165 | 166 | ``` 167 | [property] 168 | ... 169 | maintain-aspect-ratio=1 170 | symmetric-padding=1 171 | ... 172 | ``` 173 | 174 | ## 175 | 176 | ### Edit the deepstream_app_config file 177 | 178 | ``` 179 | ... 180 | [primary-gie] 181 | ... 182 | config-file=config_infer_primary_yoloV7.txt 183 | ``` 184 | 185 | ## 186 | 187 | ### Testing the model 188 | 189 | ``` 190 | deepstream-app -c deepstream_app_config.txt 191 | ``` 192 | 193 | **NOTE**: The TensorRT engine file may take a very long time to generate (sometimes more than 10 minutes). 194 | 195 | **NOTE**: For more information about custom models configuration (`batch-size`, `network-mode`, etc), please check the [`docs/customModels.md`](customModels.md) file. 196 | -------------------------------------------------------------------------------- /docs/YOLOv8.md: -------------------------------------------------------------------------------- 1 | # YOLOv8 usage 2 | 3 | **NOTE**: The yaml file is not required. 4 | 5 | * [Convert model](#convert-model) 6 | * [Compile the lib](#compile-the-lib) 7 | * [Edit the config_infer_primary_yoloV8 file](#edit-the-config_infer_primary_yolov8-file) 8 | * [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file) 9 | * [Testing the model](#testing-the-model) 10 | 11 | ## 12 | 13 | ### Convert model 14 | 15 | #### 1. Download the YOLOv8 repo and install the requirements 16 | 17 | ``` 18 | git clone https://github.com/ultralytics/ultralytics.git 19 | cd ultralytics 20 | pip3 install -e . 21 | pip3 install onnx onnxslim onnxruntime 22 | ``` 23 | 24 | **NOTE**: It is recommended to use Python virtualenv. 25 | 26 | #### 2. Copy conversor 27 | 28 | Copy the `export_yoloV8.py` file from `DeepStream-Yolo/utils` directory to the `ultralytics` folder. 29 | 30 | #### 3. Download the model 31 | 32 | Download the `pt` file from [YOLOv8](https://github.com/ultralytics/assets/releases/) releases (example for YOLOv8s) 33 | 34 | ``` 35 | wget https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8s.pt 36 | ``` 37 | 38 | **NOTE**: You can use your custom model. 39 | 40 | #### 4. Convert model 41 | 42 | Generate the ONNX model file (example for YOLOv8s) 43 | 44 | ``` 45 | python3 export_yoloV8.py -w yolov8s.pt --dynamic 46 | ``` 47 | 48 | **NOTE**: To change the inference size (defaut: 640) 49 | 50 | ``` 51 | -s SIZE 52 | --size SIZE 53 | -s HEIGHT WIDTH 54 | --size HEIGHT WIDTH 55 | ``` 56 | 57 | Example for 1280 58 | 59 | ``` 60 | -s 1280 61 | ``` 62 | 63 | or 64 | 65 | ``` 66 | -s 1280 1280 67 | ``` 68 | 69 | **NOTE**: To simplify the ONNX model (DeepStream >= 6.0) 70 | 71 | ``` 72 | --simplify 73 | ``` 74 | 75 | **NOTE**: To use dynamic batch-size (DeepStream >= 6.1) 76 | 77 | ``` 78 | --dynamic 79 | ``` 80 | 81 | **NOTE**: To use static batch-size (example for batch-size = 4) 82 | 83 | ``` 84 | --batch 4 85 | ``` 86 | 87 | **NOTE**: If you are using the DeepStream 5.1, remove the `--dynamic` arg and use opset 12 or lower. The default opset is 17. 88 | 89 | ``` 90 | --opset 12 91 | ``` 92 | 93 | #### 5. Copy generated files 94 | 95 | Copy the generated ONNX model file and labels.txt file (if generated) to the `DeepStream-Yolo` folder. 96 | 97 | ## 98 | 99 | ### Compile the lib 100 | 101 | 1. Open the `DeepStream-Yolo` folder and compile the lib 102 | 103 | 2. Set the `CUDA_VER` according to your DeepStream version 104 | 105 | ``` 106 | export CUDA_VER=XY.Z 107 | ``` 108 | 109 | * x86 platform 110 | 111 | ``` 112 | DeepStream 7.1 = 12.6 113 | DeepStream 7.0 / 6.4 = 12.2 114 | DeepStream 6.3 = 12.1 115 | DeepStream 6.2 = 11.8 116 | DeepStream 6.1.1 = 11.7 117 | DeepStream 6.1 = 11.6 118 | DeepStream 6.0.1 / 6.0 = 11.4 119 | DeepStream 5.1 = 11.1 120 | ``` 121 | 122 | * Jetson platform 123 | 124 | ``` 125 | DeepStream 7.1 = 12.6 126 | DeepStream 7.0 / 6.4 = 12.2 127 | DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 = 11.4 128 | DeepStream 6.0.1 / 6.0 / 5.1 = 10.2 129 | ``` 130 | 131 | 3. Make the lib 132 | 133 | ``` 134 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo 135 | ``` 136 | 137 | ## 138 | 139 | ### Edit the config_infer_primary_yoloV8 file 140 | 141 | Edit the `config_infer_primary_yoloV8.txt` file according to your model (example for YOLOv8s with 80 classes) 142 | 143 | ``` 144 | [property] 145 | ... 146 | onnx-file=yolov8s.pt.onnx 147 | ... 148 | num-detected-classes=80 149 | ... 150 | parse-bbox-func-name=NvDsInferParseYolo 151 | ... 152 | ``` 153 | 154 | **NOTE**: The **YOLOv8** resizes the input with center padding. To get better accuracy, use 155 | 156 | ``` 157 | [property] 158 | ... 159 | maintain-aspect-ratio=1 160 | symmetric-padding=1 161 | ... 162 | ``` 163 | 164 | ## 165 | 166 | ### Edit the deepstream_app_config file 167 | 168 | ``` 169 | ... 170 | [primary-gie] 171 | ... 172 | config-file=config_infer_primary_yoloV8.txt 173 | ``` 174 | 175 | ## 176 | 177 | ### Testing the model 178 | 179 | ``` 180 | deepstream-app -c deepstream_app_config.txt 181 | ``` 182 | 183 | **NOTE**: The TensorRT engine file may take a very long time to generate (sometimes more than 10 minutes). 184 | 185 | **NOTE**: For more information about custom models configuration (`batch-size`, `network-mode`, etc), please check the [`docs/customModels.md`](customModels.md) file. 186 | -------------------------------------------------------------------------------- /docs/YOLOv9.md: -------------------------------------------------------------------------------- 1 | # YOLOv9 usage 2 | 3 | **NOTE**: The yaml file is not required. 4 | 5 | * [Convert model](#convert-model) 6 | * [Compile the lib](#compile-the-lib) 7 | * [Edit the config_infer_primary_yoloV9 file](#edit-the-config_infer_primary_yolov9-file) 8 | * [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file) 9 | * [Testing the model](#testing-the-model) 10 | 11 | ## 12 | 13 | ### Convert model 14 | 15 | #### 1. Download the YOLOv9 repo and install the requirements 16 | 17 | ``` 18 | git clone https://github.com/WongKinYiu/yolov9.git 19 | cd yolov9 20 | pip3 install -r requirements.txt 21 | pip3 install onnx onnxslim onnxruntime 22 | ``` 23 | 24 | **NOTE**: It is recommended to use Python virtualenv. 25 | 26 | #### 2. Copy conversor 27 | 28 | Copy the `export_yoloV9.py` file from `DeepStream-Yolo/utils` directory to the `yolov9` folder. 29 | 30 | #### 3. Download the model 31 | 32 | Download the `pt` file from [YOLOv9](https://github.com/WongKinYiu/yolov9/releases/) releases (example for YOLOv9-S) 33 | 34 | ``` 35 | wget https://github.com/WongKinYiu/yolov9/releases/download/v0.1/yolov9-s-converted.pt 36 | ``` 37 | 38 | **NOTE**: You can use your custom model. 39 | 40 | #### 4. Convert model 41 | 42 | Generate the ONNX model file (example for YOLOv9-S) 43 | 44 | ``` 45 | python3 export_yoloV9.py -w yolov9-s-converted.pt --dynamic 46 | ``` 47 | 48 | **NOTE**: To change the inference size (defaut: 640) 49 | 50 | ``` 51 | -s SIZE 52 | --size SIZE 53 | -s HEIGHT WIDTH 54 | --size HEIGHT WIDTH 55 | ``` 56 | 57 | Example for 1280 58 | 59 | ``` 60 | -s 1280 61 | ``` 62 | 63 | or 64 | 65 | ``` 66 | -s 1280 1280 67 | ``` 68 | 69 | **NOTE**: To simplify the ONNX model (DeepStream >= 6.0) 70 | 71 | ``` 72 | --simplify 73 | ``` 74 | 75 | **NOTE**: To use dynamic batch-size (DeepStream >= 6.1) 76 | 77 | ``` 78 | --dynamic 79 | ``` 80 | 81 | **NOTE**: To use static batch-size (example for batch-size = 4) 82 | 83 | ``` 84 | --batch 4 85 | ``` 86 | 87 | **NOTE**: If you are using the DeepStream 5.1, remove the `--dynamic` arg and use opset 12 or lower. The default opset is 17. 88 | 89 | ``` 90 | --opset 12 91 | ``` 92 | 93 | #### 5. Copy generated files 94 | 95 | Copy the generated ONNX model file and labels.txt file (if generated) to the `DeepStream-Yolo` folder. 96 | 97 | ## 98 | 99 | ### Compile the lib 100 | 101 | 1. Open the `DeepStream-Yolo` folder and compile the lib 102 | 103 | 2. Set the `CUDA_VER` according to your DeepStream version 104 | 105 | ``` 106 | export CUDA_VER=XY.Z 107 | ``` 108 | 109 | * x86 platform 110 | 111 | ``` 112 | DeepStream 7.1 = 12.6 113 | DeepStream 7.0 / 6.4 = 12.2 114 | DeepStream 6.3 = 12.1 115 | DeepStream 6.2 = 11.8 116 | DeepStream 6.1.1 = 11.7 117 | DeepStream 6.1 = 11.6 118 | DeepStream 6.0.1 / 6.0 = 11.4 119 | DeepStream 5.1 = 11.1 120 | ``` 121 | 122 | * Jetson platform 123 | 124 | ``` 125 | DeepStream 7.1 = 12.6 126 | DeepStream 7.0 / 6.4 = 12.2 127 | DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 = 11.4 128 | DeepStream 6.0.1 / 6.0 / 5.1 = 10.2 129 | ``` 130 | 131 | 3. Make the lib 132 | 133 | ``` 134 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo 135 | ``` 136 | 137 | ## 138 | 139 | ### Edit the config_infer_primary_yoloV9 file 140 | 141 | Edit the `config_infer_primary_yoloV9.txt` file according to your model (example for YOLOv9-S with 80 classes) 142 | 143 | ``` 144 | [property] 145 | ... 146 | onnx-file=yolov9-s-converted.pt.onnx 147 | ... 148 | num-detected-classes=80 149 | ... 150 | parse-bbox-func-name=NvDsInferParseYolo 151 | ... 152 | ``` 153 | 154 | **NOTE**: The **YOLOv9** resizes the input with center padding. To get better accuracy, use 155 | 156 | ``` 157 | [property] 158 | ... 159 | maintain-aspect-ratio=1 160 | symmetric-padding=1 161 | ... 162 | ``` 163 | 164 | ## 165 | 166 | ### Edit the deepstream_app_config file 167 | 168 | ``` 169 | ... 170 | [primary-gie] 171 | ... 172 | config-file=config_infer_primary_yoloV9.txt 173 | ``` 174 | 175 | ## 176 | 177 | ### Testing the model 178 | 179 | ``` 180 | deepstream-app -c deepstream_app_config.txt 181 | ``` 182 | 183 | **NOTE**: The TensorRT engine file may take a very long time to generate (sometimes more than 10 minutes). 184 | 185 | **NOTE**: For more information about custom models configuration (`batch-size`, `network-mode`, etc), please check the [`docs/customModels.md`](customModels.md) file. 186 | -------------------------------------------------------------------------------- /docs/multipleGIEs_tree.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marcoslucianops/DeepStream-Yolo/01c1e504d9c15267fb58f8e88e243eeb31aa99d2/docs/multipleGIEs_tree.png -------------------------------------------------------------------------------- /labels.txt: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/Makefile: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a 5 | # copy of this software and associated documentation files (the "Software"), 6 | # to deal in the Software without restriction, including without limitation 7 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | # and/or sell copies of the Software, and to permit persons to whom the 9 | # Software is furnished to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in 12 | # all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | # DEALINGS IN THE SOFTWARE. 21 | # 22 | # Edited by Marcos Luciano 23 | # https://www.github.com/marcoslucianops 24 | ################################################################################ 25 | 26 | CUDA_VER?= 27 | ifeq ($(CUDA_VER),) 28 | $(error "CUDA_VER is not set") 29 | endif 30 | 31 | OPENCV?= 32 | ifeq ($(OPENCV),) 33 | OPENCV=0 34 | endif 35 | 36 | GRAPH?= 37 | ifeq ($(GRAPH),) 38 | GRAPH=0 39 | endif 40 | 41 | CC:= g++ 42 | NVCC:=/usr/local/cuda-$(CUDA_VER)/bin/nvcc 43 | 44 | CFLAGS:= -Wall -std=c++11 -shared -fPIC -Wno-error=deprecated-declarations 45 | CFLAGS+= -I/opt/nvidia/deepstream/deepstream/sources/includes -I/usr/local/cuda-$(CUDA_VER)/include 46 | 47 | ifeq ($(OPENCV), 1) 48 | COMMON+= -DOPENCV 49 | CFLAGS+= $(shell pkg-config --cflags opencv4 2> /dev/null || pkg-config --cflags opencv) 50 | LIBS+= $(shell pkg-config --libs opencv4 2> /dev/null || pkg-config --libs opencv) 51 | endif 52 | 53 | ifeq ($(GRAPH), 1) 54 | COMMON+= -DGRAPH 55 | endif 56 | 57 | CUFLAGS:= -I/opt/nvidia/deepstream/deepstream/sources/includes -I/usr/local/cuda-$(CUDA_VER)/include 58 | 59 | ifeq ($(shell ldconfig -p | grep -q libnvparsers && echo 1 || echo 0), 1) 60 | LIBS+= -lnvparsers 61 | endif 62 | 63 | LIBS+= -lnvinfer_plugin -lnvinfer -lnvonnxparser -L/usr/local/cuda-$(CUDA_VER)/lib64 -lcudart -lcublas -lstdc++fs 64 | LFLAGS:= -shared -Wl,--start-group $(LIBS) -Wl,--end-group 65 | 66 | INCS:= $(wildcard layers/*.h) 67 | INCS+= $(wildcard *.h) 68 | 69 | SRCFILES:= $(filter-out calibrator.cpp, $(wildcard *.cpp)) 70 | 71 | ifeq ($(OPENCV), 1) 72 | SRCFILES+= calibrator.cpp 73 | endif 74 | 75 | SRCFILES+= $(wildcard layers/*.cpp) 76 | SRCFILES+= $(wildcard *.cu) 77 | 78 | TARGET_LIB:= libnvdsinfer_custom_impl_Yolo.so 79 | 80 | TARGET_OBJS:= $(SRCFILES:.cpp=.o) 81 | TARGET_OBJS:= $(TARGET_OBJS:.cu=.o) 82 | 83 | all: $(TARGET_LIB) 84 | 85 | %.o: %.cpp $(INCS) Makefile 86 | $(CC) -c $(COMMON) -o $@ $(CFLAGS) $< 87 | 88 | %.o: %.cu $(INCS) Makefile 89 | $(NVCC) -c -o $@ --compiler-options '-fPIC' $(CUFLAGS) $< 90 | 91 | $(TARGET_LIB) : $(TARGET_OBJS) 92 | $(CC) -o $@ $(TARGET_OBJS) $(LFLAGS) 93 | 94 | clean: 95 | rm -rf $(TARGET_LIB) 96 | rm -rf $(TARGET_OBJS) 97 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/calibrator.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "calibrator.h" 7 | 8 | #include 9 | #include 10 | 11 | Int8EntropyCalibrator2::Int8EntropyCalibrator2(const int& batchSize, const int& channels, const int& height, 12 | const int& width, const float& scaleFactor, const float* offsets, const int& inputFormat, 13 | const std::string& imgPath, const std::string& calibTablePath) : batchSize(batchSize), inputC(channels), 14 | inputH(height), inputW(width), scaleFactor(scaleFactor), offsets(offsets), inputFormat(inputFormat), 15 | calibTablePath(calibTablePath), imageIndex(0) 16 | { 17 | inputCount = batchSize * channels * height * width; 18 | std::fstream f(imgPath); 19 | if (f.is_open()) { 20 | std::string temp; 21 | while (std::getline(f, temp)) { 22 | imgPaths.push_back(temp); 23 | } 24 | } 25 | batchData = new float[inputCount]; 26 | CUDA_CHECK(cudaMalloc(&deviceInput, inputCount * sizeof(float))); 27 | } 28 | 29 | Int8EntropyCalibrator2::~Int8EntropyCalibrator2() 30 | { 31 | CUDA_CHECK(cudaFree(deviceInput)); 32 | if (batchData) { 33 | delete[] batchData; 34 | } 35 | } 36 | 37 | int 38 | Int8EntropyCalibrator2::getBatchSize() const noexcept 39 | { 40 | return batchSize; 41 | } 42 | 43 | bool 44 | Int8EntropyCalibrator2::getBatch(void** bindings, const char** names, int nbBindings) noexcept 45 | { 46 | if (imageIndex + batchSize > uint(imgPaths.size())) { 47 | return false; 48 | } 49 | 50 | float* ptr = batchData; 51 | for (size_t i = imageIndex; i < imageIndex + batchSize; ++i) { 52 | cv::Mat img = cv::imread(imgPaths[i]); 53 | if (img.empty()){ 54 | std::cerr << "Failed to read image for calibration" << std::endl; 55 | return false; 56 | } 57 | 58 | std::vector inputData = prepareImage(img, inputC, inputH, inputW, scaleFactor, offsets, inputFormat); 59 | 60 | size_t len = inputData.size(); 61 | memcpy(ptr, inputData.data(), len * sizeof(float)); 62 | ptr += inputData.size(); 63 | 64 | std::cout << "Load image: " << imgPaths[i] << std::endl; 65 | std::cout << "Progress: " << (i + 1) * 100. / imgPaths.size() << "%" << std::endl; 66 | } 67 | 68 | imageIndex += batchSize; 69 | 70 | CUDA_CHECK(cudaMemcpy(deviceInput, batchData, inputCount * sizeof(float), cudaMemcpyHostToDevice)); 71 | bindings[0] = deviceInput; 72 | 73 | return true; 74 | } 75 | 76 | const void* 77 | Int8EntropyCalibrator2::readCalibrationCache(std::size_t &length) noexcept 78 | { 79 | calibrationCache.clear(); 80 | std::ifstream input(calibTablePath, std::ios::binary); 81 | input >> std::noskipws; 82 | if (readCache && input.good()) { 83 | std::copy(std::istream_iterator(input), std::istream_iterator(), std::back_inserter(calibrationCache)); 84 | } 85 | length = calibrationCache.size(); 86 | return length ? calibrationCache.data() : nullptr; 87 | } 88 | 89 | void 90 | Int8EntropyCalibrator2::writeCalibrationCache(const void* cache, std::size_t length) noexcept 91 | { 92 | std::ofstream output(calibTablePath, std::ios::binary); 93 | output.write(reinterpret_cast(cache), length); 94 | } 95 | 96 | std::vector 97 | prepareImage(cv::Mat& img, int inputC, int inputH, int inputW, float scaleFactor, const float* offsets, int inputFormat) 98 | { 99 | cv::Mat out; 100 | 101 | if (inputFormat == 0) { 102 | cv::cvtColor(img, out, cv::COLOR_BGR2RGB); 103 | } 104 | else if (inputFormat == 2) { 105 | cv::cvtColor(img, out, cv::COLOR_BGR2GRAY); 106 | } 107 | else { 108 | out = img; 109 | } 110 | 111 | int imageW = img.cols; 112 | int imageH = img.rows; 113 | 114 | if (imageW != inputW || imageH != inputH) { 115 | float resizeFactor = std::max(inputW / (float) imageW, inputH / (float) imageH); 116 | cv::resize(out, out, cv::Size(0, 0), resizeFactor, resizeFactor, cv::INTER_CUBIC); 117 | cv::Rect crop(cv::Point(0.5 * (out.cols - inputW), 0.5 * (out.rows - inputH)), cv::Size(inputW, inputH)); 118 | out = out(crop); 119 | } 120 | 121 | out.convertTo(out, CV_32F, scaleFactor); 122 | 123 | if (inputFormat == 2) { 124 | cv::subtract(out, cv::Scalar(offsets[0] / 255), out); 125 | } 126 | else { 127 | cv::subtract(out, cv::Scalar(offsets[0] / 255, offsets[1] / 255, offsets[3] / 255), out); 128 | } 129 | 130 | std::vector inputChannels(inputC); 131 | cv::split(out, inputChannels); 132 | std::vector result(inputH * inputW * inputC); 133 | auto data = result.data(); 134 | int channelLength = inputH * inputW; 135 | for (int i = 0; i < inputC; ++i) { 136 | memcpy(data, inputChannels[i].data, channelLength * sizeof(float)); 137 | data += channelLength; 138 | } 139 | 140 | return result; 141 | } 142 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/calibrator.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef CALIBRATOR_H 7 | #define CALIBRATOR_H 8 | 9 | #include 10 | #include 11 | 12 | #include "NvInfer.h" 13 | #include "opencv2/opencv.hpp" 14 | 15 | #define CUDA_CHECK(status) { \ 16 | if (status != 0) { \ 17 | std::cout << "CUDA failure: " << cudaGetErrorString(status) << " in file " << __FILE__ << " at line " << \ 18 | __LINE__ << std::endl; \ 19 | abort(); \ 20 | } \ 21 | } 22 | 23 | class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 { 24 | public: 25 | Int8EntropyCalibrator2(const int& batchSize, const int& channels, const int& height, const int& width, 26 | const float& scaleFactor, const float* offsets, const int& inputFormat, const std::string& imgPath, 27 | const std::string& calibTablePath); 28 | 29 | virtual ~Int8EntropyCalibrator2(); 30 | 31 | int getBatchSize() const noexcept override; 32 | 33 | bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept override; 34 | 35 | const void* readCalibrationCache(std::size_t& length) noexcept override; 36 | 37 | void writeCalibrationCache(const void* cache, size_t length) noexcept override; 38 | 39 | private: 40 | int batchSize; 41 | int inputC; 42 | int inputH; 43 | int inputW; 44 | int letterBox; 45 | float scaleFactor; 46 | const float* offsets; 47 | int inputFormat; 48 | std::string calibTablePath; 49 | size_t imageIndex; 50 | size_t inputCount; 51 | std::vector imgPaths; 52 | float* batchData {nullptr}; 53 | void* deviceInput {nullptr}; 54 | bool readCache; 55 | std::vector calibrationCache; 56 | }; 57 | 58 | std::vector prepareImage(cv::Mat& img, int inputC, int inputH, int inputW, float scaleFactor, 59 | const float* offsets, int inputFormat); 60 | 61 | #endif //CALIBRATOR_H 62 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/activation_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __ACTIVATION_LAYER_H__ 7 | #define __ACTIVATION_LAYER_H__ 8 | 9 | #include 10 | 11 | #include "NvInfer.h" 12 | 13 | nvinfer1::ITensor* activationLayer(int layerIdx, std::string activation, nvinfer1::ITensor* input, 14 | nvinfer1::INetworkDefinition* network, std::string layerName = ""); 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/batchnorm_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "batchnorm_layer.h" 7 | 8 | #include 9 | #include 10 | 11 | nvinfer1::ITensor* 12 | batchnormLayer(int layerIdx, std::map& block, std::vector& weights, 13 | std::vector& trtWeights, int& weightPtr, nvinfer1::ITensor* input, 14 | nvinfer1::INetworkDefinition* network) 15 | { 16 | nvinfer1::ITensor* output; 17 | 18 | assert(block.at("type") == "batchnorm"); 19 | assert(block.find("filters") != block.end()); 20 | 21 | int filters = std::stoi(block.at("filters")); 22 | std::string activation = block.at("activation"); 23 | 24 | float eps = 1.0e-5; 25 | if (block.find("eps") != block.end()) { 26 | eps = std::stof(block.at("eps")); 27 | } 28 | 29 | std::vector bnBiases; 30 | std::vector bnWeights; 31 | std::vector bnRunningMean; 32 | std::vector bnRunningVar; 33 | 34 | for (int i = 0; i < filters; ++i) { 35 | bnBiases.push_back(weights[weightPtr]); 36 | ++weightPtr; 37 | } 38 | for (int i = 0; i < filters; ++i) { 39 | bnWeights.push_back(weights[weightPtr]); 40 | ++weightPtr; 41 | } 42 | for (int i = 0; i < filters; ++i) { 43 | bnRunningMean.push_back(weights[weightPtr]); 44 | ++weightPtr; 45 | } 46 | for (int i = 0; i < filters; ++i) { 47 | bnRunningVar.push_back(sqrt(weights[weightPtr] + eps)); 48 | ++weightPtr; 49 | } 50 | 51 | int size = filters; 52 | nvinfer1::Weights shift {nvinfer1::DataType::kFLOAT, nullptr, size}; 53 | nvinfer1::Weights scale {nvinfer1::DataType::kFLOAT, nullptr, size}; 54 | nvinfer1::Weights power {nvinfer1::DataType::kFLOAT, nullptr, size}; 55 | 56 | float* shiftWt = new float[size]; 57 | for (int i = 0; i < size; ++i) { 58 | shiftWt[i] = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i)); 59 | } 60 | shift.values = shiftWt; 61 | 62 | float* scaleWt = new float[size]; 63 | for (int i = 0; i < size; ++i) { 64 | scaleWt[i] = bnWeights.at(i) / bnRunningVar[i]; 65 | } 66 | scale.values = scaleWt; 67 | 68 | float* powerWt = new float[size]; 69 | for (int i = 0; i < size; ++i) { 70 | powerWt[i] = 1.0; 71 | } 72 | power.values = powerWt; 73 | 74 | trtWeights.push_back(shift); 75 | trtWeights.push_back(scale); 76 | trtWeights.push_back(power); 77 | 78 | nvinfer1::IScaleLayer* batchnorm = network->addScale(*input, nvinfer1::ScaleMode::kCHANNEL, shift, scale, power); 79 | assert(batchnorm != nullptr); 80 | std::string batchnormLayerName = "batchnorm_" + std::to_string(layerIdx); 81 | batchnorm->setName(batchnormLayerName.c_str()); 82 | output = batchnorm->getOutput(0); 83 | 84 | output = activationLayer(layerIdx, activation, output, network); 85 | assert(output != nullptr); 86 | 87 | return output; 88 | } 89 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/batchnorm_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __BATCHNORM_LAYER_H__ 7 | #define __BATCHNORM_LAYER_H__ 8 | 9 | #include 10 | #include 11 | 12 | #include "NvInfer.h" 13 | 14 | #include "activation_layer.h" 15 | 16 | nvinfer1::ITensor* batchnormLayer(int layerIdx, std::map& block, std::vector& weights, 17 | std::vector& trtWeights, int& weightPtr, nvinfer1::ITensor* input, 18 | nvinfer1::INetworkDefinition* network); 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/channels_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "channels_layer.h" 7 | 8 | #include 9 | 10 | nvinfer1::ITensor* 11 | channelsLayer(int layerIdx, std::map& block, nvinfer1::ITensor* input, 12 | nvinfer1::ITensor* implicitTensor, nvinfer1::INetworkDefinition* network) 13 | { 14 | nvinfer1::ITensor* output; 15 | 16 | assert(block.at("type") == "shift_channels" || block.at("type") == "control_channels"); 17 | 18 | if (block.at("type") == "shift_channels") { 19 | nvinfer1::IElementWiseLayer* shift = network->addElementWise(*input, *implicitTensor, 20 | nvinfer1::ElementWiseOperation::kSUM); 21 | assert(shift != nullptr); 22 | std::string shiftLayerName = "shift_channels_" + std::to_string(layerIdx); 23 | shift->setName(shiftLayerName.c_str()); 24 | output = shift->getOutput(0); 25 | } 26 | else if (block.at("type") == "control_channels") { 27 | nvinfer1::IElementWiseLayer* control = network->addElementWise(*input, *implicitTensor, 28 | nvinfer1::ElementWiseOperation::kPROD); 29 | assert(control != nullptr); 30 | std::string controlLayerName = "control_channels_" + std::to_string(layerIdx); 31 | control->setName(controlLayerName.c_str()); 32 | output = control->getOutput(0); 33 | } 34 | 35 | return output; 36 | } 37 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/channels_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __CHANNELS_LAYER_H__ 7 | #define __CHANNELS_LAYER_H__ 8 | 9 | #include 10 | #include 11 | 12 | #include "NvInfer.h" 13 | 14 | nvinfer1::ITensor* channelsLayer(int layerIdx, std::map& block, nvinfer1::ITensor* input, 15 | nvinfer1::ITensor* implicitTensor, nvinfer1::INetworkDefinition* network); 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __CONVOLUTIONAL_LAYER_H__ 7 | #define __CONVOLUTIONAL_LAYER_H__ 8 | 9 | #include 10 | #include 11 | 12 | #include "NvInfer.h" 13 | 14 | #include "activation_layer.h" 15 | 16 | nvinfer1::ITensor* convolutionalLayer(int layerIdx, std::map& block, 17 | std::vector& weights, std::vector& trtWeights, int& weightPtr, int& inputChannels, 18 | nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network, std::string layerName = ""); 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/deconvolutional_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __DECONVOLUTIONAL_LAYER_H__ 7 | #define __DECONVOLUTIONAL_LAYER_H__ 8 | 9 | #include 10 | #include 11 | 12 | #include "NvInfer.h" 13 | 14 | #include "activation_layer.h" 15 | 16 | nvinfer1::ITensor* deconvolutionalLayer(int layerIdx, std::map& block, 17 | std::vector& weights, std::vector& trtWeights, int& weightPtr, int& inputChannels, 18 | nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network, std::string layerName = ""); 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/implicit_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "implicit_layer.h" 7 | 8 | #include 9 | 10 | nvinfer1::ITensor* 11 | implicitLayer(int layerIdx, std::map& block, std::vector& weights, 12 | std::vector& trtWeights, int& weightPtr, nvinfer1::INetworkDefinition* network) 13 | { 14 | nvinfer1::ITensor* output; 15 | 16 | assert(block.at("type") == "implicit" || block.at("type") == "implicit_add" || block.at("type") == "implicit_mul"); 17 | assert(block.find("filters") != block.end()); 18 | 19 | int filters = std::stoi(block.at("filters")); 20 | 21 | nvinfer1::Weights convWt {nvinfer1::DataType::kFLOAT, nullptr, filters}; 22 | 23 | float* val = new float[filters]; 24 | for (int i = 0; i < filters; ++i) { 25 | val[i] = weights[weightPtr]; 26 | ++weightPtr; 27 | } 28 | convWt.values = val; 29 | trtWeights.push_back(convWt); 30 | 31 | nvinfer1::IConstantLayer* implicit = network->addConstant(nvinfer1::Dims{4, {1, filters, 1, 1}}, convWt); 32 | assert(implicit != nullptr); 33 | std::string implicitLayerName = block.at("type") + "_" + std::to_string(layerIdx); 34 | implicit->setName(implicitLayerName.c_str()); 35 | output = implicit->getOutput(0); 36 | 37 | return output; 38 | } 39 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/implicit_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __IMPLICIT_LAYER_H__ 7 | #define __IMPLICIT_LAYER_H__ 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include "NvInfer.h" 14 | 15 | nvinfer1::ITensor* implicitLayer(int layerIdx, std::map& block, std::vector& weights, 16 | std::vector& trtWeights, int& weightPtr, nvinfer1::INetworkDefinition* network); 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/pooling_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "pooling_layer.h" 7 | 8 | #include 9 | #include 10 | 11 | nvinfer1::ITensor* 12 | poolingLayer(int layerIdx, std::map& block, nvinfer1::ITensor* input, 13 | nvinfer1::INetworkDefinition* network) 14 | { 15 | nvinfer1::ITensor* output; 16 | 17 | assert(block.at("type") == "max" || block.at("type") == "maxpool" || block.at("type") == "avg" || 18 | block.at("type") == "avgpool"); 19 | 20 | if (block.at("type") == "max" || block.at("type") == "maxpool") { 21 | assert(block.find("size") != block.end()); 22 | assert(block.find("stride") != block.end()); 23 | 24 | int size = std::stoi(block.at("size")); 25 | int stride = std::stoi(block.at("stride")); 26 | 27 | nvinfer1::IPoolingLayer* maxpool = network->addPoolingNd(*input, nvinfer1::PoolingType::kMAX, 28 | nvinfer1::Dims{2, {size, size}}); 29 | assert(maxpool != nullptr); 30 | std::string maxpoolLayerName = "maxpool_" + std::to_string(layerIdx); 31 | maxpool->setName(maxpoolLayerName.c_str()); 32 | maxpool->setStrideNd(nvinfer1::Dims{2, {stride, stride}}); 33 | maxpool->setPaddingNd(nvinfer1::Dims{2, {(size - 1) / 2, (size - 1) / 2}}); 34 | if (size == 2 && stride == 1) { 35 | maxpool->setPrePadding(nvinfer1::Dims{2, {0, 0}}); 36 | maxpool->setPostPadding(nvinfer1::Dims{2, {1, 1}}); 37 | } 38 | output = maxpool->getOutput(0); 39 | } 40 | else if (block.at("type") == "avg" || block.at("type") == "avgpool") { 41 | nvinfer1::Dims inputDims = input->getDimensions(); 42 | nvinfer1::IPoolingLayer* avgpool = network->addPoolingNd(*input, nvinfer1::PoolingType::kAVERAGE, 43 | nvinfer1::Dims{2, {inputDims.d[1], inputDims.d[2]}}); 44 | assert(avgpool != nullptr); 45 | std::string avgpoolLayerName = "avgpool_" + std::to_string(layerIdx); 46 | avgpool->setName(avgpoolLayerName.c_str()); 47 | output = avgpool->getOutput(0); 48 | } 49 | else { 50 | std::cerr << "Pooling not supported: " << block.at("type") << std::endl; 51 | assert(0); 52 | } 53 | 54 | return output; 55 | } 56 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/pooling_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __POOLING_LAYER_H__ 7 | #define __POOLING_LAYER_H__ 8 | 9 | #include 10 | #include 11 | 12 | #include "NvInfer.h" 13 | 14 | nvinfer1::ITensor* poolingLayer(int layerIdx, std::map& block, nvinfer1::ITensor* input, 15 | nvinfer1::INetworkDefinition* network); 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/reorg_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "reorg_layer.h" 7 | 8 | #include 9 | #include 10 | 11 | nvinfer1::ITensor* 12 | reorgLayer(int layerIdx, std::map& block, nvinfer1::ITensor* input, 13 | nvinfer1::INetworkDefinition* network) 14 | { 15 | nvinfer1::ITensor* output; 16 | 17 | assert(block.at("type") == "reorg" || block.at("type") == "reorg3d"); 18 | 19 | int stride = 1; 20 | if(block.find("stride") != block.end()) { 21 | stride = std::stoi(block.at("stride")); 22 | } 23 | 24 | nvinfer1::Dims inputDims = input->getDimensions(); 25 | 26 | if (block.at("type") == "reorg3d") { 27 | std::string name1 = "slice1"; 28 | std::string name2 = "slice2"; 29 | std::string name3 = "slice3"; 30 | std::string name4 = "slice4"; 31 | nvinfer1::Dims start1 = {4, {0, 0, 0, 0}}; 32 | nvinfer1::Dims start2 = {4, {0, 0, 0, 1}}; 33 | nvinfer1::Dims start3 = {4, {0, 0, 1, 0}}; 34 | nvinfer1::Dims start4 = {4, {0, 0, 1, 1}}; 35 | nvinfer1::Dims sizeAll = {4, {inputDims.d[0], inputDims.d[1], inputDims.d[2] / stride, inputDims.d[3] / stride}}; 36 | nvinfer1::Dims strideAll = {4, {1, 1, stride, stride}}; 37 | 38 | nvinfer1::ITensor* slice1 = sliceLayer(layerIdx, name1, input, start1, sizeAll, strideAll, network); 39 | assert(slice1 != nullptr); 40 | 41 | nvinfer1::ITensor* slice2 = sliceLayer(layerIdx, name2, input, start2, sizeAll, strideAll, network); 42 | assert(slice2 != nullptr); 43 | 44 | nvinfer1::ITensor* slice3 = sliceLayer(layerIdx, name3, input, start3, sizeAll, strideAll, network); 45 | assert(slice3 != nullptr); 46 | 47 | nvinfer1::ITensor* slice4 = sliceLayer(layerIdx, name4, input, start4, sizeAll, strideAll, network); 48 | assert(slice4 != nullptr); 49 | 50 | std::vector concatInputs; 51 | concatInputs.push_back(slice1); 52 | concatInputs.push_back(slice2); 53 | concatInputs.push_back(slice3); 54 | concatInputs.push_back(slice4); 55 | 56 | nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size()); 57 | assert(concat != nullptr); 58 | std::string concatLayerName = "concat_" + std::to_string(layerIdx); 59 | concat->setName(concatLayerName.c_str()); 60 | concat->setAxis(0); 61 | output = concat->getOutput(0); 62 | } 63 | else { 64 | nvinfer1::IShuffleLayer* shuffle1 = network->addShuffle(*input); 65 | assert(shuffle1 != nullptr); 66 | std::string shuffle1LayerName = "shuffle1_" + std::to_string(layerIdx); 67 | shuffle1->setName(shuffle1LayerName.c_str()); 68 | nvinfer1::Dims reshapeDims1{6, {inputDims.d[0], inputDims.d[1] / (stride * stride), inputDims.d[2], stride, 69 | inputDims.d[3], stride}}; 70 | shuffle1->setReshapeDimensions(reshapeDims1); 71 | nvinfer1::Permutation permutation1{{0, 1, 2, 4, 3, 5}}; 72 | shuffle1->setSecondTranspose(permutation1); 73 | output = shuffle1->getOutput(0); 74 | 75 | nvinfer1::IShuffleLayer* shuffle2 = network->addShuffle(*output); 76 | assert(shuffle2 != nullptr); 77 | std::string shuffle2LayerName = "shuffle2_" + std::to_string(layerIdx); 78 | shuffle2->setName(shuffle2LayerName.c_str()); 79 | nvinfer1::Dims reshapeDims2{4, {inputDims.d[0], inputDims.d[1] / (stride * stride), inputDims.d[2] * inputDims.d[3], 80 | stride * stride}}; 81 | shuffle2->setReshapeDimensions(reshapeDims2); 82 | nvinfer1::Permutation permutation2{{0, 1, 3, 2}}; 83 | shuffle2->setSecondTranspose(permutation2); 84 | output = shuffle2->getOutput(0); 85 | 86 | nvinfer1::IShuffleLayer* shuffle3 = network->addShuffle(*output); 87 | assert(shuffle3 != nullptr); 88 | std::string shuffle3LayerName = "shuffle3_" + std::to_string(layerIdx); 89 | shuffle3->setName(shuffle3LayerName.c_str()); 90 | nvinfer1::Dims reshapeDims3{4, {inputDims.d[0], inputDims.d[1] / (stride * stride), stride * stride, 91 | inputDims.d[2] * inputDims.d[3]}}; 92 | shuffle3->setReshapeDimensions(reshapeDims3); 93 | nvinfer1::Permutation permutation3{{0, 2, 1, 3}}; 94 | shuffle3->setSecondTranspose(permutation3); 95 | output = shuffle3->getOutput(0); 96 | 97 | nvinfer1::IShuffleLayer* shuffle4 = network->addShuffle(*output); 98 | assert(shuffle4 != nullptr); 99 | std::string shuffle4LayerName = "shuffle4_" + std::to_string(layerIdx); 100 | shuffle4->setName(shuffle4LayerName.c_str()); 101 | nvinfer1::Dims reshapeDims4{4, {inputDims.d[0], inputDims.d[1] * stride * stride, inputDims.d[2] / stride, 102 | inputDims.d[3] / stride}}; 103 | shuffle4->setReshapeDimensions(reshapeDims4); 104 | output = shuffle4->getOutput(0); 105 | } 106 | 107 | return output; 108 | } 109 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/reorg_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __REORG_LAYER_H__ 7 | #define __REORG_LAYER_H__ 8 | 9 | #include 10 | #include 11 | 12 | #include "NvInfer.h" 13 | 14 | #include "slice_layer.h" 15 | 16 | nvinfer1::ITensor* reorgLayer(int layerIdx, std::map& block, nvinfer1::ITensor* input, 17 | nvinfer1::INetworkDefinition* network); 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/route_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "route_layer.h" 7 | 8 | nvinfer1::ITensor* 9 | routeLayer(int layerIdx, std::string& layers, std::map& block, 10 | std::vector tensorOutputs, nvinfer1::INetworkDefinition* network) 11 | { 12 | nvinfer1::ITensor* output; 13 | 14 | assert(block.at("type") == "route"); 15 | assert(block.find("layers") != block.end()); 16 | 17 | std::string strLayers = block.at("layers"); 18 | std::vector idxLayers; 19 | size_t lastPos = 0, pos = 0; 20 | while ((pos = strLayers.find(',', lastPos)) != std::string::npos) { 21 | int vL = std::stoi(trim(strLayers.substr(lastPos, pos - lastPos))); 22 | idxLayers.push_back(vL); 23 | lastPos = pos + 1; 24 | } 25 | if (lastPos < strLayers.length()) { 26 | std::string lastV = trim(strLayers.substr(lastPos)); 27 | if (!lastV.empty()) { 28 | idxLayers.push_back(std::stoi(lastV)); 29 | } 30 | } 31 | assert(!idxLayers.empty()); 32 | std::vector concatInputs; 33 | for (uint i = 0; i < idxLayers.size(); ++i) { 34 | if (idxLayers[i] < 0) { 35 | idxLayers[i] = tensorOutputs.size() + idxLayers[i]; 36 | } 37 | assert(idxLayers[i] >= 0 && idxLayers[i] < (int)tensorOutputs.size()); 38 | concatInputs.push_back(tensorOutputs[idxLayers[i]]); 39 | if (i < idxLayers.size() - 1) { 40 | layers += std::to_string(idxLayers[i]) + ", "; 41 | } 42 | } 43 | layers += std::to_string(idxLayers[idxLayers.size() - 1]); 44 | 45 | if (concatInputs.size() == 1) { 46 | output = concatInputs[0]; 47 | } 48 | else { 49 | int axis = 1; 50 | if (block.find("axis") != block.end()) { 51 | axis += std::stoi(block.at("axis")); 52 | } 53 | if (axis < 0) { 54 | axis += concatInputs[0]->getDimensions().nbDims; 55 | } 56 | 57 | nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size()); 58 | assert(concat != nullptr); 59 | std::string concatLayerName = "route_" + std::to_string(layerIdx); 60 | concat->setName(concatLayerName.c_str()); 61 | concat->setAxis(axis); 62 | output = concat->getOutput(0); 63 | } 64 | 65 | if (block.find("groups") != block.end()) { 66 | nvinfer1::Dims prevTensorDims = output->getDimensions(); 67 | int groups = stoi(block.at("groups")); 68 | int group_id = stoi(block.at("group_id")); 69 | int startSlice = (prevTensorDims.d[1] / groups) * group_id; 70 | int channelSlice = (prevTensorDims.d[1] / groups); 71 | 72 | std::string name = "slice"; 73 | nvinfer1::Dims start = {4, {0, startSlice, 0, 0}}; 74 | nvinfer1::Dims size = {4, {prevTensorDims.d[0], channelSlice, prevTensorDims.d[2], prevTensorDims.d[3]}}; 75 | nvinfer1::Dims stride = {4, {1, 1, 1, 1}}; 76 | 77 | output = sliceLayer(layerIdx, name, output, start, size, stride, network); 78 | assert(output != nullptr); 79 | } 80 | 81 | return output; 82 | } 83 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/route_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __ROUTE_LAYER_H__ 7 | #define __ROUTE_LAYER_H__ 8 | 9 | #include "../utils.h" 10 | 11 | #include "slice_layer.h" 12 | 13 | nvinfer1::ITensor* routeLayer(int layerIdx, std::string& layers, std::map& block, 14 | std::vector tensorOutputs, nvinfer1::INetworkDefinition* network); 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/sam_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "sam_layer.h" 7 | 8 | #include 9 | 10 | nvinfer1::ITensor* 11 | samLayer(int layerIdx, std::string activation, std::map& block, nvinfer1::ITensor* input, 12 | nvinfer1::ITensor* samInput, nvinfer1::INetworkDefinition* network) 13 | { 14 | nvinfer1::ITensor* output; 15 | 16 | assert(block.at("type") == "sam"); 17 | 18 | nvinfer1::IElementWiseLayer* sam = network->addElementWise(*input, *samInput, nvinfer1::ElementWiseOperation::kPROD); 19 | assert(sam != nullptr); 20 | std::string samLayerName = "sam_" + std::to_string(layerIdx); 21 | sam->setName(samLayerName.c_str()); 22 | output = sam->getOutput(0); 23 | 24 | output = activationLayer(layerIdx, activation, output, network); 25 | assert(output != nullptr); 26 | 27 | return output; 28 | } 29 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/sam_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __SAM_LAYER_H__ 7 | #define __SAM_LAYER_H__ 8 | 9 | #include 10 | 11 | #include "NvInfer.h" 12 | 13 | #include "activation_layer.h" 14 | 15 | nvinfer1::ITensor* samLayer(int layerIdx, std::string activation, std::map& block, 16 | nvinfer1::ITensor* input, nvinfer1::ITensor* samInput, nvinfer1::INetworkDefinition* network); 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "shortcut_layer.h" 7 | 8 | #include 9 | 10 | nvinfer1::ITensor* 11 | shortcutLayer(int layerIdx, std::string activation, std::string inputVol, std::string shortcutVol, 12 | std::map& block, nvinfer1::ITensor* input, nvinfer1::ITensor* shortcutInput, 13 | nvinfer1::INetworkDefinition* network) 14 | { 15 | nvinfer1::ITensor* output; 16 | 17 | assert(block.at("type") == "shortcut"); 18 | 19 | if (inputVol != shortcutVol) { 20 | std::string name = "slice"; 21 | nvinfer1::Dims start = {4, {0, 0, 0, 0}}; 22 | nvinfer1::Dims size = input->getDimensions(); 23 | nvinfer1::Dims stride = {4, {1, 1, 1, 1}}; 24 | 25 | output = sliceLayer(layerIdx, name, shortcutInput, start, size, stride, network); 26 | assert(output != nullptr); 27 | } 28 | else { 29 | output = shortcutInput; 30 | } 31 | 32 | nvinfer1::IElementWiseLayer* shortcut = network->addElementWise(*input, *output, 33 | nvinfer1::ElementWiseOperation::kSUM); 34 | assert(shortcut != nullptr); 35 | std::string shortcutLayerName = "shortcut_" + std::to_string(layerIdx); 36 | shortcut->setName(shortcutLayerName.c_str()); 37 | output = shortcut->getOutput(0); 38 | 39 | output = activationLayer(layerIdx, activation, output, network); 40 | assert(output != nullptr); 41 | 42 | return output; 43 | } 44 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __SHORTCUT_LAYER_H__ 7 | #define __SHORTCUT_LAYER_H__ 8 | 9 | #include 10 | 11 | #include "NvInfer.h" 12 | 13 | #include "slice_layer.h" 14 | #include "activation_layer.h" 15 | 16 | nvinfer1::ITensor* shortcutLayer(int layerIdx, std::string activation, std::string inputVol, std::string shortcutVol, 17 | std::map& block, nvinfer1::ITensor* input, nvinfer1::ITensor* shortcut, 18 | nvinfer1::INetworkDefinition* network); 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/slice_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "slice_layer.h" 7 | 8 | #include 9 | 10 | nvinfer1::ITensor* 11 | sliceLayer(int layerIdx, std::string& name, nvinfer1::ITensor* input, nvinfer1::Dims start, nvinfer1::Dims size, 12 | nvinfer1::Dims stride, nvinfer1::INetworkDefinition* network) 13 | { 14 | nvinfer1::ITensor* output; 15 | 16 | nvinfer1::ISliceLayer* slice; 17 | 18 | nvinfer1::Dims inputDims = input->getDimensions(); 19 | 20 | if (inputDims.d[0] == -1) { 21 | slice = network->addSlice(*input, start, nvinfer1::Dims{}, stride); 22 | assert(slice != nullptr); 23 | 24 | int nbDims = size.nbDims; 25 | 26 | nvinfer1::IShapeLayer* shape = network->addShape(*input); 27 | assert(shape != nullptr); 28 | std::string shapeLayerName = "shape_" + name + "_" + std::to_string(layerIdx); 29 | shape->setName(shapeLayerName.c_str()); 30 | nvinfer1::ITensor* shapeTensor = shape->getOutput(0); 31 | assert(shapeTensor != nullptr); 32 | 33 | #if NV_TENSORRT_MAJOR >= 10 34 | nvinfer1::ICastLayer* castShape = network->addCast(*shapeTensor, nvinfer1::DataType::kINT32); 35 | assert(castShape != nullptr); 36 | std::string castShapeLayerName = "cast_shape_" + name + "_" + std::to_string(layerIdx); 37 | castShape->setName(castShapeLayerName.c_str()); 38 | nvinfer1::ITensor* castShapeTensor = castShape->getOutput(0); 39 | assert(castShapeTensor != nullptr); 40 | shapeTensor = castShapeTensor; 41 | #endif 42 | 43 | nvinfer1::Weights constantWt {nvinfer1::DataType::kINT32, nullptr, nbDims}; 44 | 45 | int* val = new int[nbDims]; 46 | for (int i = 0; i < nbDims; ++i) { 47 | if (inputDims.d[i] == size.d[i]) { 48 | val[i] = 0; 49 | } 50 | else { 51 | val[i] = inputDims.d[i] - size.d[i]; 52 | } 53 | } 54 | constantWt.values = val; 55 | 56 | nvinfer1::IConstantLayer* constant = network->addConstant(nvinfer1::Dims{1, {nbDims}}, constantWt); 57 | assert(constant != nullptr); 58 | std::string constantLayerName = "constant_" + name + "_" + std::to_string(layerIdx); 59 | constant->setName(constantLayerName.c_str()); 60 | nvinfer1::ITensor* constantTensor = constant->getOutput(0); 61 | assert(constantTensor != nullptr); 62 | 63 | nvinfer1::IElementWiseLayer* divide = network->addElementWise(*shapeTensor, *constantTensor, 64 | nvinfer1::ElementWiseOperation::kSUB); 65 | assert(divide != nullptr); 66 | std::string divideLayerName = "divide_" + name + "_" + std::to_string(layerIdx); 67 | divide->setName(divideLayerName.c_str()); 68 | nvinfer1::ITensor* divideTensor = divide->getOutput(0); 69 | assert(divideTensor != nullptr); 70 | 71 | slice->setInput(2, *divideTensor); 72 | } 73 | else { 74 | slice = network->addSlice(*input, start, size, stride); 75 | assert(slice != nullptr); 76 | } 77 | 78 | std::string sliceLayerName = name + "_" + std::to_string(layerIdx); 79 | slice->setName(sliceLayerName.c_str()); 80 | output = slice->getOutput(0); 81 | 82 | return output; 83 | } 84 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/slice_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __SLICE_LAYER_H__ 7 | #define __SLICE_LAYER_H__ 8 | 9 | #include 10 | 11 | #include "NvInfer.h" 12 | 13 | nvinfer1::ITensor* sliceLayer(int layerIdx, std::string& name, nvinfer1::ITensor* input, nvinfer1::Dims start, 14 | nvinfer1::Dims size, nvinfer1::Dims stride, nvinfer1::INetworkDefinition* network); 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/upsample_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "upsample_layer.h" 7 | 8 | #include 9 | 10 | nvinfer1::ITensor* 11 | upsampleLayer(int layerIdx, std::map& block, nvinfer1::ITensor* input, 12 | nvinfer1::INetworkDefinition* network) 13 | { 14 | nvinfer1::ITensor* output; 15 | 16 | assert(block.at("type") == "upsample"); 17 | assert(block.find("stride") != block.end()); 18 | 19 | int stride = std::stoi(block.at("stride")); 20 | 21 | float scale[4] = {1, 1, static_cast(stride), static_cast(stride)}; 22 | 23 | nvinfer1::IResizeLayer* resize = network->addResize(*input); 24 | assert(resize != nullptr); 25 | std::string resizeLayerName = "upsample_" + std::to_string(layerIdx); 26 | resize->setName(resizeLayerName.c_str()); 27 | 28 | #if NV_TENSORRT_MAJOR > 8 || (NV_TENSORRT_MAJOR == 8 && NV_TENSORRT_MINOR > 4) 29 | resize->setResizeMode(nvinfer1::InterpolationMode::kNEAREST); 30 | #else 31 | resize->setResizeMode(nvinfer1::ResizeMode::kNEAREST); 32 | #endif 33 | 34 | resize->setScales(scale, 4); 35 | output = resize->getOutput(0); 36 | 37 | return output; 38 | } 39 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/upsample_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __UPSAMPLE_LAYER_H__ 7 | #define __UPSAMPLE_LAYER_H__ 8 | 9 | #include 10 | #include 11 | 12 | #include "NvInfer.h" 13 | 14 | nvinfer1::ITensor* upsampleLayer(int layerIdx, std::map& block, nvinfer1::ITensor* input, 15 | nvinfer1::INetworkDefinition* network); 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo_cuda.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | * 22 | * Edited by Marcos Luciano 23 | * https://www.github.com/marcoslucianops 24 | */ 25 | 26 | #include 27 | #include 28 | 29 | #include "nvdsinfer_custom_impl.h" 30 | 31 | extern "C" bool 32 | NvDsInferParseYoloCuda(std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, 33 | NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList); 34 | 35 | __global__ void decodeTensorYoloCuda(NvDsInferParseObjectInfo *binfo, const float* output, const uint outputSize, 36 | const uint netW, const uint netH, const float* preclusterThreshold) 37 | { 38 | int x_id = blockIdx.x * blockDim.x + threadIdx.x; 39 | 40 | if (x_id >= outputSize) { 41 | return; 42 | } 43 | 44 | float maxProb = output[x_id * 6 + 4]; 45 | int maxIndex = (int) output[x_id * 6 + 5]; 46 | 47 | if (maxProb < preclusterThreshold[maxIndex]) { 48 | binfo[x_id].detectionConfidence = 0.0; 49 | return; 50 | } 51 | 52 | float bx1 = output[x_id * 6 + 0]; 53 | float by1 = output[x_id * 6 + 1]; 54 | float bx2 = output[x_id * 6 + 2]; 55 | float by2 = output[x_id * 6 + 3]; 56 | 57 | bx1 = fminf(float(netW), fmaxf(float(0.0), bx1)); 58 | by1 = fminf(float(netH), fmaxf(float(0.0), by1)); 59 | bx2 = fminf(float(netW), fmaxf(float(0.0), bx2)); 60 | by2 = fminf(float(netH), fmaxf(float(0.0), by2)); 61 | 62 | binfo[x_id].left = bx1; 63 | binfo[x_id].top = by1; 64 | binfo[x_id].width = fminf(float(netW), fmaxf(float(0.0), bx2 - bx1)); 65 | binfo[x_id].height = fminf(float(netH), fmaxf(float(0.0), by2 - by1)); 66 | binfo[x_id].detectionConfidence = maxProb; 67 | binfo[x_id].classId = maxIndex; 68 | } 69 | 70 | static bool NvDsInferParseCustomYoloCuda(std::vector const& outputLayersInfo, 71 | NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams, 72 | std::vector& objectList) 73 | { 74 | if (outputLayersInfo.empty()) { 75 | std::cerr << "ERROR: Could not find output layer in bbox parsing" << std::endl; 76 | return false; 77 | } 78 | 79 | const NvDsInferLayerInfo& output = outputLayersInfo[0]; 80 | const uint outputSize = output.inferDims.d[0]; 81 | 82 | thrust::device_vector perClassPreclusterThreshold = detectionParams.perClassPreclusterThreshold; 83 | 84 | thrust::device_vector objects(outputSize); 85 | 86 | int threads_per_block = 1024; 87 | int number_of_blocks = ((outputSize) / threads_per_block) + 1; 88 | 89 | decodeTensorYoloCuda<<>>( 90 | thrust::raw_pointer_cast(objects.data()), (float*) (output.buffer), outputSize, networkInfo.width, 91 | networkInfo.height, thrust::raw_pointer_cast(perClassPreclusterThreshold.data())); 92 | 93 | objectList.resize(outputSize); 94 | thrust::copy(objects.begin(), objects.end(), objectList.begin()); 95 | 96 | return true; 97 | } 98 | 99 | extern "C" bool 100 | NvDsInferParseYoloCuda(std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, 101 | NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList) 102 | { 103 | return NvDsInferParseCustomYoloCuda(outputLayersInfo, networkInfo, detectionParams, objectList); 104 | } 105 | 106 | CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseYoloCuda); 107 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/utils.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | * 22 | * Edited by Marcos Luciano 23 | * https://www.github.com/marcoslucianops 24 | */ 25 | 26 | #include "utils.h" 27 | 28 | #include 29 | #include 30 | #include 31 | 32 | static void 33 | leftTrim(std::string& s) 34 | { 35 | s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int ch) { return !isspace(ch); })); 36 | } 37 | 38 | static void 39 | rightTrim(std::string& s) 40 | { 41 | s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !isspace(ch); }).base(), s.end()); 42 | } 43 | 44 | std::string 45 | trim(std::string s) 46 | { 47 | leftTrim(s); 48 | rightTrim(s); 49 | return s; 50 | } 51 | 52 | float 53 | clamp(const float val, const float minVal, const float maxVal) 54 | { 55 | assert(minVal <= maxVal); 56 | return std::min(maxVal, std::max(minVal, val)); 57 | } 58 | 59 | bool 60 | fileExists(const std::string fileName, bool verbose) 61 | { 62 | if (!std::experimental::filesystem::exists(std::experimental::filesystem::path(fileName))) { 63 | if (verbose) { 64 | std::cout << "\nFile does not exist: " << fileName << std::endl; 65 | } 66 | return false; 67 | } 68 | return true; 69 | } 70 | 71 | std::vector 72 | loadWeights(const std::string weightsFilePath) 73 | { 74 | assert(fileExists(weightsFilePath)); 75 | std::cout << "\nLoading pre-trained weights" << std::endl; 76 | 77 | std::vector weights; 78 | 79 | if (weightsFilePath.find(".weights") != std::string::npos) { 80 | std::ifstream file(weightsFilePath, std::ios_base::binary); 81 | assert(file.good()); 82 | std::string line; 83 | 84 | if (weightsFilePath.find("yolov2") != std::string::npos && 85 | weightsFilePath.find("yolov2-tiny") == std::string::npos) { 86 | // Remove 4 int32 bytes of data from the stream belonging to the header 87 | file.ignore(4 * 4); 88 | } 89 | else { 90 | // Remove 5 int32 bytes of data from the stream belonging to the header 91 | file.ignore(4 * 5); 92 | } 93 | 94 | char floatWeight[4]; 95 | while (!file.eof()) { 96 | file.read(floatWeight, 4); 97 | assert(file.gcount() == 4); 98 | weights.push_back(*reinterpret_cast(floatWeight)); 99 | if (file.peek() == std::istream::traits_type::eof()) { 100 | break; 101 | } 102 | } 103 | } 104 | else { 105 | std::cerr << "\nFile " << weightsFilePath << " is not supported" << std::endl; 106 | assert(0); 107 | } 108 | 109 | std::cout << "Loading " << weightsFilePath << " complete" << std::endl; 110 | std::cout << "Total weights read: " << weights.size() << std::endl; 111 | 112 | return weights; 113 | } 114 | 115 | std::string 116 | dimsToString(const nvinfer1::Dims d) 117 | { 118 | assert(d.nbDims >= 1); 119 | 120 | std::stringstream s; 121 | s << "["; 122 | for (int i = 1; i < d.nbDims - 1; ++i) { 123 | s << d.d[i] << ", "; 124 | } 125 | s << d.d[d.nbDims - 1] << "]"; 126 | 127 | return s.str(); 128 | } 129 | 130 | int 131 | getNumChannels(nvinfer1::ITensor* t) 132 | { 133 | nvinfer1::Dims d = t->getDimensions(); 134 | assert(d.nbDims == 4); 135 | return d.d[1]; 136 | } 137 | 138 | void 139 | printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput, std::string layerOutput, 140 | std::string weightPtr) 141 | { 142 | std::cout << std::setw(7) << std::left << layerIndex << std::setw(40) << std::left << layerName; 143 | std::cout << std::setw(19) << std::left << layerInput << std::setw(19) << std::left << layerOutput; 144 | std::cout << weightPtr << std::endl; 145 | } 146 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/utils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | * 22 | * Edited by Marcos Luciano 23 | * https://www.github.com/marcoslucianops 24 | */ 25 | 26 | #ifndef __UTILS_H__ 27 | #define __UTILS_H__ 28 | 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | #include "NvInfer.h" 37 | 38 | std::string trim(std::string s); 39 | 40 | float clamp(const float val, const float minVal, const float maxVal); 41 | 42 | bool fileExists(const std::string fileName, bool verbose = true); 43 | 44 | std::vector loadWeights(const std::string weightsFilePath); 45 | 46 | std::string dimsToString(const nvinfer1::Dims d); 47 | 48 | int getNumChannels(nvinfer1::ITensor* t); 49 | 50 | void printLayerInfo( 51 | std::string layerIndex, std::string layerName, std::string layerInput, std::string layerOutput, 52 | std::string weightPtr); 53 | 54 | #endif 55 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/yoloForward.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include 7 | 8 | inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); } 9 | 10 | __global__ void gpuYoloLayer(const float* input, float* output, const uint netWidth, const uint netHeight, 11 | const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, 12 | const uint64_t lastInputSize, const float scaleXY, const float* anchors, const int* mask) 13 | { 14 | uint x_id = blockIdx.x * blockDim.x + threadIdx.x; 15 | uint y_id = blockIdx.y * blockDim.y + threadIdx.y; 16 | uint z_id = blockIdx.z * blockDim.z + threadIdx.z; 17 | 18 | if (x_id >= gridSizeX || y_id >= gridSizeY || z_id >= numBBoxes) { 19 | return; 20 | } 21 | 22 | const int numGridCells = gridSizeX * gridSizeY; 23 | const int bbindex = y_id * gridSizeX + x_id; 24 | 25 | const float alpha = scaleXY; 26 | const float beta = -0.5 * (scaleXY - 1); 27 | 28 | float xc = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) * alpha + beta + x_id) 29 | * netWidth / gridSizeX; 30 | 31 | float yc = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) * alpha + beta + y_id) 32 | * netHeight / gridSizeY; 33 | 34 | float w = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]) * anchors[mask[z_id] * 2]; 35 | 36 | float h = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]) * anchors[mask[z_id] * 2 + 1]; 37 | 38 | const float objectness = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]); 39 | 40 | float maxProb = 0.0f; 41 | int maxIndex = -1; 42 | 43 | for (uint i = 0; i < numOutputClasses; ++i) { 44 | float prob = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]); 45 | if (prob > maxProb) { 46 | maxProb = prob; 47 | maxIndex = i; 48 | } 49 | } 50 | 51 | int count = numGridCells * z_id + bbindex + lastInputSize; 52 | 53 | output[count * 6 + 0] = xc - w * 0.5; 54 | output[count * 6 + 1] = yc - h * 0.5; 55 | output[count * 6 + 2] = xc + w * 0.5; 56 | output[count * 6 + 3] = yc + h * 0.5; 57 | output[count * 6 + 4] = maxProb * objectness; 58 | output[count * 6 + 5] = (float) maxIndex; 59 | } 60 | 61 | cudaError_t cudaYoloLayer(const void* input, void* output, const uint& batchSize, const uint64_t& inputSize, 62 | const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth, const uint& netHeight, 63 | const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, 64 | const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream); 65 | 66 | cudaError_t cudaYoloLayer(const void* input, void* output, const uint& batchSize, const uint64_t& inputSize, 67 | const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth, const uint& netHeight, 68 | const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, 69 | const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream) 70 | { 71 | dim3 threads_per_block(16, 16, 4); 72 | dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, (gridSizeY / threads_per_block.y) + 1, 73 | (numBBoxes / threads_per_block.z) + 1); 74 | 75 | for (unsigned int batch = 0; batch < batchSize; ++batch) { 76 | gpuYoloLayer<<>>( 77 | reinterpret_cast (input) + (batch * inputSize), 78 | reinterpret_cast (output) + (batch * 6 * outputSize), 79 | netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, lastInputSize, scaleXY, 80 | reinterpret_cast (anchors), reinterpret_cast (mask)); 81 | } 82 | return cudaGetLastError(); 83 | } 84 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/yoloForward_nc.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include 7 | 8 | __global__ void gpuYoloLayer_nc(const float* input, float* output, const uint netWidth, const uint netHeight, 9 | const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, 10 | const uint64_t lastInputSize, const float scaleXY, const float* anchors, const int* mask) 11 | { 12 | uint x_id = blockIdx.x * blockDim.x + threadIdx.x; 13 | uint y_id = blockIdx.y * blockDim.y + threadIdx.y; 14 | uint z_id = blockIdx.z * blockDim.z + threadIdx.z; 15 | 16 | if (x_id >= gridSizeX || y_id >= gridSizeY || z_id >= numBBoxes) { 17 | return; 18 | } 19 | 20 | const int numGridCells = gridSizeX * gridSizeY; 21 | const int bbindex = y_id * gridSizeX + x_id; 22 | 23 | const float alpha = scaleXY; 24 | const float beta = -0.5 * (scaleXY - 1); 25 | 26 | float xc = (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)] * alpha + beta + x_id) * netWidth / 27 | gridSizeX; 28 | 29 | float yc = (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)] * alpha + beta + y_id) * netHeight / 30 | gridSizeY; 31 | 32 | float w = __powf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)] * 2, 2) * 33 | anchors[mask[z_id] * 2]; 34 | 35 | float h = __powf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)] * 2, 2) * 36 | anchors[mask[z_id] * 2 + 1]; 37 | 38 | const float objectness = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]; 39 | 40 | float maxProb = 0.0f; 41 | int maxIndex = -1; 42 | 43 | for (uint i = 0; i < numOutputClasses; ++i) { 44 | float prob = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]; 45 | if (prob > maxProb) { 46 | maxProb = prob; 47 | maxIndex = i; 48 | } 49 | } 50 | 51 | int count = numGridCells * z_id + bbindex + lastInputSize; 52 | 53 | output[count * 6 + 0] = xc - w * 0.5; 54 | output[count * 6 + 1] = yc - h * 0.5; 55 | output[count * 6 + 2] = xc + w * 0.5; 56 | output[count * 6 + 3] = yc + h * 0.5; 57 | output[count * 6 + 4] = maxProb * objectness; 58 | output[count * 6 + 5] = (float) maxIndex; 59 | } 60 | 61 | cudaError_t cudaYoloLayer_nc(const void* input, void* output, const uint& batchSize, const uint64_t& inputSize, 62 | const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth, const uint& netHeight, 63 | const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, 64 | const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream); 65 | 66 | cudaError_t cudaYoloLayer_nc(const void* input, void* output, const uint& batchSize, const uint64_t& inputSize, 67 | const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth, const uint& netHeight, 68 | const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, 69 | const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream) 70 | { 71 | dim3 threads_per_block(16, 16, 4); 72 | dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, (gridSizeY / threads_per_block.y) + 1, 73 | (numBBoxes / threads_per_block.z) + 1); 74 | 75 | for (unsigned int batch = 0; batch < batchSize; ++batch) { 76 | gpuYoloLayer_nc<<>>( 77 | reinterpret_cast (input) + (batch * inputSize), 78 | reinterpret_cast (output) + (batch * 6 * outputSize), 79 | netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, lastInputSize, scaleXY, 80 | reinterpret_cast (anchors), reinterpret_cast (mask)); 81 | } 82 | return cudaGetLastError(); 83 | } 84 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/yoloForward_v2.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include 7 | 8 | inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); } 9 | 10 | __device__ void softmaxGPU(const float* input, const int bbindex, const int numGridCells, uint z_id, 11 | const uint numOutputClasses, float temp, float* output) 12 | { 13 | int i; 14 | float sum = 0; 15 | float largest = -INFINITY; 16 | for (i = 0; i < numOutputClasses; ++i) { 17 | int val = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]; 18 | largest = (val>largest) ? val : largest; 19 | } 20 | for (i = 0; i < numOutputClasses; ++i) { 21 | float e = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] / temp - largest / temp); 22 | sum += e; 23 | output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] = e; 24 | } 25 | for (i = 0; i < numOutputClasses; ++i) { 26 | output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] /= sum; 27 | } 28 | } 29 | 30 | __global__ void gpuRegionLayer(const float* input, float* softmax, float* output, const uint netWidth, 31 | const uint netHeight, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, 32 | const uint64_t lastInputSize, const float* anchors) 33 | { 34 | uint x_id = blockIdx.x * blockDim.x + threadIdx.x; 35 | uint y_id = blockIdx.y * blockDim.y + threadIdx.y; 36 | uint z_id = blockIdx.z * blockDim.z + threadIdx.z; 37 | 38 | if (x_id >= gridSizeX || y_id >= gridSizeY || z_id >= numBBoxes) { 39 | return; 40 | } 41 | 42 | const int numGridCells = gridSizeX * gridSizeY; 43 | const int bbindex = y_id * gridSizeX + x_id; 44 | 45 | float xc = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) + x_id) * netWidth / 46 | gridSizeX; 47 | 48 | float yc = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) + y_id) * netHeight / 49 | gridSizeY; 50 | 51 | float w = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]) * anchors[z_id * 2] * netWidth / 52 | gridSizeX; 53 | 54 | float h = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]) * anchors[z_id * 2 + 1] * 55 | netHeight / gridSizeY; 56 | 57 | const float objectness = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]); 58 | 59 | softmaxGPU(input, bbindex, numGridCells, z_id, numOutputClasses, 1.0, softmax); 60 | 61 | float maxProb = 0.0f; 62 | int maxIndex = -1; 63 | 64 | for (uint i = 0; i < numOutputClasses; ++i) { 65 | float prob = softmax[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]; 66 | if (prob > maxProb) { 67 | maxProb = prob; 68 | maxIndex = i; 69 | } 70 | } 71 | 72 | int count = numGridCells * z_id + bbindex + lastInputSize; 73 | 74 | output[count * 6 + 0] = xc - w * 0.5; 75 | output[count * 6 + 1] = yc - h * 0.5; 76 | output[count * 6 + 2] = xc + w * 0.5; 77 | output[count * 6 + 3] = yc + h * 0.5; 78 | output[count * 6 + 4] = maxProb * objectness; 79 | output[count * 6 + 5] = (float) maxIndex; 80 | } 81 | 82 | cudaError_t cudaRegionLayer(const void* input, void* softmax, void* output, const uint& batchSize, 83 | const uint64_t& inputSize, const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth, 84 | const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, 85 | const uint& numBBoxes, const void* anchors, cudaStream_t stream); 86 | 87 | cudaError_t cudaRegionLayer(const void* input, void* softmax, void* output, const uint& batchSize, 88 | const uint64_t& inputSize, const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth, 89 | const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, 90 | const uint& numBBoxes, const void* anchors, cudaStream_t stream) 91 | { 92 | dim3 threads_per_block(16, 16, 4); 93 | dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, (gridSizeY / threads_per_block.y) + 1, 94 | (numBBoxes / threads_per_block.z) + 1); 95 | 96 | for (unsigned int batch = 0; batch < batchSize; ++batch) { 97 | gpuRegionLayer<<>>( 98 | reinterpret_cast (input) + (batch * inputSize), 99 | reinterpret_cast (softmax) + (batch * inputSize), 100 | reinterpret_cast (output) + (batch * 6 * outputSize), 101 | netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, lastInputSize, 102 | reinterpret_cast (anchors)); 103 | } 104 | return cudaGetLastError(); 105 | } 106 | -------------------------------------------------------------------------------- /utils/export_damoyolo.py: -------------------------------------------------------------------------------- 1 | import os 2 | import onnx 3 | import torch 4 | import torch.nn as nn 5 | 6 | from damo.config.base import parse_config 7 | from damo.utils.model_utils import replace_module 8 | from damo.base_models.core.ops import RepConv, SiLU 9 | from damo.detectors.detector import build_local_model 10 | 11 | 12 | class DeepStreamOutput(nn.Module): 13 | def __init__(self): 14 | super().__init__() 15 | 16 | def forward(self, x): 17 | boxes = x[1] 18 | scores, labels = torch.max(x[0], dim=-1, keepdim=True) 19 | return torch.cat([boxes, scores, labels.to(boxes.dtype)], dim=-1) 20 | 21 | 22 | def suppress_warnings(): 23 | import warnings 24 | warnings.filterwarnings('ignore', category=torch.jit.TracerWarning) 25 | warnings.filterwarnings('ignore', category=UserWarning) 26 | warnings.filterwarnings('ignore', category=DeprecationWarning) 27 | warnings.filterwarnings('ignore', category=FutureWarning) 28 | warnings.filterwarnings('ignore', category=ResourceWarning) 29 | 30 | 31 | def damoyolo_export(weights, config_file, device): 32 | config = parse_config(config_file) 33 | config.model.head.export_with_post = True 34 | model = build_local_model(config, device) 35 | ckpt = torch.load(weights, map_location=device) 36 | model.eval() 37 | if 'model' in ckpt: 38 | ckpt = ckpt['model'] 39 | model.load_state_dict(ckpt, strict=True) 40 | model = replace_module(model, nn.SiLU, SiLU) 41 | for layer in model.modules(): 42 | if isinstance(layer, RepConv): 43 | layer.switch_to_deploy() 44 | model.head.nms = False 45 | return config, model 46 | 47 | 48 | def main(args): 49 | suppress_warnings() 50 | 51 | print(f'\nStarting: {args.weights}') 52 | 53 | print('Opening DAMO-YOLO model') 54 | 55 | device = torch.device('cpu') 56 | cfg, model = damoyolo_export(args.weights, args.config, device) 57 | 58 | if len(cfg.dataset['class_names']) > 0: 59 | print('Creating labels.txt file') 60 | with open('labels.txt', 'w', encoding='utf-8') as f: 61 | for name in cfg.dataset['class_names']: 62 | f.write(f'{name}\n') 63 | 64 | model = nn.Sequential(model, DeepStreamOutput()) 65 | 66 | img_size = args.size * 2 if len(args.size) == 1 else args.size 67 | 68 | onnx_input_im = torch.zeros(args.batch, 3, *img_size).to(device) 69 | onnx_output_file = f'{args.weights}.onnx' 70 | 71 | dynamic_axes = { 72 | 'input': { 73 | 0: 'batch' 74 | }, 75 | 'output': { 76 | 0: 'batch' 77 | } 78 | } 79 | 80 | print('Exporting the model to ONNX') 81 | torch.onnx.export( 82 | model, onnx_input_im, onnx_output_file, verbose=False, opset_version=args.opset, do_constant_folding=True, 83 | input_names=['input'], output_names=['output'], dynamic_axes=dynamic_axes if args.dynamic else None 84 | ) 85 | 86 | if args.simplify: 87 | print('Simplifying the ONNX model') 88 | import onnxslim 89 | model_onnx = onnx.load(onnx_output_file) 90 | model_onnx = onnxslim.slim(model_onnx) 91 | onnx.save(model_onnx, onnx_output_file) 92 | 93 | print(f'Done: {onnx_output_file}\n') 94 | 95 | 96 | def parse_args(): 97 | import argparse 98 | parser = argparse.ArgumentParser(description='DeepStream DAMO-YOLO conversion') 99 | parser.add_argument('-w', '--weights', required=True, help='Input weights (.pth) file path (required)') 100 | parser.add_argument('-c', '--config', required=True, help='Input config (.py) file path (required)') 101 | parser.add_argument('-s', '--size', nargs='+', type=int, default=[640], help='Inference size [H,W] (default [640])') 102 | parser.add_argument('--opset', type=int, default=11, help='ONNX opset version') 103 | parser.add_argument('--simplify', action='store_true', help='ONNX simplify model') 104 | parser.add_argument('--dynamic', action='store_true', help='Dynamic batch-size') 105 | parser.add_argument('--batch', type=int, default=1, help='Static batch-size') 106 | args = parser.parse_args() 107 | if not os.path.isfile(args.weights): 108 | raise SystemExit('Invalid weights file') 109 | if not os.path.isfile(args.config): 110 | raise SystemExit('Invalid config file') 111 | if args.dynamic and args.batch > 1: 112 | raise SystemExit('Cannot set dynamic batch-size and static batch-size at same time') 113 | return args 114 | 115 | 116 | if __name__ == '__main__': 117 | args = parse_args() 118 | main(args) 119 | -------------------------------------------------------------------------------- /utils/export_dfine.py: -------------------------------------------------------------------------------- 1 | import os 2 | import onnx 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | from src.core import YAMLConfig 8 | 9 | 10 | class DeepStreamOutput(nn.Module): 11 | def __init__(self, img_size, use_focal_loss): 12 | super().__init__() 13 | self.img_size = img_size 14 | self.use_focal_loss = use_focal_loss 15 | 16 | def forward(self, x): 17 | boxes = x['pred_boxes'] 18 | convert_matrix = torch.tensor( 19 | [[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]], dtype=boxes.dtype, device=boxes.device 20 | ) 21 | boxes @= convert_matrix 22 | boxes *= torch.as_tensor([[*self.img_size]]).flip(1).tile([1, 2]).unsqueeze(1) 23 | scores = F.sigmoid(x['pred_logits']) if self.use_focal_loss else F.softmax(x['pred_logits'])[:, :, :-1] 24 | scores, labels = torch.max(scores, dim=-1, keepdim=True) 25 | return torch.cat([boxes, scores, labels.to(boxes.dtype)], dim=-1) 26 | 27 | 28 | def dfine_export(weights, cfg_file, device): 29 | cfg = YAMLConfig(cfg_file, resume=weights) 30 | if 'HGNetv2' in cfg.yaml_cfg: 31 | cfg.yaml_cfg['HGNetv2']['pretrained'] = False 32 | checkpoint = torch.load(weights, map_location=device) 33 | if 'ema' in checkpoint: 34 | state = checkpoint['ema']['module'] 35 | else: 36 | state = checkpoint['model'] 37 | cfg.model.load_state_dict(state) 38 | return cfg.model.deploy(), cfg.postprocessor.use_focal_loss 39 | 40 | 41 | def suppress_warnings(): 42 | import warnings 43 | warnings.filterwarnings('ignore', category=torch.jit.TracerWarning) 44 | warnings.filterwarnings('ignore', category=UserWarning) 45 | warnings.filterwarnings('ignore', category=DeprecationWarning) 46 | warnings.filterwarnings('ignore', category=FutureWarning) 47 | warnings.filterwarnings('ignore', category=ResourceWarning) 48 | 49 | 50 | def main(args): 51 | suppress_warnings() 52 | 53 | print(f'\nStarting: {args.weights}') 54 | 55 | print('Opening D-FINE model') 56 | 57 | device = torch.device('cpu') 58 | model, use_focal_loss = dfine_export(args.weights, args.config, device) 59 | 60 | img_size = args.size * 2 if len(args.size) == 1 else args.size 61 | 62 | model = nn.Sequential(model, DeepStreamOutput(img_size, use_focal_loss)) 63 | 64 | onnx_input_im = torch.zeros(args.batch, 3, *img_size).to(device) 65 | onnx_output_file = f'{args.weights}.onnx' 66 | 67 | dynamic_axes = { 68 | 'input': { 69 | 0: 'batch' 70 | }, 71 | 'output': { 72 | 0: 'batch' 73 | } 74 | } 75 | 76 | print('Exporting the model to ONNX') 77 | torch.onnx.export( 78 | model, onnx_input_im, onnx_output_file, verbose=False, opset_version=args.opset, do_constant_folding=True, 79 | input_names=['input'], output_names=['output'], dynamic_axes=dynamic_axes if args.dynamic else None 80 | ) 81 | 82 | if args.simplify: 83 | print('Simplifying the ONNX model') 84 | import onnxslim 85 | model_onnx = onnx.load(onnx_output_file) 86 | model_onnx = onnxslim.slim(model_onnx) 87 | onnx.save(model_onnx, onnx_output_file) 88 | 89 | print(f'Done: {onnx_output_file}\n') 90 | 91 | 92 | def parse_args(): 93 | import argparse 94 | parser = argparse.ArgumentParser(description='DeepStream D-FINE conversion') 95 | parser.add_argument('-w', '--weights', required=True, help='Input weights (.pth) file path (required)') 96 | parser.add_argument('-c', '--config', required=True, help='Input YAML (.yml) file path (required)') 97 | parser.add_argument('-s', '--size', nargs='+', type=int, default=[640], help='Inference size [H,W] (default [640])') 98 | parser.add_argument('--opset', type=int, default=16, help='ONNX opset version') 99 | parser.add_argument('--simplify', action='store_true', help='ONNX simplify model') 100 | parser.add_argument('--dynamic', action='store_true', help='Dynamic batch-size') 101 | parser.add_argument('--batch', type=int, default=1, help='Static batch-size') 102 | args = parser.parse_args() 103 | if not os.path.isfile(args.weights): 104 | raise SystemExit('Invalid weights file') 105 | if not os.path.isfile(args.config): 106 | raise SystemExit('Invalid config file') 107 | if args.dynamic and args.batch > 1: 108 | raise SystemExit('Cannot set dynamic batch-size and static batch-size at same time') 109 | return args 110 | 111 | 112 | if __name__ == '__main__': 113 | args = parse_args() 114 | main(args) 115 | -------------------------------------------------------------------------------- /utils/export_goldyolo.py: -------------------------------------------------------------------------------- 1 | import os 2 | import onnx 3 | import torch 4 | import torch.nn as nn 5 | 6 | import yolov6.utils.general as _m 7 | from yolov6.layers.common import SiLU 8 | from gold_yolo.switch_tool import switch_to_deploy 9 | from yolov6.utils.checkpoint import load_checkpoint 10 | 11 | 12 | def _dist2bbox(distance, anchor_points, box_format='xyxy'): 13 | lt, rb = torch.split(distance, 2, -1) 14 | x1y1 = anchor_points - lt 15 | x2y2 = anchor_points + rb 16 | bbox = torch.cat([x1y1, x2y2], -1) 17 | return bbox 18 | 19 | _m.dist2bbox.__code__ = _dist2bbox.__code__ 20 | 21 | 22 | class DeepStreamOutput(nn.Module): 23 | def __init__(self): 24 | super().__init__() 25 | 26 | def forward(self, x): 27 | boxes = x[:, :, :4] 28 | objectness = x[:, :, 4:5] 29 | scores, labels = torch.max(x[:, :, 5:], dim=-1, keepdim=True) 30 | scores *= objectness 31 | return torch.cat([boxes, scores, labels.to(boxes.dtype)], dim=-1) 32 | 33 | 34 | def gold_yolo_export(weights, device, inplace=True, fuse=True): 35 | model = load_checkpoint(weights, map_location=device, inplace=inplace, fuse=fuse) 36 | model = switch_to_deploy(model) 37 | for layer in model.modules(): 38 | t = type(layer) 39 | if t.__name__ == 'RepVGGBlock': 40 | layer.switch_to_deploy() 41 | model.eval() 42 | for k, m in model.named_modules(): 43 | if m.__class__.__name__ == 'Conv': 44 | if isinstance(m.act, nn.SiLU): 45 | m.act = SiLU() 46 | elif m.__class__.__name__ == 'Detect': 47 | m.inplace = False 48 | return model 49 | 50 | 51 | def suppress_warnings(): 52 | import warnings 53 | warnings.filterwarnings('ignore', category=torch.jit.TracerWarning) 54 | warnings.filterwarnings('ignore', category=UserWarning) 55 | warnings.filterwarnings('ignore', category=DeprecationWarning) 56 | warnings.filterwarnings('ignore', category=FutureWarning) 57 | warnings.filterwarnings('ignore', category=ResourceWarning) 58 | 59 | 60 | def main(args): 61 | suppress_warnings() 62 | 63 | print(f'\nStarting: {args.weights}') 64 | 65 | print('Opening Gold-YOLO model') 66 | 67 | device = torch.device('cpu') 68 | model = gold_yolo_export(args.weights, device) 69 | 70 | model = nn.Sequential(model, DeepStreamOutput()) 71 | 72 | img_size = args.size * 2 if len(args.size) == 1 else args.size 73 | 74 | onnx_input_im = torch.zeros(args.batch, 3, *img_size).to(device) 75 | onnx_output_file = f'{args.weights}.onnx' 76 | 77 | dynamic_axes = { 78 | 'input': { 79 | 0: 'batch' 80 | }, 81 | 'output': { 82 | 0: 'batch' 83 | } 84 | } 85 | 86 | print('Exporting the model to ONNX') 87 | torch.onnx.export( 88 | model, onnx_input_im, onnx_output_file, verbose=False, opset_version=args.opset, do_constant_folding=True, 89 | input_names=['input'], output_names=['output'], dynamic_axes=dynamic_axes if args.dynamic else None 90 | ) 91 | 92 | if args.simplify: 93 | print('Simplifying the ONNX model') 94 | import onnxslim 95 | model_onnx = onnx.load(onnx_output_file) 96 | model_onnx = onnxslim.slim(model_onnx) 97 | onnx.save(model_onnx, onnx_output_file) 98 | 99 | print(f'Done: {onnx_output_file}\n') 100 | 101 | 102 | def parse_args(): 103 | import argparse 104 | parser = argparse.ArgumentParser(description='DeepStream Gold-YOLO conversion') 105 | parser.add_argument('-w', '--weights', required=True, help='Input weights (.pt) file path (required)') 106 | parser.add_argument('-s', '--size', nargs='+', type=int, default=[640], help='Inference size [H,W] (default [640])') 107 | parser.add_argument('--opset', type=int, default=13, help='ONNX opset version') 108 | parser.add_argument('--simplify', action='store_true', help='ONNX simplify model') 109 | parser.add_argument('--dynamic', action='store_true', help='Dynamic batch-size') 110 | parser.add_argument('--batch', type=int, default=1, help='Static batch-size') 111 | args = parser.parse_args() 112 | if not os.path.isfile(args.weights): 113 | raise SystemExit('Invalid weights file') 114 | if args.dynamic and args.batch > 1: 115 | raise SystemExit('Cannot set dynamic batch-size and static batch-size at same time') 116 | return args 117 | 118 | 119 | if __name__ == '__main__': 120 | args = parse_args() 121 | main(args) 122 | -------------------------------------------------------------------------------- /utils/export_ppyoloe.py: -------------------------------------------------------------------------------- 1 | import os 2 | import onnx 3 | import paddle 4 | import paddle.nn as nn 5 | 6 | from ppdet.engine import Trainer 7 | from ppdet.utils.cli import ArgsParser 8 | from ppdet.slim import build_slim_model 9 | from ppdet.data.source.category import get_categories 10 | from ppdet.utils.check import check_version, check_config 11 | from ppdet.core.workspace import load_config, merge_config 12 | 13 | 14 | class DeepStreamOutput(nn.Layer): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | def forward(self, x): 19 | boxes = x['bbox'] 20 | x['bbox_num'] = x['bbox_num'].transpose([0, 2, 1]) 21 | scores = paddle.max(x['bbox_num'], axis=-1, keepdim=True) 22 | labels = paddle.argmax(x['bbox_num'], axis=-1, keepdim=True) 23 | return paddle.concat((boxes, scores, paddle.cast(labels, dtype=boxes.dtype)), axis=-1) 24 | 25 | 26 | class DeepStreamInput(nn.Layer): 27 | def __init__(self): 28 | super().__init__() 29 | 30 | def forward(self, x): 31 | y = {} 32 | y['image'] = x['image'] 33 | y['scale_factor'] = paddle.to_tensor([1.0, 1.0], dtype=x['image'].dtype) 34 | return y 35 | 36 | 37 | def ppyoloe_export(FLAGS): 38 | cfg = load_config(FLAGS.config) 39 | FLAGS.opt['weights'] = FLAGS.weights 40 | FLAGS.opt['exclude_nms'] = True 41 | merge_config(FLAGS.opt) 42 | if FLAGS.slim_config: 43 | cfg = build_slim_model(cfg, FLAGS.slim_config, mode='test') 44 | merge_config(FLAGS.opt) 45 | check_config(cfg) 46 | check_version() 47 | trainer = Trainer(cfg, mode='test') 48 | trainer.load_weights(cfg.weights) 49 | trainer.model.eval() 50 | if not os.path.exists('.tmp'): 51 | os.makedirs('.tmp') 52 | static_model, _ = trainer._get_infer_cfg_and_input_spec('.tmp') 53 | os.system('rm -r .tmp') 54 | return trainer.cfg, static_model 55 | 56 | 57 | def suppress_warnings(): 58 | import warnings 59 | warnings.filterwarnings('ignore') 60 | 61 | 62 | def main(FLAGS): 63 | suppress_warnings() 64 | 65 | print(f'\nStarting: {FLAGS.weights}') 66 | 67 | print('Opening PPYOLOE model') 68 | 69 | paddle.set_device('cpu') 70 | cfg, model = ppyoloe_export(FLAGS) 71 | 72 | anno_file = cfg['TestDataset'].get_anno() 73 | if os.path.isfile(anno_file): 74 | _, catid2name = get_categories(cfg['metric'], anno_file, 'detection_arch') 75 | print('Creating labels.txt file') 76 | with open('labels.txt', 'w', encoding='utf-8') as f: 77 | for name in catid2name.values(): 78 | f.write(f'{name}\n') 79 | 80 | model = nn.Sequential(DeepStreamInput(), model, DeepStreamOutput()) 81 | 82 | img_size = [cfg.eval_height, cfg.eval_width] 83 | 84 | onnx_input_im = {} 85 | onnx_input_im['image'] = paddle.static.InputSpec(shape=[FLAGS.batch, 3, *img_size], dtype='float32') 86 | onnx_output_file = f'{FLAGS.weights}.onnx' 87 | 88 | print('Exporting the model to ONNX') 89 | paddle.onnx.export(model, FLAGS.weights, input_spec=[onnx_input_im], opset_version=FLAGS.opset) 90 | 91 | if FLAGS.simplify: 92 | print('Simplifying the ONNX model') 93 | import onnxslim 94 | model_onnx = onnx.load(onnx_output_file) 95 | model_onnx = onnxslim.slim(model_onnx) 96 | onnx.save(model_onnx, onnx_output_file) 97 | 98 | print(f'Done: {onnx_output_file}\n') 99 | 100 | 101 | def parse_args(): 102 | parser = ArgsParser() 103 | parser.add_argument('-w', '--weights', required=True, help='Input weights (.pdparams) file path (required)') 104 | parser.add_argument('--slim_config', default=None, type=str, help='Slim configuration file of slim method') 105 | parser.add_argument('--opset', type=int, default=11, help='ONNX opset version') 106 | parser.add_argument('--simplify', action='store_true', help='ONNX simplify model') 107 | parser.add_argument('--dynamic', action='store_true', help='Dynamic batch-size') 108 | parser.add_argument('--batch', type=int, default=1, help='Static batch-size') 109 | args = parser.parse_args() 110 | if not os.path.isfile(args.weights): 111 | raise SystemExit('Invalid weights file') 112 | if args.dynamic and args.batch > 1: 113 | raise SystemExit('Cannot set dynamic batch-size and static batch-size at same time') 114 | elif args.dynamic: 115 | args.batch = None 116 | return args 117 | 118 | 119 | if __name__ == '__main__': 120 | FLAGS = parse_args() 121 | main(FLAGS) 122 | -------------------------------------------------------------------------------- /utils/export_rtdetr_paddle.py: -------------------------------------------------------------------------------- 1 | import os 2 | import onnx 3 | import paddle 4 | import paddle.nn as nn 5 | import paddle.nn.functional as F 6 | 7 | from ppdet.engine import Trainer 8 | from ppdet.utils.cli import ArgsParser 9 | from ppdet.utils.check import check_version, check_config 10 | from ppdet.core.workspace import load_config, merge_config 11 | 12 | 13 | class DeepStreamOutput(nn.Layer): 14 | def __init__(self, img_size, use_focal_loss): 15 | super().__init__() 16 | self.img_size = img_size 17 | self.use_focal_loss = use_focal_loss 18 | 19 | def forward(self, x): 20 | boxes = x['bbox'] 21 | convert_matrix = paddle.to_tensor( 22 | [[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]], dtype=boxes.dtype 23 | ) 24 | boxes @= convert_matrix 25 | boxes *= paddle.to_tensor([[*self.img_size]]).flip(1).tile([1, 2]).unsqueeze(1) 26 | bbox_num = F.sigmoid(x['bbox_num']) if self.use_focal_loss else F.softmax(x['bbox_num'])[:, :, :-1] 27 | scores = paddle.max(bbox_num, axis=-1, keepdim=True) 28 | labels = paddle.argmax(bbox_num, axis=-1, keepdim=True) 29 | return paddle.concat((boxes, scores, paddle.cast(labels, dtype=boxes.dtype)), axis=-1) 30 | 31 | 32 | def rtdetr_paddle_export(FLAGS): 33 | cfg = load_config(FLAGS.config) 34 | FLAGS.opt['weights'] = FLAGS.weights 35 | FLAGS.opt['exclude_nms'] = True 36 | FLAGS.opt['exclude_post_process'] = True 37 | merge_config(FLAGS.opt) 38 | merge_config(FLAGS.opt) 39 | check_config(cfg) 40 | check_version() 41 | trainer = Trainer(cfg, mode='test') 42 | trainer.load_weights(cfg.weights) 43 | trainer.model.eval() 44 | if not os.path.exists('.tmp'): 45 | os.makedirs('.tmp') 46 | static_model, _ = trainer._get_infer_cfg_and_input_spec('.tmp') 47 | os.system('rm -r .tmp') 48 | return trainer.cfg, static_model 49 | 50 | 51 | def suppress_warnings(): 52 | import warnings 53 | warnings.filterwarnings('ignore') 54 | 55 | 56 | def main(FLAGS): 57 | suppress_warnings() 58 | 59 | print(f'\nStarting: {FLAGS.weights}') 60 | 61 | print('Opening RT-DETR Paddle model') 62 | 63 | paddle.set_device('cpu') 64 | cfg, model = rtdetr_paddle_export(FLAGS) 65 | 66 | img_size = [cfg.eval_size[1], cfg.eval_size[0]] 67 | 68 | model = nn.Sequential(model, DeepStreamOutput(img_size, cfg.use_focal_loss)) 69 | 70 | onnx_input_im = {} 71 | onnx_input_im['image'] = paddle.static.InputSpec(shape=[FLAGS.batch, 3, *img_size], dtype='float32') 72 | onnx_output_file = f'{FLAGS.weights}.onnx' 73 | 74 | print('Exporting the model to ONNX\n') 75 | paddle.onnx.export(model, FLAGS.weights, input_spec=[onnx_input_im], opset_version=FLAGS.opset) 76 | 77 | if FLAGS.simplify: 78 | print('Simplifying the ONNX model') 79 | import onnxslim 80 | model_onnx = onnx.load(onnx_output_file) 81 | model_onnx = onnxslim.slim(model_onnx) 82 | onnx.save(model_onnx, onnx_output_file) 83 | 84 | print(f'Done: {onnx_output_file}\n') 85 | 86 | 87 | def parse_args(): 88 | parser = ArgsParser() 89 | parser.add_argument('-w', '--weights', required=True, help='Input weights (.pdparams) file path (required)') 90 | parser.add_argument('--slim_config', default=None, type=str, help='Slim configuration file of slim method') 91 | parser.add_argument('--opset', type=int, default=16, help='ONNX opset version') 92 | parser.add_argument('--simplify', action='store_true', help='ONNX simplify model') 93 | parser.add_argument('--dynamic', action='store_true', help='Dynamic batch-size') 94 | parser.add_argument('--batch', type=int, default=1, help='Static batch-size') 95 | args = parser.parse_args() 96 | if not os.path.isfile(args.weights): 97 | raise SystemExit('Invalid weights file') 98 | if args.dynamic and args.batch > 1: 99 | raise SystemExit('Cannot set dynamic batch-size and static batch-size at same time') 100 | elif args.dynamic: 101 | args.batch = None 102 | return args 103 | 104 | 105 | if __name__ == '__main__': 106 | FLAGS = parse_args() 107 | main(FLAGS) 108 | -------------------------------------------------------------------------------- /utils/export_rtdetr_pytorch.py: -------------------------------------------------------------------------------- 1 | import os 2 | import onnx 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | from src.core import YAMLConfig 8 | 9 | 10 | class DeepStreamOutput(nn.Module): 11 | def __init__(self, img_size, use_focal_loss): 12 | super().__init__() 13 | self.img_size = img_size 14 | self.use_focal_loss = use_focal_loss 15 | 16 | def forward(self, x): 17 | boxes = x['pred_boxes'] 18 | convert_matrix = torch.tensor( 19 | [[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]], dtype=boxes.dtype, device=boxes.device 20 | ) 21 | boxes @= convert_matrix 22 | boxes *= torch.as_tensor([[*self.img_size]]).flip(1).tile([1, 2]).unsqueeze(1) 23 | scores = F.sigmoid(x['pred_logits']) if self.use_focal_loss else F.softmax(x['pred_logits'])[:, :, :-1] 24 | scores, labels = torch.max(scores, dim=-1, keepdim=True) 25 | return torch.cat([boxes, scores, labels.to(boxes.dtype)], dim=-1) 26 | 27 | 28 | def rtdetr_pytorch_export(weights, cfg_file, device): 29 | cfg = YAMLConfig(cfg_file, resume=weights) 30 | checkpoint = torch.load(weights, map_location=device) 31 | if 'ema' in checkpoint: 32 | state = checkpoint['ema']['module'] 33 | else: 34 | state = checkpoint['model'] 35 | cfg.model.load_state_dict(state) 36 | return cfg.model.deploy(), cfg.postprocessor.use_focal_loss 37 | 38 | 39 | def suppress_warnings(): 40 | import warnings 41 | warnings.filterwarnings('ignore', category=torch.jit.TracerWarning) 42 | warnings.filterwarnings('ignore', category=UserWarning) 43 | warnings.filterwarnings('ignore', category=DeprecationWarning) 44 | warnings.filterwarnings('ignore', category=FutureWarning) 45 | warnings.filterwarnings('ignore', category=ResourceWarning) 46 | 47 | 48 | def main(args): 49 | suppress_warnings() 50 | 51 | print(f'\nStarting: {args.weights}') 52 | 53 | print('Opening RT-DETR PyTorch model') 54 | 55 | device = torch.device('cpu') 56 | model, use_focal_loss = rtdetr_pytorch_export(args.weights, args.config, device) 57 | 58 | img_size = args.size * 2 if len(args.size) == 1 else args.size 59 | 60 | model = nn.Sequential(model, DeepStreamOutput(img_size, use_focal_loss)) 61 | 62 | onnx_input_im = torch.zeros(args.batch, 3, *img_size).to(device) 63 | onnx_output_file = f'{args.weights}.onnx' 64 | 65 | dynamic_axes = { 66 | 'input': { 67 | 0: 'batch' 68 | }, 69 | 'output': { 70 | 0: 'batch' 71 | } 72 | } 73 | 74 | print('Exporting the model to ONNX') 75 | torch.onnx.export( 76 | model, onnx_input_im, onnx_output_file, verbose=False, opset_version=args.opset, do_constant_folding=True, 77 | input_names=['input'], output_names=['output'], dynamic_axes=dynamic_axes if args.dynamic else None 78 | ) 79 | 80 | if args.simplify: 81 | print('Simplifying the ONNX model') 82 | import onnxslim 83 | model_onnx = onnx.load(onnx_output_file) 84 | model_onnx = onnxslim.slim(model_onnx) 85 | onnx.save(model_onnx, onnx_output_file) 86 | 87 | print(f'Done: {onnx_output_file}\n') 88 | 89 | 90 | def parse_args(): 91 | import argparse 92 | parser = argparse.ArgumentParser(description='DeepStream RT-DETR PyTorch conversion') 93 | parser.add_argument('-w', '--weights', required=True, help='Input weights (.pth) file path (required)') 94 | parser.add_argument('-c', '--config', required=True, help='Input YAML (.yml) file path (required)') 95 | parser.add_argument('-s', '--size', nargs='+', type=int, default=[640], help='Inference size [H,W] (default [640])') 96 | parser.add_argument('--opset', type=int, default=16, help='ONNX opset version') 97 | parser.add_argument('--simplify', action='store_true', help='ONNX simplify model') 98 | parser.add_argument('--dynamic', action='store_true', help='Dynamic batch-size') 99 | parser.add_argument('--batch', type=int, default=1, help='Static batch-size') 100 | args = parser.parse_args() 101 | if not os.path.isfile(args.weights): 102 | raise SystemExit('Invalid weights file') 103 | if not os.path.isfile(args.config): 104 | raise SystemExit('Invalid config file') 105 | if args.dynamic and args.batch > 1: 106 | raise SystemExit('Cannot set dynamic batch-size and static batch-size at same time') 107 | return args 108 | 109 | 110 | if __name__ == '__main__': 111 | args = parse_args() 112 | main(args) 113 | -------------------------------------------------------------------------------- /utils/export_rtdetr_ultralytics.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.nn as nn 4 | from copy import deepcopy 5 | 6 | from ultralytics import RTDETR 7 | 8 | 9 | class DeepStreamOutput(nn.Module): 10 | def __init__(self, img_size): 11 | super().__init__() 12 | self.img_size = img_size 13 | 14 | def forward(self, x): 15 | boxes = x[:, :, :4] 16 | convert_matrix = torch.tensor( 17 | [[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]], dtype=boxes.dtype, device=boxes.device 18 | ) 19 | boxes @= convert_matrix 20 | boxes *= torch.as_tensor([[*self.img_size]]).flip(1).tile([1, 2]).unsqueeze(1) 21 | scores, labels = torch.max(x[:, :, 4:], dim=-1, keepdim=True) 22 | return torch.cat([boxes, scores, labels.to(boxes.dtype)], dim=-1) 23 | 24 | 25 | def rtdetr_ultralytics_export(weights, device): 26 | model = RTDETR(weights) 27 | model = deepcopy(model.model).to(device) 28 | for p in model.parameters(): 29 | p.requires_grad = False 30 | model.eval() 31 | model.float() 32 | model = model.fuse() 33 | for k, m in model.named_modules(): 34 | if m.__class__.__name__ in ('Detect', 'RTDETRDecoder'): 35 | m.dynamic = False 36 | m.export = True 37 | m.format = 'onnx' 38 | elif m.__class__.__name__ == 'C2f': 39 | m.forward = m.forward_split 40 | return model 41 | 42 | 43 | def suppress_warnings(): 44 | import warnings 45 | warnings.filterwarnings('ignore', category=torch.jit.TracerWarning) 46 | warnings.filterwarnings('ignore', category=UserWarning) 47 | warnings.filterwarnings('ignore', category=DeprecationWarning) 48 | warnings.filterwarnings('ignore', category=FutureWarning) 49 | warnings.filterwarnings('ignore', category=ResourceWarning) 50 | 51 | 52 | def main(args): 53 | suppress_warnings() 54 | 55 | print(f'\nStarting: {args.weights}') 56 | 57 | print('Opening RT-DETR Ultralytics model') 58 | 59 | device = torch.device('cpu') 60 | model = rtdetr_ultralytics_export(args.weights, device) 61 | 62 | if len(model.names.keys()) > 0: 63 | print('Creating labels.txt file') 64 | with open('labels.txt', 'w', encoding='utf-8') as f: 65 | for name in model.names.values(): 66 | f.write(f'{name}\n') 67 | 68 | img_size = args.size * 2 if len(args.size) == 1 else args.size 69 | 70 | model = nn.Sequential(model, DeepStreamOutput(img_size)) 71 | 72 | onnx_input_im = torch.zeros(args.batch, 3, *img_size).to(device) 73 | onnx_output_file = f'{args.weights}.onnx' 74 | 75 | dynamic_axes = { 76 | 'input': { 77 | 0: 'batch' 78 | }, 79 | 'output': { 80 | 0: 'batch' 81 | } 82 | } 83 | 84 | print('Exporting the model to ONNX') 85 | torch.onnx.export( 86 | model, onnx_input_im, onnx_output_file, verbose=False, opset_version=args.opset, do_constant_folding=True, 87 | input_names=['input'], output_names=['output'], dynamic_axes=dynamic_axes if args.dynamic else None 88 | ) 89 | 90 | if args.simplify: 91 | print('Simplifying is not available for this model') 92 | 93 | print(f'Done: {onnx_output_file}\n') 94 | 95 | 96 | def parse_args(): 97 | import argparse 98 | parser = argparse.ArgumentParser(description='DeepStream RT-DETR Ultralytics conversion') 99 | parser.add_argument('-w', '--weights', required=True, help='Input weights (.pt) file path (required)') 100 | parser.add_argument('-s', '--size', nargs='+', type=int, default=[640], help='Inference size [H,W] (default [640])') 101 | parser.add_argument('--opset', type=int, default=17, help='ONNX opset version') 102 | parser.add_argument('--simplify', action='store_true', help='ONNX simplify model') 103 | parser.add_argument('--dynamic', action='store_true', help='Dynamic batch-size') 104 | parser.add_argument('--batch', type=int, default=1, help='Static batch-size') 105 | args = parser.parse_args() 106 | if not os.path.isfile(args.weights): 107 | raise SystemExit('Invalid weights file') 108 | if args.dynamic and args.batch > 1: 109 | raise SystemExit('Cannot set dynamic batch-size and static batch-size at same time') 110 | return args 111 | 112 | 113 | if __name__ == '__main__': 114 | args = parse_args() 115 | main(args) 116 | -------------------------------------------------------------------------------- /utils/export_yoloV5.py: -------------------------------------------------------------------------------- 1 | import os 2 | import onnx 3 | import torch 4 | import torch.nn as nn 5 | 6 | from models.experimental import attempt_load 7 | 8 | 9 | class DeepStreamOutput(nn.Module): 10 | def __init__(self): 11 | super().__init__() 12 | 13 | def forward(self, x): 14 | x = x[0] 15 | boxes = x[:, :, :4] 16 | convert_matrix = torch.tensor( 17 | [[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]], dtype=boxes.dtype, device=boxes.device 18 | ) 19 | boxes @= convert_matrix 20 | objectness = x[:, :, 4:5] 21 | scores, labels = torch.max(x[:, :, 5:], dim=-1, keepdim=True) 22 | scores *= objectness 23 | return torch.cat([boxes, scores, labels.to(boxes.dtype)], dim=-1) 24 | 25 | 26 | def yolov5_export(weights, device, inplace=True, fuse=True): 27 | model = attempt_load(weights, device=device, inplace=inplace, fuse=fuse) 28 | model.eval() 29 | for k, m in model.named_modules(): 30 | if m.__class__.__name__ == 'Detect': 31 | m.inplace = False 32 | m.dynamic = False 33 | m.export = True 34 | return model 35 | 36 | 37 | def suppress_warnings(): 38 | import warnings 39 | warnings.filterwarnings('ignore', category=torch.jit.TracerWarning) 40 | warnings.filterwarnings('ignore', category=UserWarning) 41 | warnings.filterwarnings('ignore', category=DeprecationWarning) 42 | warnings.filterwarnings('ignore', category=FutureWarning) 43 | warnings.filterwarnings('ignore', category=ResourceWarning) 44 | 45 | 46 | def main(args): 47 | suppress_warnings() 48 | 49 | print(f'\nStarting: {args.weights}') 50 | 51 | print('Opening YOLOv5 model') 52 | 53 | device = torch.device('cpu') 54 | model = yolov5_export(args.weights, device) 55 | 56 | if len(model.names.keys()) > 0: 57 | print('Creating labels.txt file') 58 | with open('labels.txt', 'w', encoding='utf-8') as f: 59 | for name in model.names.values(): 60 | f.write(f'{name}\n') 61 | 62 | model = nn.Sequential(model, DeepStreamOutput()) 63 | 64 | img_size = args.size * 2 if len(args.size) == 1 else args.size 65 | 66 | if img_size == [640, 640] and args.p6: 67 | img_size = [1280] * 2 68 | 69 | onnx_input_im = torch.zeros(args.batch, 3, *img_size).to(device) 70 | onnx_output_file = f'{args.weights}.onnx' 71 | 72 | dynamic_axes = { 73 | 'input': { 74 | 0: 'batch' 75 | }, 76 | 'output': { 77 | 0: 'batch' 78 | } 79 | } 80 | 81 | print('Exporting the model to ONNX') 82 | torch.onnx.export( 83 | model, onnx_input_im, onnx_output_file, verbose=False, opset_version=args.opset, do_constant_folding=True, 84 | input_names=['input'], output_names=['output'], dynamic_axes=dynamic_axes if args.dynamic else None 85 | ) 86 | 87 | if args.simplify: 88 | print('Simplifying the ONNX model') 89 | import onnxslim 90 | model_onnx = onnx.load(onnx_output_file) 91 | model_onnx = onnxslim.slim(model_onnx) 92 | onnx.save(model_onnx, onnx_output_file) 93 | 94 | print(f'Done: {onnx_output_file}\n') 95 | 96 | 97 | def parse_args(): 98 | import argparse 99 | parser = argparse.ArgumentParser(description='DeepStream YOLOv5 conversion') 100 | parser.add_argument('-w', '--weights', required=True, type=str, help='Input weights (.pt) file path (required)') 101 | parser.add_argument('-s', '--size', nargs='+', type=int, default=[640], help='Inference size [H,W] (default [640])') 102 | parser.add_argument('--p6', action='store_true', help='P6 model') 103 | parser.add_argument('--opset', type=int, default=17, help='ONNX opset version') 104 | parser.add_argument('--simplify', action='store_true', help='ONNX simplify model') 105 | parser.add_argument('--dynamic', action='store_true', help='Dynamic batch-size') 106 | parser.add_argument('--batch', type=int, default=1, help='Static batch-size') 107 | args = parser.parse_args() 108 | if not os.path.isfile(args.weights): 109 | raise SystemExit('Invalid weights file') 110 | if args.dynamic and args.batch > 1: 111 | raise SystemExit('Cannot set dynamic batch-size and static batch-size at same time') 112 | return args 113 | 114 | 115 | if __name__ == '__main__': 116 | args = parse_args() 117 | main(args) 118 | -------------------------------------------------------------------------------- /utils/export_yoloV6.py: -------------------------------------------------------------------------------- 1 | import os 2 | import onnx 3 | import torch 4 | import torch.nn as nn 5 | 6 | from yolov6.models.effidehead import Detect 7 | from yolov6.layers.common import RepVGGBlock, SiLU 8 | from yolov6.utils.checkpoint import load_checkpoint 9 | 10 | try: 11 | from yolov6.layers.common import ConvModule 12 | except ImportError: 13 | from yolov6.layers.common import Conv as ConvModule 14 | 15 | 16 | class DeepStreamOutput(nn.Module): 17 | def __init__(self): 18 | super().__init__() 19 | 20 | def forward(self, x): 21 | boxes = x[:, :, :4] 22 | convert_matrix = torch.tensor( 23 | [[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]], dtype=boxes.dtype, device=boxes.device 24 | ) 25 | boxes @= convert_matrix 26 | objectness = x[:, :, 4:5] 27 | scores, labels = torch.max(x[:, :, 5:], dim=-1, keepdim=True) 28 | scores *= objectness 29 | return torch.cat([boxes, scores, labels.to(boxes.dtype)], dim=-1) 30 | 31 | 32 | def yolov6_export(weights, device): 33 | model = load_checkpoint(weights, map_location=device, inplace=True, fuse=True) 34 | for layer in model.modules(): 35 | if isinstance(layer, RepVGGBlock): 36 | layer.switch_to_deploy() 37 | elif isinstance(layer, nn.Upsample) and not hasattr(layer, 'recompute_scale_factor'): 38 | layer.recompute_scale_factor = None 39 | model.eval() 40 | for k, m in model.named_modules(): 41 | if isinstance(m, ConvModule): 42 | if hasattr(m, 'act') and isinstance(m.act, nn.SiLU): 43 | m.act = SiLU() 44 | elif isinstance(m, Detect): 45 | m.inplace = False 46 | return model 47 | 48 | 49 | def suppress_warnings(): 50 | import warnings 51 | warnings.filterwarnings('ignore', category=torch.jit.TracerWarning) 52 | warnings.filterwarnings('ignore', category=UserWarning) 53 | warnings.filterwarnings('ignore', category=DeprecationWarning) 54 | warnings.filterwarnings('ignore', category=FutureWarning) 55 | warnings.filterwarnings('ignore', category=ResourceWarning) 56 | 57 | 58 | def main(args): 59 | suppress_warnings() 60 | 61 | print(f'\nStarting: {args.weights}') 62 | 63 | print('Opening YOLOv6 model') 64 | 65 | device = torch.device('cpu') 66 | model = yolov6_export(args.weights, device) 67 | 68 | model = nn.Sequential(model, DeepStreamOutput()) 69 | 70 | img_size = args.size * 2 if len(args.size) == 1 else args.size 71 | 72 | if img_size == [640, 640] and args.p6: 73 | img_size = [1280] * 2 74 | 75 | onnx_input_im = torch.zeros(args.batch, 3, *img_size).to(device) 76 | onnx_output_file = f'{args.weights}.onnx' 77 | 78 | dynamic_axes = { 79 | 'input': { 80 | 0: 'batch' 81 | }, 82 | 'output': { 83 | 0: 'batch' 84 | } 85 | } 86 | 87 | print('Exporting the model to ONNX') 88 | torch.onnx.export( 89 | model, onnx_input_im, onnx_output_file, verbose=False, opset_version=args.opset, do_constant_folding=True, 90 | input_names=['input'], output_names=['output'], dynamic_axes=dynamic_axes if args.dynamic else None 91 | ) 92 | 93 | if args.simplify: 94 | print('Simplifying the ONNX model') 95 | import onnxslim 96 | model_onnx = onnx.load(onnx_output_file) 97 | model_onnx = onnxslim.slim(model_onnx) 98 | onnx.save(model_onnx, onnx_output_file) 99 | 100 | print(f'Done: {onnx_output_file}\n') 101 | 102 | 103 | def parse_args(): 104 | import argparse 105 | parser = argparse.ArgumentParser(description='DeepStream YOLOv6 conversion') 106 | parser.add_argument('-w', '--weights', required=True, help='Input weights (.pt) file path (required)') 107 | parser.add_argument('-s', '--size', nargs='+', type=int, default=[640], help='Inference size [H,W] (default [640])') 108 | parser.add_argument('--p6', action='store_true', help='P6 model') 109 | parser.add_argument('--opset', type=int, default=13, help='ONNX opset version') 110 | parser.add_argument('--simplify', action='store_true', help='ONNX simplify model') 111 | parser.add_argument('--dynamic', action='store_true', help='Dynamic batch-size') 112 | parser.add_argument('--batch', type=int, default=1, help='Static batch-size') 113 | args = parser.parse_args() 114 | if not os.path.isfile(args.weights): 115 | raise SystemExit('Invalid weights file') 116 | if args.dynamic and args.batch > 1: 117 | raise SystemExit('Cannot set dynamic batch-size and static batch-size at same time') 118 | return args 119 | 120 | 121 | if __name__ == '__main__': 122 | args = parse_args() 123 | main(args) 124 | -------------------------------------------------------------------------------- /utils/export_yoloV7.py: -------------------------------------------------------------------------------- 1 | import os 2 | import onnx 3 | import torch 4 | import torch.nn as nn 5 | 6 | import models 7 | from models.experimental import attempt_load 8 | from utils.torch_utils import select_device 9 | from utils.activations import Hardswish, SiLU 10 | 11 | 12 | class DeepStreamOutput(nn.Module): 13 | def __init__(self): 14 | super().__init__() 15 | 16 | def forward(self, x): 17 | boxes = x[:, :, :4] 18 | convert_matrix = torch.tensor( 19 | [[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]], dtype=boxes.dtype, device=boxes.device 20 | ) 21 | boxes @= convert_matrix 22 | objectness = x[:, :, 4:5] 23 | scores, labels = torch.max(x[:, :, 5:], dim=-1, keepdim=True) 24 | scores *= objectness 25 | return torch.cat([boxes, scores, labels.to(boxes.dtype)], dim=-1) 26 | 27 | 28 | def yolov7_export(weights, device): 29 | model = attempt_load(weights, map_location=device) 30 | for k, m in model.named_modules(): 31 | m._non_persistent_buffers_set = set() 32 | if isinstance(m, models.common.Conv): 33 | if isinstance(m.act, nn.Hardswish): 34 | m.act = Hardswish() 35 | elif isinstance(m.act, nn.SiLU): 36 | m.act = SiLU() 37 | model.model[-1].export = False 38 | model.model[-1].concat = True 39 | model.eval() 40 | return model 41 | 42 | 43 | def suppress_warnings(): 44 | import warnings 45 | warnings.filterwarnings('ignore', category=torch.jit.TracerWarning) 46 | warnings.filterwarnings('ignore', category=UserWarning) 47 | warnings.filterwarnings('ignore', category=DeprecationWarning) 48 | warnings.filterwarnings('ignore', category=FutureWarning) 49 | warnings.filterwarnings('ignore', category=ResourceWarning) 50 | 51 | 52 | def main(args): 53 | suppress_warnings() 54 | 55 | print(f'\nStarting: {args.weights}') 56 | 57 | print('Opening YOLOv7 model') 58 | 59 | device = select_device('cpu') 60 | model = yolov7_export(args.weights, device) 61 | 62 | if hasattr(model, 'names') and len(model.names) > 0: 63 | print('Creating labels.txt file') 64 | with open('labels.txt', 'w', encoding='utf-8') as f: 65 | for name in model.names: 66 | f.write(f'{name}\n') 67 | 68 | model = nn.Sequential(model, DeepStreamOutput()) 69 | 70 | img_size = args.size * 2 if len(args.size) == 1 else args.size 71 | 72 | if img_size == [640, 640] and args.p6: 73 | img_size = [1280] * 2 74 | 75 | onnx_input_im = torch.zeros(args.batch, 3, *img_size).to(device) 76 | onnx_output_file = f'{args.weights}.onnx' 77 | 78 | dynamic_axes = { 79 | 'input': { 80 | 0: 'batch' 81 | }, 82 | 'output': { 83 | 0: 'batch' 84 | } 85 | } 86 | 87 | print('Exporting the model to ONNX') 88 | torch.onnx.export( 89 | model, onnx_input_im, onnx_output_file, verbose=False, opset_version=args.opset, do_constant_folding=True, 90 | input_names=['input'], output_names=['output'], dynamic_axes=dynamic_axes if args.dynamic else None 91 | ) 92 | 93 | if args.simplify: 94 | print('Simplifying the ONNX model') 95 | import onnxslim 96 | model_onnx = onnx.load(onnx_output_file) 97 | model_onnx = onnxslim.slim(model_onnx) 98 | onnx.save(model_onnx, onnx_output_file) 99 | 100 | print(f'Done: {onnx_output_file}\n') 101 | 102 | 103 | def parse_args(): 104 | import argparse 105 | parser = argparse.ArgumentParser(description='DeepStream YOLOv7 conversion') 106 | parser.add_argument('-w', '--weights', required=True, help='Input weights (.pt) file path (required)') 107 | parser.add_argument('-s', '--size', nargs='+', type=int, default=[640], help='Inference size [H,W] (default [640])') 108 | parser.add_argument('--p6', action='store_true', help='P6 model') 109 | parser.add_argument('--opset', type=int, default=12, help='ONNX opset version') 110 | parser.add_argument('--simplify', action='store_true', help='ONNX simplify model') 111 | parser.add_argument('--dynamic', action='store_true', help='Dynamic batch-size') 112 | parser.add_argument('--batch', type=int, default=1, help='Static batch-size') 113 | args = parser.parse_args() 114 | if not os.path.isfile(args.weights): 115 | raise SystemExit('Invalid weights file') 116 | if args.dynamic and args.batch > 1: 117 | raise SystemExit('Cannot set dynamic batch-size and static batch-size at same time') 118 | return args 119 | 120 | 121 | if __name__ == '__main__': 122 | args = parse_args() 123 | main(args) 124 | -------------------------------------------------------------------------------- /utils/export_yoloV7_u6.py: -------------------------------------------------------------------------------- 1 | import os 2 | import onnx 3 | import torch 4 | import torch.nn as nn 5 | 6 | from utils.torch_utils import select_device 7 | from models.experimental import attempt_load 8 | from models.yolo import Detect, V6Detect, IV6Detect 9 | 10 | 11 | class DeepStreamOutput(nn.Module): 12 | def __init__(self): 13 | super().__init__() 14 | 15 | def forward(self, x): 16 | x = x.transpose(1, 2) 17 | boxes = x[:, :, :4] 18 | convert_matrix = torch.tensor( 19 | [[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]], dtype=boxes.dtype, device=boxes.device 20 | ) 21 | boxes @= convert_matrix 22 | scores, labels = torch.max(x[:, :, 4:], dim=-1, keepdim=True) 23 | return torch.cat([boxes, scores, labels.to(boxes.dtype)], dim=-1) 24 | 25 | 26 | def yolov7_u6_export(weights, device): 27 | model = attempt_load(weights, device=device, inplace=True, fuse=True) 28 | model.eval() 29 | for k, m in model.named_modules(): 30 | if isinstance(m, (Detect, V6Detect, IV6Detect)): 31 | m.inplace = False 32 | m.dynamic = False 33 | m.export = True 34 | return model 35 | 36 | 37 | def suppress_warnings(): 38 | import warnings 39 | warnings.filterwarnings('ignore', category=torch.jit.TracerWarning) 40 | warnings.filterwarnings('ignore', category=UserWarning) 41 | warnings.filterwarnings('ignore', category=DeprecationWarning) 42 | warnings.filterwarnings('ignore', category=FutureWarning) 43 | warnings.filterwarnings('ignore', category=ResourceWarning) 44 | 45 | 46 | def main(args): 47 | suppress_warnings() 48 | 49 | print(f'\nStarting: {args.weights}') 50 | 51 | print('Opening YOLOv7_u6 model') 52 | 53 | device = select_device('cpu') 54 | model = yolov7_u6_export(args.weights, device) 55 | 56 | if len(model.names.keys()) > 0: 57 | print('Creating labels.txt file') 58 | with open('labels.txt', 'w', encoding='utf-8') as f: 59 | for name in model.names.values(): 60 | f.write(f'{name}\n') 61 | 62 | model = nn.Sequential(model, DeepStreamOutput()) 63 | 64 | img_size = args.size * 2 if len(args.size) == 1 else args.size 65 | 66 | onnx_input_im = torch.zeros(args.batch, 3, *img_size).to(device) 67 | onnx_output_file = f'{args.weights}.onnx' 68 | 69 | dynamic_axes = { 70 | 'input': { 71 | 0: 'batch' 72 | }, 73 | 'output': { 74 | 0: 'batch' 75 | } 76 | } 77 | 78 | print('Exporting the model to ONNX') 79 | torch.onnx.export( 80 | model, onnx_input_im, onnx_output_file, verbose=False, opset_version=args.opset, do_constant_folding=True, 81 | input_names=['input'], output_names=['output'], dynamic_axes=dynamic_axes if args.dynamic else None 82 | ) 83 | 84 | if args.simplify: 85 | print('Simplifying the ONNX model') 86 | import onnxslim 87 | model_onnx = onnx.load(onnx_output_file) 88 | model_onnx = onnxslim.slim(model_onnx) 89 | onnx.save(model_onnx, onnx_output_file) 90 | 91 | print(f'Done: {onnx_output_file}\n') 92 | 93 | 94 | def parse_args(): 95 | import argparse 96 | parser = argparse.ArgumentParser(description='DeepStream YOLOv7-u6 conversion') 97 | parser.add_argument('-w', '--weights', required=True, help='Input weights (.pt) file path (required)') 98 | parser.add_argument('-s', '--size', nargs='+', type=int, default=[640], help='Inference size [H,W] (default [640])') 99 | parser.add_argument('--opset', type=int, default=12, help='ONNX opset version') 100 | parser.add_argument('--simplify', action='store_true', help='ONNX simplify model') 101 | parser.add_argument('--dynamic', action='store_true', help='Dynamic batch-size') 102 | parser.add_argument('--batch', type=int, default=1, help='Static batch-size') 103 | args = parser.parse_args() 104 | if not os.path.isfile(args.weights): 105 | raise SystemExit('Invalid weights file') 106 | if args.dynamic and args.batch > 1: 107 | raise SystemExit('Cannot set dynamic batch-size and static batch-size at same time') 108 | return args 109 | 110 | 111 | if __name__ == '__main__': 112 | args = parse_args() 113 | main(args) 114 | -------------------------------------------------------------------------------- /utils/export_yolonas.py: -------------------------------------------------------------------------------- 1 | import os 2 | import onnx 3 | import torch 4 | import torch.nn as nn 5 | 6 | from super_gradients.training import models 7 | 8 | 9 | class DeepStreamOutput(nn.Module): 10 | def __init__(self): 11 | super().__init__() 12 | 13 | def forward(self, x): 14 | boxes = x[0] 15 | scores, labels = torch.max(x[1], dim=-1, keepdim=True) 16 | return torch.cat([boxes, scores, labels.to(boxes.dtype)], dim=-1) 17 | 18 | 19 | def suppress_warnings(): 20 | import warnings 21 | warnings.filterwarnings('ignore', category=torch.jit.TracerWarning) 22 | warnings.filterwarnings('ignore', category=UserWarning) 23 | warnings.filterwarnings('ignore', category=DeprecationWarning) 24 | warnings.filterwarnings('ignore', category=FutureWarning) 25 | warnings.filterwarnings('ignore', category=ResourceWarning) 26 | 27 | 28 | def yolonas_export(model_name, weights, num_classes, size): 29 | img_size = size * 2 if len(size) == 1 else size 30 | model = models.get(model_name, num_classes=num_classes, checkpoint_path=weights) 31 | model.eval() 32 | model.prep_model_for_conversion(input_size=[1, 3, *img_size]) 33 | return model 34 | 35 | 36 | def main(args): 37 | suppress_warnings() 38 | 39 | print(f'\nStarting: {args.weights}') 40 | 41 | print('Opening YOLO-NAS model') 42 | 43 | device = torch.device('cpu') 44 | model = yolonas_export(args.model, args.weights, args.classes, args.size) 45 | 46 | model = nn.Sequential(model, DeepStreamOutput()) 47 | 48 | img_size = args.size * 2 if len(args.size) == 1 else args.size 49 | 50 | onnx_input_im = torch.zeros(args.batch, 3, *img_size).to(device) 51 | onnx_output_file = f'{args.weights}.onnx' 52 | 53 | dynamic_axes = { 54 | 'input': { 55 | 0: 'batch' 56 | }, 57 | 'output': { 58 | 0: 'batch' 59 | } 60 | } 61 | 62 | print('Exporting the model to ONNX') 63 | torch.onnx.export( 64 | model, onnx_input_im, onnx_output_file, verbose=False, opset_version=args.opset, do_constant_folding=True, 65 | input_names=['input'], output_names=['output'], dynamic_axes=dynamic_axes if args.dynamic else None 66 | ) 67 | 68 | if args.simplify: 69 | print('Simplifying the ONNX model') 70 | import onnxslim 71 | model_onnx = onnx.load(onnx_output_file) 72 | model_onnx = onnxslim.slim(model_onnx) 73 | onnx.save(model_onnx, onnx_output_file) 74 | 75 | print(f'Done: {onnx_output_file}\n') 76 | 77 | 78 | def parse_args(): 79 | import argparse 80 | parser = argparse.ArgumentParser(description='DeepStream YOLO-NAS conversion') 81 | parser.add_argument('-m', '--model', required=True, help='Model name (required)') 82 | parser.add_argument('-w', '--weights', required=True, help='Input weights (.pth) file path (required)') 83 | parser.add_argument('-n', '--classes', type=int, default=80, help='Number of trained classes (default 80)') 84 | parser.add_argument('-s', '--size', nargs='+', type=int, default=[640], help='Inference size [H,W] (default [640])') 85 | parser.add_argument('--opset', type=int, default=14, help='ONNX opset version') 86 | parser.add_argument('--simplify', action='store_true', help='ONNX simplify model') 87 | parser.add_argument('--dynamic', action='store_true', help='Dynamic batch-size') 88 | parser.add_argument('--batch', type=int, default=1, help='Static batch-size') 89 | args = parser.parse_args() 90 | if args.model == '': 91 | raise SystemExit('Invalid model name') 92 | if not os.path.isfile(args.weights): 93 | raise SystemExit('Invalid weights file') 94 | if args.dynamic and args.batch > 1: 95 | raise SystemExit('Cannot set dynamic batch-size and static batch-size at same time') 96 | return args 97 | 98 | 99 | if __name__ == '__main__': 100 | args = parse_args() 101 | main(args) 102 | -------------------------------------------------------------------------------- /utils/export_yolox.py: -------------------------------------------------------------------------------- 1 | import os 2 | import onnx 3 | import torch 4 | import torch.nn as nn 5 | 6 | from yolox.exp import get_exp 7 | from yolox.utils import replace_module 8 | from yolox.models.network_blocks import SiLU 9 | 10 | 11 | class DeepStreamOutput(nn.Module): 12 | def __init__(self): 13 | super().__init__() 14 | 15 | def forward(self, x): 16 | boxes = x[:, :, :4] 17 | convert_matrix = torch.tensor( 18 | [[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]], dtype=boxes.dtype, device=boxes.device 19 | ) 20 | boxes @= convert_matrix 21 | objectness = x[:, :, 4:5] 22 | scores, labels = torch.max(x[:, :, 5:], dim=-1, keepdim=True) 23 | scores *= objectness 24 | return torch.cat([boxes, scores, labels.to(boxes.dtype)], dim=-1) 25 | 26 | 27 | def yolox_export(weights, exp_file): 28 | exp = get_exp(exp_file) 29 | model = exp.get_model() 30 | ckpt = torch.load(weights, map_location='cpu') 31 | model.eval() 32 | if 'model' in ckpt: 33 | ckpt = ckpt['model'] 34 | model.load_state_dict(ckpt) 35 | model = replace_module(model, nn.SiLU, SiLU) 36 | model.head.decode_in_inference = True 37 | return model, exp 38 | 39 | 40 | def suppress_warnings(): 41 | import warnings 42 | warnings.filterwarnings('ignore', category=torch.jit.TracerWarning) 43 | warnings.filterwarnings('ignore', category=UserWarning) 44 | warnings.filterwarnings('ignore', category=DeprecationWarning) 45 | warnings.filterwarnings('ignore', category=FutureWarning) 46 | warnings.filterwarnings('ignore', category=ResourceWarning) 47 | 48 | 49 | def main(args): 50 | suppress_warnings() 51 | 52 | print(f'\nStarting: {args.weights}') 53 | 54 | print('Opening YOLOX model') 55 | 56 | device = torch.device('cpu') 57 | model, exp = yolox_export(args.weights, args.exp) 58 | 59 | model = nn.Sequential(model, DeepStreamOutput()) 60 | 61 | img_size = [exp.input_size[1], exp.input_size[0]] 62 | 63 | onnx_input_im = torch.zeros(args.batch, 3, *img_size).to(device) 64 | onnx_output_file = f'{args.weights}.onnx' 65 | 66 | dynamic_axes = { 67 | 'input': { 68 | 0: 'batch' 69 | }, 70 | 'output': { 71 | 0: 'batch' 72 | } 73 | } 74 | 75 | print('Exporting the model to ONNX') 76 | torch.onnx.export( 77 | model, onnx_input_im, onnx_output_file, verbose=False, opset_version=args.opset, do_constant_folding=True, 78 | input_names=['input'], output_names=['output'], dynamic_axes=dynamic_axes if args.dynamic else None 79 | ) 80 | 81 | if args.simplify: 82 | print('Simplifying the ONNX model') 83 | import onnxslim 84 | model_onnx = onnx.load(onnx_output_file) 85 | model_onnx = onnxslim.slim(model_onnx) 86 | onnx.save(model_onnx, onnx_output_file) 87 | 88 | print(f'Done: {onnx_output_file}\n') 89 | 90 | 91 | def parse_args(): 92 | import argparse 93 | parser = argparse.ArgumentParser(description='DeepStream YOLOX conversion') 94 | parser.add_argument('-w', '--weights', required=True, help='Input weights (.pth) file path (required)') 95 | parser.add_argument('-c', '--exp', required=True, help='Input exp (.py) file path (required)') 96 | parser.add_argument('--opset', type=int, default=11, help='ONNX opset version') 97 | parser.add_argument('--simplify', action='store_true', help='ONNX simplify model') 98 | parser.add_argument('--dynamic', action='store_true', help='Dynamic batch-size') 99 | parser.add_argument('--batch', type=int, default=1, help='Static batch-size') 100 | args = parser.parse_args() 101 | if not os.path.isfile(args.weights): 102 | raise SystemExit('Invalid weights file') 103 | if args.dynamic and args.batch > 1: 104 | raise SystemExit('Cannot set dynamic batch-size and static batch-size at same time') 105 | return args 106 | 107 | 108 | if __name__ == '__main__': 109 | args = parse_args() 110 | main(args) 111 | --------------------------------------------------------------------------------