├── .github
    └── FUNDING.yml
├── LICENSE.md
├── README.md
├── config_infer_primary.txt
├── config_infer_primary_codetr.txt
├── config_infer_primary_damoyolo.txt
├── config_infer_primary_dfine.txt
├── config_infer_primary_goldyolo.txt
├── config_infer_primary_ppyoloe.txt
├── config_infer_primary_ppyoloe_plus.txt
├── config_infer_primary_rtdetr.txt
├── config_infer_primary_rtmdet.txt
├── config_infer_primary_yolo11.txt
├── config_infer_primary_yoloV10.txt
├── config_infer_primary_yoloV2.txt
├── config_infer_primary_yoloV5.txt
├── config_infer_primary_yoloV5u.txt
├── config_infer_primary_yoloV6.txt
├── config_infer_primary_yoloV7.txt
├── config_infer_primary_yoloV8.txt
├── config_infer_primary_yoloV9.txt
├── config_infer_primary_yolonas.txt
├── config_infer_primary_yolonas_custom.txt
├── config_infer_primary_yolor.txt
├── config_infer_primary_yolox.txt
├── config_infer_primary_yolox_legacy.txt
├── deepstream_app_config.txt
├── docs
    ├── CODETR.md
    ├── DAMOYOLO.md
    ├── DFINE.md
    ├── GoldYOLO.md
    ├── INT8Calibration.md
    ├── PPYOLOE.md
    ├── RTDETR_Paddle.md
    ├── RTDETR_PyTorch.md
    ├── RTDETR_Ultralytics.md
    ├── RTMDet.md
    ├── YOLO11.md
    ├── YOLONAS.md
    ├── YOLOR.md
    ├── YOLOX.md
    ├── YOLOv10.md
    ├── YOLOv5.md
    ├── YOLOv5u.md
    ├── YOLOv6.md
    ├── YOLOv7.md
    ├── YOLOv8.md
    ├── YOLOv9.md
    ├── benchmarks.md
    ├── customModels.md
    ├── dGPUInstalation.md
    ├── multipleGIEs.md
    └── multipleGIEs_tree.png
├── labels.txt
├── nvdsinfer_custom_impl_Yolo
    ├── Makefile
    ├── calibrator.cpp
    ├── calibrator.h
    ├── layers
    │   ├── activation_layer.cpp
    │   ├── activation_layer.h
    │   ├── batchnorm_layer.cpp
    │   ├── batchnorm_layer.h
    │   ├── channels_layer.cpp
    │   ├── channels_layer.h
    │   ├── convolutional_layer.cpp
    │   ├── convolutional_layer.h
    │   ├── deconvolutional_layer.cpp
    │   ├── deconvolutional_layer.h
    │   ├── implicit_layer.cpp
    │   ├── implicit_layer.h
    │   ├── pooling_layer.cpp
    │   ├── pooling_layer.h
    │   ├── reorg_layer.cpp
    │   ├── reorg_layer.h
    │   ├── route_layer.cpp
    │   ├── route_layer.h
    │   ├── sam_layer.cpp
    │   ├── sam_layer.h
    │   ├── shortcut_layer.cpp
    │   ├── shortcut_layer.h
    │   ├── slice_layer.cpp
    │   ├── slice_layer.h
    │   ├── upsample_layer.cpp
    │   └── upsample_layer.h
    ├── nvdsinfer_yolo_engine.cpp
    ├── nvdsparsebbox_Yolo.cpp
    ├── nvdsparsebbox_Yolo_cuda.cu
    ├── utils.cpp
    ├── utils.h
    ├── yolo.cpp
    ├── yolo.h
    ├── yoloForward.cu
    ├── yoloForward_nc.cu
    ├── yoloForward_v2.cu
    ├── yoloPlugins.cpp
    └── yoloPlugins.h
└── utils
    ├── export_codetr.py
    ├── export_damoyolo.py
    ├── export_dfine.py
    ├── export_goldyolo.py
    ├── export_ppyoloe.py
    ├── export_rtdetr_paddle.py
    ├── export_rtdetr_pytorch.py
    ├── export_rtdetr_ultralytics.py
    ├── export_rtmdet.py
    ├── export_yolo11.py
    ├── export_yoloV10.py
    ├── export_yoloV5.py
    ├── export_yoloV5u.py
    ├── export_yoloV6.py
    ├── export_yoloV7.py
    ├── export_yoloV7_u6.py
    ├── export_yoloV8.py
    ├── export_yoloV9.py
    ├── export_yolonas.py
    ├── export_yolor.py
    └── export_yolox.py


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | #github: [marcoslucianops]
2 | custom: ['https://www.buymeacoffee.com/marcoslucianops']
3 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018-2023, Marcos Luciano Piropo Santos.
 4 | Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/config_infer_primary.txt:
--------------------------------------------------------------------------------
 1 | [property]
 2 | gpu-id=0
 3 | net-scale-factor=0.0039215697906911373
 4 | model-color-format=0
 5 | custom-network-config=yolov4.cfg
 6 | model-file=yolov4.weights
 7 | model-engine-file=model_b1_gpu0_fp32.engine
 8 | #int8-calib-file=calib.table
 9 | labelfile-path=labels.txt
10 | batch-size=1
11 | network-mode=0
12 | num-detected-classes=80
13 | interval=0
14 | gie-unique-id=1
15 | process-mode=1
16 | network-type=0
17 | cluster-mode=2
18 | maintain-aspect-ratio=0
19 | symmetric-padding=1
20 | force-implicit-batch-dim=0
21 | #workspace-size=2000
22 | parse-bbox-func-name=NvDsInferParseYolo
23 | #parse-bbox-func-name=NvDsInferParseYoloCuda
24 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
25 | engine-create-func-name=NvDsInferYoloCudaEngineGet
26 | 
27 | [class-attrs-all]
28 | nms-iou-threshold=0.45
29 | pre-cluster-threshold=0.25
30 | topk=300
31 | 


--------------------------------------------------------------------------------
/config_infer_primary_codetr.txt:
--------------------------------------------------------------------------------
 1 | [property]
 2 | gpu-id=0
 3 | net-scale-factor=0.0039215697906911373
 4 | model-color-format=0
 5 | onnx-file=co_dino_5scale_r50_1x_coco-7481f903.onnx
 6 | model-engine-file=model_b1_gpu0_fp32.engine
 7 | #int8-calib-file=calib.table
 8 | labelfile-path=labels.txt
 9 | batch-size=1
10 | network-mode=0
11 | num-detected-classes=80
12 | interval=0
13 | gie-unique-id=1
14 | process-mode=1
15 | network-type=0
16 | cluster-mode=2
17 | maintain-aspect-ratio=1
18 | symmetric-padding=0
19 | #workspace-size=2000
20 | parse-bbox-func-name=NvDsInferParseYolo
21 | #parse-bbox-func-name=NvDsInferParseYoloCuda
22 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
23 | engine-create-func-name=NvDsInferYoloCudaEngineGet
24 | 
25 | [class-attrs-all]
26 | nms-iou-threshold=0.45
27 | pre-cluster-threshold=0.25
28 | topk=300
29 | 


--------------------------------------------------------------------------------
/config_infer_primary_damoyolo.txt:
--------------------------------------------------------------------------------
 1 | [property]
 2 | gpu-id=0
 3 | net-scale-factor=1
 4 | model-color-format=0
 5 | onnx-file=damoyolo_tinynasL25_S.onnx
 6 | model-engine-file=model_b1_gpu0_fp32.engine
 7 | #int8-calib-file=calib.table
 8 | labelfile-path=labels.txt
 9 | batch-size=1
10 | network-mode=0
11 | num-detected-classes=80
12 | interval=0
13 | gie-unique-id=1
14 | process-mode=1
15 | network-type=0
16 | cluster-mode=2
17 | maintain-aspect-ratio=0
18 | #workspace-size=2000
19 | parse-bbox-func-name=NvDsInferParseYolo
20 | #parse-bbox-func-name=NvDsInferParseYoloCuda
21 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
22 | engine-create-func-name=NvDsInferYoloCudaEngineGet
23 | 
24 | [class-attrs-all]
25 | nms-iou-threshold=0.45
26 | pre-cluster-threshold=0.25
27 | topk=300
28 | 


--------------------------------------------------------------------------------
/config_infer_primary_dfine.txt:
--------------------------------------------------------------------------------
 1 | [property]
 2 | gpu-id=0
 3 | net-scale-factor=0.0039215697906911373
 4 | model-color-format=0
 5 | onnx-file=dfine_s_coco.pth.onnx
 6 | model-engine-file=model_b1_gpu0_fp32.engine
 7 | #int8-calib-file=calib.table
 8 | labelfile-path=labels.txt
 9 | batch-size=1
10 | network-mode=0
11 | num-detected-classes=80
12 | interval=0
13 | gie-unique-id=1
14 | process-mode=1
15 | network-type=0
16 | cluster-mode=4
17 | maintain-aspect-ratio=0
18 | #workspace-size=2000
19 | parse-bbox-func-name=NvDsInferParseYolo
20 | #parse-bbox-func-name=NvDsInferParseYoloCuda
21 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
22 | engine-create-func-name=NvDsInferYoloCudaEngineGet
23 | 
24 | [class-attrs-all]
25 | pre-cluster-threshold=0.25
26 | topk=300
27 | 


--------------------------------------------------------------------------------
/config_infer_primary_goldyolo.txt:
--------------------------------------------------------------------------------
 1 | [property]
 2 | gpu-id=0
 3 | net-scale-factor=0.0039215697906911373
 4 | model-color-format=0
 5 | onnx-file=Gold_s_pre_dist.onnx
 6 | model-engine-file=model_b1_gpu0_fp32.engine
 7 | #int8-calib-file=calib.table
 8 | labelfile-path=labels.txt
 9 | batch-size=1
10 | network-mode=0
11 | num-detected-classes=80
12 | interval=0
13 | gie-unique-id=1
14 | process-mode=1
15 | network-type=0
16 | cluster-mode=2
17 | maintain-aspect-ratio=1
18 | symmetric-padding=1
19 | #workspace-size=2000
20 | parse-bbox-func-name=NvDsInferParseYolo
21 | #parse-bbox-func-name=NvDsInferParseYoloCuda
22 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
23 | engine-create-func-name=NvDsInferYoloCudaEngineGet
24 | 
25 | [class-attrs-all]
26 | nms-iou-threshold=0.45
27 | pre-cluster-threshold=0.25
28 | topk=300
29 | 


--------------------------------------------------------------------------------
/config_infer_primary_ppyoloe.txt:
--------------------------------------------------------------------------------
 1 | [property]
 2 | gpu-id=0
 3 | net-scale-factor=0.0173520735727919486
 4 | offsets=123.675;116.28;103.53
 5 | model-color-format=0
 6 | onnx-file=ppyoloe_crn_s_400e_coco.onnx
 7 | model-engine-file=model_b1_gpu0_fp32.engine
 8 | #int8-calib-file=calib.table
 9 | labelfile-path=labels.txt
10 | batch-size=1
11 | network-mode=0
12 | num-detected-classes=80
13 | interval=0
14 | gie-unique-id=1
15 | process-mode=1
16 | network-type=0
17 | cluster-mode=2
18 | maintain-aspect-ratio=0
19 | #workspace-size=2000
20 | parse-bbox-func-name=NvDsInferParseYolo
21 | #parse-bbox-func-name=NvDsInferParseYoloCuda
22 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
23 | engine-create-func-name=NvDsInferYoloCudaEngineGet
24 | 
25 | [class-attrs-all]
26 | nms-iou-threshold=0.45
27 | pre-cluster-threshold=0.25
28 | topk=300
29 | 


--------------------------------------------------------------------------------
/config_infer_primary_ppyoloe_plus.txt:
--------------------------------------------------------------------------------
 1 | [property]
 2 | gpu-id=0
 3 | net-scale-factor=0.0039215697906911373
 4 | model-color-format=0
 5 | onnx-file=ppyoloe_plus_crn_s_80e_coco.onnx
 6 | model-engine-file=model_b1_gpu0_fp32.engine
 7 | #int8-calib-file=calib.table
 8 | labelfile-path=labels.txt
 9 | batch-size=1
10 | network-mode=0
11 | num-detected-classes=80
12 | interval=0
13 | gie-unique-id=1
14 | process-mode=1
15 | network-type=0
16 | cluster-mode=2
17 | maintain-aspect-ratio=0
18 | #workspace-size=2000
19 | parse-bbox-func-name=NvDsInferParseYolo
20 | #parse-bbox-func-name=NvDsInferParseYoloCuda
21 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
22 | engine-create-func-name=NvDsInferYoloCudaEngineGet
23 | 
24 | [class-attrs-all]
25 | nms-iou-threshold=0.45
26 | pre-cluster-threshold=0.25
27 | topk=300
28 | 


--------------------------------------------------------------------------------
/config_infer_primary_rtdetr.txt:
--------------------------------------------------------------------------------
 1 | [property]
 2 | gpu-id=0
 3 | net-scale-factor=0.0039215697906911373
 4 | model-color-format=0
 5 | onnx-file=rtdetr_r50vd_6x_coco_from_paddle.onnx
 6 | model-engine-file=model_b1_gpu0_fp32.engine
 7 | #int8-calib-file=calib.table
 8 | labelfile-path=labels.txt
 9 | batch-size=1
10 | network-mode=0
11 | num-detected-classes=80
12 | interval=0
13 | gie-unique-id=1
14 | process-mode=1
15 | network-type=0
16 | cluster-mode=4
17 | maintain-aspect-ratio=0
18 | #workspace-size=2000
19 | parse-bbox-func-name=NvDsInferParseYolo
20 | #parse-bbox-func-name=NvDsInferParseYoloCuda
21 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
22 | engine-create-func-name=NvDsInferYoloCudaEngineGet
23 | 
24 | [class-attrs-all]
25 | pre-cluster-threshold=0.25
26 | topk=300
27 | 


--------------------------------------------------------------------------------
/config_infer_primary_rtmdet.txt:
--------------------------------------------------------------------------------
 1 | [property]
 2 | gpu-id=0
 3 | net-scale-factor=0.0173520735727919486
 4 | offsets=103.53;116.28;123.675
 5 | model-color-format=1
 6 | onnx-file=rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329-0a8c901a.onnx
 7 | model-engine-file=model_b1_gpu0_fp32.engine
 8 | #int8-calib-file=calib.table
 9 | labelfile-path=labels.txt
10 | batch-size=1
11 | network-mode=0
12 | num-detected-classes=80
13 | interval=0
14 | gie-unique-id=1
15 | process-mode=1
16 | network-type=0
17 | cluster-mode=2
18 | maintain-aspect-ratio=1
19 | symmetric-padding=1
20 | #workspace-size=2000
21 | parse-bbox-func-name=NvDsInferParseYolo
22 | #parse-bbox-func-name=NvDsInferParseYoloCuda
23 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
24 | engine-create-func-name=NvDsInferYoloCudaEngineGet
25 | 
26 | [class-attrs-all]
27 | nms-iou-threshold=0.45
28 | pre-cluster-threshold=0.25
29 | topk=300
30 | 


--------------------------------------------------------------------------------
/config_infer_primary_yolo11.txt:
--------------------------------------------------------------------------------
 1 | [property]
 2 | gpu-id=0
 3 | net-scale-factor=0.0039215697906911373
 4 | model-color-format=0
 5 | onnx-file=yolo11s.onnx
 6 | model-engine-file=model_b1_gpu0_fp32.engine
 7 | #int8-calib-file=calib.table
 8 | labelfile-path=labels.txt
 9 | batch-size=1
10 | network-mode=0
11 | num-detected-classes=80
12 | interval=0
13 | gie-unique-id=1
14 | process-mode=1
15 | network-type=0
16 | cluster-mode=2
17 | maintain-aspect-ratio=1
18 | symmetric-padding=1
19 | #workspace-size=2000
20 | parse-bbox-func-name=NvDsInferParseYolo
21 | #parse-bbox-func-name=NvDsInferParseYoloCuda
22 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
23 | engine-create-func-name=NvDsInferYoloCudaEngineGet
24 | 
25 | [class-attrs-all]
26 | nms-iou-threshold=0.45
27 | pre-cluster-threshold=0.25
28 | topk=300
29 | 


--------------------------------------------------------------------------------
/config_infer_primary_yoloV10.txt:
--------------------------------------------------------------------------------
 1 | [property]
 2 | gpu-id=0
 3 | net-scale-factor=0.0039215697906911373
 4 | model-color-format=0
 5 | onnx-file=yolov10s.onnx
 6 | model-engine-file=model_b1_gpu0_fp32.engine
 7 | #int8-calib-file=calib.table
 8 | labelfile-path=labels.txt
 9 | batch-size=1
10 | network-mode=0
11 | num-detected-classes=80
12 | interval=0
13 | gie-unique-id=1
14 | process-mode=1
15 | network-type=0
16 | cluster-mode=4
17 | maintain-aspect-ratio=1
18 | symmetric-padding=1
19 | #workspace-size=2000
20 | parse-bbox-func-name=NvDsInferParseYolo
21 | #parse-bbox-func-name=NvDsInferParseYoloCuda
22 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
23 | engine-create-func-name=NvDsInferYoloCudaEngineGet
24 | 
25 | [class-attrs-all]
26 | nms-iou-threshold=0.45
27 | pre-cluster-threshold=0.25
28 | topk=300
29 | 


--------------------------------------------------------------------------------
/config_infer_primary_yoloV2.txt:
--------------------------------------------------------------------------------
 1 | [property]
 2 | gpu-id=0
 3 | net-scale-factor=0.0039215697906911373
 4 | model-color-format=0
 5 | custom-network-config=yolov2.cfg
 6 | model-file=yolov2.weights
 7 | model-engine-file=model_b1_gpu0_fp32.engine
 8 | #int8-calib-file=calib.table
 9 | labelfile-path=labels.txt
10 | batch-size=1
11 | network-mode=0
12 | num-detected-classes=80
13 | interval=0
14 | gie-unique-id=1
15 | process-mode=1
16 | network-type=0
17 | cluster-mode=2
18 | maintain-aspect-ratio=0
19 | force-implicit-batch-dim=0
20 | #workspace-size=2000
21 | parse-bbox-func-name=NvDsInferParseYolo
22 | #parse-bbox-func-name=NvDsInferParseYoloCuda
23 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
24 | engine-create-func-name=NvDsInferYoloCudaEngineGet
25 | 
26 | [class-attrs-all]
27 | nms-iou-threshold=0.45
28 | pre-cluster-threshold=0.25
29 | topk=300
30 | 


--------------------------------------------------------------------------------
/config_infer_primary_yoloV5.txt:
--------------------------------------------------------------------------------
 1 | [property]
 2 | gpu-id=0
 3 | net-scale-factor=0.0039215697906911373
 4 | model-color-format=0
 5 | onnx-file=yolov5s.onnx
 6 | model-engine-file=model_b1_gpu0_fp32.engine
 7 | #int8-calib-file=calib.table
 8 | labelfile-path=labels.txt
 9 | batch-size=1
10 | network-mode=0
11 | num-detected-classes=80
12 | interval=0
13 | gie-unique-id=1
14 | process-mode=1
15 | network-type=0
16 | cluster-mode=2
17 | maintain-aspect-ratio=1
18 | symmetric-padding=1
19 | #workspace-size=2000
20 | parse-bbox-func-name=NvDsInferParseYolo
21 | #parse-bbox-func-name=NvDsInferParseYoloCuda
22 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
23 | engine-create-func-name=NvDsInferYoloCudaEngineGet
24 | 
25 | [class-attrs-all]
26 | nms-iou-threshold=0.45
27 | pre-cluster-threshold=0.25
28 | topk=300
29 | 


--------------------------------------------------------------------------------
/config_infer_primary_yoloV5u.txt:
--------------------------------------------------------------------------------
 1 | [property]
 2 | gpu-id=0
 3 | net-scale-factor=0.0039215697906911373
 4 | model-color-format=0
 5 | onnx-file=yolov5su.onnx
 6 | model-engine-file=model_b1_gpu0_fp32.engine
 7 | #int8-calib-file=calib.table
 8 | labelfile-path=labels.txt
 9 | batch-size=1
10 | network-mode=0
11 | num-detected-classes=80
12 | interval=0
13 | gie-unique-id=1
14 | process-mode=1
15 | network-type=0
16 | cluster-mode=2
17 | maintain-aspect-ratio=1
18 | symmetric-padding=1
19 | #workspace-size=2000
20 | parse-bbox-func-name=NvDsInferParseYolo
21 | #parse-bbox-func-name=NvDsInferParseYoloCuda
22 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
23 | engine-create-func-name=NvDsInferYoloCudaEngineGet
24 | 
25 | [class-attrs-all]
26 | nms-iou-threshold=0.45
27 | pre-cluster-threshold=0.25
28 | topk=300
29 | 


--------------------------------------------------------------------------------
/config_infer_primary_yoloV6.txt:
--------------------------------------------------------------------------------
 1 | [property]
 2 | gpu-id=0
 3 | net-scale-factor=0.0039215697906911373
 4 | model-color-format=0
 5 | onnx-file=yolov6s.onnx
 6 | model-engine-file=model_b1_gpu0_fp32.engine
 7 | #int8-calib-file=calib.table
 8 | labelfile-path=labels.txt
 9 | batch-size=1
10 | network-mode=0
11 | num-detected-classes=80
12 | interval=0
13 | gie-unique-id=1
14 | process-mode=1
15 | network-type=0
16 | cluster-mode=2
17 | maintain-aspect-ratio=1
18 | symmetric-padding=1
19 | #workspace-size=2000
20 | parse-bbox-func-name=NvDsInferParseYolo
21 | #parse-bbox-func-name=NvDsInferParseYoloCuda
22 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
23 | engine-create-func-name=NvDsInferYoloCudaEngineGet
24 | 
25 | [class-attrs-all]
26 | nms-iou-threshold=0.45
27 | pre-cluster-threshold=0.25
28 | topk=300
29 | 


--------------------------------------------------------------------------------
/config_infer_primary_yoloV7.txt:
--------------------------------------------------------------------------------
 1 | [property]
 2 | gpu-id=0
 3 | net-scale-factor=0.0039215697906911373
 4 | model-color-format=0
 5 | onnx-file=yolov7.onnx
 6 | model-engine-file=model_b1_gpu0_fp32.engine
 7 | #int8-calib-file=calib.table
 8 | labelfile-path=labels.txt
 9 | batch-size=1
10 | network-mode=0
11 | num-detected-classes=80
12 | interval=0
13 | gie-unique-id=1
14 | process-mode=1
15 | network-type=0
16 | cluster-mode=2
17 | maintain-aspect-ratio=1
18 | symmetric-padding=1
19 | #workspace-size=2000
20 | parse-bbox-func-name=NvDsInferParseYolo
21 | #parse-bbox-func-name=NvDsInferParseYoloCuda
22 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
23 | engine-create-func-name=NvDsInferYoloCudaEngineGet
24 | 
25 | [class-attrs-all]
26 | nms-iou-threshold=0.45
27 | pre-cluster-threshold=0.25
28 | topk=300
29 | 


--------------------------------------------------------------------------------
/config_infer_primary_yoloV8.txt:
--------------------------------------------------------------------------------
 1 | [property]
 2 | gpu-id=0
 3 | net-scale-factor=0.0039215697906911373
 4 | model-color-format=0
 5 | onnx-file=yolov8s.onnx
 6 | model-engine-file=model_b1_gpu0_fp32.engine
 7 | #int8-calib-file=calib.table
 8 | labelfile-path=labels.txt
 9 | batch-size=1
10 | network-mode=0
11 | num-detected-classes=80
12 | interval=0
13 | gie-unique-id=1
14 | process-mode=1
15 | network-type=0
16 | cluster-mode=2
17 | maintain-aspect-ratio=1
18 | symmetric-padding=1
19 | #workspace-size=2000
20 | parse-bbox-func-name=NvDsInferParseYolo
21 | #parse-bbox-func-name=NvDsInferParseYoloCuda
22 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
23 | engine-create-func-name=NvDsInferYoloCudaEngineGet
24 | 
25 | [class-attrs-all]
26 | nms-iou-threshold=0.45
27 | pre-cluster-threshold=0.25
28 | topk=300
29 | 


--------------------------------------------------------------------------------
/config_infer_primary_yoloV9.txt:
--------------------------------------------------------------------------------
 1 | [property]
 2 | gpu-id=0
 3 | net-scale-factor=0.0039215697906911373
 4 | model-color-format=0
 5 | onnx-file=yolov9-c.onnx
 6 | model-engine-file=model_b1_gpu0_fp32.engine
 7 | #int8-calib-file=calib.table
 8 | labelfile-path=labels.txt
 9 | batch-size=1
10 | network-mode=0
11 | num-detected-classes=80
12 | interval=0
13 | gie-unique-id=1
14 | process-mode=1
15 | network-type=0
16 | cluster-mode=2
17 | maintain-aspect-ratio=1
18 | symmetric-padding=1
19 | #workspace-size=2000
20 | parse-bbox-func-name=NvDsInferParseYolo
21 | #parse-bbox-func-name=NvDsInferParseYoloCuda
22 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
23 | engine-create-func-name=NvDsInferYoloCudaEngineGet
24 | 
25 | [class-attrs-all]
26 | nms-iou-threshold=0.45
27 | pre-cluster-threshold=0.25
28 | topk=300
29 | 


--------------------------------------------------------------------------------
/config_infer_primary_yolonas.txt:
--------------------------------------------------------------------------------
 1 | [property]
 2 | gpu-id=0
 3 | net-scale-factor=0.0039215697906911373
 4 | model-color-format=0
 5 | onnx-file=yolo_nas_s_coco.onnx
 6 | model-engine-file=model_b1_gpu0_fp32.engine
 7 | #int8-calib-file=calib.table
 8 | labelfile-path=labels.txt
 9 | batch-size=1
10 | network-mode=0
11 | num-detected-classes=80
12 | interval=0
13 | gie-unique-id=1
14 | process-mode=1
15 | network-type=0
16 | cluster-mode=2
17 | maintain-aspect-ratio=1
18 | symmetric-padding=0
19 | #workspace-size=2000
20 | parse-bbox-func-name=NvDsInferParseYolo
21 | #parse-bbox-func-name=NvDsInferParseYoloCuda
22 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
23 | engine-create-func-name=NvDsInferYoloCudaEngineGet
24 | 
25 | [class-attrs-all]
26 | nms-iou-threshold=0.45
27 | pre-cluster-threshold=0.25
28 | topk=300
29 | 


--------------------------------------------------------------------------------
/config_infer_primary_yolonas_custom.txt:
--------------------------------------------------------------------------------
 1 | [property]
 2 | gpu-id=0
 3 | net-scale-factor=1
 4 | model-color-format=0
 5 | onnx-file=yolo_nas_s_coco.onnx
 6 | model-engine-file=model_b1_gpu0_fp32.engine
 7 | #int8-calib-file=calib.table
 8 | labelfile-path=labels.txt
 9 | batch-size=1
10 | network-mode=0
11 | num-detected-classes=80
12 | interval=0
13 | gie-unique-id=1
14 | process-mode=1
15 | network-type=0
16 | cluster-mode=2
17 | maintain-aspect-ratio=1
18 | symmetric-padding=0
19 | #workspace-size=2000
20 | parse-bbox-func-name=NvDsInferParseYolo
21 | #parse-bbox-func-name=NvDsInferParseYoloCuda
22 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
23 | engine-create-func-name=NvDsInferYoloCudaEngineGet
24 | 
25 | [class-attrs-all]
26 | nms-iou-threshold=0.45
27 | pre-cluster-threshold=0.25
28 | topk=300
29 | 


--------------------------------------------------------------------------------
/config_infer_primary_yolor.txt:
--------------------------------------------------------------------------------
 1 | [property]
 2 | gpu-id=0
 3 | net-scale-factor=0.0039215697906911373
 4 | model-color-format=0
 5 | onnx-file=yolor_csp.onnx
 6 | model-engine-file=model_b1_gpu0_fp32.engine
 7 | #int8-calib-file=calib.table
 8 | labelfile-path=labels.txt
 9 | batch-size=1
10 | network-mode=0
11 | num-detected-classes=80
12 | interval=0
13 | gie-unique-id=1
14 | process-mode=1
15 | network-type=0
16 | cluster-mode=2
17 | maintain-aspect-ratio=1
18 | symmetric-padding=1
19 | #workspace-size=2000
20 | parse-bbox-func-name=NvDsInferParseYolo
21 | #parse-bbox-func-name=NvDsInferParseYoloCuda
22 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
23 | engine-create-func-name=NvDsInferYoloCudaEngineGet
24 | 
25 | [class-attrs-all]
26 | nms-iou-threshold=0.45
27 | pre-cluster-threshold=0.25
28 | topk=300
29 | 


--------------------------------------------------------------------------------
/config_infer_primary_yolox.txt:
--------------------------------------------------------------------------------
 1 | [property]
 2 | gpu-id=0
 3 | net-scale-factor=1
 4 | model-color-format=1
 5 | onnx-file=yolox_s.onnx
 6 | model-engine-file=model_b1_gpu0_fp32.engine
 7 | #int8-calib-file=calib.table
 8 | labelfile-path=labels.txt
 9 | batch-size=1
10 | network-mode=0
11 | num-detected-classes=80
12 | interval=0
13 | gie-unique-id=1
14 | process-mode=1
15 | network-type=0
16 | cluster-mode=2
17 | maintain-aspect-ratio=1
18 | symmetric-padding=0
19 | #workspace-size=2000
20 | parse-bbox-func-name=NvDsInferParseYolo
21 | #parse-bbox-func-name=NvDsInferParseYoloCuda
22 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
23 | engine-create-func-name=NvDsInferYoloCudaEngineGet
24 | 
25 | [class-attrs-all]
26 | nms-iou-threshold=0.45
27 | pre-cluster-threshold=0.25
28 | topk=300
29 | 


--------------------------------------------------------------------------------
/config_infer_primary_yolox_legacy.txt:
--------------------------------------------------------------------------------
 1 | [property]
 2 | gpu-id=0
 3 | net-scale-factor=0.0173520735727919486
 4 | offsets=123.675;116.28;103.53
 5 | model-color-format=0
 6 | onnx-file=yolox_s.onnx
 7 | model-engine-file=model_b1_gpu0_fp32.engine
 8 | #int8-calib-file=calib.table
 9 | labelfile-path=labels.txt
10 | batch-size=1
11 | network-mode=0
12 | num-detected-classes=80
13 | interval=0
14 | gie-unique-id=1
15 | process-mode=1
16 | network-type=0
17 | cluster-mode=2
18 | maintain-aspect-ratio=1
19 | symmetric-padding=0
20 | #workspace-size=2000
21 | parse-bbox-func-name=NvDsInferParseYolo
22 | #parse-bbox-func-name=NvDsInferParseYoloCuda
23 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
24 | engine-create-func-name=NvDsInferYoloCudaEngineGet
25 | 
26 | [class-attrs-all]
27 | nms-iou-threshold=0.45
28 | pre-cluster-threshold=0.25
29 | topk=300
30 | 


--------------------------------------------------------------------------------
/deepstream_app_config.txt:
--------------------------------------------------------------------------------
 1 | [application]
 2 | enable-perf-measurement=1
 3 | perf-measurement-interval-sec=5
 4 | 
 5 | [tiled-display]
 6 | enable=1
 7 | rows=1
 8 | columns=1
 9 | width=1280
10 | height=720
11 | gpu-id=0
12 | nvbuf-memory-type=0
13 | 
14 | [source0]
15 | enable=1
16 | type=3
17 | uri=file:///opt/nvidia/deepstream/deepstream/samples/streams/sample_1080p_h264.mp4
18 | num-sources=1
19 | gpu-id=0
20 | cudadec-memtype=0
21 | 
22 | [sink0]
23 | enable=1
24 | type=2
25 | sync=0
26 | gpu-id=0
27 | nvbuf-memory-type=0
28 | 
29 | [osd]
30 | enable=1
31 | gpu-id=0
32 | border-width=5
33 | text-size=15
34 | text-color=1;1;1;1;
35 | text-bg-color=0.3;0.3;0.3;1
36 | font=Serif
37 | show-clock=0
38 | clock-x-offset=800
39 | clock-y-offset=820
40 | clock-text-size=12
41 | clock-color=1;0;0;0
42 | nvbuf-memory-type=0
43 | 
44 | [streammux]
45 | gpu-id=0
46 | live-source=0
47 | batch-size=1
48 | batched-push-timeout=40000
49 | width=1920
50 | height=1080
51 | enable-padding=0
52 | nvbuf-memory-type=0
53 | 
54 | [primary-gie]
55 | enable=1
56 | gpu-id=0
57 | gie-unique-id=1
58 | nvbuf-memory-type=0
59 | config-file=config_infer_primary.txt
60 | 
61 | [tests]
62 | file-loop=0
63 | 


--------------------------------------------------------------------------------
/docs/CODETR.md:
--------------------------------------------------------------------------------
  1 | # CO-DETR (MMDetection) usage
  2 | 
  3 | * [Convert model](#convert-model)
  4 | * [Compile the lib](#compile-the-lib)
  5 | * [Edit the config_infer_primary_codetr file](#edit-the-config_infer_primary_codetr-file)
  6 | * [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file)
  7 | * [Testing the model](#testing-the-model)
  8 | 
  9 | ##
 10 | 
 11 | ### Convert model
 12 | 
 13 | #### 1. Download the CO-DETR (MMDetection) repo and install the requirements
 14 | 
 15 | ```
 16 | git clone https://github.com/open-mmlab/mmdetection.git
 17 | cd mmdetection
 18 | pip3 install openmim
 19 | mim install mmengine
 20 | mim install mmdeploy
 21 | mim install "mmcv>=2.0.0rc4,<2.2.0"
 22 | pip3 install -v -e .
 23 | pip3 install onnx onnxslim onnxruntime
 24 | ```
 25 | 
 26 | **NOTE**: It is recommended to use Python virtualenv.
 27 | 
 28 | #### 2. Copy conversor
 29 | 
 30 | Copy the `export_codetr.py` file from `DeepStream-Yolo/utils` directory to the `mmdetection` folder.
 31 | 
 32 | #### 3. Download the model
 33 | 
 34 | Download the `pth` file from [CO-DETR (MMDetection)](https://github.com/open-mmlab/mmdetection/tree/main/projects/CO-DETR) releases (example for Co-DINO R50 DETR*)
 35 | 
 36 | ```
 37 | wget https://download.openmmlab.com/mmdetection/v3.0/codetr/co_dino_5scale_r50_1x_coco-7481f903.pth
 38 | ```
 39 | 
 40 | **NOTE**: You can use your custom model.
 41 | 
 42 | #### 4. Convert model
 43 | 
 44 | Generate the ONNX model file (example for Co-DINO R50 DETR)
 45 | 
 46 | ```
 47 | python3 export_codetr.py -w co_dino_5scale_r50_1x_coco-7481f903.pth -c projects/CO-DETR/configs/codino/co_dino_5scale_r50_8xb2_1x_coco.py --dynamic
 48 | ```
 49 | 
 50 | **NOTE**: To change the inference size (defaut: 640)
 51 | 
 52 | ```
 53 | -s SIZE
 54 | --size SIZE
 55 | -s HEIGHT WIDTH
 56 | --size HEIGHT WIDTH
 57 | ```
 58 | 
 59 | Example for 1280
 60 | 
 61 | ```
 62 | -s 1280
 63 | ```
 64 | 
 65 | or
 66 | 
 67 | ```
 68 | -s 1280 1280
 69 | ```
 70 | 
 71 | **NOTE**: To simplify the ONNX model (DeepStream >= 6.0)
 72 | 
 73 | ```
 74 | --simplify
 75 | ```
 76 | 
 77 | **NOTE**: To use dynamic batch-size (DeepStream >= 6.1)
 78 | 
 79 | ```
 80 | --dynamic
 81 | ```
 82 | 
 83 | **NOTE**: To use static batch-size (example for batch-size = 4)
 84 | 
 85 | ```
 86 | --batch 4
 87 | ```
 88 | 
 89 | **NOTE**: If you are using the DeepStream 5.1, remove the `--dynamic` arg and use opset 12 or lower. The default opset is 11.
 90 | 
 91 | ```
 92 | --opset 12
 93 | ```
 94 | 
 95 | #### 5. Copy generated files
 96 | 
 97 | Copy the generated ONNX model file and labels.txt file (if generated) to the `DeepStream-Yolo` folder.
 98 | 
 99 | ##
100 | 
101 | ### Compile the lib
102 | 
103 | 1. Open the `DeepStream-Yolo` folder and compile the lib
104 | 
105 | 2. Set the `CUDA_VER` according to your DeepStream version
106 | 
107 | ```
108 | export CUDA_VER=XY.Z
109 | ```
110 | 
111 | * x86 platform
112 | 
113 |   ```
114 |   DeepStream 7.1 = 12.6
115 |   DeepStream 7.0 / 6.4 = 12.2
116 |   DeepStream 6.3 = 12.1
117 |   DeepStream 6.2 = 11.8
118 |   DeepStream 6.1.1 = 11.7
119 |   DeepStream 6.1 = 11.6
120 |   DeepStream 6.0.1 / 6.0 = 11.4
121 |   DeepStream 5.1 = 11.1
122 |   ```
123 | 
124 | * Jetson platform
125 | 
126 |   ```
127 |   DeepStream 7.1 = 12.6
128 |   DeepStream 7.0 / 6.4 = 12.2
129 |   DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 = 11.4
130 |   DeepStream 6.0.1 / 6.0 / 5.1 = 10.2
131 |   ```
132 | 
133 | 3. Make the lib
134 | 
135 | ```
136 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo
137 | ```
138 | 
139 | ##
140 | 
141 | ### Edit the config_infer_primary_codetr file
142 | 
143 | Edit the `config_infer_primary_codetr.txt` file according to your model (example for Co-DINO R50 DETR with 80 classes)
144 | 
145 | ```
146 | [property]
147 | ...
148 | onnx-file=co_dino_5scale_r50_1x_coco-7481f903.pth.onnx
149 | ...
150 | num-detected-classes=80
151 | ...
152 | parse-bbox-func-name=NvDsInferParseYolo
153 | ...
154 | ```
155 | 
156 | **NOTE**: The **CO-DETR (MMDetection)** resizes the input with left/top padding. To get better accuracy, use
157 | 
158 | ```
159 | [property]
160 | ...
161 | maintain-aspect-ratio=1
162 | symmetric-padding=0
163 | ...
164 | ```
165 | 
166 | ##
167 | 
168 | ### Edit the deepstream_app_config file
169 | 
170 | ```
171 | ...
172 | [primary-gie]
173 | ...
174 | config-file=config_infer_primary_codetr.txt
175 | ```
176 | 
177 | ##
178 | 
179 | ### Testing the model
180 | 
181 | ```
182 | deepstream-app -c deepstream_app_config.txt
183 | ```
184 | 
185 | **NOTE**: The TensorRT engine file may take a very long time to generate (sometimes more than 10 minutes).
186 | 
187 | **NOTE**: For more information about custom models configuration (`batch-size`, `network-mode`, etc), please check the [`docs/customModels.md`](customModels.md) file.
188 | 


--------------------------------------------------------------------------------
/docs/DAMOYOLO.md:
--------------------------------------------------------------------------------
  1 | # DAMO-YOLO usage
  2 | 
  3 | * [Convert model](#convert-model)
  4 | * [Compile the lib](#compile-the-lib)
  5 | * [Edit the config_infer_primary_damoyolo file](#edit-the-config_infer_primary_damoyolo-file)
  6 | * [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file)
  7 | * [Testing the model](#testing-the-model)
  8 | 
  9 | ##
 10 | 
 11 | ### Convert model
 12 | 
 13 | #### 1. Download the DAMO-YOLO repo and install the requirements
 14 | 
 15 | ```
 16 | git clone https://github.com/tinyvision/DAMO-YOLO.git
 17 | cd DAMO-YOLO
 18 | pip3 install -r requirements.txt
 19 | pip3 install onnx onnxslim onnxruntime
 20 | ```
 21 | 
 22 | **NOTE**: It is recommended to use Python virtualenv.
 23 | 
 24 | #### 2. Copy conversor
 25 | 
 26 | Copy the `export_damoyolo.py` file from `DeepStream-Yolo/utils` directory to the `DAMO-YOLO` folder.
 27 | 
 28 | #### 3. Download the model
 29 | 
 30 | Download the `pth` file from [DAMO-YOLO](https://github.com/tinyvision/DAMO-YOLO) releases (example for DAMO-YOLO-S*)
 31 | 
 32 | ```
 33 | wget https://idstcv.oss-cn-zhangjiakou.aliyuncs.com/DAMO-YOLO/release_model/clean_model_0317/damoyolo_tinynasL25_S_477.pth
 34 | ```
 35 | 
 36 | **NOTE**: You can use your custom model.
 37 | 
 38 | #### 4. Convert model
 39 | 
 40 | Generate the ONNX model file (example for DAMO-YOLO-S*)
 41 | 
 42 | ```
 43 | python3 export_damoyolo.py -w damoyolo_tinynasL25_S_477.pth -c configs/damoyolo_tinynasL25_S.py --dynamic
 44 | ```
 45 | 
 46 | **NOTE**: To change the inference size (defaut: 640)
 47 | 
 48 | ```
 49 | -s SIZE
 50 | --size SIZE
 51 | -s HEIGHT WIDTH
 52 | --size HEIGHT WIDTH
 53 | ```
 54 | 
 55 | Example for 1280
 56 | 
 57 | ```
 58 | -s 1280
 59 | ```
 60 | 
 61 | or
 62 | 
 63 | ```
 64 | -s 1280 1280
 65 | ```
 66 | 
 67 | **NOTE**: To simplify the ONNX model (DeepStream >= 6.0)
 68 | 
 69 | ```
 70 | --simplify
 71 | ```
 72 | 
 73 | **NOTE**: To use dynamic batch-size (DeepStream >= 6.1)
 74 | 
 75 | ```
 76 | --dynamic
 77 | ```
 78 | 
 79 | **NOTE**: To use static batch-size (example for batch-size = 4)
 80 | 
 81 | ```
 82 | --batch 4
 83 | ```
 84 | 
 85 | **NOTE**: If you are using the DeepStream 5.1, remove the `--dynamic` arg and use opset 11 or lower. The default opset is 11.
 86 | 
 87 | ```
 88 | --opset 11
 89 | ```
 90 | 
 91 | #### 5. Copy generated files
 92 | 
 93 | Copy the generated ONNX model file and labels.txt file (if generated) to the `DeepStream-Yolo` folder.
 94 | 
 95 | ##
 96 | 
 97 | ### Compile the lib
 98 | 
 99 | 1. Open the `DeepStream-Yolo` folder and compile the lib
100 | 
101 | 2. Set the `CUDA_VER` according to your DeepStream version
102 | 
103 | ```
104 | export CUDA_VER=XY.Z
105 | ```
106 | 
107 | * x86 platform
108 | 
109 |   ```
110 |   DeepStream 7.1 = 12.6
111 |   DeepStream 7.0 / 6.4 = 12.2
112 |   DeepStream 6.3 = 12.1
113 |   DeepStream 6.2 = 11.8
114 |   DeepStream 6.1.1 = 11.7
115 |   DeepStream 6.1 = 11.6
116 |   DeepStream 6.0.1 / 6.0 = 11.4
117 |   DeepStream 5.1 = 11.1
118 |   ```
119 | 
120 | * Jetson platform
121 | 
122 |   ```
123 |   DeepStream 7.1 = 12.6
124 |   DeepStream 7.0 / 6.4 = 12.2
125 |   DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 = 11.4
126 |   DeepStream 6.0.1 / 6.0 / 5.1 = 10.2
127 |   ```
128 | 
129 | 3. Make the lib
130 | 
131 | ```
132 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo
133 | ```
134 | 
135 | ##
136 | 
137 | ### Edit the config_infer_primary_damoyolo file
138 | 
139 | Edit the `config_infer_primary_damoyolo.txt` file according to your model (example for DAMO-YOLO-S* with 80 classes)
140 | 
141 | ```
142 | [property]
143 | ...
144 | onnx-file=damoyolo_tinynasL25_S_477.pth.onnx
145 | ...
146 | num-detected-classes=80
147 | ...
148 | parse-bbox-func-name=NvDsInferParseYolo
149 | ...
150 | ```
151 | 
152 | **NOTE**: The **DAMO-YOLO** do not resize the input with padding. To get better accuracy, use
153 | 
154 | ```
155 | [property]
156 | ...
157 | maintain-aspect-ratio=0
158 | ...
159 | ```
160 | 
161 | ##
162 | 
163 | ### Edit the deepstream_app_config file
164 | 
165 | ```
166 | ...
167 | [primary-gie]
168 | ...
169 | config-file=config_infer_primary_damoyolo.txt
170 | ```
171 | 
172 | ##
173 | 
174 | ### Testing the model
175 | 
176 | ```
177 | deepstream-app -c deepstream_app_config.txt
178 | ```
179 | 
180 | **NOTE**: The TensorRT engine file may take a very long time to generate (sometimes more than 10 minutes).
181 | 
182 | **NOTE**: For more information about custom models configuration (`batch-size`, `network-mode`, etc), please check the [`docs/customModels.md`](customModels.md) file.
183 | 


--------------------------------------------------------------------------------
/docs/DFINE.md:
--------------------------------------------------------------------------------
  1 | # D-FINE usage
  2 | 
  3 | * [Convert model](#convert-model)
  4 | * [Compile the lib](#compile-the-lib)
  5 | * [Edit the config_infer_primary_dfine file](#edit-the-config_infer_primary_dfine-file)
  6 | * [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file)
  7 | * [Testing the model](#testing-the-model)
  8 | 
  9 | ##
 10 | 
 11 | ### Convert model
 12 | 
 13 | #### 1. Download the D-FINE repo and install the requirements
 14 | 
 15 | ```
 16 | git clone https://github.com/Peterande/D-FINE.git
 17 | cd D-FINE
 18 | pip3 install -r requirements.txt
 19 | pip3 install onnx onnxslim onnxruntime
 20 | ```
 21 | 
 22 | **NOTE**: It is recommended to use Python virtualenv.
 23 | 
 24 | #### 2. Copy conversor
 25 | 
 26 | Copy the `export_dfine.py` file from `DeepStream-Yolo/utils` directory to the `D-FINE` folder.
 27 | 
 28 | #### 3. Download the model
 29 | 
 30 | Download the `pth` file from [D-FINE](https://github.com/Peterande/storage/releases/tag/dfinev1.0) releases (example for D-FINE-S)
 31 | 
 32 | ```
 33 | wget https://github.com/Peterande/storage/releases/download/dfinev1.0/dfine_s_coco.pth
 34 | ```
 35 | 
 36 | **NOTE**: You can use your custom model.
 37 | 
 38 | #### 4. Convert model
 39 | 
 40 | Generate the ONNX model file (example for D-FINE-S)
 41 | 
 42 | ```
 43 | python3 export_dfine.py -w dfine_s_coco.pth -c configs/dfine/dfine_hgnetv2_s_coco.yml --dynamic
 44 | ```
 45 | 
 46 | **NOTE**: To change the inference size (defaut: 640)
 47 | 
 48 | ```
 49 | -s SIZE
 50 | --size SIZE
 51 | -s HEIGHT WIDTH
 52 | --size HEIGHT WIDTH
 53 | ```
 54 | 
 55 | Example for 1280
 56 | 
 57 | ```
 58 | -s 1280
 59 | ```
 60 | 
 61 | or
 62 | 
 63 | ```
 64 | -s 1280 1280
 65 | ```
 66 | 
 67 | **NOTE**: To simplify the ONNX model (DeepStream >= 6.0)
 68 | 
 69 | ```
 70 | --simplify
 71 | ```
 72 | 
 73 | **NOTE**: To use dynamic batch-size (DeepStream >= 6.1)
 74 | 
 75 | ```
 76 | --dynamic
 77 | ```
 78 | 
 79 | **NOTE**: To use static batch-size (example for batch-size = 4)
 80 | 
 81 | ```
 82 | --batch 4
 83 | ```
 84 | 
 85 | **NOTE**: If you are using the DeepStream 5.1, remove the `--dynamic` arg and use opset 12 or lower. The default opset is 16.
 86 | 
 87 | ```
 88 | --opset 12
 89 | ```
 90 | 
 91 | #### 5. Copy generated files
 92 | 
 93 | Copy the generated ONNX model file and labels.txt file (if generated) to the `DeepStream-Yolo` folder.
 94 | 
 95 | ##
 96 | 
 97 | ### Compile the lib
 98 | 
 99 | 1. Open the `DeepStream-Yolo` folder and compile the lib
100 | 
101 | 2. Set the `CUDA_VER` according to your DeepStream version
102 | 
103 | ```
104 | export CUDA_VER=XY.Z
105 | ```
106 | 
107 | * x86 platform
108 | 
109 |   ```
110 |   DeepStream 7.1 = 12.6
111 |   DeepStream 7.0 / 6.4 = 12.2
112 |   DeepStream 6.3 = 12.1
113 |   DeepStream 6.2 = 11.8
114 |   DeepStream 6.1.1 = 11.7
115 |   DeepStream 6.1 = 11.6
116 |   DeepStream 6.0.1 / 6.0 = 11.4
117 |   DeepStream 5.1 = 11.1
118 |   ```
119 | 
120 | * Jetson platform
121 | 
122 |   ```
123 |   DeepStream 7.1 = 12.6
124 |   DeepStream 7.0 / 6.4 = 12.2
125 |   DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 = 11.4
126 |   DeepStream 6.0.1 / 6.0 / 5.1 = 10.2
127 |   ```
128 | 
129 | 3. Make the lib
130 | 
131 | ```
132 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo
133 | ```
134 | 
135 | ##
136 | 
137 | ### Edit the config_infer_primary_dfine file
138 | 
139 | Edit the `config_infer_primary_dfine.txt` file according to your model (example for D-FINE-S with 80 classes)
140 | 
141 | ```
142 | [property]
143 | ...
144 | onnx-file=dfine_s_coco.pth.onnx
145 | ...
146 | num-detected-classes=80
147 | ...
148 | parse-bbox-func-name=NvDsInferParseYolo
149 | ...
150 | ```
151 | 
152 | **NOTE**: The **D_FINE** do not resize the input with padding. To get better accuracy, use
153 | 
154 | ```
155 | [property]
156 | ...
157 | maintain-aspect-ratio=0
158 | ...
159 | ```
160 | 
161 | **NOTE**: The **D-FINE** do not require NMS. To get better accuracy, use
162 | 
163 | ```
164 | [property]
165 | ...
166 | cluster-mode=4
167 | ...
168 | ```
169 | 
170 | ##
171 | 
172 | ### Edit the deepstream_app_config file
173 | 
174 | ```
175 | ...
176 | [primary-gie]
177 | ...
178 | config-file=config_infer_primary_dfine.txt
179 | ```
180 | 
181 | ##
182 | 
183 | ### Testing the model
184 | 
185 | ```
186 | deepstream-app -c deepstream_app_config.txt
187 | ```
188 | 
189 | **NOTE**: The TensorRT engine file may take a very long time to generate (sometimes more than 10 minutes).
190 | 
191 | **NOTE**: For more information about custom models configuration (`batch-size`, `network-mode`, etc), please check the [`docs/customModels.md`](customModels.md) file.
192 | 


--------------------------------------------------------------------------------
/docs/GoldYOLO.md:
--------------------------------------------------------------------------------
  1 | # Gold-YOLO usage
  2 | 
  3 | * [Convert model](#convert-model)
  4 | * [Compile the lib](#compile-the-lib)
  5 | * [Edit the config_infer_primary_goldyolo file](#edit-the-config_infer_primary_goldyolo-file)
  6 | * [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file)
  7 | * [Testing the model](#testing-the-model)
  8 | 
  9 | ##
 10 | 
 11 | ### Convert model
 12 | 
 13 | #### 1. Download the Gold-YOLO repo and install the requirements
 14 | 
 15 | ```
 16 | git clone https://github.com/huawei-noah/Efficient-Computing.git
 17 | cd Efficient-Computing/Detection/Gold-YOLO
 18 | pip3 install -r requirements.txt
 19 | pip3 install onnx onnxslim onnxruntime
 20 | ```
 21 | 
 22 | **NOTE**: It is recommended to use Python virtualenv.
 23 | 
 24 | #### 2. Copy conversor
 25 | 
 26 | Copy the `export_goldyolo.py` file from `DeepStream-Yolo/utils` directory to the `Gold-YOLO` folder.
 27 | 
 28 | #### 3. Download the model
 29 | 
 30 | Download the `pt` file from [Gold-YOLO](https://github.com/huawei-noah/Efficient-Computing/tree/master/Detection/Gold-YOLO) releases
 31 | 
 32 | **NOTE**: You can use your custom model.
 33 | 
 34 | #### 4. Convert model
 35 | 
 36 | Generate the ONNX model file (example for Gold-YOLO-S)
 37 | 
 38 | ```
 39 | python3 export_goldyolo.py -w Gold_s_pre_dist.pt --dynamic
 40 | ```
 41 | 
 42 | **NOTE**: To change the inference size (defaut: 640)
 43 | 
 44 | ```
 45 | -s SIZE
 46 | --size SIZE
 47 | -s HEIGHT WIDTH
 48 | --size HEIGHT WIDTH
 49 | ```
 50 | 
 51 | Example for 1280
 52 | 
 53 | ```
 54 | -s 1280
 55 | ```
 56 | 
 57 | or
 58 | 
 59 | ```
 60 | -s 1280 1280
 61 | ```
 62 | 
 63 | **NOTE**: To simplify the ONNX model (DeepStream >= 6.0)
 64 | 
 65 | ```
 66 | --simplify
 67 | ```
 68 | 
 69 | **NOTE**: To use dynamic batch-size (DeepStream >= 6.1)
 70 | 
 71 | ```
 72 | --dynamic
 73 | ```
 74 | 
 75 | **NOTE**: To use static batch-size (example for batch-size = 4)
 76 | 
 77 | ```
 78 | --batch 4
 79 | ```
 80 | 
 81 | **NOTE**: If you are using the DeepStream 5.1, remove the `--dynamic` arg and use opset 12 or lower. The default opset is 13.
 82 | 
 83 | ```
 84 | --opset 12
 85 | ```
 86 | 
 87 | #### 5. Copy generated files
 88 | 
 89 | Copy the generated ONNX model file and labels.txt file (if generated) to the `DeepStream-Yolo` folder.
 90 | 
 91 | ##
 92 | 
 93 | ### Compile the lib
 94 | 
 95 | 1. Open the `DeepStream-Yolo` folder and compile the lib
 96 | 
 97 | 2. Set the `CUDA_VER` according to your DeepStream version
 98 | 
 99 | ```
100 | export CUDA_VER=XY.Z
101 | ```
102 | 
103 | * x86 platform
104 | 
105 |   ```
106 |   DeepStream 7.1 = 12.6
107 |   DeepStream 7.0 / 6.4 = 12.2
108 |   DeepStream 6.3 = 12.1
109 |   DeepStream 6.2 = 11.8
110 |   DeepStream 6.1.1 = 11.7
111 |   DeepStream 6.1 = 11.6
112 |   DeepStream 6.0.1 / 6.0 = 11.4
113 |   DeepStream 5.1 = 11.1
114 |   ```
115 | 
116 | * Jetson platform
117 | 
118 |   ```
119 |   DeepStream 7.1 = 12.6
120 |   DeepStream 7.0 / 6.4 = 12.2
121 |   DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 = 11.4
122 |   DeepStream 6.0.1 / 6.0 / 5.1 = 10.2
123 |   ```
124 | 
125 | 3. Make the lib
126 | 
127 | ```
128 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo
129 | ```
130 | 
131 | ##
132 | 
133 | ### Edit the config_infer_primary_goldyolo file
134 | 
135 | Edit the `config_infer_primary_goldyolo.txt` file according to your model (example for Gold-YOLO-S with 80 classes)
136 | 
137 | ```
138 | [property]
139 | ...
140 | onnx-file=Gold_s_pre_dist.pt.onnx
141 | ...
142 | num-detected-classes=80
143 | ...
144 | parse-bbox-func-name=NvDsInferParseYolo
145 | ...
146 | ```
147 | 
148 | **NOTE**: The **Gold-YOLO** resizes the input with center padding. To get better accuracy, use
149 | 
150 | ```
151 | [property]
152 | ...
153 | maintain-aspect-ratio=1
154 | symmetric-padding=1
155 | ...
156 | ```
157 | 
158 | ##
159 | 
160 | ### Edit the deepstream_app_config file
161 | 
162 | ```
163 | ...
164 | [primary-gie]
165 | ...
166 | config-file=config_infer_primary_goldyolo.txt
167 | ```
168 | 
169 | ##
170 | 
171 | ### Testing the model
172 | 
173 | ```
174 | deepstream-app -c deepstream_app_config.txt
175 | ```
176 | 
177 | **NOTE**: The TensorRT engine file may take a very long time to generate (sometimes more than 10 minutes).
178 | 
179 | **NOTE**: For more information about custom models configuration (`batch-size`, `network-mode`, etc), please check the [`docs/customModels.md`](customModels.md) file.
180 | 


--------------------------------------------------------------------------------
/docs/INT8Calibration.md:
--------------------------------------------------------------------------------
  1 | # INT8 calibration (PTQ)
  2 | 
  3 | ### 1. Install OpenCV
  4 | 
  5 | ```
  6 | sudo apt-get install libopencv-dev
  7 | ```
  8 | 
  9 | ### 2. Compile/recompile the `nvdsinfer_custom_impl_Yolo` lib with OpenCV support
 10 | 
 11 | 2.1. Set the `CUDA_VER` according to your DeepStream version
 12 | 
 13 | ```
 14 | export CUDA_VER=XY.Z
 15 | ```
 16 | 
 17 | * x86 platform
 18 | 
 19 |   ```
 20 |   DeepStream 7.1 = 12.6
 21 |   DeepStream 7.0 / 6.4 = 12.2
 22 |   DeepStream 6.3 = 12.1
 23 |   DeepStream 6.2 = 11.8
 24 |   DeepStream 6.1.1 = 11.7
 25 |   DeepStream 6.1 = 11.6
 26 |   DeepStream 6.0.1 / 6.0 = 11.4
 27 |   DeepStream 5.1 = 11.1
 28 |   ```
 29 | 
 30 | * Jetson platform
 31 | 
 32 |   ```
 33 |   DeepStream 7.1 = 12.6
 34 |   DeepStream 7.0 / 6.4 = 12.2
 35 |   DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 = 11.4
 36 |   DeepStream 6.0.1 / 6.0 / 5.1 = 10.2
 37 |   ```
 38 | 
 39 | 2.2. Set the `OPENCV` env
 40 | 
 41 | ```
 42 | export OPENCV=1
 43 | ```
 44 | 
 45 | 2.3. Make the lib
 46 | 
 47 | ```
 48 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo
 49 | ```
 50 | 
 51 | ### 3. For COCO dataset, download the [val2017](https://drive.google.com/file/d/1gbvfn7mcsGDRZ_luJwtITL-ru2kK99aK/view?usp=sharing), extract, and move to DeepStream-Yolo folder
 52 | 
 53 | * Select 1000 random images from COCO dataset to run calibration
 54 | 
 55 |   ```
 56 |   mkdir calibration
 57 |   ```
 58 | 
 59 |   ```
 60 |   for jpg in $(ls -1 val2017/*.jpg | sort -R | head -1000); do \
 61 |       cp ${jpg} calibration/; \
 62 |   done
 63 |   ```
 64 | 
 65 | * Create the `calibration.txt` file with all selected images
 66 | 
 67 |   ```
 68 |   realpath calibration/*jpg > calibration.txt
 69 |   ```
 70 | 
 71 | * Set environment variables
 72 | 
 73 |   ```
 74 |   export INT8_CALIB_IMG_PATH=calibration.txt
 75 |   export INT8_CALIB_BATCH_SIZE=1
 76 |   ```
 77 | 
 78 | * Edit the `config_infer` file
 79 | 
 80 |   ```
 81 |   ...
 82 |   model-engine-file=model_b1_gpu0_fp32.engine
 83 |   #int8-calib-file=calib.table
 84 |   ...
 85 |   network-mode=0
 86 |   ...
 87 |   ```
 88 | 
 89 |     To
 90 | 
 91 |   ```
 92 |   ...
 93 |   model-engine-file=model_b1_gpu0_int8.engine
 94 |   int8-calib-file=calib.table
 95 |   ...
 96 |   network-mode=1
 97 |   ...
 98 |   ```
 99 | 
100 | * Run
101 | 
102 |   ```
103 |   deepstream-app -c deepstream_app_config.txt
104 |   ```
105 | 
106 | **NOTE**: NVIDIA recommends at least 500 images to get a good accuracy. On this example, I recommend to use 1000 images to get better accuracy (more images = more accuracy). Higher `INT8_CALIB_BATCH_SIZE` values will result in more accuracy and faster calibration speed. Set it according to you GPU memory. This process may take a long time.
107 | 


--------------------------------------------------------------------------------
/docs/RTDETR_Paddle.md:
--------------------------------------------------------------------------------
  1 | # RT-DETR Paddle usage
  2 | 
  3 | **NOTE**: https://github.com/lyuwenyu/RT-DETR/tree/main/rtdetr_paddle version.
  4 | 
  5 | * [Convert model](#convert-model)
  6 | * [Compile the lib](#compile-the-lib)
  7 | * [Edit the config_infer_primary_rtdetr file](#edit-the-config_infer_primary_rtdetr-file)
  8 | * [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file)
  9 | * [Testing the model](#testing-the-model)
 10 | 
 11 | ##
 12 | 
 13 | ### Convert model
 14 | 
 15 | #### 1. Download the PaddleDetection repo and install the requirements
 16 | 
 17 | https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.8/docs/tutorials/INSTALL.md
 18 | 
 19 | ```
 20 | git clone https://github.com/lyuwenyu/RT-DETR.git
 21 | cd RT-DETR/rtdetr_paddle
 22 | pip3 install -r requirements.txt
 23 | pip3 install onnx onnxslim onnxruntime paddle2onnx
 24 | ```
 25 | 
 26 | **NOTE**: It is recommended to use Python virtualenv.
 27 | 
 28 | #### 2. Copy conversor
 29 | 
 30 | Copy the `export_rtdetr_paddle.py` file from `DeepStream-Yolo/utils` directory to the `RT-DETR/rtdetr_paddle` folder.
 31 | 
 32 | #### 3. Download the model
 33 | 
 34 | Download the `pdparams` file from [RT-DETR Paddle](https://github.com/lyuwenyu/RT-DETR/tree/main/rtdetr_paddle) releases (example for RT-DETR-R50)
 35 | 
 36 | ```
 37 | wget https://bj.bcebos.com/v1/paddledet/models/rtdetr_r50vd_6x_coco.pdparams
 38 | ```
 39 | 
 40 | **NOTE**: You can use your custom model.
 41 | 
 42 | #### 4. Convert model
 43 | 
 44 | Generate the ONNX model file (example for RT-DETR-R50)
 45 | 
 46 | ```
 47 | python3 export_rtdetr_paddle.py -w rtdetr_r50vd_6x_coco.pdparams -c configs/rtdetr/rtdetr_r50vd_6x_coco.yml --dynamic
 48 | ```
 49 | 
 50 | **NOTE**: To simplify the ONNX model (DeepStream >= 6.0)
 51 | 
 52 | ```
 53 | --simplify
 54 | ```
 55 | 
 56 | **NOTE**: To use dynamic batch-size (DeepStream >= 6.1)
 57 | 
 58 | ```
 59 | --dynamic
 60 | ```
 61 | 
 62 | **NOTE**: To use static batch-size (example for batch-size = 4)
 63 | 
 64 | ```
 65 | --batch 4
 66 | ```
 67 | 
 68 | **NOTE**: If you are using the DeepStream 5.1, remove the `--dynamic` arg and use opset 12 or lower. The default opset is 16.
 69 | 
 70 | ```
 71 | --opset 12
 72 | ```
 73 | 
 74 | #### 5. Copy generated files
 75 | 
 76 | Copy the generated ONNX model file and labels.txt file (if generated) to the `DeepStream-Yolo` folder.
 77 | 
 78 | ##
 79 | 
 80 | ### Compile the lib
 81 | 
 82 | 1. Open the `DeepStream-Yolo` folder and compile the lib
 83 | 
 84 | 2. Set the `CUDA_VER` according to your DeepStream version
 85 | 
 86 | ```
 87 | export CUDA_VER=XY.Z
 88 | ```
 89 | 
 90 | * x86 platform
 91 | 
 92 |   ```
 93 |   DeepStream 7.1 = 12.6
 94 |   DeepStream 7.0 / 6.4 = 12.2
 95 |   DeepStream 6.3 = 12.1
 96 |   DeepStream 6.2 = 11.8
 97 |   DeepStream 6.1.1 = 11.7
 98 |   DeepStream 6.1 = 11.6
 99 |   DeepStream 6.0.1 / 6.0 = 11.4
100 |   DeepStream 5.1 = 11.1
101 |   ```
102 | 
103 | * Jetson platform
104 | 
105 |   ```
106 |   DeepStream 7.1 = 12.6
107 |   DeepStream 7.0 / 6.4 = 12.2
108 |   DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 = 11.4
109 |   DeepStream 6.0.1 / 6.0 / 5.1 = 10.2
110 |   ```
111 | 
112 | 3. Make the lib
113 | 
114 | ```
115 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo
116 | ```
117 | 
118 | ##
119 | 
120 | ### Edit the config_infer_primary_rtdetr file
121 | 
122 | Edit the `config_infer_primary_rtdetr.txt` file according to your model (example for RT-DETR-R50 with 80 classes)
123 | 
124 | ```
125 | [property]
126 | ...
127 | onnx-file=rtdetr_r50vd_6x_coco.pdparams.onnx
128 | ...
129 | num-detected-classes=80
130 | ...
131 | parse-bbox-func-name=NvDsInferParseYolo
132 | ...
133 | ```
134 | 
135 | **NOTE**: The **RT-DETR** do not resize the input with padding. To get better accuracy, use
136 | 
137 | ```
138 | [property]
139 | ...
140 | maintain-aspect-ratio=0
141 | ...
142 | ```
143 | 
144 | **NOTE**: The **RT-DETR** do not require NMS. To get better accuracy, use
145 | 
146 | ```
147 | [property]
148 | ...
149 | cluster-mode=4
150 | ...
151 | ```
152 | 
153 | ##
154 | 
155 | ### Edit the deepstream_app_config file
156 | 
157 | ```
158 | ...
159 | [primary-gie]
160 | ...
161 | config-file=config_infer_primary_rtdetr.txt
162 | ```
163 | 
164 | ##
165 | 
166 | ### Testing the model
167 | 
168 | ```
169 | deepstream-app -c deepstream_app_config.txt
170 | ```
171 | 
172 | **NOTE**: The TensorRT engine file may take a very long time to generate (sometimes more than 10 minutes).
173 | 
174 | **NOTE**: For more information about custom models configuration (`batch-size`, `network-mode`, etc), please check the [`docs/customModels.md`](customModels.md) file.
175 | 


--------------------------------------------------------------------------------
/docs/RTDETR_PyTorch.md:
--------------------------------------------------------------------------------
  1 | # RT-DETR PyTorch usage
  2 | 
  3 | **NOTE**: https://github.com/lyuwenyu/RT-DETR/tree/main/rtdetr_pytorch version.
  4 | 
  5 | * [Convert model](#convert-model)
  6 | * [Compile the lib](#compile-the-lib)
  7 | * [Edit the config_infer_primary_rtdetr file](#edit-the-config_infer_primary_rtdetr-file)
  8 | * [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file)
  9 | * [Testing the model](#testing-the-model)
 10 | 
 11 | ##
 12 | 
 13 | ### Convert model
 14 | 
 15 | #### 1. Download the RT-DETR repo and install the requirements
 16 | 
 17 | ```
 18 | git clone https://github.com/lyuwenyu/RT-DETR.git
 19 | cd RT-DETR/rtdetr_pytorch
 20 | pip3 install -r requirements.txt
 21 | pip3 install onnx onnxslim onnxruntime
 22 | ```
 23 | 
 24 | **NOTE**: It is recommended to use Python virtualenv.
 25 | 
 26 | #### 2. Copy conversor
 27 | 
 28 | Copy the `export_rtdetr_pytorch.py` file from `DeepStream-Yolo/utils` directory to the `RT-DETR/rtdetr_pytorch` folder.
 29 | 
 30 | #### 3. Download the model
 31 | 
 32 | Download the `pth` file from [RT-DETR PyTorch](https://github.com/lyuwenyu/storage/releases/tag/v0.1) releases (example for RT-DETR-R50)
 33 | 
 34 | ```
 35 | wget https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r50vd_6x_coco_from_paddle.pth
 36 | ```
 37 | 
 38 | **NOTE**: You can use your custom model.
 39 | 
 40 | #### 4. Convert model
 41 | 
 42 | Generate the ONNX model file (example for RT-DETR-R50)
 43 | 
 44 | ```
 45 | python3 export_rtdetr_pytorch.py -w rtdetr_r50vd_6x_coco_from_paddle.pth -c configs/rtdetr/rtdetr_r50vd_6x_coco.yml --dynamic
 46 | ```
 47 | 
 48 | **NOTE**: To change the inference size (defaut: 640)
 49 | 
 50 | ```
 51 | -s SIZE
 52 | --size SIZE
 53 | -s HEIGHT WIDTH
 54 | --size HEIGHT WIDTH
 55 | ```
 56 | 
 57 | Example for 1280
 58 | 
 59 | ```
 60 | -s 1280
 61 | ```
 62 | 
 63 | or
 64 | 
 65 | ```
 66 | -s 1280 1280
 67 | ```
 68 | 
 69 | **NOTE**: To simplify the ONNX model (DeepStream >= 6.0)
 70 | 
 71 | ```
 72 | --simplify
 73 | ```
 74 | 
 75 | **NOTE**: To use dynamic batch-size (DeepStream >= 6.1)
 76 | 
 77 | ```
 78 | --dynamic
 79 | ```
 80 | 
 81 | **NOTE**: To use static batch-size (example for batch-size = 4)
 82 | 
 83 | ```
 84 | --batch 4
 85 | ```
 86 | 
 87 | **NOTE**: If you are using the DeepStream 5.1, remove the `--dynamic` arg and use opset 12 or lower. The default opset is 16.
 88 | 
 89 | ```
 90 | --opset 12
 91 | ```
 92 | 
 93 | #### 5. Copy generated files
 94 | 
 95 | Copy the generated ONNX model file and labels.txt file (if generated) to the `DeepStream-Yolo` folder.
 96 | 
 97 | ##
 98 | 
 99 | ### Compile the lib
100 | 
101 | 1. Open the `DeepStream-Yolo` folder and compile the lib
102 | 
103 | 2. Set the `CUDA_VER` according to your DeepStream version
104 | 
105 | ```
106 | export CUDA_VER=XY.Z
107 | ```
108 | 
109 | * x86 platform
110 | 
111 |   ```
112 |   DeepStream 7.1 = 12.6
113 |   DeepStream 7.0 / 6.4 = 12.2
114 |   DeepStream 6.3 = 12.1
115 |   DeepStream 6.2 = 11.8
116 |   DeepStream 6.1.1 = 11.7
117 |   DeepStream 6.1 = 11.6
118 |   DeepStream 6.0.1 / 6.0 = 11.4
119 |   DeepStream 5.1 = 11.1
120 |   ```
121 | 
122 | * Jetson platform
123 | 
124 |   ```
125 |   DeepStream 7.1 = 12.6
126 |   DeepStream 7.0 / 6.4 = 12.2
127 |   DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 = 11.4
128 |   DeepStream 6.0.1 / 6.0 / 5.1 = 10.2
129 |   ```
130 | 
131 | 3. Make the lib
132 | 
133 | ```
134 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo
135 | ```
136 | 
137 | ##
138 | 
139 | ### Edit the config_infer_primary_rtdetr file
140 | 
141 | Edit the `config_infer_primary_rtdetr.txt` file according to your model (example for RT-DETR-R50 with 80 classes)
142 | 
143 | ```
144 | [property]
145 | ...
146 | onnx-file=rtdetr_r50vd_6x_coco_from_paddle.pth.onnx
147 | ...
148 | num-detected-classes=80
149 | ...
150 | parse-bbox-func-name=NvDsInferParseYolo
151 | ...
152 | ```
153 | 
154 | **NOTE**: The **RT-DETR** do not resize the input with padding. To get better accuracy, use
155 | 
156 | ```
157 | [property]
158 | ...
159 | maintain-aspect-ratio=0
160 | ...
161 | ```
162 | 
163 | **NOTE**: The **RT-DETR** do not require NMS. To get better accuracy, use
164 | 
165 | ```
166 | [property]
167 | ...
168 | cluster-mode=4
169 | ...
170 | ```
171 | 
172 | ##
173 | 
174 | ### Edit the deepstream_app_config file
175 | 
176 | ```
177 | ...
178 | [primary-gie]
179 | ...
180 | config-file=config_infer_primary_rtdetr.txt
181 | ```
182 | 
183 | ##
184 | 
185 | ### Testing the model
186 | 
187 | ```
188 | deepstream-app -c deepstream_app_config.txt
189 | ```
190 | 
191 | **NOTE**: The TensorRT engine file may take a very long time to generate (sometimes more than 10 minutes).
192 | 
193 | **NOTE**: For more information about custom models configuration (`batch-size`, `network-mode`, etc), please check the [`docs/customModels.md`](customModels.md) file.
194 | 


--------------------------------------------------------------------------------
/docs/RTDETR_Ultralytics.md:
--------------------------------------------------------------------------------
  1 | # RT-DETR Ultralytics usage
  2 | 
  3 | **NOTE**: Ultralytics (https://docs.ultralytics.com/models/rtdetr) version.
  4 | 
  5 | * [Convert model](#convert-model)
  6 | * [Compile the lib](#compile-the-lib)
  7 | * [Edit the config_infer_primary_rtdetr file](#edit-the-config_infer_primary_rtdetr-file)
  8 | * [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file)
  9 | * [Testing the model](#testing-the-model)
 10 | 
 11 | ##
 12 | 
 13 | ### Convert model
 14 | 
 15 | #### 1. Download the Ultralytics repo and install the requirements
 16 | 
 17 | ```
 18 | git clone https://github.com/ultralytics/ultralytics.git
 19 | cd ultralytics
 20 | pip3 install -e .
 21 | pip3 install onnx onnxslim onnxruntime
 22 | ```
 23 | 
 24 | **NOTE**: It is recommended to use Python virtualenv.
 25 | 
 26 | #### 2. Copy conversor
 27 | 
 28 | Copy the `export_rtdetr_ultralytics.py` file from `DeepStream-Yolo/utils` directory to the `ultralytics` folder.
 29 | 
 30 | #### 3. Download the model
 31 | 
 32 | Download the `pt` file from [Ultralytics](https://github.com/ultralytics/assets/releases/) releases (example for RT-DETR-L)
 33 | 
 34 | ```
 35 | wget https://github.com/ultralytics/assets/releases/download/v8.2.0/rtdetr-l.pt
 36 | ```
 37 | 
 38 | **NOTE**: You can use your custom model.
 39 | 
 40 | #### 4. Convert model
 41 | 
 42 | Generate the ONNX model file (example for RT-DETR-L)
 43 | 
 44 | ```
 45 | python3 export_rtdetr_ultralytics.py -w rtdetr-l.pt --dynamic
 46 | ```
 47 | 
 48 | **NOTE**: To change the inference size (defaut: 640)
 49 | 
 50 | ```
 51 | -s SIZE
 52 | --size SIZE
 53 | -s HEIGHT WIDTH
 54 | --size HEIGHT WIDTH
 55 | ```
 56 | 
 57 | Example for 1280
 58 | 
 59 | ```
 60 | -s 1280
 61 | ```
 62 | 
 63 | or
 64 | 
 65 | ```
 66 | -s 1280 1280
 67 | ```
 68 | 
 69 | **NOTE**: To simplify the ONNX model (DeepStream >= 6.0)
 70 | 
 71 | ```
 72 | --simplify
 73 | ```
 74 | 
 75 | **NOTE**: To use dynamic batch-size (DeepStream >= 6.1)
 76 | 
 77 | ```
 78 | --dynamic
 79 | ```
 80 | 
 81 | **NOTE**: To use static batch-size (example for batch-size = 4)
 82 | 
 83 | ```
 84 | --batch 4
 85 | ```
 86 | 
 87 | **NOTE**: If you are using the DeepStream 5.1, remove the `--dynamic` arg and use opset 12 or lower. The default opset is 16.
 88 | 
 89 | ```
 90 | --opset 12
 91 | ```
 92 | 
 93 | #### 5. Copy generated files
 94 | 
 95 | Copy the generated ONNX model file and labels.txt file (if generated) to the `DeepStream-Yolo` folder.
 96 | 
 97 | ##
 98 | 
 99 | ### Compile the lib
100 | 
101 | 1. Open the `DeepStream-Yolo` folder and compile the lib
102 | 
103 | 2. Set the `CUDA_VER` according to your DeepStream version
104 | 
105 | ```
106 | export CUDA_VER=XY.Z
107 | ```
108 | 
109 | * x86 platform
110 | 
111 |   ```
112 |   DeepStream 7.1 = 12.6
113 |   DeepStream 7.0 / 6.4 = 12.2
114 |   DeepStream 6.3 = 12.1
115 |   DeepStream 6.2 = 11.8
116 |   DeepStream 6.1.1 = 11.7
117 |   DeepStream 6.1 = 11.6
118 |   DeepStream 6.0.1 / 6.0 = 11.4
119 |   DeepStream 5.1 = 11.1
120 |   ```
121 | 
122 | * Jetson platform
123 | 
124 |   ```
125 |   DeepStream 7.1 = 12.6
126 |   DeepStream 7.0 / 6.4 = 12.2
127 |   DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 = 11.4
128 |   DeepStream 6.0.1 / 6.0 / 5.1 = 10.2
129 |   ```
130 | 
131 | 3. Make the lib
132 | 
133 | ```
134 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo
135 | ```
136 | 
137 | ##
138 | 
139 | ### Edit the config_infer_primary_rtdetr file
140 | 
141 | Edit the `config_infer_primary_rtdetr.txt` file according to your model (example for RT-DETR-L with 80 classes)
142 | 
143 | ```
144 | [property]
145 | ...
146 | onnx-file=rtdetr-l.pt.onnx
147 | ...
148 | num-detected-classes=80
149 | ...
150 | parse-bbox-func-name=NvDsInferParseYolo
151 | ...
152 | ```
153 | 
154 | **NOTE**: The **RT-DETR Ultralytics** do not resize the input with padding. To get better accuracy, use
155 | 
156 | ```
157 | [property]
158 | ...
159 | maintain-aspect-ratio=0
160 | ...
161 | ```
162 | 
163 | **NOTE**: The **RT-DETR Ultralytics** do not require NMS. To get better accuracy, use
164 | 
165 | ```
166 | [property]
167 | ...
168 | cluster-mode=4
169 | ...
170 | ```
171 | 
172 | ##
173 | 
174 | ### Edit the deepstream_app_config file
175 | 
176 | ```
177 | ...
178 | [primary-gie]
179 | ...
180 | config-file=config_infer_primary_rtdetr.txt
181 | ```
182 | 
183 | ##
184 | 
185 | ### Testing the model
186 | 
187 | ```
188 | deepstream-app -c deepstream_app_config.txt
189 | ```
190 | 
191 | **NOTE**: The TensorRT engine file may take a very long time to generate (sometimes more than 10 minutes).
192 | 
193 | **NOTE**: For more information about custom models configuration (`batch-size`, `network-mode`, etc), please check the [`docs/customModels.md`](customModels.md) file.
194 | 


--------------------------------------------------------------------------------
/docs/YOLO11.md:
--------------------------------------------------------------------------------
  1 | # YOLO11 usage
  2 | 
  3 | **NOTE**: The yaml file is not required.
  4 | 
  5 | * [Convert model](#convert-model)
  6 | * [Compile the lib](#compile-the-lib)
  7 | * [Edit the config_infer_primary_yolo11 file](#edit-the-config_infer_primary_yolo11-file)
  8 | * [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file)
  9 | * [Testing the model](#testing-the-model)
 10 | 
 11 | ##
 12 | 
 13 | ### Convert model
 14 | 
 15 | #### 1. Download the YOLO11 repo and install the requirements
 16 | 
 17 | ```
 18 | git clone https://github.com/ultralytics/ultralytics.git
 19 | cd ultralytics
 20 | pip3 install -e .
 21 | pip3 install onnx onnxslim onnxruntime
 22 | ```
 23 | 
 24 | **NOTE**: It is recommended to use Python virtualenv.
 25 | 
 26 | #### 2. Copy conversor
 27 | 
 28 | Copy the `export_yolo11.py` file from `DeepStream-Yolo/utils` directory to the `ultralytics` folder.
 29 | 
 30 | #### 3. Download the model
 31 | 
 32 | Download the `pt` file from [YOLO11](https://github.com/ultralytics/assets/releases/) releases (example for YOLO11s)
 33 | 
 34 | ```
 35 | wget https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s.pt
 36 | ```
 37 | 
 38 | **NOTE**: You can use your custom model.
 39 | 
 40 | #### 4. Convert model
 41 | 
 42 | Generate the ONNX model file (example for YOLO11s)
 43 | 
 44 | ```
 45 | python3 export_yolo11.py -w yolo11s.pt --dynamic
 46 | ```
 47 | 
 48 | **NOTE**: To change the inference size (defaut: 640)
 49 | 
 50 | ```
 51 | -s SIZE
 52 | --size SIZE
 53 | -s HEIGHT WIDTH
 54 | --size HEIGHT WIDTH
 55 | ```
 56 | 
 57 | Example for 1280
 58 | 
 59 | ```
 60 | -s 1280
 61 | ```
 62 | 
 63 | or
 64 | 
 65 | ```
 66 | -s 1280 1280
 67 | ```
 68 | 
 69 | **NOTE**: To simplify the ONNX model (DeepStream >= 6.0)
 70 | 
 71 | ```
 72 | --simplify
 73 | ```
 74 | 
 75 | **NOTE**: To use dynamic batch-size (DeepStream >= 6.1)
 76 | 
 77 | ```
 78 | --dynamic
 79 | ```
 80 | 
 81 | **NOTE**: To use static batch-size (example for batch-size = 4)
 82 | 
 83 | ```
 84 | --batch 4
 85 | ```
 86 | 
 87 | **NOTE**: If you are using the DeepStream 5.1, remove the `--dynamic` arg and use opset 12 or lower. The default opset is 17.
 88 | 
 89 | ```
 90 | --opset 12
 91 | ```
 92 | 
 93 | #### 5. Copy generated files
 94 | 
 95 | Copy the generated ONNX model file and labels.txt file (if generated) to the `DeepStream-Yolo` folder.
 96 | 
 97 | ##
 98 | 
 99 | ### Compile the lib
100 | 
101 | 1. Open the `DeepStream-Yolo` folder and compile the lib
102 | 
103 | 2. Set the `CUDA_VER` according to your DeepStream version
104 | 
105 | ```
106 | export CUDA_VER=XY.Z
107 | ```
108 | 
109 | * x86 platform
110 | 
111 |   ```
112 |   DeepStream 7.1 = 12.6
113 |   DeepStream 7.0 / 6.4 = 12.2
114 |   DeepStream 6.3 = 12.1
115 |   DeepStream 6.2 = 11.8
116 |   DeepStream 6.1.1 = 11.7
117 |   DeepStream 6.1 = 11.6
118 |   DeepStream 6.0.1 / 6.0 = 11.4
119 |   DeepStream 5.1 = 11.1
120 |   ```
121 | 
122 | * Jetson platform
123 | 
124 |   ```
125 |   DeepStream 7.1 = 12.6
126 |   DeepStream 7.0 / 6.4 = 12.2
127 |   DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 = 11.4
128 |   DeepStream 6.0.1 / 6.0 / 5.1 = 10.2
129 |   ```
130 | 
131 | 3. Make the lib
132 | 
133 | ```
134 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo
135 | ```
136 | 
137 | ##
138 | 
139 | ### Edit the config_infer_primary_yolo11 file
140 | 
141 | Edit the `config_infer_primary_yolo11.txt` file according to your model (example for YOLO11s with 80 classes)
142 | 
143 | ```
144 | [property]
145 | ...
146 | onnx-file=yolo11s.pt.onnx
147 | ...
148 | num-detected-classes=80
149 | ...
150 | parse-bbox-func-name=NvDsInferParseYolo
151 | ...
152 | ```
153 | 
154 | **NOTE**: The **YOLO11** resizes the input with center padding. To get better accuracy, use
155 | 
156 | ```
157 | [property]
158 | ...
159 | maintain-aspect-ratio=1
160 | symmetric-padding=1
161 | ...
162 | ```
163 | 
164 | ##
165 | 
166 | ### Edit the deepstream_app_config file
167 | 
168 | ```
169 | ...
170 | [primary-gie]
171 | ...
172 | config-file=config_infer_primary_yolo11.txt
173 | ```
174 | 
175 | ##
176 | 
177 | ### Testing the model
178 | 
179 | ```
180 | deepstream-app -c deepstream_app_config.txt
181 | ```
182 | 
183 | **NOTE**: The TensorRT engine file may take a very long time to generate (sometimes more than 10 minutes).
184 | 
185 | **NOTE**: For more information about custom models configuration (`batch-size`, `network-mode`, etc), please check the [`docs/customModels.md`](customModels.md) file.
186 | 


--------------------------------------------------------------------------------
/docs/YOLOR.md:
--------------------------------------------------------------------------------
  1 | # YOLOR usage
  2 | 
  3 | **NOTE**: Select the correct branch of the YOLOR repo before the conversion.
  4 | 
  5 | **NOTE**: The cfg file is required for the main branch.
  6 | 
  7 | * [Convert model](#convert-model)
  8 | * [Compile the lib](#compile-the-lib)
  9 | * [Edit the config_infer_primary_yolor file](#edit-the-config_infer_primary_yolor-file)
 10 | * [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file)
 11 | * [Testing the model](#testing-the-model)
 12 | 
 13 | ##
 14 | 
 15 | ### Convert model
 16 | 
 17 | #### 1. Download the YOLOR repo and install the requirements
 18 | 
 19 | ```
 20 | git clone https://github.com/WongKinYiu/yolor.git
 21 | cd yolor
 22 | pip3 install -r requirements.txt
 23 | pip3 install onnx onnxslim onnxruntime
 24 | ```
 25 | 
 26 | **NOTE**: It is recommended to use Python virtualenv.
 27 | 
 28 | #### 2. Copy conversor
 29 | 
 30 | Copy the `export_yolor.py` file from `DeepStream-Yolo/utils` directory to the `yolor` folder.
 31 | 
 32 | #### 3. Download the model
 33 | 
 34 | Download the `pt` file from [YOLOR](https://github.com/WongKinYiu/yolor) repo.
 35 | 
 36 | **NOTE**: You can use your custom model.
 37 | 
 38 | #### 4. Convert model
 39 | 
 40 | Generate the ONNX model file
 41 | 
 42 | - Main branch
 43 | 
 44 |   Example for YOLOR-CSP
 45 | 
 46 |   ```
 47 |   python3 export_yolor.py -w yolor_csp.pt -c cfg/yolor_csp.cfg --dynamic
 48 |   ```
 49 | 
 50 | - Paper branch
 51 | 
 52 |   Example for YOLOR-P6
 53 | 
 54 |   ```
 55 |   python3 export_yolor.py -w yolor-p6.pt --dynamic
 56 |   ```
 57 | 
 58 | **NOTE**: To convert a P6 model
 59 | 
 60 | ```
 61 | --p6
 62 | ```
 63 | 
 64 | **NOTE**: To change the inference size (defaut: 640 / 1280 for `--p6` models)
 65 | 
 66 | ```
 67 | -s SIZE
 68 | --size SIZE
 69 | -s HEIGHT WIDTH
 70 | --size HEIGHT WIDTH
 71 | ```
 72 | 
 73 | Example for 1280
 74 | 
 75 | ```
 76 | -s 1280
 77 | ```
 78 | 
 79 | or
 80 | 
 81 | ```
 82 | -s 1280 1280
 83 | ```
 84 | 
 85 | **NOTE**: To simplify the ONNX model (DeepStream >= 6.0)
 86 | 
 87 | ```
 88 | --simplify
 89 | ```
 90 | 
 91 | **NOTE**: To use dynamic batch-size (DeepStream >= 6.1)
 92 | 
 93 | ```
 94 | --dynamic
 95 | ```
 96 | 
 97 | **NOTE**: To use static batch-size (example for batch-size = 4)
 98 | 
 99 | ```
100 | --batch 4
101 | ```
102 | 
103 | **NOTE**: If you are using the DeepStream 5.1, remove the `--dynamic` arg and use opset 12 or lower. The default opset is 12.
104 | 
105 | ```
106 | --opset 12
107 | ```
108 | 
109 | #### 5. Copy generated files
110 | 
111 | Copy the generated ONNX model file and labels.txt file (if generated) to the `DeepStream-Yolo` folder
112 | 
113 | ##
114 | 
115 | ### Compile the lib
116 | 
117 | 1. Open the `DeepStream-Yolo` folder and compile the lib
118 | 
119 | 2. Set the `CUDA_VER` according to your DeepStream version
120 | 
121 | ```
122 | export CUDA_VER=XY.Z
123 | ```
124 | 
125 | * x86 platform
126 | 
127 |   ```
128 |   DeepStream 7.1 = 12.6
129 |   DeepStream 7.0 / 6.4 = 12.2
130 |   DeepStream 6.3 = 12.1
131 |   DeepStream 6.2 = 11.8
132 |   DeepStream 6.1.1 = 11.7
133 |   DeepStream 6.1 = 11.6
134 |   DeepStream 6.0.1 / 6.0 = 11.4
135 |   DeepStream 5.1 = 11.1
136 |   ```
137 | 
138 | * Jetson platform
139 | 
140 |   ```
141 |   DeepStream 7.1 = 12.6
142 |   DeepStream 7.0 / 6.4 = 12.2
143 |   DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 = 11.4
144 |   DeepStream 6.0.1 / 6.0 / 5.1 = 10.2
145 |   ```
146 | 
147 | 3. Make the lib
148 | 
149 | ```
150 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo
151 | ```
152 | 
153 | ##
154 | 
155 | ### Edit the config_infer_primary_yolor file
156 | 
157 | Edit the `config_infer_primary_yolor.txt` file according to your model (example for YOLOR-CSP with 80 classes)
158 | 
159 | ```
160 | [property]
161 | ...
162 | onnx-file=yolor_csp.pt.onnx
163 | ...
164 | num-detected-classes=80
165 | ...
166 | parse-bbox-func-name=NvDsInferParseYolo
167 | ...
168 | ```
169 | 
170 | **NOTE**: The **YOLOR** resizes the input with center padding. To get better accuracy, use
171 | 
172 | ```
173 | [property]
174 | ...
175 | maintain-aspect-ratio=1
176 | symmetric-padding=1
177 | ...
178 | ```
179 | 
180 | ##
181 | 
182 | ### Edit the deepstream_app_config file
183 | 
184 | ```
185 | ...
186 | [primary-gie]
187 | ...
188 | config-file=config_infer_primary_yolor.txt
189 | ```
190 | 
191 | ##
192 | 
193 | ### Testing the model
194 | 
195 | ```
196 | deepstream-app -c deepstream_app_config.txt
197 | ```
198 | 
199 | **NOTE**: The TensorRT engine file may take a very long time to generate (sometimes more than 10 minutes).
200 | 
201 | **NOTE**: For more information about custom models configuration (`batch-size`, `network-mode`, etc), please check the [`docs/customModels.md`](customModels.md) file.
202 | 


--------------------------------------------------------------------------------
/docs/YOLOv10.md:
--------------------------------------------------------------------------------
  1 | # YOLOv10 usage
  2 | 
  3 | **NOTE**: The yaml file is not required.
  4 | 
  5 | * [Convert model](#convert-model)
  6 | * [Compile the lib](#compile-the-lib)
  7 | * [Edit the config_infer_primary_yoloV10 file](#edit-the-config_infer_primary_yolov10-file)
  8 | * [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file)
  9 | * [Testing the model](#testing-the-model)
 10 | 
 11 | ##
 12 | 
 13 | ### Convert model
 14 | 
 15 | #### 1. Download the YOLOv10 repo and install the requirements
 16 | 
 17 | ```
 18 | git clone https://github.com/ultralytics/ultralytics.git
 19 | cd ultralytics
 20 | pip3 install -e .
 21 | pip3 install onnx onnxslim onnxruntime
 22 | ```
 23 | 
 24 | **NOTE**: It is recommended to use Python virtualenv.
 25 | 
 26 | #### 2. Copy conversor
 27 | 
 28 | Copy the `export_yoloV10.py` file from `DeepStream-Yolo/utils` directory to the `ultralytics` folder.
 29 | 
 30 | #### 3. Download the model
 31 | 
 32 | Download the `pt` file from [YOLOv10](https://github.com/THU-MIG/yolov10/releases/tag/v1.1) releases (example for YOLOv10s)
 33 | 
 34 | ```
 35 | wget https://github.com/THU-MIG/yolov10/releases/download/v1.1/yolov10s.pt
 36 | ```
 37 | 
 38 | **NOTE**: You can use your custom model.
 39 | 
 40 | #### 4. Convert model
 41 | 
 42 | Generate the ONNX model file (example for YOLOv10s)
 43 | 
 44 | ```
 45 | python3 export_yoloV10.py -w yolov10s.pt --dynamic
 46 | ```
 47 | 
 48 | **NOTE**: To change the inference size (defaut: 640)
 49 | 
 50 | ```
 51 | -s SIZE
 52 | --size SIZE
 53 | -s HEIGHT WIDTH
 54 | --size HEIGHT WIDTH
 55 | ```
 56 | 
 57 | Example for 1280
 58 | 
 59 | ```
 60 | -s 1280
 61 | ```
 62 | 
 63 | or
 64 | 
 65 | ```
 66 | -s 1280 1280
 67 | ```
 68 | 
 69 | **NOTE**: To simplify the ONNX model (DeepStream >= 6.0)
 70 | 
 71 | ```
 72 | --simplify
 73 | ```
 74 | 
 75 | **NOTE**: To use dynamic batch-size (DeepStream >= 6.1)
 76 | 
 77 | ```
 78 | --dynamic
 79 | ```
 80 | 
 81 | **NOTE**: To use static batch-size (example for batch-size = 4)
 82 | 
 83 | ```
 84 | --batch 4
 85 | ```
 86 | 
 87 | **NOTE**: If you are using the DeepStream 5.1, remove the `--dynamic` arg and use opset 12 or lower. The default opset is 17.
 88 | 
 89 | ```
 90 | --opset 12
 91 | ```
 92 | 
 93 | #### 5. Copy generated files
 94 | 
 95 | Copy the generated ONNX model file and labels.txt file (if generated) to the `DeepStream-Yolo` folder.
 96 | 
 97 | ##
 98 | 
 99 | ### Compile the lib
100 | 
101 | 1. Open the `DeepStream-Yolo` folder and compile the lib
102 | 
103 | 2. Set the `CUDA_VER` according to your DeepStream version
104 | 
105 | ```
106 | export CUDA_VER=XY.Z
107 | ```
108 | 
109 | * x86 platform
110 | 
111 |   ```
112 |   DeepStream 7.1 = 12.6
113 |   DeepStream 7.0 / 6.4 = 12.2
114 |   DeepStream 6.3 = 12.1
115 |   DeepStream 6.2 = 11.8
116 |   DeepStream 6.1.1 = 11.7
117 |   DeepStream 6.1 = 11.6
118 |   DeepStream 6.0.1 / 6.0 = 11.4
119 |   DeepStream 5.1 = 11.1
120 |   ```
121 | 
122 | * Jetson platform
123 | 
124 |   ```
125 |   DeepStream 7.1 = 12.6
126 |   DeepStream 7.0 / 6.4 = 12.2
127 |   DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 = 11.4
128 |   DeepStream 6.0.1 / 6.0 / 5.1 = 10.2
129 |   ```
130 | 
131 | 3. Make the lib
132 | 
133 | ```
134 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo
135 | ```
136 | 
137 | ##
138 | 
139 | ### Edit the config_infer_primary_yoloV10 file
140 | 
141 | Edit the `config_infer_primary_yoloV10.txt` file according to your model (example for YOLOv10s with 80 classes)
142 | 
143 | ```
144 | [property]
145 | ...
146 | onnx-file=yolov10s.pt.onnx
147 | ...
148 | num-detected-classes=80
149 | ...
150 | parse-bbox-func-name=NvDsInferParseYolo
151 | ...
152 | ```
153 | 
154 | **NOTE**: The **YOLOv10** resizes the input with center padding. To get better accuracy, use
155 | 
156 | ```
157 | [property]
158 | ...
159 | maintain-aspect-ratio=1
160 | symmetric-padding=1
161 | ...
162 | ```
163 | 
164 | **NOTE**: The **YOLOv10** do not require NMS. To get better accuracy, use
165 | 
166 | ```
167 | [property]
168 | ...
169 | cluster-mode=4
170 | ...
171 | ```
172 | 
173 | ##
174 | 
175 | ### Edit the deepstream_app_config file
176 | 
177 | ```
178 | ...
179 | [primary-gie]
180 | ...
181 | config-file=config_infer_primary_yoloV10.txt
182 | ```
183 | 
184 | ##
185 | 
186 | ### Testing the model
187 | 
188 | ```
189 | deepstream-app -c deepstream_app_config.txt
190 | ```
191 | 
192 | **NOTE**: The TensorRT engine file may take a very long time to generate (sometimes more than 10 minutes).
193 | 
194 | **NOTE**: For more information about custom models configuration (`batch-size`, `network-mode`, etc), please check the [`docs/customModels.md`](customModels.md) file.
195 | 


--------------------------------------------------------------------------------
/docs/YOLOv5.md:
--------------------------------------------------------------------------------
  1 | # YOLOv5 usage
  2 | 
  3 | **NOTE**: You can use the master branch of the YOLOv5 repo to convert all model versions.
  4 | 
  5 | **NOTE**: The yaml file is not required.
  6 | 
  7 | * [Convert model](#convert-model)
  8 | * [Compile the lib](#compile-the-lib)
  9 | * [Edit the config_infer_primary_yoloV5 file](#edit-the-config_infer_primary_yolov5-file)
 10 | * [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file)
 11 | * [Testing the model](#testing-the-model)
 12 | 
 13 | ##
 14 | 
 15 | ### Convert model
 16 | 
 17 | #### 1. Download the YOLOv5 repo and install the requirements
 18 | 
 19 | ```
 20 | git clone https://github.com/ultralytics/yolov5.git
 21 | cd yolov5
 22 | pip3 install -r requirements.txt
 23 | pip3 install onnx onnxslim onnxruntime
 24 | ```
 25 | 
 26 | **NOTE**: It is recommended to use Python virtualenv.
 27 | 
 28 | #### 2. Copy conversor
 29 | 
 30 | Copy the `export_yoloV5.py` file from `DeepStream-Yolo/utils` directory to the `yolov5` folder.
 31 | 
 32 | #### 3. Download the model
 33 | 
 34 | Download the `pt` file from [YOLOv5](https://github.com/ultralytics/yolov5/releases/) releases (example for YOLOv5s 7.0)
 35 | 
 36 | ```
 37 | wget https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s.pt
 38 | ```
 39 | 
 40 | **NOTE**: You can use your custom model.
 41 | 
 42 | #### 4. Convert model
 43 | 
 44 | Generate the ONNX model file (example for YOLOv5s)
 45 | 
 46 | ```
 47 | python3 export_yoloV5.py -w yolov5s.pt --dynamic
 48 | ```
 49 | 
 50 | **NOTE**: To convert a P6 model
 51 | 
 52 | ```
 53 | --p6
 54 | ```
 55 | 
 56 | **NOTE**: To change the inference size (defaut: 640 / 1280 for `--p6` models)
 57 | 
 58 | ```
 59 | -s SIZE
 60 | --size SIZE
 61 | -s HEIGHT WIDTH
 62 | --size HEIGHT WIDTH
 63 | ```
 64 | 
 65 | Example for 1280
 66 | 
 67 | ```
 68 | -s 1280
 69 | ```
 70 | 
 71 | or
 72 | 
 73 | ```
 74 | -s 1280 1280
 75 | ```
 76 | 
 77 | **NOTE**: To simplify the ONNX model (DeepStream >= 6.0)
 78 | 
 79 | ```
 80 | --simplify
 81 | ```
 82 | 
 83 | **NOTE**: To use dynamic batch-size (DeepStream >= 6.1)
 84 | 
 85 | ```
 86 | --dynamic
 87 | ```
 88 | 
 89 | **NOTE**: To use static batch-size (example for batch-size = 4)
 90 | 
 91 | ```
 92 | --batch 4
 93 | ```
 94 | 
 95 | **NOTE**: If you are using the DeepStream 5.1, remove the `--dynamic` arg and use opset 12 or lower. The default opset is 17.
 96 | 
 97 | ```
 98 | --opset 12
 99 | ```
100 | 
101 | #### 5. Copy generated files
102 | 
103 | Copy the generated ONNX model file and labels.txt file (if generated) to the `DeepStream-Yolo` folder.
104 | 
105 | ##
106 | 
107 | ### Compile the lib
108 | 
109 | 1. Open the `DeepStream-Yolo` folder and compile the lib
110 | 
111 | 2. Set the `CUDA_VER` according to your DeepStream version
112 | 
113 | ```
114 | export CUDA_VER=XY.Z
115 | ```
116 | 
117 | * x86 platform
118 | 
119 |   ```
120 |   DeepStream 7.1 = 12.6
121 |   DeepStream 7.0 / 6.4 = 12.2
122 |   DeepStream 6.3 = 12.1
123 |   DeepStream 6.2 = 11.8
124 |   DeepStream 6.1.1 = 11.7
125 |   DeepStream 6.1 = 11.6
126 |   DeepStream 6.0.1 / 6.0 = 11.4
127 |   DeepStream 5.1 = 11.1
128 |   ```
129 | 
130 | * Jetson platform
131 | 
132 |   ```
133 |   DeepStream 7.1 = 12.6
134 |   DeepStream 7.0 / 6.4 = 12.2
135 |   DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 = 11.4
136 |   DeepStream 6.0.1 / 6.0 / 5.1 = 10.2
137 |   ```
138 | 
139 | 3. Make the lib
140 | 
141 | ```
142 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo
143 | ```
144 | 
145 | ##
146 | 
147 | ### Edit the config_infer_primary_yoloV5 file
148 | 
149 | Edit the `config_infer_primary_yoloV5.txt` file according to your model (example for YOLOv5s with 80 classes)
150 | 
151 | ```
152 | [property]
153 | ...
154 | onnx-file=yolov5s.pt.onnx
155 | ...
156 | num-detected-classes=80
157 | ...
158 | parse-bbox-func-name=NvDsInferParseYolo
159 | ...
160 | ```
161 | 
162 | **NOTE**: The **YOLOv5** resizes the input with center padding. To get better accuracy, use
163 | 
164 | ```
165 | [property]
166 | ...
167 | maintain-aspect-ratio=1
168 | symmetric-padding=1
169 | ...
170 | ```
171 | 
172 | ##
173 | 
174 | ### Edit the deepstream_app_config file
175 | 
176 | ```
177 | ...
178 | [primary-gie]
179 | ...
180 | config-file=config_infer_primary_yoloV5.txt
181 | ```
182 | 
183 | ##
184 | 
185 | ### Testing the model
186 | 
187 | ```
188 | deepstream-app -c deepstream_app_config.txt
189 | ```
190 | 
191 | **NOTE**: The TensorRT engine file may take a very long time to generate (sometimes more than 10 minutes).
192 | 
193 | **NOTE**: For more information about custom models configuration (`batch-size`, `network-mode`, etc), please check the [`docs/customModels.md`](customModels.md) file.
194 | 


--------------------------------------------------------------------------------
/docs/YOLOv5u.md:
--------------------------------------------------------------------------------
  1 | # YOLOv5u usage
  2 | 
  3 | **NOTE**: The yaml file is not required.
  4 | 
  5 | * [Convert model](#convert-model)
  6 | * [Compile the lib](#compile-the-lib)
  7 | * [Edit the config_infer_primary_yoloV5u file](#edit-the-config_infer_primary_yolov5u-file)
  8 | * [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file)
  9 | * [Testing the model](#testing-the-model)
 10 | 
 11 | ##
 12 | 
 13 | ### Convert model
 14 | 
 15 | #### 1. Download the YOLOv5u repo and install the requirements
 16 | 
 17 | ```
 18 | git clone https://github.com/ultralytics/ultralytics.git
 19 | cd ultralytics
 20 | pip3 install -e .
 21 | pip3 install onnx onnxslim onnxruntime
 22 | ```
 23 | 
 24 | **NOTE**: It is recommended to use Python virtualenv.
 25 | 
 26 | #### 2. Copy conversor
 27 | 
 28 | Copy the `export_yoloV5u.py` file from `DeepStream-Yolo/utils` directory to the `ultralytics` folder.
 29 | 
 30 | #### 3. Download the model
 31 | 
 32 | Download the `pt` file from [YOLOv5u](https://github.com/ultralytics/assets/releases/) releases (example for YOLOv5su)
 33 | 
 34 | ```
 35 | wget https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov5su.pt
 36 | ```
 37 | 
 38 | **NOTE**: You can use your custom model.
 39 | 
 40 | #### 4. Convert model
 41 | 
 42 | Generate the ONNX model file (example for YOLOv5su)
 43 | 
 44 | ```
 45 | python3 export_yoloV5u.py -w yolov5su.pt --dynamic
 46 | ```
 47 | 
 48 | **NOTE**: To change the inference size (defaut: 640)
 49 | 
 50 | ```
 51 | -s SIZE
 52 | --size SIZE
 53 | -s HEIGHT WIDTH
 54 | --size HEIGHT WIDTH
 55 | ```
 56 | 
 57 | Example for 1280
 58 | 
 59 | ```
 60 | -s 1280
 61 | ```
 62 | 
 63 | or
 64 | 
 65 | ```
 66 | -s 1280 1280
 67 | ```
 68 | 
 69 | **NOTE**: To simplify the ONNX model (DeepStream >= 6.0)
 70 | 
 71 | ```
 72 | --simplify
 73 | ```
 74 | 
 75 | **NOTE**: To use dynamic batch-size (DeepStream >= 6.1)
 76 | 
 77 | ```
 78 | --dynamic
 79 | ```
 80 | 
 81 | **NOTE**: To use static batch-size (example for batch-size = 4)
 82 | 
 83 | ```
 84 | --batch 4
 85 | ```
 86 | 
 87 | **NOTE**: If you are using the DeepStream 5.1, remove the `--dynamic` arg and use opset 12 or lower. The default opset is 17.
 88 | 
 89 | ```
 90 | --opset 12
 91 | ```
 92 | 
 93 | #### 5. Copy generated files
 94 | 
 95 | Copy the generated ONNX model file and labels.txt file (if generated) to the `DeepStream-Yolo` folder.
 96 | 
 97 | ##
 98 | 
 99 | ### Compile the lib
100 | 
101 | 1. Open the `DeepStream-Yolo` folder and compile the lib
102 | 
103 | 2. Set the `CUDA_VER` according to your DeepStream version
104 | 
105 | ```
106 | export CUDA_VER=XY.Z
107 | ```
108 | 
109 | * x86 platform
110 | 
111 |   ```
112 |   DeepStream 7.1 = 12.6
113 |   DeepStream 7.0 / 6.4 = 12.2
114 |   DeepStream 6.3 = 12.1
115 |   DeepStream 6.2 = 11.8
116 |   DeepStream 6.1.1 = 11.7
117 |   DeepStream 6.1 = 11.6
118 |   DeepStream 6.0.1 / 6.0 = 11.4
119 |   DeepStream 5.1 = 11.1
120 |   ```
121 | 
122 | * Jetson platform
123 | 
124 |   ```
125 |   DeepStream 7.1 = 12.6
126 |   DeepStream 7.0 / 6.4 = 12.2
127 |   DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 = 11.4
128 |   DeepStream 6.0.1 / 6.0 / 5.1 = 10.2
129 |   ```
130 | 
131 | 3. Make the lib
132 | 
133 | ```
134 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo
135 | ```
136 | 
137 | ##
138 | 
139 | ### Edit the config_infer_primary_yoloV5u file
140 | 
141 | Edit the `config_infer_primary_yoloV5u.txt` file according to your model (example for YOLOv5su with 80 classes)
142 | 
143 | ```
144 | [property]
145 | ...
146 | onnx-file=yolov5su.pt.onnx
147 | ...
148 | num-detected-classes=80
149 | ...
150 | parse-bbox-func-name=NvDsInferParseYolo
151 | ...
152 | ```
153 | 
154 | **NOTE**: The **YOLOv5u** resizes the input with center padding. To get better accuracy, use
155 | 
156 | ```
157 | [property]
158 | ...
159 | maintain-aspect-ratio=1
160 | symmetric-padding=1
161 | ...
162 | ```
163 | 
164 | ##
165 | 
166 | ### Edit the deepstream_app_config file
167 | 
168 | ```
169 | ...
170 | [primary-gie]
171 | ...
172 | config-file=config_infer_primary_yoloV5u.txt
173 | ```
174 | 
175 | ##
176 | 
177 | ### Testing the model
178 | 
179 | ```
180 | deepstream-app -c deepstream_app_config.txt
181 | ```
182 | 
183 | **NOTE**: The TensorRT engine file may take a very long time to generate (sometimes more than 10 minutes).
184 | 
185 | **NOTE**: For more information about custom models configuration (`batch-size`, `network-mode`, etc), please check the [`docs/customModels.md`](customModels.md) file.
186 | 


--------------------------------------------------------------------------------
/docs/YOLOv6.md:
--------------------------------------------------------------------------------
  1 | # YOLOv6 usage
  2 | 
  3 | **NOTE**: You need to change the branch of the YOLOv6 repo according to the version of the model you want to convert.
  4 | 
  5 | **NOTE**: The yaml file is not required.
  6 | 
  7 | * [Convert model](#convert-model)
  8 | * [Compile the lib](#compile-the-lib)
  9 | * [Edit the config_infer_primary_yoloV6 file](#edit-the-config_infer_primary_yolov6-file)
 10 | * [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file)
 11 | * [Testing the model](#testing-the-model)
 12 | 
 13 | ##
 14 | 
 15 | ### Convert model
 16 | 
 17 | #### 1. Download the YOLOv6 repo and install the requirements
 18 | 
 19 | ```
 20 | git clone https://github.com/meituan/YOLOv6.git
 21 | cd YOLOv6
 22 | pip3 install -r requirements.txt
 23 | pip3 install onnx onnxslim onnxruntime
 24 | ```
 25 | 
 26 | **NOTE**: It is recommended to use Python virtualenv.
 27 | 
 28 | #### 2. Copy conversor
 29 | 
 30 | Copy the `export_yoloV6.py` file from `DeepStream-Yolo/utils` directory to the `YOLOv6` folder.
 31 | 
 32 | #### 3. Download the model
 33 | 
 34 | Download the `pt` file from [YOLOv6](https://github.com/meituan/YOLOv6/releases/) releases (example for YOLOv6-S 4.0)
 35 | 
 36 | ```
 37 | wget https://github.com/meituan/YOLOv6/releases/download/0.4.0/yolov6s.pt
 38 | ```
 39 | 
 40 | **NOTE**: You can use your custom model.
 41 | 
 42 | #### 4. Convert model
 43 | 
 44 | Generate the ONNX model file (example for YOLOv6-S 4.0)
 45 | 
 46 | ```
 47 | python3 export_yoloV6.py -w yolov6s.pt --dynamic
 48 | ```
 49 | 
 50 | **NOTE**: To convert a P6 model
 51 | 
 52 | ```
 53 | --p6
 54 | ```
 55 | 
 56 | **NOTE**: To change the inference size (defaut: 640 / 1280 for `--p6` models)
 57 | 
 58 | ```
 59 | -s SIZE
 60 | --size SIZE
 61 | -s HEIGHT WIDTH
 62 | --size HEIGHT WIDTH
 63 | ```
 64 | 
 65 | Example for 1280
 66 | 
 67 | ```
 68 | -s 1280
 69 | ```
 70 | 
 71 | or
 72 | 
 73 | ```
 74 | -s 1280 1280
 75 | ```
 76 | 
 77 | **NOTE**: To simplify the ONNX model (DeepStream >= 6.0)
 78 | 
 79 | ```
 80 | --simplify
 81 | ```
 82 | 
 83 | **NOTE**: To use dynamic batch-size (DeepStream >= 6.1)
 84 | 
 85 | ```
 86 | --dynamic
 87 | ```
 88 | 
 89 | **NOTE**: To use static batch-size (example for batch-size = 4)
 90 | 
 91 | ```
 92 | --batch 4
 93 | ```
 94 | 
 95 | **NOTE**: If you are using the DeepStream 5.1, remove the `--dynamic` arg and use opset 12 or lower. The default opset is 13.
 96 | 
 97 | ```
 98 | --opset 12
 99 | ```
100 | 
101 | #### 5. Copy generated file
102 | 
103 | Copy the generated ONNX model file to the `DeepStream-Yolo` folder.
104 | 
105 | ##
106 | 
107 | ### Compile the lib
108 | 
109 | 1. Open the `DeepStream-Yolo` folder and compile the lib
110 | 
111 | 2. Set the `CUDA_VER` according to your DeepStream version
112 | 
113 | ```
114 | export CUDA_VER=XY.Z
115 | ```
116 | 
117 | * x86 platform
118 | 
119 |   ```
120 |   DeepStream 7.1 = 12.6
121 |   DeepStream 7.0 / 6.4 = 12.2
122 |   DeepStream 6.3 = 12.1
123 |   DeepStream 6.2 = 11.8
124 |   DeepStream 6.1.1 = 11.7
125 |   DeepStream 6.1 = 11.6
126 |   DeepStream 6.0.1 / 6.0 = 11.4
127 |   DeepStream 5.1 = 11.1
128 |   ```
129 | 
130 | * Jetson platform
131 | 
132 |   ```
133 |   DeepStream 7.1 = 12.6
134 |   DeepStream 7.0 / 6.4 = 12.2
135 |   DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 = 11.4
136 |   DeepStream 6.0.1 / 6.0 / 5.1 = 10.2
137 |   ```
138 | 
139 | 3. Make the lib
140 | 
141 | ```
142 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo
143 | ```
144 | 
145 | ##
146 | 
147 | ### Edit the config_infer_primary_yoloV6 file
148 | 
149 | Edit the `config_infer_primary_yoloV6.txt` file according to your model (example for YOLOv6-S 4.0 with 80 classes)
150 | 
151 | ```
152 | [property]
153 | ...
154 | onnx-file=yolov6s.pt.onnx
155 | ...
156 | num-detected-classes=80
157 | ...
158 | parse-bbox-func-name=NvDsInferParseYolo
159 | ...
160 | ```
161 | 
162 | **NOTE**: The **YOLOv6** resizes the input with center padding. To get better accuracy, use
163 | 
164 | ```
165 | [property]
166 | ...
167 | maintain-aspect-ratio=1
168 | symmetric-padding=1
169 | ...
170 | ```
171 | 
172 | ##
173 | 
174 | ### Edit the deepstream_app_config file
175 | 
176 | ```
177 | ...
178 | [primary-gie]
179 | ...
180 | config-file=config_infer_primary_yoloV6.txt
181 | ```
182 | 
183 | ##
184 | 
185 | ### Testing the model
186 | 
187 | ```
188 | deepstream-app -c deepstream_app_config.txt
189 | ```
190 | 
191 | **NOTE**: The TensorRT engine file may take a very long time to generate (sometimes more than 10 minutes).
192 | 
193 | **NOTE**: For more information about custom models configuration (`batch-size`, `network-mode`, etc), please check the [`docs/customModels.md`](customModels.md) file.
194 | 


--------------------------------------------------------------------------------
/docs/YOLOv7.md:
--------------------------------------------------------------------------------
  1 | # YOLOv7 usage
  2 | 
  3 | **NOTE**: The yaml file is not required.
  4 | 
  5 | * [Convert model](#convert-model)
  6 | * [Compile the lib](#compile-the-lib)
  7 | * [Edit the config_infer_primary_yoloV7 file](#edit-the-config_infer_primary_yolov7-file)
  8 | * [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file)
  9 | * [Testing the model](#testing-the-model)
 10 | 
 11 | ##
 12 | 
 13 | ### Convert model
 14 | 
 15 | #### 1. Download the YOLOv7 repo and install the requirements
 16 | 
 17 | ```
 18 | git clone https://github.com/WongKinYiu/yolov7.git
 19 | cd yolov7
 20 | pip3 install -r requirements.txt
 21 | pip3 install onnx onnxslim onnxruntime
 22 | ```
 23 | 
 24 | **NOTE**: It is recommended to use Python virtualenv.
 25 | 
 26 | #### 2. Copy conversor
 27 | 
 28 | Copy the `export_yoloV7.py` file from `DeepStream-Yolo/utils` directory to the `yolov7` folder.
 29 | 
 30 | #### 3. Download the model
 31 | 
 32 | Download the `pt` file from [YOLOv7](https://github.com/WongKinYiu/yolov7/releases/) releases (example for YOLOv7)
 33 | 
 34 | ```
 35 | wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7.pt
 36 | ```
 37 | 
 38 | **NOTE**: You can use your custom model.
 39 | 
 40 | #### 4. Reparameterize your model (for custom models)
 41 | 
 42 | Custom YOLOv7 models cannot be directly converted to engine file. Therefore, you will have to reparameterize your model using the code [here](https://github.com/WongKinYiu/yolov7/blob/main/tools/reparameterization.ipynb). Make sure to convert your custom checkpoints in YOLOv7 repository, and then save your reparmeterized checkpoints for conversion in the next step.
 43 | 
 44 | #### 5. Convert model
 45 | 
 46 | Generate the ONNX model file (example for YOLOv7)
 47 | 
 48 | ```
 49 | python3 export_yoloV7.py -w yolov7.pt --dynamic
 50 | ```
 51 | 
 52 | **NOTE**: To convert a P6 model
 53 | 
 54 | ```
 55 | --p6
 56 | ```
 57 | 
 58 | **NOTE**: To change the inference size (defaut: 640 / 1280 for `--p6` models)
 59 | 
 60 | ```
 61 | -s SIZE
 62 | --size SIZE
 63 | -s HEIGHT WIDTH
 64 | --size HEIGHT WIDTH
 65 | ```
 66 | 
 67 | Example for 1280
 68 | 
 69 | ```
 70 | -s 1280
 71 | ```
 72 | 
 73 | or
 74 | 
 75 | ```
 76 | -s 1280 1280
 77 | ```
 78 | 
 79 | **NOTE**: To simplify the ONNX model (DeepStream >= 6.0)
 80 | 
 81 | ```
 82 | --simplify
 83 | ```
 84 | 
 85 | **NOTE**: To use dynamic batch-size (DeepStream >= 6.1)
 86 | 
 87 | ```
 88 | --dynamic
 89 | ```
 90 | 
 91 | **NOTE**: To use static batch-size (example for batch-size = 4)
 92 | 
 93 | ```
 94 | --batch 4
 95 | ```
 96 | 
 97 | **NOTE**: If you are using the DeepStream 5.1, remove the `--dynamic` arg and use opset 12 or lower. The default opset is 12.
 98 | 
 99 | ```
100 | --opset 12
101 | ```
102 | 
103 | #### 6. Copy generated files
104 | 
105 | Copy the generated ONNX model file and labels.txt file (if generated) to the `DeepStream-Yolo` folder.
106 | 
107 | ##
108 | 
109 | ### Compile the lib
110 | 
111 | 1. Open the `DeepStream-Yolo` folder and compile the lib
112 | 
113 | 2. Set the `CUDA_VER` according to your DeepStream version
114 | 
115 | ```
116 | export CUDA_VER=XY.Z
117 | ```
118 | 
119 | * x86 platform
120 | 
121 |   ```
122 |   DeepStream 7.1 = 12.6
123 |   DeepStream 7.0 / 6.4 = 12.2
124 |   DeepStream 6.3 = 12.1
125 |   DeepStream 6.2 = 11.8
126 |   DeepStream 6.1.1 = 11.7
127 |   DeepStream 6.1 = 11.6
128 |   DeepStream 6.0.1 / 6.0 = 11.4
129 |   DeepStream 5.1 = 11.1
130 |   ```
131 | 
132 | * Jetson platform
133 | 
134 |   ```
135 |   DeepStream 7.1 = 12.6
136 |   DeepStream 7.0 / 6.4 = 12.2
137 |   DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 = 11.4
138 |   DeepStream 6.0.1 / 6.0 / 5.1 = 10.2
139 |   ```
140 | 
141 | 3. Make the lib
142 | 
143 | ```
144 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo
145 | ```
146 | 
147 | ##
148 | 
149 | ### Edit the config_infer_primary_yoloV7 file
150 | 
151 | Edit the `config_infer_primary_yoloV7.txt` file according to your model (example for YOLOv7 with 80 classes)
152 | 
153 | ```
154 | [property]
155 | ...
156 | onnx-file=yolov7.pt.onnx
157 | ...
158 | num-detected-classes=80
159 | ...
160 | parse-bbox-func-name=NvDsInferParseYolo
161 | ...
162 | ```
163 | 
164 | **NOTE**: The **YOLOv7** resizes the input with center padding. To get better accuracy, use
165 | 
166 | ```
167 | [property]
168 | ...
169 | maintain-aspect-ratio=1
170 | symmetric-padding=1
171 | ...
172 | ```
173 | 
174 | ##
175 | 
176 | ### Edit the deepstream_app_config file
177 | 
178 | ```
179 | ...
180 | [primary-gie]
181 | ...
182 | config-file=config_infer_primary_yoloV7.txt
183 | ```
184 | 
185 | ##
186 | 
187 | ### Testing the model
188 | 
189 | ```
190 | deepstream-app -c deepstream_app_config.txt
191 | ```
192 | 
193 | **NOTE**: The TensorRT engine file may take a very long time to generate (sometimes more than 10 minutes).
194 | 
195 | **NOTE**: For more information about custom models configuration (`batch-size`, `network-mode`, etc), please check the [`docs/customModels.md`](customModels.md) file.
196 | 


--------------------------------------------------------------------------------
/docs/YOLOv8.md:
--------------------------------------------------------------------------------
  1 | # YOLOv8 usage
  2 | 
  3 | **NOTE**: The yaml file is not required.
  4 | 
  5 | * [Convert model](#convert-model)
  6 | * [Compile the lib](#compile-the-lib)
  7 | * [Edit the config_infer_primary_yoloV8 file](#edit-the-config_infer_primary_yolov8-file)
  8 | * [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file)
  9 | * [Testing the model](#testing-the-model)
 10 | 
 11 | ##
 12 | 
 13 | ### Convert model
 14 | 
 15 | #### 1. Download the YOLOv8 repo and install the requirements
 16 | 
 17 | ```
 18 | git clone https://github.com/ultralytics/ultralytics.git
 19 | cd ultralytics
 20 | pip3 install -e .
 21 | pip3 install onnx onnxslim onnxruntime
 22 | ```
 23 | 
 24 | **NOTE**: It is recommended to use Python virtualenv.
 25 | 
 26 | #### 2. Copy conversor
 27 | 
 28 | Copy the `export_yoloV8.py` file from `DeepStream-Yolo/utils` directory to the `ultralytics` folder.
 29 | 
 30 | #### 3. Download the model
 31 | 
 32 | Download the `pt` file from [YOLOv8](https://github.com/ultralytics/assets/releases/) releases (example for YOLOv8s)
 33 | 
 34 | ```
 35 | wget https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8s.pt
 36 | ```
 37 | 
 38 | **NOTE**: You can use your custom model.
 39 | 
 40 | #### 4. Convert model
 41 | 
 42 | Generate the ONNX model file (example for YOLOv8s)
 43 | 
 44 | ```
 45 | python3 export_yoloV8.py -w yolov8s.pt --dynamic
 46 | ```
 47 | 
 48 | **NOTE**: To change the inference size (defaut: 640)
 49 | 
 50 | ```
 51 | -s SIZE
 52 | --size SIZE
 53 | -s HEIGHT WIDTH
 54 | --size HEIGHT WIDTH
 55 | ```
 56 | 
 57 | Example for 1280
 58 | 
 59 | ```
 60 | -s 1280
 61 | ```
 62 | 
 63 | or
 64 | 
 65 | ```
 66 | -s 1280 1280
 67 | ```
 68 | 
 69 | **NOTE**: To simplify the ONNX model (DeepStream >= 6.0)
 70 | 
 71 | ```
 72 | --simplify
 73 | ```
 74 | 
 75 | **NOTE**: To use dynamic batch-size (DeepStream >= 6.1)
 76 | 
 77 | ```
 78 | --dynamic
 79 | ```
 80 | 
 81 | **NOTE**: To use static batch-size (example for batch-size = 4)
 82 | 
 83 | ```
 84 | --batch 4
 85 | ```
 86 | 
 87 | **NOTE**: If you are using the DeepStream 5.1, remove the `--dynamic` arg and use opset 12 or lower. The default opset is 17.
 88 | 
 89 | ```
 90 | --opset 12
 91 | ```
 92 | 
 93 | #### 5. Copy generated files
 94 | 
 95 | Copy the generated ONNX model file and labels.txt file (if generated) to the `DeepStream-Yolo` folder.
 96 | 
 97 | ##
 98 | 
 99 | ### Compile the lib
100 | 
101 | 1. Open the `DeepStream-Yolo` folder and compile the lib
102 | 
103 | 2. Set the `CUDA_VER` according to your DeepStream version
104 | 
105 | ```
106 | export CUDA_VER=XY.Z
107 | ```
108 | 
109 | * x86 platform
110 | 
111 |   ```
112 |   DeepStream 7.1 = 12.6
113 |   DeepStream 7.0 / 6.4 = 12.2
114 |   DeepStream 6.3 = 12.1
115 |   DeepStream 6.2 = 11.8
116 |   DeepStream 6.1.1 = 11.7
117 |   DeepStream 6.1 = 11.6
118 |   DeepStream 6.0.1 / 6.0 = 11.4
119 |   DeepStream 5.1 = 11.1
120 |   ```
121 | 
122 | * Jetson platform
123 | 
124 |   ```
125 |   DeepStream 7.1 = 12.6
126 |   DeepStream 7.0 / 6.4 = 12.2
127 |   DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 = 11.4
128 |   DeepStream 6.0.1 / 6.0 / 5.1 = 10.2
129 |   ```
130 | 
131 | 3. Make the lib
132 | 
133 | ```
134 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo
135 | ```
136 | 
137 | ##
138 | 
139 | ### Edit the config_infer_primary_yoloV8 file
140 | 
141 | Edit the `config_infer_primary_yoloV8.txt` file according to your model (example for YOLOv8s with 80 classes)
142 | 
143 | ```
144 | [property]
145 | ...
146 | onnx-file=yolov8s.pt.onnx
147 | ...
148 | num-detected-classes=80
149 | ...
150 | parse-bbox-func-name=NvDsInferParseYolo
151 | ...
152 | ```
153 | 
154 | **NOTE**: The **YOLOv8** resizes the input with center padding. To get better accuracy, use
155 | 
156 | ```
157 | [property]
158 | ...
159 | maintain-aspect-ratio=1
160 | symmetric-padding=1
161 | ...
162 | ```
163 | 
164 | ##
165 | 
166 | ### Edit the deepstream_app_config file
167 | 
168 | ```
169 | ...
170 | [primary-gie]
171 | ...
172 | config-file=config_infer_primary_yoloV8.txt
173 | ```
174 | 
175 | ##
176 | 
177 | ### Testing the model
178 | 
179 | ```
180 | deepstream-app -c deepstream_app_config.txt
181 | ```
182 | 
183 | **NOTE**: The TensorRT engine file may take a very long time to generate (sometimes more than 10 minutes).
184 | 
185 | **NOTE**: For more information about custom models configuration (`batch-size`, `network-mode`, etc), please check the [`docs/customModels.md`](customModels.md) file.
186 | 


--------------------------------------------------------------------------------
/docs/YOLOv9.md:
--------------------------------------------------------------------------------
  1 | # YOLOv9 usage
  2 | 
  3 | **NOTE**: The yaml file is not required.
  4 | 
  5 | * [Convert model](#convert-model)
  6 | * [Compile the lib](#compile-the-lib)
  7 | * [Edit the config_infer_primary_yoloV9 file](#edit-the-config_infer_primary_yolov9-file)
  8 | * [Edit the deepstream_app_config file](#edit-the-deepstream_app_config-file)
  9 | * [Testing the model](#testing-the-model)
 10 | 
 11 | ##
 12 | 
 13 | ### Convert model
 14 | 
 15 | #### 1. Download the YOLOv9 repo and install the requirements
 16 | 
 17 | ```
 18 | git clone https://github.com/WongKinYiu/yolov9.git
 19 | cd yolov9
 20 | pip3 install -r requirements.txt
 21 | pip3 install onnx onnxslim onnxruntime
 22 | ```
 23 | 
 24 | **NOTE**: It is recommended to use Python virtualenv.
 25 | 
 26 | #### 2. Copy conversor
 27 | 
 28 | Copy the `export_yoloV9.py` file from `DeepStream-Yolo/utils` directory to the `yolov9` folder.
 29 | 
 30 | #### 3. Download the model
 31 | 
 32 | Download the `pt` file from [YOLOv9](https://github.com/WongKinYiu/yolov9/releases/) releases (example for YOLOv9-S)
 33 | 
 34 | ```
 35 | wget https://github.com/WongKinYiu/yolov9/releases/download/v0.1/yolov9-s-converted.pt
 36 | ```
 37 | 
 38 | **NOTE**: You can use your custom model.
 39 | 
 40 | #### 4. Convert model
 41 | 
 42 | Generate the ONNX model file (example for YOLOv9-S)
 43 | 
 44 | ```
 45 | python3 export_yoloV9.py -w yolov9-s-converted.pt --dynamic
 46 | ```
 47 | 
 48 | **NOTE**: To change the inference size (defaut: 640)
 49 | 
 50 | ```
 51 | -s SIZE
 52 | --size SIZE
 53 | -s HEIGHT WIDTH
 54 | --size HEIGHT WIDTH
 55 | ```
 56 | 
 57 | Example for 1280
 58 | 
 59 | ```
 60 | -s 1280
 61 | ```
 62 | 
 63 | or
 64 | 
 65 | ```
 66 | -s 1280 1280
 67 | ```
 68 | 
 69 | **NOTE**: To simplify the ONNX model (DeepStream >= 6.0)
 70 | 
 71 | ```
 72 | --simplify
 73 | ```
 74 | 
 75 | **NOTE**: To use dynamic batch-size (DeepStream >= 6.1)
 76 | 
 77 | ```
 78 | --dynamic
 79 | ```
 80 | 
 81 | **NOTE**: To use static batch-size (example for batch-size = 4)
 82 | 
 83 | ```
 84 | --batch 4
 85 | ```
 86 | 
 87 | **NOTE**: If you are using the DeepStream 5.1, remove the `--dynamic` arg and use opset 12 or lower. The default opset is 17.
 88 | 
 89 | ```
 90 | --opset 12
 91 | ```
 92 | 
 93 | #### 5. Copy generated files
 94 | 
 95 | Copy the generated ONNX model file and labels.txt file (if generated) to the `DeepStream-Yolo` folder.
 96 | 
 97 | ##
 98 | 
 99 | ### Compile the lib
100 | 
101 | 1. Open the `DeepStream-Yolo` folder and compile the lib
102 | 
103 | 2. Set the `CUDA_VER` according to your DeepStream version
104 | 
105 | ```
106 | export CUDA_VER=XY.Z
107 | ```
108 | 
109 | * x86 platform
110 | 
111 |   ```
112 |   DeepStream 7.1 = 12.6
113 |   DeepStream 7.0 / 6.4 = 12.2
114 |   DeepStream 6.3 = 12.1
115 |   DeepStream 6.2 = 11.8
116 |   DeepStream 6.1.1 = 11.7
117 |   DeepStream 6.1 = 11.6
118 |   DeepStream 6.0.1 / 6.0 = 11.4
119 |   DeepStream 5.1 = 11.1
120 |   ```
121 | 
122 | * Jetson platform
123 | 
124 |   ```
125 |   DeepStream 7.1 = 12.6
126 |   DeepStream 7.0 / 6.4 = 12.2
127 |   DeepStream 6.3 / 6.2 / 6.1.1 / 6.1 = 11.4
128 |   DeepStream 6.0.1 / 6.0 / 5.1 = 10.2
129 |   ```
130 | 
131 | 3. Make the lib
132 | 
133 | ```
134 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo
135 | ```
136 | 
137 | ##
138 | 
139 | ### Edit the config_infer_primary_yoloV9 file
140 | 
141 | Edit the `config_infer_primary_yoloV9.txt` file according to your model (example for YOLOv9-S with 80 classes)
142 | 
143 | ```
144 | [property]
145 | ...
146 | onnx-file=yolov9-s-converted.pt.onnx
147 | ...
148 | num-detected-classes=80
149 | ...
150 | parse-bbox-func-name=NvDsInferParseYolo
151 | ...
152 | ```
153 | 
154 | **NOTE**: The **YOLOv9** resizes the input with center padding. To get better accuracy, use
155 | 
156 | ```
157 | [property]
158 | ...
159 | maintain-aspect-ratio=1
160 | symmetric-padding=1
161 | ...
162 | ```
163 | 
164 | ##
165 | 
166 | ### Edit the deepstream_app_config file
167 | 
168 | ```
169 | ...
170 | [primary-gie]
171 | ...
172 | config-file=config_infer_primary_yoloV9.txt
173 | ```
174 | 
175 | ##
176 | 
177 | ### Testing the model
178 | 
179 | ```
180 | deepstream-app -c deepstream_app_config.txt
181 | ```
182 | 
183 | **NOTE**: The TensorRT engine file may take a very long time to generate (sometimes more than 10 minutes).
184 | 
185 | **NOTE**: For more information about custom models configuration (`batch-size`, `network-mode`, etc), please check the [`docs/customModels.md`](customModels.md) file.
186 | 


--------------------------------------------------------------------------------
/docs/multipleGIEs_tree.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marcoslucianops/DeepStream-Yolo/01c1e504d9c15267fb58f8e88e243eeb31aa99d2/docs/multipleGIEs_tree.png


--------------------------------------------------------------------------------
/labels.txt:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorbike
 5 | aeroplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/Makefile:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | # Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Permission is hereby granted, free of charge, to any person obtaining a
 5 | # copy of this software and associated documentation files (the "Software"),
 6 | # to deal in the Software without restriction, including without limitation
 7 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
 8 | # and/or sell copies of the Software, and to permit persons to whom the
 9 | # Software is furnished to do so, subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be included in
12 | # all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 | # DEALINGS IN THE SOFTWARE.
21 | #
22 | # Edited by Marcos Luciano
23 | # https://www.github.com/marcoslucianops
24 | ################################################################################
25 | 
26 | CUDA_VER?=
27 | ifeq ($(CUDA_VER),)
28 | 	$(error "CUDA_VER is not set")
29 | endif
30 | 
31 | OPENCV?=
32 | ifeq ($(OPENCV),)
33 | 	OPENCV=0
34 | endif
35 | 
36 | GRAPH?=
37 | ifeq ($(GRAPH),)
38 | 	GRAPH=0
39 | endif
40 | 
41 | CC:= g++
42 | NVCC:=/usr/local/cuda-$(CUDA_VER)/bin/nvcc
43 | 
44 | CFLAGS:= -Wall -std=c++11 -shared -fPIC -Wno-error=deprecated-declarations
45 | CFLAGS+= -I/opt/nvidia/deepstream/deepstream/sources/includes -I/usr/local/cuda-$(CUDA_VER)/include
46 | 
47 | ifeq ($(OPENCV), 1)
48 | 	COMMON+= -DOPENCV
49 | 	CFLAGS+= $(shell pkg-config --cflags opencv4 2> /dev/null || pkg-config --cflags opencv)
50 | 	LIBS+= $(shell pkg-config --libs opencv4 2> /dev/null || pkg-config --libs opencv)
51 | endif
52 | 
53 | ifeq ($(GRAPH), 1)
54 | 	COMMON+= -DGRAPH
55 | endif
56 | 
57 | CUFLAGS:= -I/opt/nvidia/deepstream/deepstream/sources/includes -I/usr/local/cuda-$(CUDA_VER)/include
58 | 
59 | ifeq ($(shell ldconfig -p | grep -q libnvparsers && echo 1 || echo 0), 1)
60 | 	LIBS+= -lnvparsers
61 | endif
62 | 
63 | LIBS+= -lnvinfer_plugin -lnvinfer -lnvonnxparser -L/usr/local/cuda-$(CUDA_VER)/lib64 -lcudart -lcublas -lstdc++fs
64 | LFLAGS:= -shared -Wl,--start-group $(LIBS) -Wl,--end-group
65 | 
66 | INCS:= $(wildcard layers/*.h)
67 | INCS+= $(wildcard *.h)
68 | 
69 | SRCFILES:= $(filter-out calibrator.cpp, $(wildcard *.cpp))
70 | 
71 | ifeq ($(OPENCV), 1)
72 | 	SRCFILES+= calibrator.cpp
73 | endif
74 | 
75 | SRCFILES+= $(wildcard layers/*.cpp)
76 | SRCFILES+= $(wildcard *.cu)
77 | 
78 | TARGET_LIB:= libnvdsinfer_custom_impl_Yolo.so
79 | 
80 | TARGET_OBJS:= $(SRCFILES:.cpp=.o)
81 | TARGET_OBJS:= $(TARGET_OBJS:.cu=.o)
82 | 
83 | all: $(TARGET_LIB)
84 | 
85 | %.o: %.cpp $(INCS) Makefile
86 | 	$(CC) -c $(COMMON) -o $@ $(CFLAGS) $<
87 | 
88 | %.o: %.cu $(INCS) Makefile
89 | 	$(NVCC) -c -o $@ --compiler-options '-fPIC' $(CUFLAGS) $<
90 | 
91 | $(TARGET_LIB) : $(TARGET_OBJS)
92 | 	$(CC) -o $@  $(TARGET_OBJS) $(LFLAGS)
93 | 
94 | clean:
95 | 	rm -rf $(TARGET_LIB)
96 | 	rm -rf $(TARGET_OBJS)
97 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/calibrator.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Created by Marcos Luciano
  3 |  * https://www.github.com/marcoslucianops
  4 |  */
  5 | 
  6 | #include "calibrator.h"
  7 | 
  8 | #include <fstream>
  9 | #include <iterator>
 10 | 
 11 | Int8EntropyCalibrator2::Int8EntropyCalibrator2(const int& batchSize, const int& channels, const int& height,
 12 |     const int& width, const float& scaleFactor, const float* offsets, const int& inputFormat,
 13 |     const std::string& imgPath, const std::string& calibTablePath) : batchSize(batchSize), inputC(channels),
 14 |     inputH(height), inputW(width), scaleFactor(scaleFactor), offsets(offsets), inputFormat(inputFormat),
 15 |     calibTablePath(calibTablePath), imageIndex(0)
 16 | {
 17 |   inputCount = batchSize * channels * height * width;
 18 |   std::fstream f(imgPath);
 19 |   if (f.is_open()) {
 20 |       std::string temp;
 21 |       while (std::getline(f, temp)) {
 22 |         imgPaths.push_back(temp);
 23 |       }
 24 |   }
 25 |   batchData = new float[inputCount];
 26 |   CUDA_CHECK(cudaMalloc(&deviceInput, inputCount * sizeof(float)));
 27 | }
 28 | 
 29 | Int8EntropyCalibrator2::~Int8EntropyCalibrator2()
 30 | {
 31 |   CUDA_CHECK(cudaFree(deviceInput));
 32 |   if (batchData) {
 33 |     delete[] batchData;
 34 |   }
 35 | }
 36 | 
 37 | int
 38 | Int8EntropyCalibrator2::getBatchSize() const noexcept
 39 | {
 40 |   return batchSize;
 41 | }
 42 | 
 43 | bool
 44 | Int8EntropyCalibrator2::getBatch(void** bindings, const char** names, int nbBindings) noexcept
 45 | {
 46 |   if (imageIndex + batchSize > uint(imgPaths.size())) {
 47 |     return false;
 48 |   }
 49 | 
 50 |   float* ptr = batchData;
 51 |   for (size_t i = imageIndex; i < imageIndex + batchSize; ++i) {
 52 |     cv::Mat img = cv::imread(imgPaths[i]);
 53 |     if (img.empty()){
 54 |       std::cerr << "Failed to read image for calibration" << std::endl;
 55 |       return false;
 56 |     }
 57 |   
 58 |     std::vector<float> inputData = prepareImage(img, inputC, inputH, inputW, scaleFactor, offsets, inputFormat);
 59 | 
 60 |     size_t len = inputData.size();
 61 |     memcpy(ptr, inputData.data(), len * sizeof(float));
 62 |     ptr += inputData.size();
 63 | 
 64 |     std::cout << "Load image: " << imgPaths[i] << std::endl;
 65 |     std::cout << "Progress: " << (i + 1) * 100. / imgPaths.size() << "%" << std::endl;
 66 |   }
 67 | 
 68 |   imageIndex += batchSize;
 69 | 
 70 |   CUDA_CHECK(cudaMemcpy(deviceInput, batchData, inputCount * sizeof(float), cudaMemcpyHostToDevice));
 71 |   bindings[0] = deviceInput;
 72 | 
 73 |   return true;
 74 | }
 75 | 
 76 | const void*
 77 | Int8EntropyCalibrator2::readCalibrationCache(std::size_t &length) noexcept
 78 | {
 79 |   calibrationCache.clear();
 80 |   std::ifstream input(calibTablePath, std::ios::binary);
 81 |   input >> std::noskipws;
 82 |   if (readCache && input.good()) {
 83 |     std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(), std::back_inserter(calibrationCache));
 84 |   }
 85 |   length = calibrationCache.size();
 86 |   return length ? calibrationCache.data() : nullptr;
 87 | }
 88 | 
 89 | void
 90 | Int8EntropyCalibrator2::writeCalibrationCache(const void* cache, std::size_t length) noexcept
 91 | {
 92 |   std::ofstream output(calibTablePath, std::ios::binary);
 93 |   output.write(reinterpret_cast<const char*>(cache), length);
 94 | }
 95 | 
 96 | std::vector<float>
 97 | prepareImage(cv::Mat& img, int inputC, int inputH, int inputW, float scaleFactor, const float* offsets, int inputFormat)
 98 | {
 99 |   cv::Mat out;
100 | 
101 |   if (inputFormat == 0) {
102 |     cv::cvtColor(img, out, cv::COLOR_BGR2RGB);
103 |   }
104 |   else if (inputFormat == 2) {
105 |     cv::cvtColor(img, out, cv::COLOR_BGR2GRAY);
106 |   }
107 |   else {
108 |     out = img;
109 |   }
110 | 
111 |   int imageW = img.cols;
112 |   int imageH = img.rows;
113 | 
114 |   if (imageW != inputW || imageH != inputH) {
115 |     float resizeFactor = std::max(inputW / (float) imageW, inputH / (float) imageH);
116 |     cv::resize(out, out, cv::Size(0, 0), resizeFactor, resizeFactor, cv::INTER_CUBIC);
117 |     cv::Rect crop(cv::Point(0.5 * (out.cols - inputW), 0.5 * (out.rows - inputH)), cv::Size(inputW, inputH));
118 |     out = out(crop);
119 |   }
120 | 
121 |   out.convertTo(out, CV_32F, scaleFactor);
122 | 
123 |   if (inputFormat == 2) {
124 |     cv::subtract(out, cv::Scalar(offsets[0] / 255), out);
125 |   }
126 |   else {
127 |     cv::subtract(out, cv::Scalar(offsets[0] / 255, offsets[1] / 255, offsets[3] / 255), out);
128 |   }
129 | 
130 |   std::vector<cv::Mat> inputChannels(inputC);
131 |   cv::split(out, inputChannels);
132 |   std::vector<float> result(inputH * inputW * inputC);
133 |   auto data = result.data();
134 |   int channelLength = inputH * inputW;
135 |   for (int i = 0; i < inputC; ++i) {
136 |     memcpy(data, inputChannels[i].data, channelLength * sizeof(float));
137 |     data += channelLength;
138 |   }
139 | 
140 |   return result;
141 | }
142 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/calibrator.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #ifndef CALIBRATOR_H
 7 | #define CALIBRATOR_H
 8 | 
 9 | #include <vector>
10 | #include <cuda_runtime_api.h>
11 | 
12 | #include "NvInfer.h"
13 | #include "opencv2/opencv.hpp"
14 | 
15 | #define CUDA_CHECK(status) {                                                                                           \
16 |   if (status != 0) {                                                                                                   \
17 |     std::cout << "CUDA failure: " << cudaGetErrorString(status) << " in file " << __FILE__  << " at line "  <<         \
18 |         __LINE__ << std::endl;                                                                                         \
19 |     abort();                                                                                                           \
20 |   }                                                                                                                    \
21 | }
22 | 
23 | class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {
24 |   public:
25 |     Int8EntropyCalibrator2(const int& batchSize, const int& channels, const int& height, const int& width,
26 |         const float& scaleFactor, const float* offsets, const int& inputFormat, const std::string& imgPath,
27 |         const std::string& calibTablePath);
28 | 
29 |     virtual ~Int8EntropyCalibrator2();
30 | 
31 |     int getBatchSize() const noexcept override;
32 | 
33 |     bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept override;
34 | 
35 |     const void* readCalibrationCache(std::size_t& length) noexcept override;
36 | 
37 |     void writeCalibrationCache(const void* cache, size_t length) noexcept override;
38 | 
39 |   private:
40 |     int batchSize;
41 |     int inputC;
42 |     int inputH;
43 |     int inputW;
44 |     int letterBox;
45 |     float scaleFactor;
46 |     const float* offsets;
47 |     int inputFormat;
48 |     std::string calibTablePath;
49 |     size_t imageIndex;
50 |     size_t inputCount;
51 |     std::vector<std::string> imgPaths;
52 |     float* batchData {nullptr};
53 |     void* deviceInput {nullptr};
54 |     bool readCache;
55 |     std::vector<char> calibrationCache;
56 | };
57 | 
58 | std::vector<float> prepareImage(cv::Mat& img, int inputC, int inputH, int inputW, float scaleFactor,
59 |     const float* offsets, int inputFormat);
60 | 
61 | #endif //CALIBRATOR_H
62 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/activation_layer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #ifndef __ACTIVATION_LAYER_H__
 7 | #define __ACTIVATION_LAYER_H__
 8 | 
 9 | #include <string>
10 | 
11 | #include "NvInfer.h"
12 | 
13 | nvinfer1::ITensor* activationLayer(int layerIdx, std::string activation, nvinfer1::ITensor* input,
14 |     nvinfer1::INetworkDefinition* network, std::string layerName = "");
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/batchnorm_layer.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #include "batchnorm_layer.h"
 7 | 
 8 | #include <cassert>
 9 | #include <math.h>
10 | 
11 | nvinfer1::ITensor*
12 | batchnormLayer(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights,
13 |     std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, nvinfer1::ITensor* input,
14 |     nvinfer1::INetworkDefinition* network)
15 | {
16 |   nvinfer1::ITensor* output;
17 | 
18 |   assert(block.at("type") == "batchnorm");
19 |   assert(block.find("filters") != block.end());
20 | 
21 |   int filters = std::stoi(block.at("filters"));
22 |   std::string activation = block.at("activation");
23 | 
24 |   float eps = 1.0e-5;
25 |   if (block.find("eps") != block.end()) {
26 |     eps = std::stof(block.at("eps"));
27 |   }
28 | 
29 |   std::vector<float> bnBiases;
30 |   std::vector<float> bnWeights;
31 |   std::vector<float> bnRunningMean;
32 |   std::vector<float> bnRunningVar;
33 | 
34 |   for (int i = 0; i < filters; ++i) {
35 |     bnBiases.push_back(weights[weightPtr]);
36 |     ++weightPtr;
37 |   }
38 |   for (int i = 0; i < filters; ++i) {
39 |     bnWeights.push_back(weights[weightPtr]);
40 |     ++weightPtr;
41 |   }
42 |   for (int i = 0; i < filters; ++i) {
43 |     bnRunningMean.push_back(weights[weightPtr]);
44 |     ++weightPtr;
45 |   }
46 |   for (int i = 0; i < filters; ++i) {
47 |     bnRunningVar.push_back(sqrt(weights[weightPtr] + eps));
48 |     ++weightPtr;
49 |   }
50 | 
51 |   int size = filters;
52 |   nvinfer1::Weights shift {nvinfer1::DataType::kFLOAT, nullptr, size};
53 |   nvinfer1::Weights scale {nvinfer1::DataType::kFLOAT, nullptr, size};
54 |   nvinfer1::Weights power {nvinfer1::DataType::kFLOAT, nullptr, size};
55 | 
56 |   float* shiftWt = new float[size];
57 |   for (int i = 0; i < size; ++i) {
58 |     shiftWt[i] = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i));
59 |   }
60 |   shift.values = shiftWt;
61 | 
62 |   float* scaleWt = new float[size];
63 |   for (int i = 0; i < size; ++i) {
64 |     scaleWt[i] = bnWeights.at(i) / bnRunningVar[i];
65 |   }
66 |   scale.values = scaleWt;
67 | 
68 |   float* powerWt = new float[size];
69 |   for (int i = 0; i < size; ++i) {
70 |     powerWt[i] = 1.0;
71 |   }
72 |   power.values = powerWt;
73 | 
74 |   trtWeights.push_back(shift);
75 |   trtWeights.push_back(scale);
76 |   trtWeights.push_back(power);
77 | 
78 |   nvinfer1::IScaleLayer* batchnorm = network->addScale(*input, nvinfer1::ScaleMode::kCHANNEL, shift, scale, power);
79 |   assert(batchnorm != nullptr);
80 |   std::string batchnormLayerName = "batchnorm_" + std::to_string(layerIdx);
81 |   batchnorm->setName(batchnormLayerName.c_str());
82 |   output = batchnorm->getOutput(0);
83 | 
84 |   output = activationLayer(layerIdx, activation, output, network);
85 |   assert(output != nullptr);
86 | 
87 |   return output;
88 | }
89 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/batchnorm_layer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #ifndef __BATCHNORM_LAYER_H__
 7 | #define __BATCHNORM_LAYER_H__
 8 | 
 9 | #include <map>
10 | #include <vector>
11 | 
12 | #include "NvInfer.h"
13 | 
14 | #include "activation_layer.h"
15 | 
16 | nvinfer1::ITensor* batchnormLayer(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights,
17 |     std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, nvinfer1::ITensor* input,
18 |     nvinfer1::INetworkDefinition* network);
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/channels_layer.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #include "channels_layer.h"
 7 | 
 8 | #include <cassert>
 9 | 
10 | nvinfer1::ITensor*
11 | channelsLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
12 |     nvinfer1::ITensor* implicitTensor, nvinfer1::INetworkDefinition* network)
13 | {
14 |   nvinfer1::ITensor* output;
15 | 
16 |   assert(block.at("type") == "shift_channels" || block.at("type") == "control_channels");
17 | 
18 |   if (block.at("type") == "shift_channels") {
19 |     nvinfer1::IElementWiseLayer* shift = network->addElementWise(*input, *implicitTensor,
20 |         nvinfer1::ElementWiseOperation::kSUM);
21 |     assert(shift != nullptr);
22 |     std::string shiftLayerName = "shift_channels_" + std::to_string(layerIdx);
23 |     shift->setName(shiftLayerName.c_str());
24 |     output = shift->getOutput(0);
25 |   }
26 |   else if (block.at("type") == "control_channels") {
27 |     nvinfer1::IElementWiseLayer* control = network->addElementWise(*input, *implicitTensor,
28 |         nvinfer1::ElementWiseOperation::kPROD);
29 |     assert(control != nullptr);
30 |     std::string controlLayerName = "control_channels_" + std::to_string(layerIdx);
31 |     control->setName(controlLayerName.c_str());
32 |     output = control->getOutput(0);
33 |   }
34 | 
35 |   return output;
36 | }
37 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/channels_layer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #ifndef __CHANNELS_LAYER_H__
 7 | #define __CHANNELS_LAYER_H__
 8 | 
 9 | #include <map>
10 | #include <string>
11 | 
12 | #include "NvInfer.h"
13 | 
14 | nvinfer1::ITensor* channelsLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
15 |     nvinfer1::ITensor* implicitTensor, nvinfer1::INetworkDefinition* network);
16 | 
17 | #endif
18 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #ifndef __CONVOLUTIONAL_LAYER_H__
 7 | #define __CONVOLUTIONAL_LAYER_H__
 8 | 
 9 | #include <map>
10 | #include <vector>
11 | 
12 | #include "NvInfer.h"
13 | 
14 | #include "activation_layer.h"
15 | 
16 | nvinfer1::ITensor* convolutionalLayer(int layerIdx, std::map<std::string, std::string>& block,
17 |     std::vector<float>& weights, std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, int& inputChannels,
18 |     nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network, std::string layerName = "");
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/deconvolutional_layer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #ifndef __DECONVOLUTIONAL_LAYER_H__
 7 | #define __DECONVOLUTIONAL_LAYER_H__
 8 | 
 9 | #include <map>
10 | #include <vector>
11 | 
12 | #include "NvInfer.h"
13 | 
14 | #include "activation_layer.h"
15 | 
16 | nvinfer1::ITensor* deconvolutionalLayer(int layerIdx, std::map<std::string, std::string>& block,
17 |     std::vector<float>& weights, std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, int& inputChannels,
18 |     nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network, std::string layerName = "");
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/implicit_layer.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #include "implicit_layer.h"
 7 | 
 8 | #include <cassert>
 9 | 
10 | nvinfer1::ITensor*
11 | implicitLayer(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights,
12 |     std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, nvinfer1::INetworkDefinition* network)
13 | {
14 |   nvinfer1::ITensor* output;
15 | 
16 |   assert(block.at("type") == "implicit" || block.at("type") == "implicit_add" || block.at("type") == "implicit_mul");
17 |   assert(block.find("filters") != block.end());
18 | 
19 |   int filters = std::stoi(block.at("filters"));
20 | 
21 |   nvinfer1::Weights convWt {nvinfer1::DataType::kFLOAT, nullptr, filters};
22 | 
23 |   float* val = new float[filters];
24 |   for (int i = 0; i < filters; ++i) {
25 |     val[i] = weights[weightPtr];
26 |     ++weightPtr;
27 |   }
28 |   convWt.values = val;
29 |   trtWeights.push_back(convWt);
30 | 
31 |   nvinfer1::IConstantLayer* implicit = network->addConstant(nvinfer1::Dims{4, {1, filters, 1, 1}}, convWt);
32 |   assert(implicit != nullptr);
33 |   std::string implicitLayerName = block.at("type") + "_" + std::to_string(layerIdx);
34 |   implicit->setName(implicitLayerName.c_str());
35 |   output = implicit->getOutput(0);
36 | 
37 |   return output;
38 | }
39 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/implicit_layer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #ifndef __IMPLICIT_LAYER_H__
 7 | #define __IMPLICIT_LAYER_H__
 8 | 
 9 | #include <map>
10 | #include <vector>
11 | #include <string>
12 | 
13 | #include "NvInfer.h"
14 | 
15 | nvinfer1::ITensor* implicitLayer(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights,
16 |     std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, nvinfer1::INetworkDefinition* network);
17 | 
18 | #endif
19 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/pooling_layer.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #include "pooling_layer.h"
 7 | 
 8 | #include <cassert>
 9 | #include <iostream>
10 | 
11 | nvinfer1::ITensor*
12 | poolingLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
13 |     nvinfer1::INetworkDefinition* network)
14 | {
15 |   nvinfer1::ITensor* output;
16 | 
17 |   assert(block.at("type") == "max" || block.at("type") == "maxpool" || block.at("type") == "avg" ||
18 |       block.at("type") == "avgpool");
19 | 
20 |   if (block.at("type") == "max" || block.at("type") == "maxpool") {
21 |     assert(block.find("size") != block.end());
22 |     assert(block.find("stride") != block.end());
23 | 
24 |     int size = std::stoi(block.at("size"));
25 |     int stride = std::stoi(block.at("stride"));
26 | 
27 |     nvinfer1::IPoolingLayer* maxpool = network->addPoolingNd(*input, nvinfer1::PoolingType::kMAX,
28 |         nvinfer1::Dims{2, {size, size}});
29 |     assert(maxpool != nullptr);
30 |     std::string maxpoolLayerName = "maxpool_" + std::to_string(layerIdx);
31 |     maxpool->setName(maxpoolLayerName.c_str());
32 |     maxpool->setStrideNd(nvinfer1::Dims{2, {stride, stride}});
33 |     maxpool->setPaddingNd(nvinfer1::Dims{2, {(size - 1) / 2, (size - 1) / 2}});
34 |     if (size == 2 && stride == 1) {
35 |       maxpool->setPrePadding(nvinfer1::Dims{2, {0, 0}});
36 |       maxpool->setPostPadding(nvinfer1::Dims{2, {1, 1}});
37 |     }
38 |     output = maxpool->getOutput(0);
39 |   }
40 |   else if (block.at("type") == "avg" || block.at("type") == "avgpool") {
41 |     nvinfer1::Dims inputDims = input->getDimensions();
42 |     nvinfer1::IPoolingLayer* avgpool = network->addPoolingNd(*input, nvinfer1::PoolingType::kAVERAGE,
43 |         nvinfer1::Dims{2, {inputDims.d[1], inputDims.d[2]}});
44 |     assert(avgpool != nullptr);
45 |     std::string avgpoolLayerName = "avgpool_" + std::to_string(layerIdx);
46 |     avgpool->setName(avgpoolLayerName.c_str());
47 |     output = avgpool->getOutput(0);
48 |   }
49 |   else {
50 |     std::cerr << "Pooling not supported: " << block.at("type") << std::endl;
51 |     assert(0);
52 |   }
53 | 
54 |   return output;
55 | }
56 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/pooling_layer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #ifndef __POOLING_LAYER_H__
 7 | #define __POOLING_LAYER_H__
 8 | 
 9 | #include <map>
10 | #include <string>
11 | 
12 | #include "NvInfer.h"
13 | 
14 | nvinfer1::ITensor* poolingLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
15 |     nvinfer1::INetworkDefinition* network);
16 | 
17 | #endif
18 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/reorg_layer.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Created by Marcos Luciano
  3 |  * https://www.github.com/marcoslucianops
  4 |  */
  5 | 
  6 | #include "reorg_layer.h"
  7 | 
  8 | #include <vector>
  9 | #include <cassert>
 10 | 
 11 | nvinfer1::ITensor*
 12 | reorgLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
 13 |     nvinfer1::INetworkDefinition* network)
 14 | {
 15 |   nvinfer1::ITensor* output;
 16 | 
 17 |   assert(block.at("type") == "reorg" || block.at("type") == "reorg3d");
 18 | 
 19 |   int stride = 1;
 20 |   if(block.find("stride") != block.end()) {
 21 |     stride = std::stoi(block.at("stride"));
 22 |   }
 23 | 
 24 |   nvinfer1::Dims inputDims = input->getDimensions();
 25 | 
 26 |   if (block.at("type") == "reorg3d") {
 27 |     std::string name1 = "slice1";
 28 |     std::string name2 = "slice2";
 29 |     std::string name3 = "slice3";
 30 |     std::string name4 = "slice4";
 31 |     nvinfer1::Dims start1 = {4, {0, 0, 0, 0}};
 32 |     nvinfer1::Dims start2 = {4, {0, 0, 0, 1}};
 33 |     nvinfer1::Dims start3 = {4, {0, 0, 1, 0}};
 34 |     nvinfer1::Dims start4 = {4, {0, 0, 1, 1}};
 35 |     nvinfer1::Dims sizeAll = {4, {inputDims.d[0], inputDims.d[1], inputDims.d[2] / stride, inputDims.d[3] / stride}};
 36 |     nvinfer1::Dims strideAll = {4, {1, 1, stride, stride}};
 37 | 
 38 |     nvinfer1::ITensor* slice1 = sliceLayer(layerIdx, name1, input, start1, sizeAll, strideAll, network);
 39 |     assert(slice1 != nullptr);
 40 | 
 41 |     nvinfer1::ITensor* slice2 = sliceLayer(layerIdx, name2, input, start2, sizeAll, strideAll, network);
 42 |     assert(slice2 != nullptr);
 43 | 
 44 |     nvinfer1::ITensor* slice3 = sliceLayer(layerIdx, name3, input, start3, sizeAll, strideAll, network);
 45 |     assert(slice3 != nullptr);
 46 | 
 47 |     nvinfer1::ITensor* slice4 = sliceLayer(layerIdx, name4, input, start4, sizeAll, strideAll, network);
 48 |     assert(slice4 != nullptr);
 49 | 
 50 |     std::vector<nvinfer1::ITensor*> concatInputs;
 51 |     concatInputs.push_back(slice1);
 52 |     concatInputs.push_back(slice2);
 53 |     concatInputs.push_back(slice3);
 54 |     concatInputs.push_back(slice4);
 55 | 
 56 |     nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size());
 57 |     assert(concat != nullptr);
 58 |     std::string concatLayerName = "concat_" + std::to_string(layerIdx);
 59 |     concat->setName(concatLayerName.c_str());
 60 |     concat->setAxis(0);
 61 |     output = concat->getOutput(0);
 62 |   }
 63 |   else {
 64 |     nvinfer1::IShuffleLayer* shuffle1 = network->addShuffle(*input);
 65 |     assert(shuffle1 != nullptr);
 66 |     std::string shuffle1LayerName = "shuffle1_" + std::to_string(layerIdx);
 67 |     shuffle1->setName(shuffle1LayerName.c_str());
 68 |     nvinfer1::Dims reshapeDims1{6, {inputDims.d[0], inputDims.d[1] / (stride * stride), inputDims.d[2], stride,
 69 |         inputDims.d[3], stride}};
 70 |     shuffle1->setReshapeDimensions(reshapeDims1);
 71 |     nvinfer1::Permutation permutation1{{0, 1, 2, 4, 3, 5}};
 72 |     shuffle1->setSecondTranspose(permutation1);
 73 |     output = shuffle1->getOutput(0);
 74 | 
 75 |     nvinfer1::IShuffleLayer* shuffle2 = network->addShuffle(*output);
 76 |     assert(shuffle2 != nullptr);
 77 |     std::string shuffle2LayerName = "shuffle2_" + std::to_string(layerIdx);
 78 |     shuffle2->setName(shuffle2LayerName.c_str());
 79 |     nvinfer1::Dims reshapeDims2{4, {inputDims.d[0], inputDims.d[1] / (stride * stride), inputDims.d[2] * inputDims.d[3],
 80 |         stride * stride}};
 81 |     shuffle2->setReshapeDimensions(reshapeDims2);
 82 |     nvinfer1::Permutation permutation2{{0, 1, 3, 2}};
 83 |     shuffle2->setSecondTranspose(permutation2);
 84 |     output = shuffle2->getOutput(0);
 85 | 
 86 |     nvinfer1::IShuffleLayer* shuffle3 = network->addShuffle(*output);
 87 |     assert(shuffle3 != nullptr);
 88 |     std::string shuffle3LayerName = "shuffle3_" + std::to_string(layerIdx);
 89 |     shuffle3->setName(shuffle3LayerName.c_str());
 90 |     nvinfer1::Dims reshapeDims3{4, {inputDims.d[0], inputDims.d[1] / (stride * stride), stride * stride,
 91 |         inputDims.d[2] * inputDims.d[3]}};
 92 |     shuffle3->setReshapeDimensions(reshapeDims3);
 93 |     nvinfer1::Permutation permutation3{{0, 2, 1, 3}};
 94 |     shuffle3->setSecondTranspose(permutation3);
 95 |     output = shuffle3->getOutput(0);
 96 | 
 97 |     nvinfer1::IShuffleLayer* shuffle4 = network->addShuffle(*output);
 98 |     assert(shuffle4 != nullptr);
 99 |     std::string shuffle4LayerName = "shuffle4_" + std::to_string(layerIdx);
100 |     shuffle4->setName(shuffle4LayerName.c_str());
101 |     nvinfer1::Dims reshapeDims4{4, {inputDims.d[0], inputDims.d[1] * stride * stride, inputDims.d[2] / stride,
102 |         inputDims.d[3] / stride}};
103 |     shuffle4->setReshapeDimensions(reshapeDims4);
104 |     output = shuffle4->getOutput(0);
105 |   }
106 | 
107 |   return output;
108 | }
109 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/reorg_layer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #ifndef __REORG_LAYER_H__
 7 | #define __REORG_LAYER_H__
 8 | 
 9 | #include <map>
10 | #include <string>
11 | 
12 | #include "NvInfer.h"
13 | 
14 | #include "slice_layer.h"
15 | 
16 | nvinfer1::ITensor* reorgLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
17 |     nvinfer1::INetworkDefinition* network);
18 | 
19 | #endif
20 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/route_layer.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #include "route_layer.h"
 7 | 
 8 | nvinfer1::ITensor*
 9 | routeLayer(int layerIdx, std::string& layers, std::map<std::string, std::string>& block,
10 |     std::vector<nvinfer1::ITensor*> tensorOutputs, nvinfer1::INetworkDefinition* network)
11 | {
12 |   nvinfer1::ITensor* output;
13 | 
14 |   assert(block.at("type") == "route");
15 |   assert(block.find("layers") != block.end());
16 | 
17 |   std::string strLayers = block.at("layers");
18 |   std::vector<int> idxLayers;
19 |   size_t lastPos = 0, pos = 0;
20 |   while ((pos = strLayers.find(',', lastPos)) != std::string::npos) {
21 |     int vL = std::stoi(trim(strLayers.substr(lastPos, pos - lastPos)));
22 |     idxLayers.push_back(vL);
23 |     lastPos = pos + 1;
24 |   }
25 |   if (lastPos < strLayers.length()) {
26 |     std::string lastV = trim(strLayers.substr(lastPos));
27 |     if (!lastV.empty()) {
28 |       idxLayers.push_back(std::stoi(lastV));
29 |     }
30 |   }
31 |   assert(!idxLayers.empty());
32 |   std::vector<nvinfer1::ITensor*> concatInputs;
33 |   for (uint i = 0; i < idxLayers.size(); ++i) {
34 |     if (idxLayers[i] < 0) {
35 |       idxLayers[i] = tensorOutputs.size() + idxLayers[i];
36 |     }
37 |     assert(idxLayers[i] >= 0 && idxLayers[i] < (int)tensorOutputs.size());
38 |     concatInputs.push_back(tensorOutputs[idxLayers[i]]);
39 |     if (i < idxLayers.size() - 1) {
40 |       layers += std::to_string(idxLayers[i]) + ", ";
41 |     }
42 |   }
43 |   layers += std::to_string(idxLayers[idxLayers.size() - 1]);
44 | 
45 |   if (concatInputs.size() == 1) {
46 |     output = concatInputs[0];
47 |   }
48 |   else {
49 |     int axis = 1;
50 |     if (block.find("axis") != block.end()) {
51 |       axis += std::stoi(block.at("axis"));
52 |     }
53 |     if (axis < 0) {
54 |       axis += concatInputs[0]->getDimensions().nbDims;
55 |     }
56 | 
57 |     nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size());
58 |     assert(concat != nullptr);
59 |     std::string concatLayerName = "route_" + std::to_string(layerIdx);
60 |     concat->setName(concatLayerName.c_str());
61 |     concat->setAxis(axis);
62 |     output = concat->getOutput(0);
63 |   }
64 | 
65 |   if (block.find("groups") != block.end()) {
66 |     nvinfer1::Dims prevTensorDims = output->getDimensions();
67 |     int groups = stoi(block.at("groups"));
68 |     int group_id = stoi(block.at("group_id"));
69 |     int startSlice = (prevTensorDims.d[1] / groups) * group_id;
70 |     int channelSlice = (prevTensorDims.d[1] / groups);
71 | 
72 |     std::string name = "slice";
73 |     nvinfer1::Dims start = {4, {0, startSlice, 0, 0}};
74 |     nvinfer1::Dims size = {4, {prevTensorDims.d[0], channelSlice, prevTensorDims.d[2], prevTensorDims.d[3]}};
75 |     nvinfer1::Dims stride = {4, {1, 1, 1, 1}};
76 | 
77 |     output = sliceLayer(layerIdx, name, output, start, size, stride, network);
78 |     assert(output != nullptr);
79 |   }
80 | 
81 |   return output;
82 | }
83 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/route_layer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #ifndef __ROUTE_LAYER_H__
 7 | #define __ROUTE_LAYER_H__
 8 | 
 9 | #include "../utils.h"
10 | 
11 | #include "slice_layer.h"
12 | 
13 | nvinfer1::ITensor* routeLayer(int layerIdx, std::string& layers, std::map<std::string, std::string>& block,
14 |     std::vector<nvinfer1::ITensor*> tensorOutputs, nvinfer1::INetworkDefinition* network);
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/sam_layer.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #include "sam_layer.h"
 7 | 
 8 | #include <cassert>
 9 | 
10 | nvinfer1::ITensor*
11 | samLayer(int layerIdx, std::string activation, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
12 |     nvinfer1::ITensor* samInput, nvinfer1::INetworkDefinition* network)
13 | {
14 |   nvinfer1::ITensor* output;
15 | 
16 |   assert(block.at("type") == "sam");
17 | 
18 |   nvinfer1::IElementWiseLayer* sam = network->addElementWise(*input, *samInput, nvinfer1::ElementWiseOperation::kPROD);
19 |   assert(sam != nullptr);
20 |   std::string samLayerName = "sam_" + std::to_string(layerIdx);
21 |   sam->setName(samLayerName.c_str());
22 |   output = sam->getOutput(0);
23 | 
24 |   output = activationLayer(layerIdx, activation, output, network);
25 |   assert(output != nullptr);
26 | 
27 |   return output;
28 | }
29 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/sam_layer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #ifndef __SAM_LAYER_H__
 7 | #define __SAM_LAYER_H__
 8 | 
 9 | #include <map>
10 | 
11 | #include "NvInfer.h"
12 | 
13 | #include "activation_layer.h"
14 | 
15 | nvinfer1::ITensor* samLayer(int layerIdx, std::string activation, std::map<std::string, std::string>& block,
16 |     nvinfer1::ITensor* input, nvinfer1::ITensor* samInput, nvinfer1::INetworkDefinition* network);
17 | 
18 | #endif
19 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #include "shortcut_layer.h"
 7 | 
 8 | #include <cassert>
 9 | 
10 | nvinfer1::ITensor*
11 | shortcutLayer(int layerIdx, std::string activation, std::string inputVol, std::string shortcutVol,
12 |     std::map<std::string, std::string>& block, nvinfer1::ITensor* input, nvinfer1::ITensor* shortcutInput,
13 |     nvinfer1::INetworkDefinition* network)
14 | {
15 |   nvinfer1::ITensor* output;
16 | 
17 |   assert(block.at("type") == "shortcut");
18 | 
19 |   if (inputVol != shortcutVol) {
20 |     std::string name = "slice";
21 |     nvinfer1::Dims start = {4, {0, 0, 0, 0}};
22 |     nvinfer1::Dims size = input->getDimensions();
23 |     nvinfer1::Dims stride = {4, {1, 1, 1, 1}};
24 | 
25 |     output = sliceLayer(layerIdx, name, shortcutInput, start, size, stride, network);
26 |     assert(output != nullptr);
27 |   }
28 |   else {
29 |     output = shortcutInput;
30 |   }
31 | 
32 |   nvinfer1::IElementWiseLayer* shortcut = network->addElementWise(*input, *output,
33 |       nvinfer1::ElementWiseOperation::kSUM);
34 |   assert(shortcut != nullptr);
35 |   std::string shortcutLayerName = "shortcut_" + std::to_string(layerIdx);
36 |   shortcut->setName(shortcutLayerName.c_str());
37 |   output = shortcut->getOutput(0);
38 | 
39 |   output = activationLayer(layerIdx, activation, output, network);
40 |   assert(output != nullptr);
41 | 
42 |   return output;
43 | }
44 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #ifndef __SHORTCUT_LAYER_H__
 7 | #define __SHORTCUT_LAYER_H__
 8 | 
 9 | #include <map>
10 | 
11 | #include "NvInfer.h"
12 | 
13 | #include "slice_layer.h"
14 | #include "activation_layer.h"
15 | 
16 | nvinfer1::ITensor* shortcutLayer(int layerIdx, std::string activation, std::string inputVol, std::string shortcutVol,
17 |     std::map<std::string, std::string>& block, nvinfer1::ITensor* input, nvinfer1::ITensor* shortcut,
18 |     nvinfer1::INetworkDefinition* network);
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/slice_layer.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #include "slice_layer.h"
 7 | 
 8 | #include <cassert>
 9 | 
10 | nvinfer1::ITensor*
11 | sliceLayer(int layerIdx, std::string& name, nvinfer1::ITensor* input, nvinfer1::Dims start, nvinfer1::Dims size,
12 |     nvinfer1::Dims stride, nvinfer1::INetworkDefinition* network)
13 | {
14 |   nvinfer1::ITensor* output;
15 | 
16 |   nvinfer1::ISliceLayer* slice;
17 | 
18 |   nvinfer1::Dims inputDims = input->getDimensions();
19 | 
20 |   if (inputDims.d[0] == -1) {
21 |     slice = network->addSlice(*input, start, nvinfer1::Dims{}, stride);
22 |     assert(slice != nullptr);
23 | 
24 |     int nbDims = size.nbDims;
25 | 
26 |     nvinfer1::IShapeLayer* shape = network->addShape(*input);
27 |     assert(shape != nullptr);
28 |     std::string shapeLayerName = "shape_" + name + "_" + std::to_string(layerIdx);
29 |     shape->setName(shapeLayerName.c_str());
30 |     nvinfer1::ITensor* shapeTensor = shape->getOutput(0);
31 |     assert(shapeTensor != nullptr);
32 | 
33 | #if NV_TENSORRT_MAJOR >= 10
34 |     nvinfer1::ICastLayer* castShape = network->addCast(*shapeTensor, nvinfer1::DataType::kINT32);
35 |     assert(castShape != nullptr);
36 |     std::string castShapeLayerName = "cast_shape_" + name + "_" + std::to_string(layerIdx);
37 |     castShape->setName(castShapeLayerName.c_str());
38 |     nvinfer1::ITensor* castShapeTensor = castShape->getOutput(0);
39 |     assert(castShapeTensor != nullptr);
40 |     shapeTensor = castShapeTensor;
41 | #endif
42 | 
43 |     nvinfer1::Weights constantWt {nvinfer1::DataType::kINT32, nullptr, nbDims};
44 | 
45 |     int* val = new int[nbDims];
46 |     for (int i = 0; i < nbDims; ++i) {
47 |       if (inputDims.d[i] == size.d[i]) {
48 |         val[i] = 0;
49 |       }
50 |       else {
51 |         val[i] = inputDims.d[i] - size.d[i];
52 |       }
53 |     }
54 |     constantWt.values = val;
55 | 
56 |     nvinfer1::IConstantLayer* constant = network->addConstant(nvinfer1::Dims{1, {nbDims}}, constantWt);
57 |     assert(constant != nullptr);
58 |     std::string constantLayerName = "constant_" + name + "_" + std::to_string(layerIdx);
59 |     constant->setName(constantLayerName.c_str());
60 |     nvinfer1::ITensor* constantTensor = constant->getOutput(0);
61 |     assert(constantTensor != nullptr);
62 | 
63 |     nvinfer1::IElementWiseLayer* divide = network->addElementWise(*shapeTensor, *constantTensor,
64 |         nvinfer1::ElementWiseOperation::kSUB);
65 |     assert(divide != nullptr);
66 |     std::string divideLayerName = "divide_" + name + "_" + std::to_string(layerIdx);
67 |     divide->setName(divideLayerName.c_str());
68 |     nvinfer1::ITensor* divideTensor = divide->getOutput(0);
69 |     assert(divideTensor != nullptr);
70 | 
71 |     slice->setInput(2, *divideTensor);
72 |   }
73 |   else {
74 |     slice = network->addSlice(*input, start, size, stride);
75 |     assert(slice != nullptr);
76 |   }
77 | 
78 |   std::string sliceLayerName = name + "_" + std::to_string(layerIdx);
79 |   slice->setName(sliceLayerName.c_str());
80 |   output = slice->getOutput(0);
81 | 
82 |   return output;
83 | }
84 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/slice_layer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #ifndef __SLICE_LAYER_H__
 7 | #define __SLICE_LAYER_H__
 8 | 
 9 | #include <string>
10 | 
11 | #include "NvInfer.h"
12 | 
13 | nvinfer1::ITensor* sliceLayer(int layerIdx, std::string& name, nvinfer1::ITensor* input, nvinfer1::Dims start,
14 |     nvinfer1::Dims size, nvinfer1::Dims stride, nvinfer1::INetworkDefinition* network);
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #include "upsample_layer.h"
 7 | 
 8 | #include <cassert>
 9 | 
10 | nvinfer1::ITensor*
11 | upsampleLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
12 |     nvinfer1::INetworkDefinition* network)
13 | {
14 |   nvinfer1::ITensor* output;
15 | 
16 |   assert(block.at("type") == "upsample");
17 |   assert(block.find("stride") != block.end());
18 | 
19 |   int stride = std::stoi(block.at("stride"));
20 | 
21 |   float scale[4] = {1, 1, static_cast<float>(stride), static_cast<float>(stride)};
22 | 
23 |   nvinfer1::IResizeLayer* resize = network->addResize(*input);
24 |   assert(resize != nullptr);
25 |   std::string resizeLayerName = "upsample_" + std::to_string(layerIdx);
26 |   resize->setName(resizeLayerName.c_str());
27 |   
28 | #if NV_TENSORRT_MAJOR > 8 || (NV_TENSORRT_MAJOR == 8 && NV_TENSORRT_MINOR > 4)
29 |   resize->setResizeMode(nvinfer1::InterpolationMode::kNEAREST);
30 | #else
31 |   resize->setResizeMode(nvinfer1::ResizeMode::kNEAREST);
32 | #endif
33 | 
34 |   resize->setScales(scale, 4);
35 |   output = resize->getOutput(0);
36 | 
37 |   return output;
38 | }
39 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #ifndef __UPSAMPLE_LAYER_H__
 7 | #define __UPSAMPLE_LAYER_H__
 8 | 
 9 | #include <map>
10 | #include <string>
11 | 
12 | #include "NvInfer.h"
13 | 
14 | nvinfer1::ITensor* upsampleLayer(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input,
15 |     nvinfer1::INetworkDefinition* network);
16 | 
17 | #endif
18 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo_cuda.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a
  5 |  * copy of this software and associated documentation files (the "Software"),
  6 |  * to deal in the Software without restriction, including without limitation
  7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 |  * and/or sell copies of the Software, and to permit persons to whom the
  9 |  * Software is furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 20 |  * DEALINGS IN THE SOFTWARE.
 21 |  *
 22 |  * Edited by Marcos Luciano
 23 |  * https://www.github.com/marcoslucianops
 24 |  */
 25 | 
 26 | #include <thrust/host_vector.h>
 27 | #include <thrust/device_vector.h>
 28 | 
 29 | #include "nvdsinfer_custom_impl.h"
 30 | 
 31 | extern "C" bool
 32 | NvDsInferParseYoloCuda(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
 33 |     NvDsInferParseDetectionParams const& detectionParams, std::vector<NvDsInferParseObjectInfo>& objectList);
 34 | 
 35 | __global__ void decodeTensorYoloCuda(NvDsInferParseObjectInfo *binfo, const float* output, const uint outputSize,
 36 |     const uint netW, const uint netH, const float* preclusterThreshold)
 37 | {
 38 |   int x_id = blockIdx.x * blockDim.x + threadIdx.x;
 39 | 
 40 |   if (x_id >= outputSize) {
 41 |     return;
 42 |   }
 43 | 
 44 |   float maxProb = output[x_id * 6 + 4];
 45 |   int maxIndex = (int) output[x_id * 6 + 5];
 46 | 
 47 |   if (maxProb < preclusterThreshold[maxIndex]) {
 48 |     binfo[x_id].detectionConfidence = 0.0;
 49 |     return;
 50 |   }
 51 | 
 52 |   float bx1 = output[x_id * 6 + 0];
 53 |   float by1 = output[x_id * 6 + 1];
 54 |   float bx2 = output[x_id * 6 + 2];
 55 |   float by2 = output[x_id * 6 + 3];
 56 | 
 57 |   bx1 = fminf(float(netW), fmaxf(float(0.0), bx1));
 58 |   by1 = fminf(float(netH), fmaxf(float(0.0), by1));
 59 |   bx2 = fminf(float(netW), fmaxf(float(0.0), bx2));
 60 |   by2 = fminf(float(netH), fmaxf(float(0.0), by2));
 61 | 
 62 |   binfo[x_id].left = bx1;
 63 |   binfo[x_id].top = by1;
 64 |   binfo[x_id].width = fminf(float(netW), fmaxf(float(0.0), bx2 - bx1));
 65 |   binfo[x_id].height = fminf(float(netH), fmaxf(float(0.0), by2 - by1));
 66 |   binfo[x_id].detectionConfidence = maxProb;
 67 |   binfo[x_id].classId = maxIndex;
 68 | }
 69 | 
 70 | static bool NvDsInferParseCustomYoloCuda(std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
 71 |     NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams,
 72 |     std::vector<NvDsInferParseObjectInfo>& objectList)
 73 | {
 74 |   if (outputLayersInfo.empty()) {
 75 |     std::cerr << "ERROR: Could not find output layer in bbox parsing" << std::endl;
 76 |     return false;
 77 |   }
 78 | 
 79 |   const NvDsInferLayerInfo& output = outputLayersInfo[0];
 80 |   const uint outputSize = output.inferDims.d[0];
 81 | 
 82 |   thrust::device_vector<float> perClassPreclusterThreshold = detectionParams.perClassPreclusterThreshold;
 83 | 
 84 |   thrust::device_vector<NvDsInferParseObjectInfo> objects(outputSize);
 85 | 
 86 |   int threads_per_block = 1024;
 87 |   int number_of_blocks = ((outputSize) / threads_per_block) + 1;
 88 | 
 89 |   decodeTensorYoloCuda<<<number_of_blocks, threads_per_block>>>(
 90 |       thrust::raw_pointer_cast(objects.data()), (float*) (output.buffer), outputSize, networkInfo.width,
 91 |           networkInfo.height, thrust::raw_pointer_cast(perClassPreclusterThreshold.data()));
 92 | 
 93 |   objectList.resize(outputSize);
 94 |   thrust::copy(objects.begin(), objects.end(), objectList.begin());
 95 | 
 96 |   return true;
 97 | }
 98 | 
 99 | extern "C" bool
100 | NvDsInferParseYoloCuda(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
101 |     NvDsInferParseDetectionParams const& detectionParams, std::vector<NvDsInferParseObjectInfo>& objectList)
102 | {
103 |   return NvDsInferParseCustomYoloCuda(outputLayersInfo, networkInfo, detectionParams, objectList);
104 | }
105 | 
106 | CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseYoloCuda);
107 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/utils.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a
  5 |  * copy of this software and associated documentation files (the "Software"),
  6 |  * to deal in the Software without restriction, including without limitation
  7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 |  * and/or sell copies of the Software, and to permit persons to whom the
  9 |  * Software is furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 20 |  * DEALINGS IN THE SOFTWARE.
 21 |  *
 22 |  * Edited by Marcos Luciano
 23 |  * https://www.github.com/marcoslucianops
 24 |  */
 25 | 
 26 | #include "utils.h"
 27 | 
 28 | #include <iomanip>
 29 | #include <algorithm>
 30 | #include <experimental/filesystem>
 31 | 
 32 | static void
 33 | leftTrim(std::string& s)
 34 | {
 35 |   s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int ch) { return !isspace(ch); }));
 36 | }
 37 | 
 38 | static void
 39 | rightTrim(std::string& s)
 40 | {
 41 |   s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !isspace(ch); }).base(), s.end());
 42 | }
 43 | 
 44 | std::string
 45 | trim(std::string s)
 46 | {
 47 |   leftTrim(s);
 48 |   rightTrim(s);
 49 |   return s;
 50 | }
 51 | 
 52 | float
 53 | clamp(const float val, const float minVal, const float maxVal)
 54 | {
 55 |   assert(minVal <= maxVal);
 56 |   return std::min(maxVal, std::max(minVal, val));
 57 | }
 58 | 
 59 | bool
 60 | fileExists(const std::string fileName, bool verbose)
 61 | {
 62 |   if (!std::experimental::filesystem::exists(std::experimental::filesystem::path(fileName))) {
 63 |     if (verbose) {
 64 |       std::cout << "\nFile does not exist: " << fileName << std::endl;
 65 |     }
 66 |     return false;
 67 |   }
 68 |   return true;
 69 | }
 70 | 
 71 | std::vector<float>
 72 | loadWeights(const std::string weightsFilePath)
 73 | {
 74 |   assert(fileExists(weightsFilePath));
 75 |   std::cout << "\nLoading pre-trained weights" << std::endl;
 76 | 
 77 |   std::vector<float> weights;
 78 | 
 79 |   if (weightsFilePath.find(".weights") != std::string::npos) {
 80 |     std::ifstream file(weightsFilePath, std::ios_base::binary);
 81 |     assert(file.good());
 82 |     std::string line;
 83 | 
 84 |     if (weightsFilePath.find("yolov2") != std::string::npos &&
 85 |         weightsFilePath.find("yolov2-tiny") == std::string::npos) {
 86 |       // Remove 4 int32 bytes of data from the stream belonging to the header
 87 |       file.ignore(4 * 4);
 88 |     }
 89 |     else {
 90 |       // Remove 5 int32 bytes of data from the stream belonging to the header
 91 |       file.ignore(4 * 5);
 92 |     }
 93 | 
 94 |     char floatWeight[4];
 95 |     while (!file.eof()) {
 96 |       file.read(floatWeight, 4);
 97 |       assert(file.gcount() == 4);
 98 |       weights.push_back(*reinterpret_cast<float*>(floatWeight));
 99 |       if (file.peek() == std::istream::traits_type::eof()) {
100 |         break;
101 |       }
102 |     }
103 |   }
104 |   else {
105 |     std::cerr << "\nFile " << weightsFilePath << " is not supported" << std::endl;
106 |     assert(0);
107 |   }
108 | 
109 |   std::cout << "Loading " << weightsFilePath << " complete" << std::endl;
110 |   std::cout << "Total weights read: " << weights.size() << std::endl;
111 | 
112 |   return weights;
113 | }
114 | 
115 | std::string
116 | dimsToString(const nvinfer1::Dims d)
117 | {
118 |   assert(d.nbDims >= 1);
119 | 
120 |   std::stringstream s;
121 |   s << "[";
122 |   for (int i = 1; i < d.nbDims - 1; ++i) {
123 |     s << d.d[i] << ", ";
124 |   }
125 |   s << d.d[d.nbDims - 1] << "]";
126 | 
127 |   return s.str();
128 | }
129 | 
130 | int
131 | getNumChannels(nvinfer1::ITensor* t)
132 | {
133 |   nvinfer1::Dims d = t->getDimensions();
134 |   assert(d.nbDims == 4);
135 |   return d.d[1];
136 | }
137 | 
138 | void
139 | printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput, std::string layerOutput,
140 |     std::string weightPtr)
141 | {
142 |   std::cout << std::setw(7) << std::left << layerIndex << std::setw(40) << std::left << layerName;
143 |   std::cout << std::setw(19) << std::left << layerInput << std::setw(19) << std::left << layerOutput;
144 |   std::cout << weightPtr << std::endl;
145 | }
146 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/utils.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a
 5 |  * copy of this software and associated documentation files (the "Software"),
 6 |  * to deal in the Software without restriction, including without limitation
 7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 8 |  * and/or sell copies of the Software, and to permit persons to whom the
 9 |  * Software is furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice shall be included in
12 |  * all copies or substantial portions of the Software.
13 |  *
14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 |  * DEALINGS IN THE SOFTWARE.
21 |  *
22 |  * Edited by Marcos Luciano
23 |  * https://www.github.com/marcoslucianops
24 |  */
25 | 
26 | #ifndef __UTILS_H__
27 | #define __UTILS_H__
28 | 
29 | #include <map>
30 | #include <vector>
31 | #include <string>
32 | #include <cassert>
33 | #include <iostream>
34 | #include <fstream>
35 | 
36 | #include "NvInfer.h"
37 | 
38 | std::string trim(std::string s);
39 | 
40 | float clamp(const float val, const float minVal, const float maxVal);
41 | 
42 | bool fileExists(const std::string fileName, bool verbose = true);
43 | 
44 | std::vector<float> loadWeights(const std::string weightsFilePath);
45 | 
46 | std::string dimsToString(const nvinfer1::Dims d);
47 | 
48 | int getNumChannels(nvinfer1::ITensor* t);
49 | 
50 | void printLayerInfo(
51 |     std::string layerIndex, std::string layerName, std::string layerInput,  std::string layerOutput,
52 |     std::string weightPtr);
53 | 
54 | #endif
55 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/yoloForward.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #include <stdint.h>
 7 | 
 8 | inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); }
 9 | 
10 | __global__ void gpuYoloLayer(const float* input, float* output, const uint netWidth, const uint netHeight,
11 |     const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes,
12 |     const uint64_t lastInputSize, const float scaleXY, const float* anchors, const int* mask)
13 | {
14 |   uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
15 |   uint y_id = blockIdx.y * blockDim.y + threadIdx.y;
16 |   uint z_id = blockIdx.z * blockDim.z + threadIdx.z;
17 | 
18 |   if (x_id >= gridSizeX || y_id >= gridSizeY || z_id >= numBBoxes) {
19 |     return;
20 |   }
21 | 
22 |   const int numGridCells = gridSizeX * gridSizeY;
23 |   const int bbindex = y_id * gridSizeX + x_id;
24 | 
25 |   const float alpha = scaleXY;
26 |   const float beta = -0.5 * (scaleXY - 1);
27 | 
28 |   float xc = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) * alpha + beta + x_id)
29 |       * netWidth / gridSizeX;
30 | 
31 |   float yc = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) * alpha + beta + y_id)
32 |       * netHeight / gridSizeY;
33 | 
34 |   float w = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]) * anchors[mask[z_id] * 2];
35 | 
36 |   float h = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]) * anchors[mask[z_id] * 2 + 1];
37 | 
38 |   const float objectness = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]);
39 | 
40 |   float maxProb = 0.0f;
41 |   int maxIndex = -1;
42 | 
43 |   for (uint i = 0; i < numOutputClasses; ++i) {
44 |     float prob = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]);
45 |     if (prob > maxProb) {
46 |       maxProb = prob;
47 |       maxIndex = i;
48 |     }
49 |   }
50 | 
51 |   int count = numGridCells * z_id + bbindex + lastInputSize;
52 | 
53 |   output[count * 6 + 0] = xc - w * 0.5;
54 |   output[count * 6 + 1] = yc - h * 0.5;
55 |   output[count * 6 + 2] = xc + w * 0.5;
56 |   output[count * 6 + 3] = yc + h * 0.5;
57 |   output[count * 6 + 4] = maxProb * objectness;
58 |   output[count * 6 + 5] = (float) maxIndex;
59 | }
60 | 
61 | cudaError_t cudaYoloLayer(const void* input, void* output, const uint& batchSize, const uint64_t& inputSize,
62 |     const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth, const uint& netHeight,
63 |     const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
64 |     const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream);
65 | 
66 | cudaError_t cudaYoloLayer(const void* input, void* output, const uint& batchSize, const uint64_t& inputSize,
67 |     const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth, const uint& netHeight,
68 |     const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
69 |     const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream)
70 | {
71 |   dim3 threads_per_block(16, 16, 4);
72 |   dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, (gridSizeY / threads_per_block.y) + 1,
73 |       (numBBoxes / threads_per_block.z) + 1);
74 | 
75 |   for (unsigned int batch = 0; batch < batchSize; ++batch) {
76 |     gpuYoloLayer<<<number_of_blocks, threads_per_block, 0, stream>>>(
77 |         reinterpret_cast<const float*> (input) + (batch * inputSize),
78 |         reinterpret_cast<float*> (output) + (batch * 6 * outputSize),
79 |         netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, lastInputSize, scaleXY,
80 |         reinterpret_cast<const float*> (anchors), reinterpret_cast<const int*> (mask));
81 |   }
82 |   return cudaGetLastError();
83 | }
84 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/yoloForward_nc.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #include <stdint.h>
 7 | 
 8 | __global__ void gpuYoloLayer_nc(const float* input, float* output, const uint netWidth, const uint netHeight,
 9 |     const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes,
10 |     const uint64_t lastInputSize, const float scaleXY, const float* anchors, const int* mask)
11 | {
12 |   uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
13 |   uint y_id = blockIdx.y * blockDim.y + threadIdx.y;
14 |   uint z_id = blockIdx.z * blockDim.z + threadIdx.z;
15 | 
16 |   if (x_id >= gridSizeX || y_id >= gridSizeY || z_id >= numBBoxes) {
17 |     return;
18 |   }
19 | 
20 |   const int numGridCells = gridSizeX * gridSizeY;
21 |   const int bbindex = y_id * gridSizeX + x_id;
22 | 
23 |   const float alpha = scaleXY;
24 |   const float beta = -0.5 * (scaleXY - 1);
25 | 
26 |   float xc = (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)] * alpha + beta + x_id) * netWidth /
27 |       gridSizeX;
28 | 
29 |   float yc = (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)] * alpha + beta + y_id) * netHeight /
30 |       gridSizeY;
31 | 
32 |   float w = __powf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)] * 2, 2) *
33 |       anchors[mask[z_id] * 2];
34 | 
35 |   float h = __powf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)] * 2, 2) *
36 |       anchors[mask[z_id] * 2 + 1];
37 | 
38 |   const float objectness = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)];
39 | 
40 |   float maxProb = 0.0f;
41 |   int maxIndex = -1;
42 | 
43 |   for (uint i = 0; i < numOutputClasses; ++i) {
44 |     float prob = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))];
45 |     if (prob > maxProb) {
46 |       maxProb = prob;
47 |       maxIndex = i;
48 |     }
49 |   }
50 | 
51 |   int count = numGridCells * z_id + bbindex + lastInputSize;
52 | 
53 |   output[count * 6 + 0] = xc - w * 0.5;
54 |   output[count * 6 + 1] = yc - h * 0.5;
55 |   output[count * 6 + 2] = xc + w * 0.5;
56 |   output[count * 6 + 3] = yc + h * 0.5;
57 |   output[count * 6 + 4] = maxProb * objectness;
58 |   output[count * 6 + 5] = (float) maxIndex;
59 | }
60 | 
61 | cudaError_t cudaYoloLayer_nc(const void* input, void* output, const uint& batchSize, const uint64_t& inputSize,
62 |     const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth, const uint& netHeight,
63 |     const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
64 |     const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream);
65 | 
66 | cudaError_t cudaYoloLayer_nc(const void* input, void* output, const uint& batchSize, const uint64_t& inputSize,
67 |     const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth, const uint& netHeight,
68 |     const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
69 |     const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream)
70 | {
71 |   dim3 threads_per_block(16, 16, 4);
72 |   dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, (gridSizeY / threads_per_block.y) + 1,
73 |       (numBBoxes / threads_per_block.z) + 1);
74 | 
75 |   for (unsigned int batch = 0; batch < batchSize; ++batch) {
76 |     gpuYoloLayer_nc<<<number_of_blocks, threads_per_block, 0, stream>>>(
77 |         reinterpret_cast<const float*> (input) + (batch * inputSize),
78 |         reinterpret_cast<float*> (output) + (batch * 6 * outputSize),
79 |         netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, lastInputSize, scaleXY,
80 |         reinterpret_cast<const float*> (anchors), reinterpret_cast<const int*> (mask));
81 |   }
82 |   return cudaGetLastError();
83 | }
84 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/yoloForward_v2.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Created by Marcos Luciano
  3 |  * https://www.github.com/marcoslucianops
  4 |  */
  5 | 
  6 | #include <stdint.h>
  7 | 
  8 | inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); }
  9 | 
 10 | __device__ void softmaxGPU(const float* input, const int bbindex, const int numGridCells, uint z_id,
 11 |     const uint numOutputClasses, float temp, float* output)
 12 | {
 13 |   int i;
 14 |   float sum = 0;
 15 |   float largest = -INFINITY;
 16 |   for (i = 0; i < numOutputClasses; ++i) {
 17 |     int val = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))];
 18 |     largest = (val>largest) ? val : largest;
 19 |   }
 20 |   for (i = 0; i < numOutputClasses; ++i) {
 21 |     float e = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] / temp - largest / temp);
 22 |     sum += e;
 23 |     output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] = e;
 24 |   }
 25 |   for (i = 0; i < numOutputClasses; ++i) {
 26 |     output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] /= sum;
 27 |   }
 28 | }
 29 | 
 30 | __global__ void gpuRegionLayer(const float* input, float* softmax, float* output, const uint netWidth,
 31 |     const uint netHeight, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes,
 32 |     const uint64_t lastInputSize, const float* anchors)
 33 | {
 34 |   uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
 35 |   uint y_id = blockIdx.y * blockDim.y + threadIdx.y;
 36 |   uint z_id = blockIdx.z * blockDim.z + threadIdx.z;
 37 | 
 38 |   if (x_id >= gridSizeX || y_id >= gridSizeY || z_id >= numBBoxes) {
 39 |     return;
 40 |   }
 41 | 
 42 |   const int numGridCells = gridSizeX * gridSizeY;
 43 |   const int bbindex = y_id * gridSizeX + x_id;
 44 | 
 45 |   float xc = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) + x_id) * netWidth /
 46 |       gridSizeX;
 47 | 
 48 |   float yc = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) + y_id) * netHeight /
 49 |       gridSizeY;
 50 | 
 51 |   float w = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]) * anchors[z_id * 2] * netWidth /
 52 |       gridSizeX;
 53 | 
 54 |   float h = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]) * anchors[z_id * 2 + 1] *
 55 |       netHeight / gridSizeY;
 56 | 
 57 |   const float objectness = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]);
 58 | 
 59 |   softmaxGPU(input, bbindex, numGridCells, z_id, numOutputClasses, 1.0, softmax);
 60 | 
 61 |   float maxProb = 0.0f;
 62 |   int maxIndex = -1;
 63 | 
 64 |   for (uint i = 0; i < numOutputClasses; ++i) {
 65 |     float prob = softmax[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))];
 66 |     if (prob > maxProb) {
 67 |       maxProb = prob;
 68 |       maxIndex = i;
 69 |     }
 70 |   }
 71 | 
 72 |   int count = numGridCells * z_id + bbindex + lastInputSize;
 73 | 
 74 |   output[count * 6 + 0] = xc - w * 0.5;
 75 |   output[count * 6 + 1] = yc - h * 0.5;
 76 |   output[count * 6 + 2] = xc + w * 0.5;
 77 |   output[count * 6 + 3] = yc + h * 0.5;
 78 |   output[count * 6 + 4] = maxProb * objectness;
 79 |   output[count * 6 + 5] = (float) maxIndex;
 80 | }
 81 | 
 82 | cudaError_t cudaRegionLayer(const void* input, void* softmax, void* output, const uint& batchSize,
 83 |     const uint64_t& inputSize, const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth,
 84 |     const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
 85 |     const uint& numBBoxes, const void* anchors, cudaStream_t stream);
 86 | 
 87 | cudaError_t cudaRegionLayer(const void* input, void* softmax, void* output, const uint& batchSize,
 88 |     const uint64_t& inputSize, const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth,
 89 |     const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
 90 |     const uint& numBBoxes, const void* anchors, cudaStream_t stream)
 91 | {
 92 |   dim3 threads_per_block(16, 16, 4);
 93 |   dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, (gridSizeY / threads_per_block.y) + 1,
 94 |       (numBBoxes / threads_per_block.z) + 1);
 95 | 
 96 |   for (unsigned int batch = 0; batch < batchSize; ++batch) {
 97 |     gpuRegionLayer<<<number_of_blocks, threads_per_block, 0, stream>>>(
 98 |         reinterpret_cast<const float*> (input) + (batch * inputSize),
 99 |         reinterpret_cast<float*> (softmax) + (batch * inputSize),
100 |         reinterpret_cast<float*> (output) + (batch * 6 * outputSize),
101 |         netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, lastInputSize,
102 |         reinterpret_cast<const float*> (anchors));
103 |   }
104 |   return cudaGetLastError();
105 | }
106 | 


--------------------------------------------------------------------------------
/utils/export_damoyolo.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import onnx
  3 | import torch
  4 | import torch.nn as nn
  5 | 
  6 | from damo.config.base import parse_config
  7 | from damo.utils.model_utils import replace_module
  8 | from damo.base_models.core.ops import RepConv, SiLU
  9 | from damo.detectors.detector import build_local_model
 10 | 
 11 | 
 12 | class DeepStreamOutput(nn.Module):
 13 |     def __init__(self):
 14 |         super().__init__()
 15 | 
 16 |     def forward(self, x):
 17 |         boxes = x[1]
 18 |         scores, labels = torch.max(x[0], dim=-1, keepdim=True)
 19 |         return torch.cat([boxes, scores, labels.to(boxes.dtype)], dim=-1)
 20 | 
 21 | 
 22 | def suppress_warnings():
 23 |     import warnings
 24 |     warnings.filterwarnings('ignore', category=torch.jit.TracerWarning)
 25 |     warnings.filterwarnings('ignore', category=UserWarning)
 26 |     warnings.filterwarnings('ignore', category=DeprecationWarning)
 27 |     warnings.filterwarnings('ignore', category=FutureWarning)
 28 |     warnings.filterwarnings('ignore', category=ResourceWarning)
 29 | 
 30 | 
 31 | def damoyolo_export(weights, config_file, device):
 32 |     config = parse_config(config_file)
 33 |     config.model.head.export_with_post = True
 34 |     model = build_local_model(config, device)
 35 |     ckpt = torch.load(weights, map_location=device)
 36 |     model.eval()
 37 |     if 'model' in ckpt:
 38 |         ckpt = ckpt['model']
 39 |     model.load_state_dict(ckpt, strict=True)
 40 |     model = replace_module(model, nn.SiLU, SiLU)
 41 |     for layer in model.modules():
 42 |         if isinstance(layer, RepConv):
 43 |             layer.switch_to_deploy()
 44 |     model.head.nms = False
 45 |     return config, model
 46 | 
 47 | 
 48 | def main(args):
 49 |     suppress_warnings()
 50 | 
 51 |     print(f'\nStarting: {args.weights}')
 52 | 
 53 |     print('Opening DAMO-YOLO model')
 54 | 
 55 |     device = torch.device('cpu')
 56 |     cfg, model = damoyolo_export(args.weights, args.config, device)
 57 | 
 58 |     if len(cfg.dataset['class_names']) > 0:
 59 |         print('Creating labels.txt file')
 60 |         with open('labels.txt', 'w', encoding='utf-8') as f:
 61 |             for name in cfg.dataset['class_names']:
 62 |                 f.write(f'{name}\n')
 63 | 
 64 |     model = nn.Sequential(model, DeepStreamOutput())
 65 | 
 66 |     img_size = args.size * 2 if len(args.size) == 1 else args.size
 67 | 
 68 |     onnx_input_im = torch.zeros(args.batch, 3, *img_size).to(device)
 69 |     onnx_output_file = f'{args.weights}.onnx'
 70 | 
 71 |     dynamic_axes = {
 72 |         'input': {
 73 |             0: 'batch'
 74 |         },
 75 |         'output': {
 76 |             0: 'batch'
 77 |         }
 78 |     }
 79 | 
 80 |     print('Exporting the model to ONNX')
 81 |     torch.onnx.export(
 82 |         model, onnx_input_im, onnx_output_file, verbose=False, opset_version=args.opset, do_constant_folding=True,
 83 |         input_names=['input'], output_names=['output'], dynamic_axes=dynamic_axes if args.dynamic else None
 84 |     )
 85 | 
 86 |     if args.simplify:
 87 |         print('Simplifying the ONNX model')
 88 |         import onnxslim
 89 |         model_onnx = onnx.load(onnx_output_file)
 90 |         model_onnx = onnxslim.slim(model_onnx)
 91 |         onnx.save(model_onnx, onnx_output_file)
 92 | 
 93 |     print(f'Done: {onnx_output_file}\n')
 94 | 
 95 | 
 96 | def parse_args():
 97 |     import argparse
 98 |     parser = argparse.ArgumentParser(description='DeepStream DAMO-YOLO conversion')
 99 |     parser.add_argument('-w', '--weights', required=True, help='Input weights (.pth) file path (required)')
100 |     parser.add_argument('-c', '--config', required=True, help='Input config (.py) file path (required)')
101 |     parser.add_argument('-s', '--size', nargs='+', type=int, default=[640], help='Inference size [H,W] (default [640])')
102 |     parser.add_argument('--opset', type=int, default=11, help='ONNX opset version')
103 |     parser.add_argument('--simplify', action='store_true', help='ONNX simplify model')
104 |     parser.add_argument('--dynamic', action='store_true', help='Dynamic batch-size')
105 |     parser.add_argument('--batch', type=int, default=1, help='Static batch-size')
106 |     args = parser.parse_args()
107 |     if not os.path.isfile(args.weights):
108 |         raise SystemExit('Invalid weights file')
109 |     if not os.path.isfile(args.config):
110 |         raise SystemExit('Invalid config file')
111 |     if args.dynamic and args.batch > 1:
112 |         raise SystemExit('Cannot set dynamic batch-size and static batch-size at same time')
113 |     return args
114 | 
115 | 
116 | if __name__ == '__main__':
117 |     args = parse_args()
118 |     main(args)
119 | 


--------------------------------------------------------------------------------
/utils/export_dfine.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import onnx
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | 
  7 | from src.core import YAMLConfig
  8 | 
  9 | 
 10 | class DeepStreamOutput(nn.Module):
 11 |     def __init__(self, img_size, use_focal_loss):
 12 |         super().__init__()
 13 |         self.img_size = img_size
 14 |         self.use_focal_loss = use_focal_loss
 15 | 
 16 |     def forward(self, x):
 17 |         boxes = x['pred_boxes']
 18 |         convert_matrix = torch.tensor(
 19 |             [[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]], dtype=boxes.dtype, device=boxes.device
 20 |         )
 21 |         boxes @= convert_matrix
 22 |         boxes *= torch.as_tensor([[*self.img_size]]).flip(1).tile([1, 2]).unsqueeze(1)
 23 |         scores = F.sigmoid(x['pred_logits']) if self.use_focal_loss else F.softmax(x['pred_logits'])[:, :, :-1]
 24 |         scores, labels = torch.max(scores, dim=-1, keepdim=True)
 25 |         return torch.cat([boxes, scores, labels.to(boxes.dtype)], dim=-1)
 26 | 
 27 | 
 28 | def dfine_export(weights, cfg_file, device):
 29 |     cfg = YAMLConfig(cfg_file, resume=weights)
 30 |     if 'HGNetv2' in cfg.yaml_cfg:
 31 |         cfg.yaml_cfg['HGNetv2']['pretrained'] = False
 32 |     checkpoint = torch.load(weights, map_location=device)
 33 |     if 'ema' in checkpoint:
 34 |         state = checkpoint['ema']['module']
 35 |     else:
 36 |         state = checkpoint['model']
 37 |     cfg.model.load_state_dict(state)
 38 |     return cfg.model.deploy(), cfg.postprocessor.use_focal_loss
 39 | 
 40 | 
 41 | def suppress_warnings():
 42 |     import warnings
 43 |     warnings.filterwarnings('ignore', category=torch.jit.TracerWarning)
 44 |     warnings.filterwarnings('ignore', category=UserWarning)
 45 |     warnings.filterwarnings('ignore', category=DeprecationWarning)
 46 |     warnings.filterwarnings('ignore', category=FutureWarning)
 47 |     warnings.filterwarnings('ignore', category=ResourceWarning)
 48 | 
 49 | 
 50 | def main(args):
 51 |     suppress_warnings()
 52 | 
 53 |     print(f'\nStarting: {args.weights}')
 54 | 
 55 |     print('Opening D-FINE model')
 56 | 
 57 |     device = torch.device('cpu')
 58 |     model, use_focal_loss = dfine_export(args.weights, args.config, device)
 59 | 
 60 |     img_size = args.size * 2 if len(args.size) == 1 else args.size
 61 | 
 62 |     model = nn.Sequential(model, DeepStreamOutput(img_size, use_focal_loss))
 63 | 
 64 |     onnx_input_im = torch.zeros(args.batch, 3, *img_size).to(device)
 65 |     onnx_output_file = f'{args.weights}.onnx'
 66 | 
 67 |     dynamic_axes = {
 68 |         'input': {
 69 |             0: 'batch'
 70 |         },
 71 |         'output': {
 72 |             0: 'batch'
 73 |         }
 74 |     }
 75 | 
 76 |     print('Exporting the model to ONNX')
 77 |     torch.onnx.export(
 78 |         model, onnx_input_im, onnx_output_file, verbose=False, opset_version=args.opset, do_constant_folding=True,
 79 |         input_names=['input'], output_names=['output'], dynamic_axes=dynamic_axes if args.dynamic else None
 80 |     )
 81 | 
 82 |     if args.simplify:
 83 |         print('Simplifying the ONNX model')
 84 |         import onnxslim
 85 |         model_onnx = onnx.load(onnx_output_file)
 86 |         model_onnx = onnxslim.slim(model_onnx)
 87 |         onnx.save(model_onnx, onnx_output_file)
 88 | 
 89 |     print(f'Done: {onnx_output_file}\n')
 90 | 
 91 | 
 92 | def parse_args():
 93 |     import argparse
 94 |     parser = argparse.ArgumentParser(description='DeepStream D-FINE conversion')
 95 |     parser.add_argument('-w', '--weights', required=True, help='Input weights (.pth) file path (required)')
 96 |     parser.add_argument('-c', '--config', required=True, help='Input YAML (.yml) file path (required)')
 97 |     parser.add_argument('-s', '--size', nargs='+', type=int, default=[640], help='Inference size [H,W] (default [640])')
 98 |     parser.add_argument('--opset', type=int, default=16, help='ONNX opset version')
 99 |     parser.add_argument('--simplify', action='store_true', help='ONNX simplify model')
100 |     parser.add_argument('--dynamic', action='store_true', help='Dynamic batch-size')
101 |     parser.add_argument('--batch', type=int, default=1, help='Static batch-size')
102 |     args = parser.parse_args()
103 |     if not os.path.isfile(args.weights):
104 |         raise SystemExit('Invalid weights file')
105 |     if not os.path.isfile(args.config):
106 |         raise SystemExit('Invalid config file')
107 |     if args.dynamic and args.batch > 1:
108 |         raise SystemExit('Cannot set dynamic batch-size and static batch-size at same time')
109 |     return args
110 | 
111 | 
112 | if __name__ == '__main__':
113 |     args = parse_args()
114 |     main(args)
115 | 


--------------------------------------------------------------------------------
/utils/export_goldyolo.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import onnx
  3 | import torch
  4 | import torch.nn as nn
  5 | 
  6 | import yolov6.utils.general as _m
  7 | from yolov6.layers.common import SiLU
  8 | from gold_yolo.switch_tool import switch_to_deploy
  9 | from yolov6.utils.checkpoint import load_checkpoint
 10 | 
 11 | 
 12 | def _dist2bbox(distance, anchor_points, box_format='xyxy'):
 13 |     lt, rb = torch.split(distance, 2, -1)
 14 |     x1y1 = anchor_points - lt
 15 |     x2y2 = anchor_points + rb
 16 |     bbox = torch.cat([x1y1, x2y2], -1)
 17 |     return bbox
 18 | 
 19 | _m.dist2bbox.__code__ = _dist2bbox.__code__
 20 | 
 21 | 
 22 | class DeepStreamOutput(nn.Module):
 23 |     def __init__(self):
 24 |         super().__init__()
 25 | 
 26 |     def forward(self, x):
 27 |         boxes = x[:, :, :4]
 28 |         objectness = x[:, :, 4:5]
 29 |         scores, labels = torch.max(x[:, :, 5:], dim=-1, keepdim=True)
 30 |         scores *= objectness
 31 |         return torch.cat([boxes, scores, labels.to(boxes.dtype)], dim=-1)
 32 | 
 33 | 
 34 | def gold_yolo_export(weights, device, inplace=True, fuse=True):
 35 |     model = load_checkpoint(weights, map_location=device, inplace=inplace, fuse=fuse)
 36 |     model = switch_to_deploy(model)
 37 |     for layer in model.modules():
 38 |         t = type(layer)
 39 |         if t.__name__ == 'RepVGGBlock':
 40 |             layer.switch_to_deploy()
 41 |     model.eval()
 42 |     for k, m in model.named_modules():
 43 |         if m.__class__.__name__ == 'Conv':
 44 |             if isinstance(m.act, nn.SiLU):
 45 |                 m.act = SiLU()
 46 |         elif m.__class__.__name__ == 'Detect':
 47 |             m.inplace = False
 48 |     return model
 49 | 
 50 | 
 51 | def suppress_warnings():
 52 |     import warnings
 53 |     warnings.filterwarnings('ignore', category=torch.jit.TracerWarning)
 54 |     warnings.filterwarnings('ignore', category=UserWarning)
 55 |     warnings.filterwarnings('ignore', category=DeprecationWarning)
 56 |     warnings.filterwarnings('ignore', category=FutureWarning)
 57 |     warnings.filterwarnings('ignore', category=ResourceWarning)
 58 | 
 59 | 
 60 | def main(args):
 61 |     suppress_warnings()
 62 | 
 63 |     print(f'\nStarting: {args.weights}')
 64 | 
 65 |     print('Opening Gold-YOLO model')
 66 | 
 67 |     device = torch.device('cpu')
 68 |     model = gold_yolo_export(args.weights, device)
 69 | 
 70 |     model = nn.Sequential(model, DeepStreamOutput())
 71 | 
 72 |     img_size = args.size * 2 if len(args.size) == 1 else args.size
 73 | 
 74 |     onnx_input_im = torch.zeros(args.batch, 3, *img_size).to(device)
 75 |     onnx_output_file = f'{args.weights}.onnx'
 76 | 
 77 |     dynamic_axes = {
 78 |         'input': {
 79 |             0: 'batch'
 80 |         },
 81 |         'output': {
 82 |             0: 'batch'
 83 |         }
 84 |     }
 85 | 
 86 |     print('Exporting the model to ONNX')
 87 |     torch.onnx.export(
 88 |         model, onnx_input_im, onnx_output_file, verbose=False, opset_version=args.opset, do_constant_folding=True,
 89 |         input_names=['input'], output_names=['output'], dynamic_axes=dynamic_axes if args.dynamic else None
 90 |     )
 91 | 
 92 |     if args.simplify:
 93 |         print('Simplifying the ONNX model')
 94 |         import onnxslim
 95 |         model_onnx = onnx.load(onnx_output_file)
 96 |         model_onnx = onnxslim.slim(model_onnx)
 97 |         onnx.save(model_onnx, onnx_output_file)
 98 | 
 99 |     print(f'Done: {onnx_output_file}\n')
100 | 
101 | 
102 | def parse_args():
103 |     import argparse
104 |     parser = argparse.ArgumentParser(description='DeepStream Gold-YOLO conversion')
105 |     parser.add_argument('-w', '--weights', required=True, help='Input weights (.pt) file path (required)')
106 |     parser.add_argument('-s', '--size', nargs='+', type=int, default=[640], help='Inference size [H,W] (default [640])')
107 |     parser.add_argument('--opset', type=int, default=13, help='ONNX opset version')
108 |     parser.add_argument('--simplify', action='store_true', help='ONNX simplify model')
109 |     parser.add_argument('--dynamic', action='store_true', help='Dynamic batch-size')
110 |     parser.add_argument('--batch', type=int, default=1, help='Static batch-size')
111 |     args = parser.parse_args()
112 |     if not os.path.isfile(args.weights):
113 |         raise SystemExit('Invalid weights file')
114 |     if args.dynamic and args.batch > 1:
115 |         raise SystemExit('Cannot set dynamic batch-size and static batch-size at same time')
116 |     return args
117 | 
118 | 
119 | if __name__ == '__main__':
120 |     args = parse_args()
121 |     main(args)
122 | 


--------------------------------------------------------------------------------
/utils/export_ppyoloe.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import onnx
  3 | import paddle
  4 | import paddle.nn as nn
  5 | 
  6 | from ppdet.engine import Trainer
  7 | from ppdet.utils.cli import ArgsParser
  8 | from ppdet.slim import build_slim_model
  9 | from ppdet.data.source.category import get_categories
 10 | from ppdet.utils.check import check_version, check_config
 11 | from ppdet.core.workspace import load_config, merge_config
 12 | 
 13 | 
 14 | class DeepStreamOutput(nn.Layer):
 15 |     def __init__(self):
 16 |         super().__init__()
 17 | 
 18 |     def forward(self, x):
 19 |         boxes = x['bbox']
 20 |         x['bbox_num'] = x['bbox_num'].transpose([0, 2, 1])
 21 |         scores = paddle.max(x['bbox_num'], axis=-1, keepdim=True)
 22 |         labels = paddle.argmax(x['bbox_num'], axis=-1, keepdim=True)
 23 |         return paddle.concat((boxes, scores, paddle.cast(labels, dtype=boxes.dtype)), axis=-1)
 24 | 
 25 | 
 26 | class DeepStreamInput(nn.Layer):
 27 |     def __init__(self):
 28 |         super().__init__()
 29 | 
 30 |     def forward(self, x):
 31 |         y = {}
 32 |         y['image'] = x['image']
 33 |         y['scale_factor'] = paddle.to_tensor([1.0, 1.0], dtype=x['image'].dtype)
 34 |         return y
 35 | 
 36 | 
 37 | def ppyoloe_export(FLAGS):
 38 |     cfg = load_config(FLAGS.config)
 39 |     FLAGS.opt['weights'] = FLAGS.weights
 40 |     FLAGS.opt['exclude_nms'] = True
 41 |     merge_config(FLAGS.opt)
 42 |     if FLAGS.slim_config:
 43 |         cfg = build_slim_model(cfg, FLAGS.slim_config, mode='test')
 44 |     merge_config(FLAGS.opt)
 45 |     check_config(cfg)
 46 |     check_version()
 47 |     trainer = Trainer(cfg, mode='test')
 48 |     trainer.load_weights(cfg.weights)
 49 |     trainer.model.eval()
 50 |     if not os.path.exists('.tmp'):
 51 |         os.makedirs('.tmp')
 52 |     static_model, _ = trainer._get_infer_cfg_and_input_spec('.tmp')
 53 |     os.system('rm -r .tmp')
 54 |     return trainer.cfg, static_model
 55 | 
 56 | 
 57 | def suppress_warnings():
 58 |     import warnings
 59 |     warnings.filterwarnings('ignore')
 60 | 
 61 | 
 62 | def main(FLAGS):
 63 |     suppress_warnings()
 64 | 
 65 |     print(f'\nStarting: {FLAGS.weights}')
 66 | 
 67 |     print('Opening PPYOLOE model')
 68 | 
 69 |     paddle.set_device('cpu')
 70 |     cfg, model = ppyoloe_export(FLAGS)
 71 | 
 72 |     anno_file = cfg['TestDataset'].get_anno()
 73 |     if os.path.isfile(anno_file):
 74 |         _, catid2name = get_categories(cfg['metric'], anno_file, 'detection_arch')
 75 |         print('Creating labels.txt file')
 76 |         with open('labels.txt', 'w', encoding='utf-8') as f:
 77 |             for name in catid2name.values():
 78 |                 f.write(f'{name}\n')
 79 | 
 80 |     model = nn.Sequential(DeepStreamInput(), model, DeepStreamOutput())
 81 | 
 82 |     img_size = [cfg.eval_height, cfg.eval_width]
 83 | 
 84 |     onnx_input_im = {}
 85 |     onnx_input_im['image'] = paddle.static.InputSpec(shape=[FLAGS.batch, 3, *img_size], dtype='float32')
 86 |     onnx_output_file = f'{FLAGS.weights}.onnx'
 87 | 
 88 |     print('Exporting the model to ONNX')
 89 |     paddle.onnx.export(model, FLAGS.weights, input_spec=[onnx_input_im], opset_version=FLAGS.opset)
 90 | 
 91 |     if FLAGS.simplify:
 92 |         print('Simplifying the ONNX model')
 93 |         import onnxslim
 94 |         model_onnx = onnx.load(onnx_output_file)
 95 |         model_onnx = onnxslim.slim(model_onnx)
 96 |         onnx.save(model_onnx, onnx_output_file)
 97 | 
 98 |     print(f'Done: {onnx_output_file}\n')
 99 | 
100 | 
101 | def parse_args():
102 |     parser = ArgsParser()
103 |     parser.add_argument('-w', '--weights', required=True, help='Input weights (.pdparams) file path (required)')
104 |     parser.add_argument('--slim_config', default=None, type=str, help='Slim configuration file of slim method')
105 |     parser.add_argument('--opset', type=int, default=11, help='ONNX opset version')
106 |     parser.add_argument('--simplify', action='store_true', help='ONNX simplify model')
107 |     parser.add_argument('--dynamic', action='store_true', help='Dynamic batch-size')
108 |     parser.add_argument('--batch', type=int, default=1, help='Static batch-size')
109 |     args = parser.parse_args()
110 |     if not os.path.isfile(args.weights):
111 |         raise SystemExit('Invalid weights file')
112 |     if args.dynamic and args.batch > 1:
113 |         raise SystemExit('Cannot set dynamic batch-size and static batch-size at same time')
114 |     elif args.dynamic:
115 |         args.batch = None
116 |     return args
117 | 
118 | 
119 | if __name__ == '__main__':
120 |     FLAGS = parse_args()
121 |     main(FLAGS)
122 | 


--------------------------------------------------------------------------------
/utils/export_rtdetr_paddle.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import onnx
  3 | import paddle
  4 | import paddle.nn as nn
  5 | import paddle.nn.functional as F
  6 | 
  7 | from ppdet.engine import Trainer
  8 | from ppdet.utils.cli import ArgsParser
  9 | from ppdet.utils.check import check_version, check_config
 10 | from ppdet.core.workspace import load_config, merge_config
 11 | 
 12 | 
 13 | class DeepStreamOutput(nn.Layer):
 14 |     def __init__(self, img_size, use_focal_loss):
 15 |         super().__init__()
 16 |         self.img_size = img_size
 17 |         self.use_focal_loss = use_focal_loss
 18 | 
 19 |     def forward(self, x):
 20 |         boxes = x['bbox']
 21 |         convert_matrix = paddle.to_tensor(
 22 |             [[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]], dtype=boxes.dtype
 23 |         )
 24 |         boxes @= convert_matrix
 25 |         boxes *= paddle.to_tensor([[*self.img_size]]).flip(1).tile([1, 2]).unsqueeze(1)
 26 |         bbox_num = F.sigmoid(x['bbox_num']) if self.use_focal_loss else F.softmax(x['bbox_num'])[:, :, :-1]
 27 |         scores = paddle.max(bbox_num, axis=-1, keepdim=True)
 28 |         labels = paddle.argmax(bbox_num, axis=-1, keepdim=True)
 29 |         return paddle.concat((boxes, scores, paddle.cast(labels, dtype=boxes.dtype)), axis=-1)
 30 | 
 31 | 
 32 | def rtdetr_paddle_export(FLAGS):
 33 |     cfg = load_config(FLAGS.config)
 34 |     FLAGS.opt['weights'] = FLAGS.weights
 35 |     FLAGS.opt['exclude_nms'] = True
 36 |     FLAGS.opt['exclude_post_process'] = True
 37 |     merge_config(FLAGS.opt)
 38 |     merge_config(FLAGS.opt)
 39 |     check_config(cfg)
 40 |     check_version()
 41 |     trainer = Trainer(cfg, mode='test')
 42 |     trainer.load_weights(cfg.weights)
 43 |     trainer.model.eval()
 44 |     if not os.path.exists('.tmp'):
 45 |         os.makedirs('.tmp')
 46 |     static_model, _ = trainer._get_infer_cfg_and_input_spec('.tmp')
 47 |     os.system('rm -r .tmp')
 48 |     return trainer.cfg, static_model
 49 | 
 50 | 
 51 | def suppress_warnings():
 52 |     import warnings
 53 |     warnings.filterwarnings('ignore')
 54 | 
 55 | 
 56 | def main(FLAGS):
 57 |     suppress_warnings()
 58 | 
 59 |     print(f'\nStarting: {FLAGS.weights}')
 60 | 
 61 |     print('Opening RT-DETR Paddle model')
 62 | 
 63 |     paddle.set_device('cpu')
 64 |     cfg, model = rtdetr_paddle_export(FLAGS)
 65 | 
 66 |     img_size = [cfg.eval_size[1], cfg.eval_size[0]]
 67 | 
 68 |     model = nn.Sequential(model, DeepStreamOutput(img_size, cfg.use_focal_loss))
 69 | 
 70 |     onnx_input_im = {}
 71 |     onnx_input_im['image'] = paddle.static.InputSpec(shape=[FLAGS.batch, 3, *img_size], dtype='float32')
 72 |     onnx_output_file = f'{FLAGS.weights}.onnx'
 73 | 
 74 |     print('Exporting the model to ONNX\n')
 75 |     paddle.onnx.export(model, FLAGS.weights, input_spec=[onnx_input_im], opset_version=FLAGS.opset)
 76 | 
 77 |     if FLAGS.simplify:
 78 |         print('Simplifying the ONNX model')
 79 |         import onnxslim
 80 |         model_onnx = onnx.load(onnx_output_file)
 81 |         model_onnx = onnxslim.slim(model_onnx)
 82 |         onnx.save(model_onnx, onnx_output_file)
 83 | 
 84 |     print(f'Done: {onnx_output_file}\n')
 85 | 
 86 | 
 87 | def parse_args():
 88 |     parser = ArgsParser()
 89 |     parser.add_argument('-w', '--weights', required=True, help='Input weights (.pdparams) file path (required)')
 90 |     parser.add_argument('--slim_config', default=None, type=str, help='Slim configuration file of slim method')
 91 |     parser.add_argument('--opset', type=int, default=16, help='ONNX opset version')
 92 |     parser.add_argument('--simplify', action='store_true', help='ONNX simplify model')
 93 |     parser.add_argument('--dynamic', action='store_true', help='Dynamic batch-size')
 94 |     parser.add_argument('--batch', type=int, default=1, help='Static batch-size')
 95 |     args = parser.parse_args()
 96 |     if not os.path.isfile(args.weights):
 97 |         raise SystemExit('Invalid weights file')
 98 |     if args.dynamic and args.batch > 1:
 99 |         raise SystemExit('Cannot set dynamic batch-size and static batch-size at same time')
100 |     elif args.dynamic:
101 |         args.batch = None
102 |     return args
103 | 
104 | 
105 | if __name__ == '__main__':
106 |     FLAGS = parse_args()
107 |     main(FLAGS)
108 | 


--------------------------------------------------------------------------------
/utils/export_rtdetr_pytorch.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import onnx
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | 
  7 | from src.core import YAMLConfig
  8 | 
  9 | 
 10 | class DeepStreamOutput(nn.Module):
 11 |     def __init__(self, img_size, use_focal_loss):
 12 |         super().__init__()
 13 |         self.img_size = img_size
 14 |         self.use_focal_loss = use_focal_loss
 15 | 
 16 |     def forward(self, x):
 17 |         boxes = x['pred_boxes']
 18 |         convert_matrix = torch.tensor(
 19 |             [[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]], dtype=boxes.dtype, device=boxes.device
 20 |         )
 21 |         boxes @= convert_matrix
 22 |         boxes *= torch.as_tensor([[*self.img_size]]).flip(1).tile([1, 2]).unsqueeze(1)
 23 |         scores = F.sigmoid(x['pred_logits']) if self.use_focal_loss else F.softmax(x['pred_logits'])[:, :, :-1]
 24 |         scores, labels = torch.max(scores, dim=-1, keepdim=True)
 25 |         return torch.cat([boxes, scores, labels.to(boxes.dtype)], dim=-1)
 26 | 
 27 | 
 28 | def rtdetr_pytorch_export(weights, cfg_file, device):
 29 |     cfg = YAMLConfig(cfg_file, resume=weights)
 30 |     checkpoint = torch.load(weights, map_location=device)
 31 |     if 'ema' in checkpoint:
 32 |         state = checkpoint['ema']['module']
 33 |     else:
 34 |         state = checkpoint['model']
 35 |     cfg.model.load_state_dict(state)
 36 |     return cfg.model.deploy(), cfg.postprocessor.use_focal_loss
 37 | 
 38 | 
 39 | def suppress_warnings():
 40 |     import warnings
 41 |     warnings.filterwarnings('ignore', category=torch.jit.TracerWarning)
 42 |     warnings.filterwarnings('ignore', category=UserWarning)
 43 |     warnings.filterwarnings('ignore', category=DeprecationWarning)
 44 |     warnings.filterwarnings('ignore', category=FutureWarning)
 45 |     warnings.filterwarnings('ignore', category=ResourceWarning)
 46 | 
 47 | 
 48 | def main(args):
 49 |     suppress_warnings()
 50 | 
 51 |     print(f'\nStarting: {args.weights}')
 52 | 
 53 |     print('Opening RT-DETR PyTorch model')
 54 | 
 55 |     device = torch.device('cpu')
 56 |     model, use_focal_loss = rtdetr_pytorch_export(args.weights, args.config, device)
 57 | 
 58 |     img_size = args.size * 2 if len(args.size) == 1 else args.size
 59 | 
 60 |     model = nn.Sequential(model, DeepStreamOutput(img_size, use_focal_loss))
 61 | 
 62 |     onnx_input_im = torch.zeros(args.batch, 3, *img_size).to(device)
 63 |     onnx_output_file = f'{args.weights}.onnx'
 64 | 
 65 |     dynamic_axes = {
 66 |         'input': {
 67 |             0: 'batch'
 68 |         },
 69 |         'output': {
 70 |             0: 'batch'
 71 |         }
 72 |     }
 73 | 
 74 |     print('Exporting the model to ONNX')
 75 |     torch.onnx.export(
 76 |         model, onnx_input_im, onnx_output_file, verbose=False, opset_version=args.opset, do_constant_folding=True,
 77 |         input_names=['input'], output_names=['output'], dynamic_axes=dynamic_axes if args.dynamic else None
 78 |     )
 79 | 
 80 |     if args.simplify:
 81 |         print('Simplifying the ONNX model')
 82 |         import onnxslim
 83 |         model_onnx = onnx.load(onnx_output_file)
 84 |         model_onnx = onnxslim.slim(model_onnx)
 85 |         onnx.save(model_onnx, onnx_output_file)
 86 | 
 87 |     print(f'Done: {onnx_output_file}\n')
 88 | 
 89 | 
 90 | def parse_args():
 91 |     import argparse
 92 |     parser = argparse.ArgumentParser(description='DeepStream RT-DETR PyTorch conversion')
 93 |     parser.add_argument('-w', '--weights', required=True, help='Input weights (.pth) file path (required)')
 94 |     parser.add_argument('-c', '--config', required=True, help='Input YAML (.yml) file path (required)')
 95 |     parser.add_argument('-s', '--size', nargs='+', type=int, default=[640], help='Inference size [H,W] (default [640])')
 96 |     parser.add_argument('--opset', type=int, default=16, help='ONNX opset version')
 97 |     parser.add_argument('--simplify', action='store_true', help='ONNX simplify model')
 98 |     parser.add_argument('--dynamic', action='store_true', help='Dynamic batch-size')
 99 |     parser.add_argument('--batch', type=int, default=1, help='Static batch-size')
100 |     args = parser.parse_args()
101 |     if not os.path.isfile(args.weights):
102 |         raise SystemExit('Invalid weights file')
103 |     if not os.path.isfile(args.config):
104 |         raise SystemExit('Invalid config file')
105 |     if args.dynamic and args.batch > 1:
106 |         raise SystemExit('Cannot set dynamic batch-size and static batch-size at same time')
107 |     return args
108 | 
109 | 
110 | if __name__ == '__main__':
111 |     args = parse_args()
112 |     main(args)
113 | 


--------------------------------------------------------------------------------
/utils/export_rtdetr_ultralytics.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | import torch.nn as nn
  4 | from copy import deepcopy
  5 | 
  6 | from ultralytics import RTDETR
  7 | 
  8 | 
  9 | class DeepStreamOutput(nn.Module):
 10 |     def __init__(self, img_size):
 11 |         super().__init__()
 12 |         self.img_size = img_size
 13 | 
 14 |     def forward(self, x):
 15 |         boxes = x[:, :, :4]
 16 |         convert_matrix = torch.tensor(
 17 |             [[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]], dtype=boxes.dtype, device=boxes.device
 18 |         )
 19 |         boxes @= convert_matrix
 20 |         boxes *= torch.as_tensor([[*self.img_size]]).flip(1).tile([1, 2]).unsqueeze(1)
 21 |         scores, labels = torch.max(x[:, :, 4:], dim=-1, keepdim=True)
 22 |         return torch.cat([boxes, scores, labels.to(boxes.dtype)], dim=-1)
 23 | 
 24 | 
 25 | def rtdetr_ultralytics_export(weights, device):
 26 |     model = RTDETR(weights)
 27 |     model = deepcopy(model.model).to(device)
 28 |     for p in model.parameters():
 29 |         p.requires_grad = False
 30 |     model.eval()
 31 |     model.float()
 32 |     model = model.fuse()
 33 |     for k, m in model.named_modules():
 34 |         if m.__class__.__name__ in ('Detect', 'RTDETRDecoder'):
 35 |             m.dynamic = False
 36 |             m.export = True
 37 |             m.format = 'onnx'
 38 |         elif m.__class__.__name__ == 'C2f':
 39 |             m.forward = m.forward_split
 40 |     return model
 41 | 
 42 | 
 43 | def suppress_warnings():
 44 |     import warnings
 45 |     warnings.filterwarnings('ignore', category=torch.jit.TracerWarning)
 46 |     warnings.filterwarnings('ignore', category=UserWarning)
 47 |     warnings.filterwarnings('ignore', category=DeprecationWarning)
 48 |     warnings.filterwarnings('ignore', category=FutureWarning)
 49 |     warnings.filterwarnings('ignore', category=ResourceWarning)
 50 | 
 51 | 
 52 | def main(args):
 53 |     suppress_warnings()
 54 | 
 55 |     print(f'\nStarting: {args.weights}')
 56 | 
 57 |     print('Opening RT-DETR Ultralytics model')
 58 | 
 59 |     device = torch.device('cpu')
 60 |     model = rtdetr_ultralytics_export(args.weights, device)
 61 | 
 62 |     if len(model.names.keys()) > 0:
 63 |         print('Creating labels.txt file')
 64 |         with open('labels.txt', 'w', encoding='utf-8') as f:
 65 |             for name in model.names.values():
 66 |                 f.write(f'{name}\n')
 67 | 
 68 |     img_size = args.size * 2 if len(args.size) == 1 else args.size
 69 | 
 70 |     model = nn.Sequential(model, DeepStreamOutput(img_size))
 71 | 
 72 |     onnx_input_im = torch.zeros(args.batch, 3, *img_size).to(device)
 73 |     onnx_output_file = f'{args.weights}.onnx'
 74 | 
 75 |     dynamic_axes = {
 76 |         'input': {
 77 |             0: 'batch'
 78 |         },
 79 |         'output': {
 80 |             0: 'batch'
 81 |         }
 82 |     }
 83 | 
 84 |     print('Exporting the model to ONNX')
 85 |     torch.onnx.export(
 86 |         model, onnx_input_im, onnx_output_file, verbose=False, opset_version=args.opset, do_constant_folding=True,
 87 |         input_names=['input'], output_names=['output'], dynamic_axes=dynamic_axes if args.dynamic else None
 88 |     )
 89 | 
 90 |     if args.simplify:
 91 |         print('Simplifying is not available for this model')
 92 | 
 93 |     print(f'Done: {onnx_output_file}\n')
 94 | 
 95 | 
 96 | def parse_args():
 97 |     import argparse
 98 |     parser = argparse.ArgumentParser(description='DeepStream RT-DETR Ultralytics conversion')
 99 |     parser.add_argument('-w', '--weights', required=True, help='Input weights (.pt) file path (required)')
100 |     parser.add_argument('-s', '--size', nargs='+', type=int, default=[640], help='Inference size [H,W] (default [640])')
101 |     parser.add_argument('--opset', type=int, default=17, help='ONNX opset version')
102 |     parser.add_argument('--simplify', action='store_true', help='ONNX simplify model')
103 |     parser.add_argument('--dynamic', action='store_true', help='Dynamic batch-size')
104 |     parser.add_argument('--batch', type=int, default=1, help='Static batch-size')
105 |     args = parser.parse_args()
106 |     if not os.path.isfile(args.weights):
107 |         raise SystemExit('Invalid weights file')
108 |     if args.dynamic and args.batch > 1:
109 |         raise SystemExit('Cannot set dynamic batch-size and static batch-size at same time')
110 |     return args
111 | 
112 | 
113 | if __name__ == '__main__':
114 |     args = parse_args()
115 |     main(args)
116 | 


--------------------------------------------------------------------------------
/utils/export_yoloV5.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import onnx
  3 | import torch
  4 | import torch.nn as nn
  5 | 
  6 | from models.experimental import attempt_load
  7 | 
  8 | 
  9 | class DeepStreamOutput(nn.Module):
 10 |     def __init__(self):
 11 |         super().__init__()
 12 | 
 13 |     def forward(self, x):
 14 |         x = x[0]
 15 |         boxes = x[:, :, :4]
 16 |         convert_matrix = torch.tensor(
 17 |             [[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]], dtype=boxes.dtype, device=boxes.device
 18 |         )
 19 |         boxes @= convert_matrix
 20 |         objectness = x[:, :, 4:5]
 21 |         scores, labels = torch.max(x[:, :, 5:], dim=-1, keepdim=True)
 22 |         scores *= objectness
 23 |         return torch.cat([boxes, scores, labels.to(boxes.dtype)], dim=-1)
 24 | 
 25 | 
 26 | def yolov5_export(weights, device, inplace=True, fuse=True):
 27 |     model = attempt_load(weights, device=device, inplace=inplace, fuse=fuse)
 28 |     model.eval()
 29 |     for k, m in model.named_modules():
 30 |         if m.__class__.__name__ == 'Detect':
 31 |             m.inplace = False
 32 |             m.dynamic = False
 33 |             m.export = True
 34 |     return model
 35 | 
 36 | 
 37 | def suppress_warnings():
 38 |     import warnings
 39 |     warnings.filterwarnings('ignore', category=torch.jit.TracerWarning)
 40 |     warnings.filterwarnings('ignore', category=UserWarning)
 41 |     warnings.filterwarnings('ignore', category=DeprecationWarning)
 42 |     warnings.filterwarnings('ignore', category=FutureWarning)
 43 |     warnings.filterwarnings('ignore', category=ResourceWarning)
 44 | 
 45 | 
 46 | def main(args):
 47 |     suppress_warnings()
 48 | 
 49 |     print(f'\nStarting: {args.weights}')
 50 | 
 51 |     print('Opening YOLOv5 model')
 52 | 
 53 |     device = torch.device('cpu')
 54 |     model = yolov5_export(args.weights, device)
 55 | 
 56 |     if len(model.names.keys()) > 0:
 57 |         print('Creating labels.txt file')
 58 |         with open('labels.txt', 'w', encoding='utf-8') as f:
 59 |             for name in model.names.values():
 60 |                 f.write(f'{name}\n')
 61 | 
 62 |     model = nn.Sequential(model, DeepStreamOutput())
 63 | 
 64 |     img_size = args.size * 2 if len(args.size) == 1 else args.size
 65 | 
 66 |     if img_size == [640, 640] and args.p6:
 67 |         img_size = [1280] * 2
 68 | 
 69 |     onnx_input_im = torch.zeros(args.batch, 3, *img_size).to(device)
 70 |     onnx_output_file = f'{args.weights}.onnx'
 71 | 
 72 |     dynamic_axes = {
 73 |         'input': {
 74 |             0: 'batch'
 75 |         },
 76 |         'output': {
 77 |             0: 'batch'
 78 |         }
 79 |     }
 80 | 
 81 |     print('Exporting the model to ONNX')
 82 |     torch.onnx.export(
 83 |         model, onnx_input_im, onnx_output_file, verbose=False, opset_version=args.opset, do_constant_folding=True,
 84 |         input_names=['input'], output_names=['output'], dynamic_axes=dynamic_axes if args.dynamic else None
 85 |     )
 86 | 
 87 |     if args.simplify:
 88 |         print('Simplifying the ONNX model')
 89 |         import onnxslim
 90 |         model_onnx = onnx.load(onnx_output_file)
 91 |         model_onnx = onnxslim.slim(model_onnx)
 92 |         onnx.save(model_onnx, onnx_output_file)
 93 | 
 94 |     print(f'Done: {onnx_output_file}\n')
 95 | 
 96 | 
 97 | def parse_args():
 98 |     import argparse
 99 |     parser = argparse.ArgumentParser(description='DeepStream YOLOv5 conversion')
100 |     parser.add_argument('-w', '--weights', required=True, type=str, help='Input weights (.pt) file path (required)')
101 |     parser.add_argument('-s', '--size', nargs='+', type=int, default=[640], help='Inference size [H,W] (default [640])')
102 |     parser.add_argument('--p6', action='store_true', help='P6 model')
103 |     parser.add_argument('--opset', type=int, default=17, help='ONNX opset version')
104 |     parser.add_argument('--simplify', action='store_true', help='ONNX simplify model')
105 |     parser.add_argument('--dynamic', action='store_true', help='Dynamic batch-size')
106 |     parser.add_argument('--batch', type=int, default=1, help='Static batch-size')
107 |     args = parser.parse_args()
108 |     if not os.path.isfile(args.weights):
109 |         raise SystemExit('Invalid weights file')
110 |     if args.dynamic and args.batch > 1:
111 |         raise SystemExit('Cannot set dynamic batch-size and static batch-size at same time')
112 |     return args
113 | 
114 | 
115 | if __name__ == '__main__':
116 |     args = parse_args()
117 |     main(args)
118 | 


--------------------------------------------------------------------------------
/utils/export_yoloV6.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import onnx
  3 | import torch
  4 | import torch.nn as nn
  5 | 
  6 | from yolov6.models.effidehead import Detect
  7 | from yolov6.layers.common import RepVGGBlock, SiLU
  8 | from yolov6.utils.checkpoint import load_checkpoint
  9 | 
 10 | try:
 11 |     from yolov6.layers.common import ConvModule
 12 | except ImportError:
 13 |     from yolov6.layers.common import Conv as ConvModule
 14 | 
 15 | 
 16 | class DeepStreamOutput(nn.Module):
 17 |     def __init__(self):
 18 |         super().__init__()
 19 | 
 20 |     def forward(self, x):
 21 |         boxes = x[:, :, :4]
 22 |         convert_matrix = torch.tensor(
 23 |             [[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]], dtype=boxes.dtype, device=boxes.device
 24 |         )
 25 |         boxes @= convert_matrix
 26 |         objectness = x[:, :, 4:5]
 27 |         scores, labels = torch.max(x[:, :, 5:], dim=-1, keepdim=True)
 28 |         scores *= objectness
 29 |         return torch.cat([boxes, scores, labels.to(boxes.dtype)], dim=-1)
 30 | 
 31 | 
 32 | def yolov6_export(weights, device):
 33 |     model = load_checkpoint(weights, map_location=device, inplace=True, fuse=True)
 34 |     for layer in model.modules():
 35 |         if isinstance(layer, RepVGGBlock):
 36 |             layer.switch_to_deploy()
 37 |         elif isinstance(layer, nn.Upsample) and not hasattr(layer, 'recompute_scale_factor'):
 38 |             layer.recompute_scale_factor = None
 39 |     model.eval()
 40 |     for k, m in model.named_modules():
 41 |         if isinstance(m, ConvModule):
 42 |             if hasattr(m, 'act') and isinstance(m.act, nn.SiLU):
 43 |                 m.act = SiLU()
 44 |         elif isinstance(m, Detect):
 45 |             m.inplace = False
 46 |     return model
 47 | 
 48 | 
 49 | def suppress_warnings():
 50 |     import warnings
 51 |     warnings.filterwarnings('ignore', category=torch.jit.TracerWarning)
 52 |     warnings.filterwarnings('ignore', category=UserWarning)
 53 |     warnings.filterwarnings('ignore', category=DeprecationWarning)
 54 |     warnings.filterwarnings('ignore', category=FutureWarning)
 55 |     warnings.filterwarnings('ignore', category=ResourceWarning)
 56 | 
 57 | 
 58 | def main(args):
 59 |     suppress_warnings()
 60 | 
 61 |     print(f'\nStarting: {args.weights}')
 62 | 
 63 |     print('Opening YOLOv6 model')
 64 | 
 65 |     device = torch.device('cpu')
 66 |     model = yolov6_export(args.weights, device)
 67 | 
 68 |     model = nn.Sequential(model, DeepStreamOutput())
 69 | 
 70 |     img_size = args.size * 2 if len(args.size) == 1 else args.size
 71 | 
 72 |     if img_size == [640, 640] and args.p6:
 73 |         img_size = [1280] * 2
 74 | 
 75 |     onnx_input_im = torch.zeros(args.batch, 3, *img_size).to(device)
 76 |     onnx_output_file = f'{args.weights}.onnx'
 77 | 
 78 |     dynamic_axes = {
 79 |         'input': {
 80 |             0: 'batch'
 81 |         },
 82 |         'output': {
 83 |             0: 'batch'
 84 |         }
 85 |     }
 86 | 
 87 |     print('Exporting the model to ONNX')
 88 |     torch.onnx.export(
 89 |         model, onnx_input_im, onnx_output_file, verbose=False, opset_version=args.opset, do_constant_folding=True,
 90 |         input_names=['input'], output_names=['output'], dynamic_axes=dynamic_axes if args.dynamic else None
 91 |     )
 92 | 
 93 |     if args.simplify:
 94 |         print('Simplifying the ONNX model')
 95 |         import onnxslim
 96 |         model_onnx = onnx.load(onnx_output_file)
 97 |         model_onnx = onnxslim.slim(model_onnx)
 98 |         onnx.save(model_onnx, onnx_output_file)
 99 | 
100 |     print(f'Done: {onnx_output_file}\n')
101 | 
102 | 
103 | def parse_args():
104 |     import argparse
105 |     parser = argparse.ArgumentParser(description='DeepStream YOLOv6 conversion')
106 |     parser.add_argument('-w', '--weights', required=True, help='Input weights (.pt) file path (required)')
107 |     parser.add_argument('-s', '--size', nargs='+', type=int, default=[640], help='Inference size [H,W] (default [640])')
108 |     parser.add_argument('--p6', action='store_true', help='P6 model')
109 |     parser.add_argument('--opset', type=int, default=13, help='ONNX opset version')
110 |     parser.add_argument('--simplify', action='store_true', help='ONNX simplify model')
111 |     parser.add_argument('--dynamic', action='store_true', help='Dynamic batch-size')
112 |     parser.add_argument('--batch', type=int, default=1, help='Static batch-size')
113 |     args = parser.parse_args()
114 |     if not os.path.isfile(args.weights):
115 |         raise SystemExit('Invalid weights file')
116 |     if args.dynamic and args.batch > 1:
117 |         raise SystemExit('Cannot set dynamic batch-size and static batch-size at same time')
118 |     return args
119 | 
120 | 
121 | if __name__ == '__main__':
122 |     args = parse_args()
123 |     main(args)
124 | 


--------------------------------------------------------------------------------
/utils/export_yoloV7.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import onnx
  3 | import torch
  4 | import torch.nn as nn
  5 | 
  6 | import models
  7 | from models.experimental import attempt_load
  8 | from utils.torch_utils import select_device
  9 | from utils.activations import Hardswish, SiLU
 10 | 
 11 | 
 12 | class DeepStreamOutput(nn.Module):
 13 |     def __init__(self):
 14 |         super().__init__()
 15 | 
 16 |     def forward(self, x):
 17 |         boxes = x[:, :, :4]
 18 |         convert_matrix = torch.tensor(
 19 |             [[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]], dtype=boxes.dtype, device=boxes.device
 20 |         )
 21 |         boxes @= convert_matrix
 22 |         objectness = x[:, :, 4:5]
 23 |         scores, labels = torch.max(x[:, :, 5:], dim=-1, keepdim=True)
 24 |         scores *= objectness
 25 |         return torch.cat([boxes, scores, labels.to(boxes.dtype)], dim=-1)
 26 | 
 27 | 
 28 | def yolov7_export(weights, device):
 29 |     model = attempt_load(weights, map_location=device)
 30 |     for k, m in model.named_modules():
 31 |         m._non_persistent_buffers_set = set()
 32 |         if isinstance(m, models.common.Conv):
 33 |             if isinstance(m.act, nn.Hardswish):
 34 |                 m.act = Hardswish()
 35 |             elif isinstance(m.act, nn.SiLU):
 36 |                 m.act = SiLU()
 37 |     model.model[-1].export = False
 38 |     model.model[-1].concat = True
 39 |     model.eval()
 40 |     return model
 41 | 
 42 | 
 43 | def suppress_warnings():
 44 |     import warnings
 45 |     warnings.filterwarnings('ignore', category=torch.jit.TracerWarning)
 46 |     warnings.filterwarnings('ignore', category=UserWarning)
 47 |     warnings.filterwarnings('ignore', category=DeprecationWarning)
 48 |     warnings.filterwarnings('ignore', category=FutureWarning)
 49 |     warnings.filterwarnings('ignore', category=ResourceWarning)
 50 | 
 51 | 
 52 | def main(args):
 53 |     suppress_warnings()
 54 | 
 55 |     print(f'\nStarting: {args.weights}')
 56 | 
 57 |     print('Opening YOLOv7 model')
 58 | 
 59 |     device = select_device('cpu')
 60 |     model = yolov7_export(args.weights, device)
 61 | 
 62 |     if hasattr(model, 'names') and len(model.names) > 0:
 63 |         print('Creating labels.txt file')
 64 |         with open('labels.txt', 'w', encoding='utf-8') as f:
 65 |             for name in model.names:
 66 |                 f.write(f'{name}\n')
 67 | 
 68 |     model = nn.Sequential(model, DeepStreamOutput())
 69 | 
 70 |     img_size = args.size * 2 if len(args.size) == 1 else args.size
 71 | 
 72 |     if img_size == [640, 640] and args.p6:
 73 |         img_size = [1280] * 2
 74 | 
 75 |     onnx_input_im = torch.zeros(args.batch, 3, *img_size).to(device)
 76 |     onnx_output_file = f'{args.weights}.onnx'
 77 | 
 78 |     dynamic_axes = {
 79 |         'input': {
 80 |             0: 'batch'
 81 |         },
 82 |         'output': {
 83 |             0: 'batch'
 84 |         }
 85 |     }
 86 | 
 87 |     print('Exporting the model to ONNX')
 88 |     torch.onnx.export(
 89 |         model, onnx_input_im, onnx_output_file, verbose=False, opset_version=args.opset, do_constant_folding=True,
 90 |         input_names=['input'], output_names=['output'], dynamic_axes=dynamic_axes if args.dynamic else None
 91 |     )
 92 | 
 93 |     if args.simplify:
 94 |         print('Simplifying the ONNX model')
 95 |         import onnxslim
 96 |         model_onnx = onnx.load(onnx_output_file)
 97 |         model_onnx = onnxslim.slim(model_onnx)
 98 |         onnx.save(model_onnx, onnx_output_file)
 99 | 
100 |     print(f'Done: {onnx_output_file}\n')
101 | 
102 | 
103 | def parse_args():
104 |     import argparse
105 |     parser = argparse.ArgumentParser(description='DeepStream YOLOv7 conversion')
106 |     parser.add_argument('-w', '--weights', required=True, help='Input weights (.pt) file path (required)')
107 |     parser.add_argument('-s', '--size', nargs='+', type=int, default=[640], help='Inference size [H,W] (default [640])')
108 |     parser.add_argument('--p6', action='store_true', help='P6 model')
109 |     parser.add_argument('--opset', type=int, default=12, help='ONNX opset version')
110 |     parser.add_argument('--simplify', action='store_true', help='ONNX simplify model')
111 |     parser.add_argument('--dynamic', action='store_true', help='Dynamic batch-size')
112 |     parser.add_argument('--batch', type=int, default=1, help='Static batch-size')
113 |     args = parser.parse_args()
114 |     if not os.path.isfile(args.weights):
115 |         raise SystemExit('Invalid weights file')
116 |     if args.dynamic and args.batch > 1:
117 |         raise SystemExit('Cannot set dynamic batch-size and static batch-size at same time')
118 |     return args
119 | 
120 | 
121 | if __name__ == '__main__':
122 |     args = parse_args()
123 |     main(args)
124 | 


--------------------------------------------------------------------------------
/utils/export_yoloV7_u6.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import onnx
  3 | import torch
  4 | import torch.nn as nn
  5 | 
  6 | from utils.torch_utils import select_device
  7 | from models.experimental import attempt_load
  8 | from models.yolo import Detect, V6Detect, IV6Detect
  9 | 
 10 | 
 11 | class DeepStreamOutput(nn.Module):
 12 |     def __init__(self):
 13 |         super().__init__()
 14 | 
 15 |     def forward(self, x):
 16 |         x = x.transpose(1, 2)
 17 |         boxes = x[:, :, :4]
 18 |         convert_matrix = torch.tensor(
 19 |             [[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]], dtype=boxes.dtype, device=boxes.device
 20 |         )
 21 |         boxes @= convert_matrix
 22 |         scores, labels = torch.max(x[:, :, 4:], dim=-1, keepdim=True)
 23 |         return torch.cat([boxes, scores, labels.to(boxes.dtype)], dim=-1)
 24 | 
 25 | 
 26 | def yolov7_u6_export(weights, device):
 27 |     model = attempt_load(weights, device=device, inplace=True, fuse=True)
 28 |     model.eval()
 29 |     for k, m in model.named_modules():
 30 |         if isinstance(m, (Detect, V6Detect, IV6Detect)):
 31 |             m.inplace = False
 32 |             m.dynamic = False
 33 |             m.export = True
 34 |     return model
 35 | 
 36 | 
 37 | def suppress_warnings():
 38 |     import warnings
 39 |     warnings.filterwarnings('ignore', category=torch.jit.TracerWarning)
 40 |     warnings.filterwarnings('ignore', category=UserWarning)
 41 |     warnings.filterwarnings('ignore', category=DeprecationWarning)
 42 |     warnings.filterwarnings('ignore', category=FutureWarning)
 43 |     warnings.filterwarnings('ignore', category=ResourceWarning)
 44 | 
 45 | 
 46 | def main(args):
 47 |     suppress_warnings()
 48 | 
 49 |     print(f'\nStarting: {args.weights}')
 50 | 
 51 |     print('Opening YOLOv7_u6 model')
 52 | 
 53 |     device = select_device('cpu')
 54 |     model = yolov7_u6_export(args.weights, device)
 55 | 
 56 |     if len(model.names.keys()) > 0:
 57 |         print('Creating labels.txt file')
 58 |         with open('labels.txt', 'w', encoding='utf-8') as f:
 59 |             for name in model.names.values():
 60 |                 f.write(f'{name}\n')
 61 | 
 62 |     model = nn.Sequential(model, DeepStreamOutput())
 63 | 
 64 |     img_size = args.size * 2 if len(args.size) == 1 else args.size
 65 | 
 66 |     onnx_input_im = torch.zeros(args.batch, 3, *img_size).to(device)
 67 |     onnx_output_file = f'{args.weights}.onnx'
 68 | 
 69 |     dynamic_axes = {
 70 |         'input': {
 71 |             0: 'batch'
 72 |         },
 73 |         'output': {
 74 |             0: 'batch'
 75 |         }
 76 |     }
 77 | 
 78 |     print('Exporting the model to ONNX')
 79 |     torch.onnx.export(
 80 |         model, onnx_input_im, onnx_output_file, verbose=False, opset_version=args.opset, do_constant_folding=True,
 81 |         input_names=['input'], output_names=['output'], dynamic_axes=dynamic_axes if args.dynamic else None
 82 |     )
 83 | 
 84 |     if args.simplify:
 85 |         print('Simplifying the ONNX model')
 86 |         import onnxslim
 87 |         model_onnx = onnx.load(onnx_output_file)
 88 |         model_onnx = onnxslim.slim(model_onnx)
 89 |         onnx.save(model_onnx, onnx_output_file)
 90 | 
 91 |     print(f'Done: {onnx_output_file}\n')
 92 | 
 93 | 
 94 | def parse_args():
 95 |     import argparse
 96 |     parser = argparse.ArgumentParser(description='DeepStream YOLOv7-u6 conversion')
 97 |     parser.add_argument('-w', '--weights', required=True, help='Input weights (.pt) file path (required)')
 98 |     parser.add_argument('-s', '--size', nargs='+', type=int, default=[640], help='Inference size [H,W] (default [640])')
 99 |     parser.add_argument('--opset', type=int, default=12, help='ONNX opset version')
100 |     parser.add_argument('--simplify', action='store_true', help='ONNX simplify model')
101 |     parser.add_argument('--dynamic', action='store_true', help='Dynamic batch-size')
102 |     parser.add_argument('--batch', type=int, default=1, help='Static batch-size')
103 |     args = parser.parse_args()
104 |     if not os.path.isfile(args.weights):
105 |         raise SystemExit('Invalid weights file')
106 |     if args.dynamic and args.batch > 1:
107 |         raise SystemExit('Cannot set dynamic batch-size and static batch-size at same time')
108 |     return args
109 | 
110 | 
111 | if __name__ == '__main__':
112 |     args = parse_args()
113 |     main(args)
114 | 


--------------------------------------------------------------------------------
/utils/export_yolonas.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import onnx
  3 | import torch
  4 | import torch.nn as nn
  5 | 
  6 | from super_gradients.training import models
  7 | 
  8 | 
  9 | class DeepStreamOutput(nn.Module):
 10 |     def __init__(self):
 11 |         super().__init__()
 12 | 
 13 |     def forward(self, x):
 14 |         boxes = x[0]
 15 |         scores, labels = torch.max(x[1], dim=-1, keepdim=True)
 16 |         return torch.cat([boxes, scores, labels.to(boxes.dtype)], dim=-1)
 17 | 
 18 | 
 19 | def suppress_warnings():
 20 |     import warnings
 21 |     warnings.filterwarnings('ignore', category=torch.jit.TracerWarning)
 22 |     warnings.filterwarnings('ignore', category=UserWarning)
 23 |     warnings.filterwarnings('ignore', category=DeprecationWarning)
 24 |     warnings.filterwarnings('ignore', category=FutureWarning)
 25 |     warnings.filterwarnings('ignore', category=ResourceWarning)
 26 | 
 27 | 
 28 | def yolonas_export(model_name, weights, num_classes, size):
 29 |     img_size = size * 2 if len(size) == 1 else size
 30 |     model = models.get(model_name, num_classes=num_classes, checkpoint_path=weights)
 31 |     model.eval()
 32 |     model.prep_model_for_conversion(input_size=[1, 3, *img_size])
 33 |     return model
 34 | 
 35 | 
 36 | def main(args):
 37 |     suppress_warnings()
 38 | 
 39 |     print(f'\nStarting: {args.weights}')
 40 | 
 41 |     print('Opening YOLO-NAS model')
 42 | 
 43 |     device = torch.device('cpu')
 44 |     model = yolonas_export(args.model, args.weights, args.classes, args.size)
 45 | 
 46 |     model = nn.Sequential(model, DeepStreamOutput())
 47 | 
 48 |     img_size = args.size * 2 if len(args.size) == 1 else args.size
 49 | 
 50 |     onnx_input_im = torch.zeros(args.batch, 3, *img_size).to(device)
 51 |     onnx_output_file = f'{args.weights}.onnx'
 52 | 
 53 |     dynamic_axes = {
 54 |         'input': {
 55 |             0: 'batch'
 56 |         },
 57 |         'output': {
 58 |             0: 'batch'
 59 |         }
 60 |     }
 61 | 
 62 |     print('Exporting the model to ONNX')
 63 |     torch.onnx.export(
 64 |         model, onnx_input_im, onnx_output_file, verbose=False, opset_version=args.opset, do_constant_folding=True,
 65 |         input_names=['input'], output_names=['output'], dynamic_axes=dynamic_axes if args.dynamic else None
 66 |     )
 67 | 
 68 |     if args.simplify:
 69 |         print('Simplifying the ONNX model')
 70 |         import onnxslim
 71 |         model_onnx = onnx.load(onnx_output_file)
 72 |         model_onnx = onnxslim.slim(model_onnx)
 73 |         onnx.save(model_onnx, onnx_output_file)
 74 | 
 75 |     print(f'Done: {onnx_output_file}\n')
 76 | 
 77 | 
 78 | def parse_args():
 79 |     import argparse
 80 |     parser = argparse.ArgumentParser(description='DeepStream YOLO-NAS conversion')
 81 |     parser.add_argument('-m', '--model', required=True, help='Model name (required)')
 82 |     parser.add_argument('-w', '--weights', required=True, help='Input weights (.pth) file path (required)')
 83 |     parser.add_argument('-n', '--classes', type=int, default=80, help='Number of trained classes (default 80)')
 84 |     parser.add_argument('-s', '--size', nargs='+', type=int, default=[640], help='Inference size [H,W] (default [640])')
 85 |     parser.add_argument('--opset', type=int, default=14, help='ONNX opset version')
 86 |     parser.add_argument('--simplify', action='store_true', help='ONNX simplify model')
 87 |     parser.add_argument('--dynamic', action='store_true', help='Dynamic batch-size')
 88 |     parser.add_argument('--batch', type=int, default=1, help='Static batch-size')
 89 |     args = parser.parse_args()
 90 |     if args.model == '':
 91 |         raise SystemExit('Invalid model name')
 92 |     if not os.path.isfile(args.weights):
 93 |         raise SystemExit('Invalid weights file')
 94 |     if args.dynamic and args.batch > 1:
 95 |         raise SystemExit('Cannot set dynamic batch-size and static batch-size at same time')
 96 |     return args
 97 | 
 98 | 
 99 | if __name__ == '__main__':
100 |     args = parse_args()
101 |     main(args)
102 | 


--------------------------------------------------------------------------------
/utils/export_yolox.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import onnx
  3 | import torch
  4 | import torch.nn as nn
  5 | 
  6 | from yolox.exp import get_exp
  7 | from yolox.utils import replace_module
  8 | from yolox.models.network_blocks import SiLU
  9 | 
 10 | 
 11 | class DeepStreamOutput(nn.Module):
 12 |     def __init__(self):
 13 |         super().__init__()
 14 | 
 15 |     def forward(self, x):
 16 |         boxes = x[:, :, :4]
 17 |         convert_matrix = torch.tensor(
 18 |             [[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]], dtype=boxes.dtype, device=boxes.device
 19 |         )
 20 |         boxes @= convert_matrix
 21 |         objectness = x[:, :, 4:5]
 22 |         scores, labels = torch.max(x[:, :, 5:], dim=-1, keepdim=True)
 23 |         scores *= objectness
 24 |         return torch.cat([boxes, scores, labels.to(boxes.dtype)], dim=-1)
 25 | 
 26 | 
 27 | def yolox_export(weights, exp_file):
 28 |     exp = get_exp(exp_file)
 29 |     model = exp.get_model()
 30 |     ckpt = torch.load(weights, map_location='cpu')
 31 |     model.eval()
 32 |     if 'model' in ckpt:
 33 |         ckpt = ckpt['model']
 34 |     model.load_state_dict(ckpt)
 35 |     model = replace_module(model, nn.SiLU, SiLU)
 36 |     model.head.decode_in_inference = True
 37 |     return model, exp
 38 | 
 39 | 
 40 | def suppress_warnings():
 41 |     import warnings
 42 |     warnings.filterwarnings('ignore', category=torch.jit.TracerWarning)
 43 |     warnings.filterwarnings('ignore', category=UserWarning)
 44 |     warnings.filterwarnings('ignore', category=DeprecationWarning)
 45 |     warnings.filterwarnings('ignore', category=FutureWarning)
 46 |     warnings.filterwarnings('ignore', category=ResourceWarning)
 47 | 
 48 | 
 49 | def main(args):
 50 |     suppress_warnings()
 51 | 
 52 |     print(f'\nStarting: {args.weights}')
 53 | 
 54 |     print('Opening YOLOX model')
 55 | 
 56 |     device = torch.device('cpu')
 57 |     model, exp = yolox_export(args.weights, args.exp)
 58 | 
 59 |     model = nn.Sequential(model, DeepStreamOutput())
 60 | 
 61 |     img_size = [exp.input_size[1], exp.input_size[0]]
 62 | 
 63 |     onnx_input_im = torch.zeros(args.batch, 3, *img_size).to(device)
 64 |     onnx_output_file = f'{args.weights}.onnx'
 65 | 
 66 |     dynamic_axes = {
 67 |         'input': {
 68 |             0: 'batch'
 69 |         },
 70 |         'output': {
 71 |             0: 'batch'
 72 |         }
 73 |     }
 74 | 
 75 |     print('Exporting the model to ONNX')
 76 |     torch.onnx.export(
 77 |         model, onnx_input_im, onnx_output_file, verbose=False, opset_version=args.opset, do_constant_folding=True,
 78 |         input_names=['input'], output_names=['output'], dynamic_axes=dynamic_axes if args.dynamic else None
 79 |     )
 80 | 
 81 |     if args.simplify:
 82 |         print('Simplifying the ONNX model')
 83 |         import onnxslim
 84 |         model_onnx = onnx.load(onnx_output_file)
 85 |         model_onnx = onnxslim.slim(model_onnx)
 86 |         onnx.save(model_onnx, onnx_output_file)
 87 | 
 88 |     print(f'Done: {onnx_output_file}\n')
 89 | 
 90 | 
 91 | def parse_args():
 92 |     import argparse
 93 |     parser = argparse.ArgumentParser(description='DeepStream YOLOX conversion')
 94 |     parser.add_argument('-w', '--weights', required=True, help='Input weights (.pth) file path (required)')
 95 |     parser.add_argument('-c', '--exp', required=True, help='Input exp (.py) file path (required)')
 96 |     parser.add_argument('--opset', type=int, default=11, help='ONNX opset version')
 97 |     parser.add_argument('--simplify', action='store_true', help='ONNX simplify model')
 98 |     parser.add_argument('--dynamic', action='store_true', help='Dynamic batch-size')
 99 |     parser.add_argument('--batch', type=int, default=1, help='Static batch-size')
100 |     args = parser.parse_args()
101 |     if not os.path.isfile(args.weights):
102 |         raise SystemExit('Invalid weights file')
103 |     if args.dynamic and args.batch > 1:
104 |         raise SystemExit('Cannot set dynamic batch-size and static batch-size at same time')
105 |     return args
106 | 
107 | 
108 | if __name__ == '__main__':
109 |     args = parse_args()
110 |     main(args)
111 | 


--------------------------------------------------------------------------------