├── src ├── models │ └── .gitkeep ├── nvdsinfer_custom_impl_Yolo │ ├── layers │ │ ├── pooling_layer.h │ │ ├── activation_layer.h │ │ ├── upsample_layer.h │ │ ├── slice_layer.h │ │ ├── channels_layer.h │ │ ├── route_layer.h │ │ ├── reorg_layer.h │ │ ├── sam_layer.h │ │ ├── implicit_layer.h │ │ ├── batchnorm_layer.h │ │ ├── shortcut_layer.h │ │ ├── deconvolutional_layer.h │ │ ├── convolutional_layer.h │ │ ├── sam_layer.cpp │ │ ├── upsample_layer.cpp │ │ ├── implicit_layer.cpp │ │ ├── channels_layer.cpp │ │ ├── shortcut_layer.cpp │ │ ├── pooling_layer.cpp │ │ ├── slice_layer.cpp │ │ ├── batchnorm_layer.cpp │ │ ├── deconvolutional_layer.cpp │ │ ├── route_layer.cpp │ │ ├── reorg_layer.cpp │ │ ├── activation_layer.cpp │ │ └── convolutional_layer.cpp │ ├── nvdsinitinputlayers_Yolo.cpp │ ├── utils.h │ ├── calibrator.h │ ├── Makefile │ ├── yoloForward_nc.cu │ ├── yoloForward.cu │ ├── calibrator.cpp │ ├── utils.cpp │ ├── yoloForward_v2.cu │ ├── yolo.h │ ├── nvdsinfer_yolo_engine.cpp │ ├── yoloPlugins.h │ ├── nvdsparsebbox_Yolo.cpp │ ├── nvdsparsebbox_Yolo_cuda.cu │ ├── yoloPlugins.cpp │ └── yolo.cpp ├── config_infer_primary_yoloV11.txt ├── labels.txt ├── deepstream_app_config.txt ├── scripts │ └── export_yolo11.py └── config_tracker_NvDCF_perf.yml ├── requirements.txt ├── .devcontainer ├── Dockerfile └── devcontainer.json ├── LICENSE ├── .gitignore └── README.md /src/models/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch==2.4.1 2 | ultralytics==8.3.11 3 | onnx==1.17.0 4 | onnxruntime==1.19.2 -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/layers/pooling_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __POOLING_LAYER_H__ 7 | #define __POOLING_LAYER_H__ 8 | 9 | #include 10 | #include 11 | 12 | #include "NvInfer.h" 13 | 14 | nvinfer1::ITensor* poolingLayer(int layerIdx, std::map& block, nvinfer1::ITensor* input, 15 | nvinfer1::INetworkDefinition* network); 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/layers/activation_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __ACTIVATION_LAYER_H__ 7 | #define __ACTIVATION_LAYER_H__ 8 | 9 | #include 10 | 11 | #include "NvInfer.h" 12 | 13 | nvinfer1::ITensor* activationLayer(int layerIdx, std::string activation, nvinfer1::ITensor* input, 14 | nvinfer1::INetworkDefinition* network, std::string layerName = ""); 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __UPSAMPLE_LAYER_H__ 7 | #define __UPSAMPLE_LAYER_H__ 8 | 9 | #include 10 | #include 11 | 12 | #include "NvInfer.h" 13 | 14 | nvinfer1::ITensor* upsampleLayer(int layerIdx, std::map& block, nvinfer1::ITensor* input, 15 | nvinfer1::INetworkDefinition* network); 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/layers/slice_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __SLICE_LAYER_H__ 7 | #define __SLICE_LAYER_H__ 8 | 9 | #include 10 | 11 | #include "NvInfer.h" 12 | 13 | nvinfer1::ITensor* sliceLayer(int layerIdx, std::string& name, nvinfer1::ITensor* input, nvinfer1::Dims start, 14 | nvinfer1::Dims size, nvinfer1::Dims stride, nvinfer1::INetworkDefinition* network, uint batchSize); 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/layers/channels_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __CHANNELS_LAYER_H__ 7 | #define __CHANNELS_LAYER_H__ 8 | 9 | #include 10 | #include 11 | 12 | #include "NvInfer.h" 13 | 14 | nvinfer1::ITensor* channelsLayer(int layerIdx, std::map& block, nvinfer1::ITensor* input, 15 | nvinfer1::ITensor* implicitTensor, nvinfer1::INetworkDefinition* network); 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/layers/route_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __ROUTE_LAYER_H__ 7 | #define __ROUTE_LAYER_H__ 8 | 9 | #include "../utils.h" 10 | 11 | #include "slice_layer.h" 12 | 13 | nvinfer1::ITensor* routeLayer(int layerIdx, std::string& layers, std::map& block, 14 | std::vector tensorOutputs, nvinfer1::INetworkDefinition* network, uint batchSize); 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/layers/reorg_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __REORG_LAYER_H__ 7 | #define __REORG_LAYER_H__ 8 | 9 | #include 10 | #include 11 | 12 | #include "NvInfer.h" 13 | 14 | #include "slice_layer.h" 15 | 16 | nvinfer1::ITensor* reorgLayer(int layerIdx, std::map& block, nvinfer1::ITensor* input, 17 | nvinfer1::INetworkDefinition* network, uint batchSize); 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/layers/sam_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __SAM_LAYER_H__ 7 | #define __SAM_LAYER_H__ 8 | 9 | #include 10 | 11 | #include "NvInfer.h" 12 | 13 | #include "activation_layer.h" 14 | 15 | nvinfer1::ITensor* samLayer(int layerIdx, std::string activation, std::map& block, 16 | nvinfer1::ITensor* input, nvinfer1::ITensor* samInput, nvinfer1::INetworkDefinition* network); 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /.devcontainer/Dockerfile: -------------------------------------------------------------------------------- 1 | # ARG BASE_DEEPSTREAM_DEV_CONTAINER=nvcr.io/nvidia/deepstream:6.3-gc-triton-devel 2 | 3 | FROM nvcr.io/nvidia/deepstream:7.0-gc-triton-devel AS build 4 | 5 | RUN apt-get update && \ 6 | apt-get install -y build-essential \ 7 | curl \ 8 | && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* 9 | 10 | RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && python3 get-pip.py && rm get-pip.py 11 | 12 | # # Install Python dependencies 13 | COPY requirements.txt . 14 | RUN pip3 install -r requirements.txt -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/layers/implicit_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __IMPLICIT_LAYER_H__ 7 | #define __IMPLICIT_LAYER_H__ 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include "NvInfer.h" 14 | 15 | nvinfer1::ITensor* implicitLayer(int layerIdx, std::map& block, std::vector& weights, 16 | std::vector& trtWeights, int& weightPtr, nvinfer1::INetworkDefinition* network); 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/layers/batchnorm_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __BATCHNORM_LAYER_H__ 7 | #define __BATCHNORM_LAYER_H__ 8 | 9 | #include 10 | #include 11 | 12 | #include "NvInfer.h" 13 | 14 | #include "activation_layer.h" 15 | 16 | nvinfer1::ITensor* batchnormLayer(int layerIdx, std::map& block, std::vector& weights, 17 | std::vector& trtWeights, int& weightPtr, nvinfer1::ITensor* input, 18 | nvinfer1::INetworkDefinition* network); 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __SHORTCUT_LAYER_H__ 7 | #define __SHORTCUT_LAYER_H__ 8 | 9 | #include 10 | 11 | #include "NvInfer.h" 12 | 13 | #include "slice_layer.h" 14 | #include "activation_layer.h" 15 | 16 | nvinfer1::ITensor* shortcutLayer(int layerIdx, std::string activation, std::string inputVol, std::string shortcutVol, 17 | std::map& block, nvinfer1::ITensor* input, nvinfer1::ITensor* shortcut, 18 | nvinfer1::INetworkDefinition* network, uint batchSize); 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "deepstream-yolo11", 3 | "build": { 4 | "dockerfile": "Dockerfile", 5 | "context": ".." 6 | }, 7 | "remoteEnv": { 8 | "DISPLAY": "${env:DISPLAY}" 9 | }, 10 | "runArgs": [ 11 | "--net=host", 12 | "--gpus=all" 13 | ], 14 | "privileged": true, 15 | "customizations": { 16 | "vscode": { 17 | "extensions": [ 18 | "ms-vscode.cpptools", 19 | "ms-python.python", 20 | "ms-python.isort", 21 | "ms-python.balck-formatter" 22 | ] 23 | } 24 | } 25 | 26 | } -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/layers/deconvolutional_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __DECONVOLUTIONAL_LAYER_H__ 7 | #define __DECONVOLUTIONAL_LAYER_H__ 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include "NvInfer.h" 14 | 15 | nvinfer1::ITensor* deconvolutionalLayer(int layerIdx, std::map& block, std::vector& weights, 16 | std::vector& trtWeights, int& weightPtr, int& inputChannels, nvinfer1::ITensor* input, 17 | nvinfer1::INetworkDefinition* network, std::string layerName = ""); 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __CONVOLUTIONAL_LAYER_H__ 7 | #define __CONVOLUTIONAL_LAYER_H__ 8 | 9 | #include 10 | #include 11 | 12 | #include "NvInfer.h" 13 | 14 | #include "activation_layer.h" 15 | 16 | nvinfer1::ITensor* convolutionalLayer(int layerIdx, std::map& block, std::vector& weights, 17 | std::vector& trtWeights, int& weightPtr, int& inputChannels, nvinfer1::ITensor* input, 18 | nvinfer1::INetworkDefinition* network, std::string layerName = ""); 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /src/config_infer_primary_yoloV11.txt: -------------------------------------------------------------------------------- 1 | [property] 2 | gpu-id=0 3 | net-scale-factor=0.0039215697906911373 4 | model-color-format=0 5 | onnx-file=models/yolo11m.onnx 6 | model-engine-file=model_b1_gpu0_fp32.engine 7 | #int8-calib-file=calib.table 8 | labelfile-path=labels.txt 9 | batch-size=1 10 | network-mode=0 11 | num-detected-classes=80 12 | interval=0 13 | gie-unique-id=1 14 | process-mode=1 15 | network-type=0 16 | cluster-mode=2 17 | maintain-aspect-ratio=1 18 | symmetric-padding=1 19 | #workspace-size=2000 20 | parse-bbox-func-name=NvDsInferParseYolo 21 | #parse-bbox-func-name=NvDsInferParseYoloCuda 22 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so 23 | engine-create-func-name=NvDsInferYoloCudaEngineGet 24 | 25 | [class-attrs-all] 26 | nms-iou-threshold=0.45 27 | pre-cluster-threshold=0.25 28 | topk=300 29 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/layers/sam_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "sam_layer.h" 7 | 8 | #include 9 | 10 | nvinfer1::ITensor* 11 | samLayer(int layerIdx, std::string activation, std::map& block, nvinfer1::ITensor* input, 12 | nvinfer1::ITensor* samInput, nvinfer1::INetworkDefinition* network) 13 | { 14 | nvinfer1::ITensor* output; 15 | 16 | assert(block.at("type") == "sam"); 17 | 18 | nvinfer1::IElementWiseLayer* sam = network->addElementWise(*input, *samInput, nvinfer1::ElementWiseOperation::kPROD); 19 | assert(sam != nullptr); 20 | std::string samLayerName = "sam_" + std::to_string(layerIdx); 21 | sam->setName(samLayerName.c_str()); 22 | output = sam->getOutput(0); 23 | 24 | output = activationLayer(layerIdx, activation, output, network); 25 | assert(output != nullptr); 26 | 27 | return output; 28 | } 29 | -------------------------------------------------------------------------------- /src/labels.txt: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorcycle 5 | airplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | couch 59 | potted plant 60 | bed 61 | dining table 62 | toilet 63 | tv 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "upsample_layer.h" 7 | 8 | #include 9 | 10 | nvinfer1::ITensor* 11 | upsampleLayer(int layerIdx, std::map& block, nvinfer1::ITensor* input, 12 | nvinfer1::INetworkDefinition* network) 13 | { 14 | nvinfer1::ITensor* output; 15 | 16 | assert(block.at("type") == "upsample"); 17 | assert(block.find("stride") != block.end()); 18 | 19 | int stride = std::stoi(block.at("stride")); 20 | 21 | float scale[4] = {1, 1, static_cast(stride), static_cast(stride)}; 22 | 23 | nvinfer1::IResizeLayer* resize = network->addResize(*input); 24 | assert(resize != nullptr); 25 | std::string resizeLayerName = "upsample_" + std::to_string(layerIdx); 26 | resize->setName(resizeLayerName.c_str()); 27 | resize->setResizeMode(nvinfer1::ResizeMode::kNEAREST); 28 | resize->setScales(scale, 4); 29 | output = resize->getOutput(0); 30 | 31 | return output; 32 | } 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Santu Hazra 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/layers/implicit_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "implicit_layer.h" 7 | 8 | #include 9 | 10 | nvinfer1::ITensor* 11 | implicitLayer(int layerIdx, std::map& block, std::vector& weights, 12 | std::vector& trtWeights, int& weightPtr, nvinfer1::INetworkDefinition* network) 13 | { 14 | nvinfer1::ITensor* output; 15 | 16 | assert(block.at("type") == "implicit" || block.at("type") == "implicit_add" || block.at("type") == "implicit_mul"); 17 | assert(block.find("filters") != block.end()); 18 | 19 | int filters = std::stoi(block.at("filters")); 20 | 21 | nvinfer1::Weights convWt {nvinfer1::DataType::kFLOAT, nullptr, filters}; 22 | 23 | float* val = new float[filters]; 24 | for (int i = 0; i < filters; ++i) { 25 | val[i] = weights[weightPtr]; 26 | ++weightPtr; 27 | } 28 | convWt.values = val; 29 | trtWeights.push_back(convWt); 30 | 31 | nvinfer1::IConstantLayer* implicit = network->addConstant(nvinfer1::Dims{4, {1, filters, 1, 1}}, convWt); 32 | assert(implicit != nullptr); 33 | std::string implicitLayerName = block.at("type") + "_" + std::to_string(layerIdx); 34 | implicit->setName(implicitLayerName.c_str()); 35 | output = implicit->getOutput(0); 36 | 37 | return output; 38 | } 39 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/layers/channels_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "channels_layer.h" 7 | 8 | #include 9 | 10 | nvinfer1::ITensor* 11 | channelsLayer(int layerIdx, std::map& block, nvinfer1::ITensor* input, 12 | nvinfer1::ITensor* implicitTensor, nvinfer1::INetworkDefinition* network) 13 | { 14 | nvinfer1::ITensor* output; 15 | 16 | assert(block.at("type") == "shift_channels" || block.at("type") == "control_channels"); 17 | 18 | if (block.at("type") == "shift_channels") { 19 | nvinfer1::IElementWiseLayer* shift = network->addElementWise(*input, *implicitTensor, 20 | nvinfer1::ElementWiseOperation::kSUM); 21 | assert(shift != nullptr); 22 | std::string shiftLayerName = "shift_channels_" + std::to_string(layerIdx); 23 | shift->setName(shiftLayerName.c_str()); 24 | output = shift->getOutput(0); 25 | } 26 | else if (block.at("type") == "control_channels") { 27 | nvinfer1::IElementWiseLayer* control = network->addElementWise(*input, *implicitTensor, 28 | nvinfer1::ElementWiseOperation::kPROD); 29 | assert(control != nullptr); 30 | std::string controlLayerName = "control_channels_" + std::to_string(layerIdx); 31 | control->setName(controlLayerName.c_str()); 32 | output = control->getOutput(0); 33 | } 34 | 35 | return output; 36 | } 37 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "shortcut_layer.h" 7 | 8 | #include 9 | 10 | nvinfer1::ITensor* 11 | shortcutLayer(int layerIdx, std::string activation, std::string inputVol, std::string shortcutVol, 12 | std::map& block, nvinfer1::ITensor* input, nvinfer1::ITensor* shortcutInput, 13 | nvinfer1::INetworkDefinition* network, uint batchSize) 14 | { 15 | nvinfer1::ITensor* output; 16 | 17 | assert(block.at("type") == "shortcut"); 18 | 19 | if (inputVol != shortcutVol) { 20 | std::string name = "slice"; 21 | nvinfer1::Dims start = {4, {0, 0, 0, 0}}; 22 | nvinfer1::Dims size = input->getDimensions(); 23 | nvinfer1::Dims stride = nvinfer1::Dims{4, {1, 1, 1, 1}}; 24 | 25 | output = sliceLayer(layerIdx, name, shortcutInput, start, size, stride, network, batchSize); 26 | assert(output != nullptr); 27 | } 28 | else 29 | output = shortcutInput; 30 | 31 | nvinfer1::IElementWiseLayer* shortcut = network->addElementWise(*input, *output, nvinfer1::ElementWiseOperation::kSUM); 32 | assert(shortcut != nullptr); 33 | std::string shortcutLayerName = "shortcut_" + std::to_string(layerIdx); 34 | shortcut->setName(shortcutLayerName.c_str()); 35 | output = shortcut->getOutput(0); 36 | 37 | output = activationLayer(layerIdx, activation, output, network); 38 | assert(output != nullptr); 39 | 40 | return output; 41 | } 42 | -------------------------------------------------------------------------------- /src/deepstream_app_config.txt: -------------------------------------------------------------------------------- 1 | [application] 2 | enable-perf-measurement=1 3 | perf-measurement-interval-sec=5 4 | 5 | [tiled-display] 6 | enable=1 7 | rows=1 8 | columns=1 9 | width=1280 10 | height=720 11 | gpu-id=0 12 | nvbuf-memory-type=0 13 | 14 | [source0] 15 | enable=1 16 | type=1 17 | num-sources=1 18 | camera-width=1280 19 | camera-height=720 20 | camera-v4l2-dev-node=0 21 | gpu-id=0 22 | cudadec-memtype=0 23 | 24 | 25 | [sink0] 26 | enable=1 27 | type=2 28 | sync=0 29 | gpu-id=0 30 | nvbuf-memory-type=0 31 | 32 | [osd] 33 | enable=1 34 | gpu-id=0 35 | border-width=5 36 | text-size=15 37 | text-color=1;1;1;1; 38 | text-bg-color=0.3;0.3;0.3;1 39 | font=Serif 40 | show-clock=0 41 | clock-x-offset=800 42 | clock-y-offset=820 43 | clock-text-size=12 44 | clock-color=1;0;0;0 45 | nvbuf-memory-type=0 46 | 47 | [streammux] 48 | gpu-id=0 49 | live-source=0 50 | batch-size=1 51 | batched-push-timeout=40000 52 | width=1280 53 | height=720 54 | enable-padding=0 55 | nvbuf-memory-type=0 56 | 57 | [primary-gie] 58 | enable=1 59 | gpu-id=0 60 | gie-unique-id=1 61 | nvbuf-memory-type=0 62 | config-file=config_infer_primary_yoloV11.txt 63 | 64 | [tracker] 65 | enable=1 66 | # For NvDCF and DeepSORT tracker, tracker-width and tracker-height must be a multiple of 32, respectively 67 | tracker-width=640 68 | tracker-height=384 69 | ll-lib-file=/opt/nvidia/deepstream/deepstream-7.0/lib/libnvds_nvmultiobjecttracker.so 70 | # ll-config-file required to set different tracker types 71 | # ll-config-file=config_tracker_IOU.yml 72 | ll-config-file=config_tracker_NvDCF_perf.yml 73 | # ll-config-file=config_tracker_NvDCF_accuracy.yml 74 | # ll-config-file=config_tracker_DeepSORT.yml 75 | gpu-id=0 76 | enable-batch-process=1 77 | enable-past-frame=1 78 | display-tracking-id=1 79 | 80 | [tests] 81 | file-loop=0 82 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/nvdsinitinputlayers_Yolo.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | * 22 | * Edited by Marcos Luciano 23 | * https://www.github.com/marcoslucianops 24 | */ 25 | 26 | #include "nvdsinfer_custom_impl.h" 27 | 28 | bool 29 | NvDsInferInitializeInputLayers(std::vector const& inputLayersInfo, 30 | NvDsInferNetworkInfo const& networkInfo, unsigned int maxBatchSize) 31 | { 32 | float* scaleFactor = (float*) inputLayersInfo[0].buffer; 33 | for (unsigned int i = 0; i < maxBatchSize; i++) { 34 | scaleFactor[i * 2 + 0] = 1.0; 35 | scaleFactor[i * 2 + 1] = 1.0; 36 | } 37 | return true; 38 | } 39 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/utils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | * 22 | * Edited by Marcos Luciano 23 | * https://www.github.com/marcoslucianops 24 | */ 25 | 26 | #ifndef __UTILS_H__ 27 | #define __UTILS_H__ 28 | 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | #include "NvInfer.h" 37 | 38 | std::string trim(std::string s); 39 | 40 | float clamp(const float val, const float minVal, const float maxVal); 41 | 42 | bool fileExists(const std::string fileName, bool verbose = true); 43 | 44 | std::vector loadWeights(const std::string weightsFilePath, const std::string& modelName); 45 | 46 | std::string dimsToString(const nvinfer1::Dims d); 47 | 48 | int getNumChannels(nvinfer1::ITensor* t); 49 | 50 | void printLayerInfo( 51 | std::string layerIndex, std::string layerName, std::string layerInput, std::string layerOutput, std::string weightPtr); 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/layers/pooling_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "pooling_layer.h" 7 | 8 | #include 9 | #include 10 | 11 | nvinfer1::ITensor* 12 | poolingLayer(int layerIdx, std::map& block, nvinfer1::ITensor* input, 13 | nvinfer1::INetworkDefinition* network) 14 | { 15 | nvinfer1::ITensor* output; 16 | 17 | assert(block.at("type") == "max" || block.at("type") == "maxpool" || block.at("type") == "avg" || 18 | block.at("type") == "avgpool"); 19 | 20 | if (block.at("type") == "max" || block.at("type") == "maxpool") { 21 | assert(block.find("size") != block.end()); 22 | assert(block.find("stride") != block.end()); 23 | 24 | int size = std::stoi(block.at("size")); 25 | int stride = std::stoi(block.at("stride")); 26 | 27 | nvinfer1::IPoolingLayer* maxpool = network->addPoolingNd(*input, nvinfer1::PoolingType::kMAX, 28 | nvinfer1::Dims{2, {size, size}}); 29 | assert(maxpool != nullptr); 30 | std::string maxpoolLayerName = "maxpool_" + std::to_string(layerIdx); 31 | maxpool->setName(maxpoolLayerName.c_str()); 32 | maxpool->setStrideNd(nvinfer1::Dims{2, {stride, stride}}); 33 | maxpool->setPaddingNd(nvinfer1::Dims{2, {(size - 1) / 2, (size - 1) / 2}}); 34 | if (size == 2 && stride == 1) { 35 | maxpool->setPrePadding(nvinfer1::Dims{2, {0, 0}}); 36 | maxpool->setPostPadding(nvinfer1::Dims{2, {1, 1}}); 37 | } 38 | output = maxpool->getOutput(0); 39 | } 40 | else if (block.at("type") == "avg" || block.at("type") == "avgpool") { 41 | nvinfer1::Dims inputDims = input->getDimensions(); 42 | nvinfer1::IPoolingLayer* avgpool = network->addPoolingNd(*input, nvinfer1::PoolingType::kAVERAGE, 43 | nvinfer1::Dims{2, {inputDims.d[1], inputDims.d[2]}}); 44 | assert(avgpool != nullptr); 45 | std::string avgpoolLayerName = "avgpool_" + std::to_string(layerIdx); 46 | avgpool->setName(avgpoolLayerName.c_str()); 47 | output = avgpool->getOutput(0); 48 | } 49 | else { 50 | std::cerr << "Pooling not supported: " << block.at("type") << std::endl; 51 | assert(0); 52 | } 53 | 54 | return output; 55 | } 56 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/calibrator.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef CALIBRATOR_H 7 | #define CALIBRATOR_H 8 | 9 | #include 10 | #include 11 | 12 | #include "NvInfer.h" 13 | #include "opencv2/opencv.hpp" 14 | 15 | #define CUDA_CHECK(status) { \ 16 | if (status != 0) { \ 17 | std::cout << "CUDA failure: " << cudaGetErrorString(status) << " in file " << __FILE__ << " at line " << __LINE__ << \ 18 | std::endl; \ 19 | abort(); \ 20 | } \ 21 | } 22 | 23 | class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 { 24 | public: 25 | Int8EntropyCalibrator2(const int& batchSize, const int& channels, const int& height, const int& width, 26 | const float& scaleFactor, const float* offsets, const std::string& imgPath, const std::string& calibTablePath); 27 | 28 | virtual ~Int8EntropyCalibrator2(); 29 | 30 | int getBatchSize() const noexcept override; 31 | 32 | bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept override; 33 | 34 | const void* readCalibrationCache(std::size_t& length) noexcept override; 35 | 36 | void writeCalibrationCache(const void* cache, size_t length) noexcept override; 37 | 38 | private: 39 | int batchSize; 40 | int inputC; 41 | int inputH; 42 | int inputW; 43 | int letterBox; 44 | float scaleFactor; 45 | const float* offsets; 46 | std::string calibTablePath; 47 | size_t imageIndex; 48 | size_t inputCount; 49 | std::vector imgPaths; 50 | float* batchData {nullptr}; 51 | void* deviceInput {nullptr}; 52 | bool readCache; 53 | std::vector calibrationCache; 54 | }; 55 | 56 | std::vector prepareImage(cv::Mat& img, int input_c, int input_h, int input_w, float scaleFactor, 57 | const float* offsets); 58 | 59 | #endif //CALIBRATOR_H 60 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/layers/slice_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "slice_layer.h" 7 | 8 | #include 9 | 10 | nvinfer1::ITensor* 11 | sliceLayer(int layerIdx, std::string& name, nvinfer1::ITensor* input, nvinfer1::Dims start, nvinfer1::Dims size, 12 | nvinfer1::Dims stride, nvinfer1::INetworkDefinition* network, uint batchSize) 13 | { 14 | nvinfer1::ITensor* output; 15 | 16 | int tensorBatch = input->getDimensions().d[0]; 17 | 18 | nvinfer1::ISliceLayer* slice = network->addSlice(*input, start, size, stride); 19 | 20 | if (tensorBatch == -1) { 21 | int nbDims = size.nbDims; 22 | 23 | nvinfer1::Weights constant1Wt {nvinfer1::DataType::kINT32, nullptr, nbDims}; 24 | 25 | int* val1 = new int[nbDims]; 26 | val1[0] = 1; 27 | for (int i = 1; i < nbDims; ++i) { 28 | val1[i] = size.d[i]; 29 | } 30 | constant1Wt.values = val1; 31 | 32 | nvinfer1::IConstantLayer* constant1 = network->addConstant(nvinfer1::Dims{1, {nbDims}}, constant1Wt); 33 | assert(constant1 != nullptr); 34 | std::string constant1LayerName = "constant1_" + name + "_" + std::to_string(layerIdx); 35 | constant1->setName(constant1LayerName.c_str()); 36 | nvinfer1::ITensor* constant1Tensor = constant1->getOutput(0); 37 | 38 | nvinfer1::Weights constant2Wt {nvinfer1::DataType::kINT32, nullptr, nbDims}; 39 | 40 | int* val2 = new int[nbDims]; 41 | val2[0] = batchSize; 42 | for (int i = 1; i < nbDims; ++i) { 43 | val2[i] = 1; 44 | } 45 | constant2Wt.values = val2; 46 | 47 | nvinfer1::IConstantLayer* constant2 = network->addConstant(nvinfer1::Dims{1, {nbDims}}, constant2Wt); 48 | assert(constant2 != nullptr); 49 | std::string constant2LayerName = "constant2_" + name + "_" + std::to_string(layerIdx); 50 | constant2->setName(constant2LayerName.c_str()); 51 | nvinfer1::ITensor* constant2Tensor = constant2->getOutput(0); 52 | 53 | nvinfer1::IElementWiseLayer* newSize = network->addElementWise(*constant1Tensor, *constant2Tensor, 54 | nvinfer1::ElementWiseOperation::kPROD); 55 | assert(newSize != nullptr); 56 | std::string newSizeLayerName = "new_size_" + name + "_" + std::to_string(layerIdx); 57 | newSize->setName(newSizeLayerName.c_str()); 58 | nvinfer1::ITensor* newSizeTensor = newSize->getOutput(0); 59 | 60 | slice->setInput(2, *newSizeTensor); 61 | } 62 | 63 | assert(slice != nullptr); 64 | std::string sliceLayerName = name + "_" + std::to_string(layerIdx); 65 | slice->setName(sliceLayerName.c_str()); 66 | output = slice->getOutput(0); 67 | 68 | return output; 69 | } 70 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/layers/batchnorm_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "batchnorm_layer.h" 7 | 8 | #include 9 | #include 10 | 11 | nvinfer1::ITensor* 12 | batchnormLayer(int layerIdx, std::map& block, std::vector& weights, 13 | std::vector& trtWeights, int& weightPtr, nvinfer1::ITensor* input, 14 | nvinfer1::INetworkDefinition* network) 15 | { 16 | nvinfer1::ITensor* output; 17 | 18 | assert(block.at("type") == "batchnorm"); 19 | assert(block.find("filters") != block.end()); 20 | 21 | int filters = std::stoi(block.at("filters")); 22 | std::string activation = block.at("activation"); 23 | 24 | std::vector bnBiases; 25 | std::vector bnWeights; 26 | std::vector bnRunningMean; 27 | std::vector bnRunningVar; 28 | 29 | for (int i = 0; i < filters; ++i) { 30 | bnBiases.push_back(weights[weightPtr]); 31 | ++weightPtr; 32 | } 33 | for (int i = 0; i < filters; ++i) { 34 | bnWeights.push_back(weights[weightPtr]); 35 | ++weightPtr; 36 | } 37 | for (int i = 0; i < filters; ++i) { 38 | bnRunningMean.push_back(weights[weightPtr]); 39 | ++weightPtr; 40 | } 41 | for (int i = 0; i < filters; ++i) { 42 | bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5)); 43 | ++weightPtr; 44 | } 45 | 46 | int size = filters; 47 | nvinfer1::Weights shift {nvinfer1::DataType::kFLOAT, nullptr, size}; 48 | nvinfer1::Weights scale {nvinfer1::DataType::kFLOAT, nullptr, size}; 49 | nvinfer1::Weights power {nvinfer1::DataType::kFLOAT, nullptr, size}; 50 | float* shiftWt = new float[size]; 51 | for (int i = 0; i < size; ++i) 52 | shiftWt[i] = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i)); 53 | shift.values = shiftWt; 54 | float* scaleWt = new float[size]; 55 | for (int i = 0; i < size; ++i) 56 | scaleWt[i] = bnWeights.at(i) / bnRunningVar[i]; 57 | scale.values = scaleWt; 58 | float* powerWt = new float[size]; 59 | for (int i = 0; i < size; ++i) 60 | powerWt[i] = 1.0; 61 | power.values = powerWt; 62 | trtWeights.push_back(shift); 63 | trtWeights.push_back(scale); 64 | trtWeights.push_back(power); 65 | 66 | nvinfer1::IScaleLayer* batchnorm = network->addScale(*input, nvinfer1::ScaleMode::kCHANNEL, shift, scale, power); 67 | assert(batchnorm != nullptr); 68 | std::string batchnormLayerName = "batchnorm_" + std::to_string(layerIdx); 69 | batchnorm->setName(batchnormLayerName.c_str()); 70 | output = batchnorm->getOutput(0); 71 | 72 | output = activationLayer(layerIdx, activation, output, network); 73 | assert(output != nullptr); 74 | 75 | return output; 76 | } 77 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/layers/deconvolutional_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "deconvolutional_layer.h" 7 | 8 | #include 9 | 10 | nvinfer1::ITensor* 11 | deconvolutionalLayer(int layerIdx, std::map& block, std::vector& weights, 12 | std::vector& trtWeights, int& weightPtr, int& inputChannels, nvinfer1::ITensor* input, 13 | nvinfer1::INetworkDefinition* network, std::string layerName) 14 | { 15 | nvinfer1::ITensor* output; 16 | 17 | assert(block.at("type") == "deconvolutional"); 18 | assert(block.find("filters") != block.end()); 19 | assert(block.find("pad") != block.end()); 20 | assert(block.find("size") != block.end()); 21 | assert(block.find("stride") != block.end()); 22 | 23 | int filters = std::stoi(block.at("filters")); 24 | int padding = std::stoi(block.at("pad")); 25 | int kernelSize = std::stoi(block.at("size")); 26 | int stride = std::stoi(block.at("stride")); 27 | int bias = filters; 28 | 29 | int groups = 1; 30 | if (block.find("groups") != block.end()) 31 | groups = std::stoi(block.at("groups")); 32 | 33 | if (block.find("bias") != block.end()) 34 | bias = std::stoi(block.at("bias")); 35 | 36 | int pad; 37 | if (padding) 38 | pad = (kernelSize - 1) / 2; 39 | else 40 | pad = 0; 41 | 42 | int size = filters * inputChannels * kernelSize * kernelSize / groups; 43 | std::vector bnBiases; 44 | std::vector bnWeights; 45 | std::vector bnRunningMean; 46 | std::vector bnRunningVar; 47 | nvinfer1::Weights convWt {nvinfer1::DataType::kFLOAT, nullptr, size}; 48 | nvinfer1::Weights convBias {nvinfer1::DataType::kFLOAT, nullptr, bias}; 49 | 50 | float* val; 51 | if (bias != 0) { 52 | val = new float[filters]; 53 | for (int i = 0; i < filters; ++i) { 54 | val[i] = weights[weightPtr]; 55 | ++weightPtr; 56 | } 57 | convBias.values = val; 58 | trtWeights.push_back(convBias); 59 | } 60 | val = new float[size]; 61 | for (int i = 0; i < size; ++i) { 62 | val[i] = weights[weightPtr]; 63 | ++weightPtr; 64 | } 65 | convWt.values = val; 66 | trtWeights.push_back(convWt); 67 | 68 | nvinfer1::IDeconvolutionLayer* conv = network->addDeconvolutionNd(*input, filters, 69 | nvinfer1::Dims{2, {kernelSize, kernelSize}}, convWt, convBias); 70 | assert(conv != nullptr); 71 | std::string convLayerName = "deconv_" + layerName + std::to_string(layerIdx); 72 | conv->setName(convLayerName.c_str()); 73 | conv->setStrideNd(nvinfer1::Dims{2, {stride, stride}}); 74 | conv->setPaddingNd(nvinfer1::Dims{2, {pad, pad}}); 75 | 76 | if (block.find("groups") != block.end()) 77 | conv->setNbGroups(groups); 78 | 79 | output = conv->getOutput(0); 80 | 81 | return output; 82 | } 83 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/layers/route_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "route_layer.h" 7 | 8 | nvinfer1::ITensor* 9 | routeLayer(int layerIdx, std::string& layers, std::map& block, 10 | std::vector tensorOutputs, nvinfer1::INetworkDefinition* network, uint batchSize) 11 | { 12 | nvinfer1::ITensor* output; 13 | 14 | assert(block.at("type") == "route"); 15 | assert(block.find("layers") != block.end()); 16 | 17 | std::string strLayers = block.at("layers"); 18 | std::vector idxLayers; 19 | size_t lastPos = 0, pos = 0; 20 | while ((pos = strLayers.find(',', lastPos)) != std::string::npos) { 21 | int vL = std::stoi(trim(strLayers.substr(lastPos, pos - lastPos))); 22 | idxLayers.push_back(vL); 23 | lastPos = pos + 1; 24 | } 25 | if (lastPos < strLayers.length()) { 26 | std::string lastV = trim(strLayers.substr(lastPos)); 27 | if (!lastV.empty()) { 28 | idxLayers.push_back(std::stoi(lastV)); 29 | } 30 | } 31 | assert(!idxLayers.empty()); 32 | std::vector concatInputs; 33 | for (uint i = 0; i < idxLayers.size(); ++i) { 34 | if (idxLayers[i] < 0) { 35 | idxLayers[i] = tensorOutputs.size() + idxLayers[i]; 36 | } 37 | assert(idxLayers[i] >= 0 && idxLayers[i] < (int)tensorOutputs.size()); 38 | concatInputs.push_back(tensorOutputs[idxLayers[i]]); 39 | if (i < idxLayers.size() - 1) { 40 | layers += std::to_string(idxLayers[i]) + ", "; 41 | } 42 | } 43 | layers += std::to_string(idxLayers[idxLayers.size() - 1]); 44 | 45 | if (concatInputs.size() == 1) { 46 | output = concatInputs[0]; 47 | } 48 | else { 49 | int axis = 1; 50 | if (block.find("axis") != block.end()) { 51 | axis += std::stoi(block.at("axis")); 52 | std::cout << axis << std::endl; 53 | } 54 | if (axis < 0) { 55 | axis += concatInputs[0]->getDimensions().nbDims; 56 | } 57 | 58 | nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size()); 59 | assert(concat != nullptr); 60 | std::string concatLayerName = "route_" + std::to_string(layerIdx); 61 | concat->setName(concatLayerName.c_str()); 62 | concat->setAxis(axis); 63 | output = concat->getOutput(0); 64 | } 65 | 66 | if (block.find("groups") != block.end()) { 67 | nvinfer1::Dims prevTensorDims = output->getDimensions(); 68 | int groups = stoi(block.at("groups")); 69 | int group_id = stoi(block.at("group_id")); 70 | int startSlice = (prevTensorDims.d[1] / groups) * group_id; 71 | int channelSlice = (prevTensorDims.d[1] / groups); 72 | 73 | std::string name = "slice"; 74 | nvinfer1::Dims start = {4, {0, startSlice, 0, 0}}; 75 | nvinfer1::Dims size = {4, {prevTensorDims.d[0], channelSlice, prevTensorDims.d[2], prevTensorDims.d[3]}}; 76 | nvinfer1::Dims stride = {4, {1, 1, 1, 1}}; 77 | 78 | output = sliceLayer(layerIdx, name, output, start, size, stride, network, batchSize); 79 | assert(output != nullptr); 80 | } 81 | 82 | return output; 83 | } 84 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/Makefile: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a 5 | # copy of this software and associated documentation files (the "Software"), 6 | # to deal in the Software without restriction, including without limitation 7 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | # and/or sell copies of the Software, and to permit persons to whom the 9 | # Software is furnished to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in 12 | # all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | # DEALINGS IN THE SOFTWARE. 21 | # 22 | # Edited by Marcos Luciano 23 | # https://www.github.com/marcoslucianops 24 | ################################################################################ 25 | 26 | CUDA_VER?= 27 | ifeq ($(CUDA_VER),) 28 | $(error "CUDA_VER is not set") 29 | endif 30 | 31 | OPENCV?= 32 | ifeq ($(OPENCV),) 33 | OPENCV=0 34 | endif 35 | 36 | GRAPH?= 37 | ifeq ($(GRAPH),) 38 | GRAPH=0 39 | endif 40 | 41 | CC:= g++ 42 | NVCC:=/usr/local/cuda-$(CUDA_VER)/bin/nvcc 43 | 44 | CFLAGS:= -Wall -std=c++11 -shared -fPIC -Wno-error=deprecated-declarations 45 | CFLAGS+= -I/opt/nvidia/deepstream/deepstream/sources/includes -I/usr/local/cuda-$(CUDA_VER)/include 46 | 47 | ifeq ($(OPENCV), 1) 48 | COMMON+= -DOPENCV 49 | CFLAGS+= $(shell pkg-config --cflags opencv4 2> /dev/null || pkg-config --cflags opencv) 50 | LIBS+= $(shell pkg-config --libs opencv4 2> /dev/null || pkg-config --libs opencv) 51 | endif 52 | 53 | ifeq ($(GRAPH), 1) 54 | COMMON+= -DGRAPH 55 | endif 56 | 57 | CUFLAGS:= -I/opt/nvidia/deepstream/deepstream/sources/includes -I/usr/local/cuda-$(CUDA_VER)/include 58 | 59 | LIBS+= -lnvinfer_plugin -lnvinfer -lnvparsers -lnvonnxparser -L/usr/local/cuda-$(CUDA_VER)/lib64 -lcudart -lcublas -lstdc++fs 60 | LFLAGS:= -shared -Wl,--start-group $(LIBS) -Wl,--end-group 61 | 62 | INCS:= $(wildcard *.h) 63 | 64 | SRCFILES:= $(filter-out calibrator.cpp, $(wildcard *.cpp)) 65 | 66 | ifeq ($(OPENCV), 1) 67 | SRCFILES+= calibrator.cpp 68 | endif 69 | 70 | SRCFILES+= $(wildcard layers/*.cpp) 71 | SRCFILES+= $(wildcard *.cu) 72 | 73 | TARGET_LIB:= libnvdsinfer_custom_impl_Yolo.so 74 | 75 | TARGET_OBJS:= $(SRCFILES:.cpp=.o) 76 | TARGET_OBJS:= $(TARGET_OBJS:.cu=.o) 77 | 78 | all: $(TARGET_LIB) 79 | 80 | %.o: %.cpp $(INCS) Makefile 81 | $(CC) -c $(COMMON) -o $@ $(CFLAGS) $< 82 | 83 | %.o: %.cu $(INCS) Makefile 84 | $(NVCC) -c -o $@ --compiler-options '-fPIC' $(CUFLAGS) $< 85 | 86 | $(TARGET_LIB) : $(TARGET_OBJS) 87 | $(CC) -o $@ $(TARGET_OBJS) $(LFLAGS) 88 | 89 | clean: 90 | rm -rf $(TARGET_LIB) 91 | rm -rf $(TARGET_OBJS) 92 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | *.engine 9 | *.onnx 10 | *.pt 11 | *.o 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | *.py,cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | cover/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | .pybuilder/ 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # pyenv 89 | # For a library or package, you might want to ignore these files since the code is 90 | # intended to run in multiple environments; otherwise, check them in: 91 | # .python-version 92 | 93 | # pipenv 94 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 95 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 96 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 97 | # install all needed dependencies. 98 | #Pipfile.lock 99 | 100 | # poetry 101 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 102 | # This is especially recommended for binary packages to ensure reproducibility, and is more 103 | # commonly ignored for libraries. 104 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 105 | #poetry.lock 106 | 107 | # pdm 108 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 109 | #pdm.lock 110 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 111 | # in version control. 112 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 113 | .pdm.toml 114 | .pdm-python 115 | .pdm-build/ 116 | 117 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 118 | __pypackages__/ 119 | 120 | # Celery stuff 121 | celerybeat-schedule 122 | celerybeat.pid 123 | 124 | # SageMath parsed files 125 | *.sage.py 126 | 127 | # Environments 128 | .env 129 | .venv 130 | env/ 131 | venv/ 132 | ENV/ 133 | env.bak/ 134 | venv.bak/ 135 | 136 | # Spyder project settings 137 | .spyderproject 138 | .spyproject 139 | 140 | # Rope project settings 141 | .ropeproject 142 | 143 | # mkdocs documentation 144 | /site 145 | 146 | # mypy 147 | .mypy_cache/ 148 | .dmypy.json 149 | dmypy.json 150 | 151 | # Pyre type checker 152 | .pyre/ 153 | 154 | # pytype static type analyzer 155 | .pytype/ 156 | 157 | # Cython debug symbols 158 | cython_debug/ 159 | 160 | # PyCharm 161 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 162 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 163 | # and can be added to the global gitignore or merged into this file. For a more nuclear 164 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 165 | #.idea/ 166 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/yoloForward_nc.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include 7 | 8 | __global__ void gpuYoloLayer_nc(const float* input, float* boxes, float* scores, float* classes, const uint netWidth, 9 | const uint netHeight, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, 10 | const uint64_t lastInputSize, const float scaleXY, const float* anchors, const int* mask) 11 | { 12 | uint x_id = blockIdx.x * blockDim.x + threadIdx.x; 13 | uint y_id = blockIdx.y * blockDim.y + threadIdx.y; 14 | uint z_id = blockIdx.z * blockDim.z + threadIdx.z; 15 | 16 | if (x_id >= gridSizeX || y_id >= gridSizeY || z_id >= numBBoxes) { 17 | return; 18 | } 19 | 20 | const int numGridCells = gridSizeX * gridSizeY; 21 | const int bbindex = y_id * gridSizeX + x_id; 22 | 23 | const float alpha = scaleXY; 24 | const float beta = -0.5 * (scaleXY - 1); 25 | 26 | float xc = (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)] * alpha + beta + x_id) * netWidth / 27 | gridSizeX; 28 | 29 | float yc = (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)] * alpha + beta + y_id) * netHeight / 30 | gridSizeY; 31 | 32 | float w = __powf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)] * 2, 2) * anchors[mask[z_id] * 2]; 33 | 34 | float h = __powf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)] * 2, 2) * anchors[mask[z_id] * 2 + 1]; 35 | 36 | const float objectness = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]; 37 | 38 | float maxProb = 0.0f; 39 | int maxIndex = -1; 40 | 41 | for (uint i = 0; i < numOutputClasses; ++i) { 42 | float prob = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]; 43 | if (prob > maxProb) { 44 | maxProb = prob; 45 | maxIndex = i; 46 | } 47 | } 48 | 49 | int count = numGridCells * z_id + bbindex + lastInputSize; 50 | 51 | boxes[count * 4 + 0] = xc; 52 | boxes[count * 4 + 1] = yc; 53 | boxes[count * 4 + 2] = w; 54 | boxes[count * 4 + 3] = h; 55 | scores[count] = maxProb * objectness; 56 | classes[count] = (float) maxIndex; 57 | } 58 | 59 | cudaError_t cudaYoloLayer_nc(const void* input, void* boxes, void* scores, void* classes, const uint& batchSize, 60 | const uint64_t& inputSize, const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth, 61 | const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, 62 | const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream); 63 | 64 | cudaError_t cudaYoloLayer_nc(const void* input, void* boxes, void* scores, void* classes, const uint& batchSize, 65 | const uint64_t& inputSize, const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth, 66 | const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, 67 | const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream) 68 | { 69 | dim3 threads_per_block(16, 16, 4); 70 | dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, (gridSizeY / threads_per_block.y) + 1, 71 | (numBBoxes / threads_per_block.z) + 1); 72 | 73 | for (unsigned int batch = 0; batch < batchSize; ++batch) { 74 | gpuYoloLayer_nc<<>>( 75 | reinterpret_cast (input) + (batch * inputSize), 76 | reinterpret_cast (boxes) + (batch * 4 * outputSize), 77 | reinterpret_cast (scores) + (batch * 1 * outputSize), 78 | reinterpret_cast (classes) + (batch * 1 * outputSize), 79 | netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, lastInputSize, scaleXY, 80 | reinterpret_cast (anchors), reinterpret_cast (mask)); 81 | } 82 | return cudaGetLastError(); 83 | } 84 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/yoloForward.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include 7 | 8 | inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); } 9 | 10 | __global__ void gpuYoloLayer(const float* input, float* boxes, float* scores, float* classes, const uint netWidth, 11 | const uint netHeight, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, 12 | const uint64_t lastInputSize, const float scaleXY, const float* anchors, const int* mask) 13 | { 14 | uint x_id = blockIdx.x * blockDim.x + threadIdx.x; 15 | uint y_id = blockIdx.y * blockDim.y + threadIdx.y; 16 | uint z_id = blockIdx.z * blockDim.z + threadIdx.z; 17 | 18 | if (x_id >= gridSizeX || y_id >= gridSizeY || z_id >= numBBoxes) { 19 | return; 20 | } 21 | 22 | const int numGridCells = gridSizeX * gridSizeY; 23 | const int bbindex = y_id * gridSizeX + x_id; 24 | 25 | const float alpha = scaleXY; 26 | const float beta = -0.5 * (scaleXY - 1); 27 | 28 | float xc = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) * alpha + beta + x_id) 29 | * netWidth / gridSizeX; 30 | 31 | float yc = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) * alpha + beta + y_id) 32 | * netHeight / gridSizeY; 33 | 34 | float w = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]) * anchors[mask[z_id] * 2]; 35 | 36 | float h = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]) * anchors[mask[z_id] * 2 + 1]; 37 | 38 | const float objectness = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]); 39 | 40 | float maxProb = 0.0f; 41 | int maxIndex = -1; 42 | 43 | for (uint i = 0; i < numOutputClasses; ++i) { 44 | float prob = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]); 45 | if (prob > maxProb) { 46 | maxProb = prob; 47 | maxIndex = i; 48 | } 49 | } 50 | 51 | int count = numGridCells * z_id + bbindex + lastInputSize; 52 | 53 | boxes[count * 4 + 0] = xc; 54 | boxes[count * 4 + 1] = yc; 55 | boxes[count * 4 + 2] = w; 56 | boxes[count * 4 + 3] = h; 57 | scores[count] = maxProb * objectness; 58 | classes[count] = (float) maxIndex; 59 | } 60 | 61 | cudaError_t cudaYoloLayer(const void* input, void* boxes, void* scores, void* classes, const uint& batchSize, 62 | const uint64_t& inputSize, const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth, 63 | const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, 64 | const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream); 65 | 66 | cudaError_t cudaYoloLayer(const void* input, void* boxes, void* scores, void* classes, const uint& batchSize, 67 | const uint64_t& inputSize, const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth, 68 | const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, 69 | const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream) 70 | { 71 | dim3 threads_per_block(16, 16, 4); 72 | dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, (gridSizeY / threads_per_block.y) + 1, 73 | (numBBoxes / threads_per_block.z) + 1); 74 | 75 | for (unsigned int batch = 0; batch < batchSize; ++batch) { 76 | gpuYoloLayer<<>>( 77 | reinterpret_cast (input) + (batch * inputSize), 78 | reinterpret_cast (boxes) + (batch * 4 * outputSize), 79 | reinterpret_cast (scores) + (batch * 1 * outputSize), 80 | reinterpret_cast (classes) + (batch * 1 * outputSize), 81 | netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, lastInputSize, scaleXY, 82 | reinterpret_cast (anchors), reinterpret_cast (mask)); 83 | } 84 | return cudaGetLastError(); 85 | } 86 | -------------------------------------------------------------------------------- /src/scripts/export_yolo11.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import argparse 4 | import warnings 5 | import onnx 6 | import torch 7 | import torch.nn as nn 8 | from copy import deepcopy 9 | from ultralytics import YOLO 10 | from ultralytics.utils.torch_utils import select_device 11 | from ultralytics.nn.modules import C2f, Detect, RTDETRDecoder 12 | 13 | 14 | class DeepStreamOutput(nn.Module): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | def forward(self, x): 19 | x = x.transpose(1, 2) 20 | boxes = x[:, :, :4] 21 | scores, classes = torch.max(x[:, :, 4:], 2, keepdim=True) 22 | classes = classes.float() 23 | return boxes, scores, classes 24 | 25 | 26 | def suppress_warnings(): 27 | warnings.filterwarnings('ignore', category=torch.jit.TracerWarning) 28 | warnings.filterwarnings('ignore', category=UserWarning) 29 | warnings.filterwarnings('ignore', category=DeprecationWarning) 30 | 31 | 32 | def yolo11_export(weights, device): 33 | model = YOLO(weights) 34 | model = deepcopy(model.model).to(device) 35 | for p in model.parameters(): 36 | p.requires_grad = False 37 | model.eval() 38 | model.float() 39 | model = model.fuse() 40 | for k, m in model.named_modules(): 41 | if isinstance(m, (Detect, RTDETRDecoder)): 42 | m.dynamic = False 43 | m.export = True 44 | m.format = 'onnx' 45 | elif isinstance(m, C2f): 46 | m.forward = m.forward_split 47 | return model 48 | 49 | 50 | def main(args): 51 | suppress_warnings() 52 | 53 | print('\nStarting: %s' % args.weights) 54 | 55 | print('Opening YOLO11 model\n') 56 | 57 | device = select_device('cpu') 58 | model = yolo11_export(args.weights, device) 59 | 60 | if len(model.names.keys()) > 0: 61 | print('\nCreating labels.txt file') 62 | f = open('labels.txt', 'w') 63 | for name in model.names.values(): 64 | f.write(name + '\n') 65 | f.close() 66 | 67 | model = nn.Sequential(model, DeepStreamOutput()) 68 | 69 | img_size = args.size * 2 if len(args.size) == 1 else args.size 70 | 71 | onnx_input_im = torch.zeros(args.batch, 3, *img_size).to(device) 72 | onnx_output_file = 'models/' + os.path.basename(args.weights.split("/")[-1]).split('.pt')[0] + '.onnx' 73 | 74 | dynamic_axes = { 75 | 'input': { 76 | 0: 'batch' 77 | }, 78 | 'boxes': { 79 | 0: 'batch' 80 | }, 81 | 'scores': { 82 | 0: 'batch' 83 | }, 84 | 'classes': { 85 | 0: 'batch' 86 | } 87 | } 88 | 89 | print('\nExporting the model to ONNX') 90 | torch.onnx.export(model, onnx_input_im, onnx_output_file, verbose=False, opset_version=args.opset, 91 | do_constant_folding=True, input_names=['input'], output_names=['boxes', 'scores', 'classes'], 92 | dynamic_axes=dynamic_axes if args.dynamic else None) 93 | 94 | if args.simplify: 95 | print('Simplifying the ONNX model') 96 | import onnxsim 97 | model_onnx = onnx.load(onnx_output_file) 98 | model_onnx, _ = onnxsim.simplify(model_onnx) 99 | onnx.save(model_onnx, onnx_output_file) 100 | 101 | print('Done: %s\n' % onnx_output_file) 102 | 103 | 104 | def parse_args(): 105 | parser = argparse.ArgumentParser(description='DeepStream YOLO11 conversion') 106 | parser.add_argument('-w', '--weights', required=True, help='Input weights (.pt) file path (required)') 107 | parser.add_argument('-s', '--size', nargs='+', type=int, default=[640], help='Inference size [H,W] (default [640])') 108 | parser.add_argument('--opset', type=int, default=16, help='ONNX opset version') 109 | parser.add_argument('--simplify', action='store_true', help='ONNX simplify model') 110 | parser.add_argument('--dynamic', action='store_true', help='Dynamic batch-size') 111 | parser.add_argument('--batch', type=int, default=1, help='Static batch-size') 112 | args = parser.parse_args() 113 | if not os.path.isfile(args.weights): 114 | raise SystemExit('Invalid weights file') 115 | if args.dynamic and args.batch > 1: 116 | raise SystemExit('Cannot set dynamic batch-size and static batch-size at same time') 117 | return args 118 | 119 | 120 | if __name__ == '__main__': 121 | args = parse_args() 122 | sys.exit(main(args)) 123 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/calibrator.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "calibrator.h" 7 | 8 | #include 9 | #include 10 | 11 | Int8EntropyCalibrator2::Int8EntropyCalibrator2(const int& batchSize, const int& channels, const int& height, const int& width, 12 | const float& scaleFactor, const float* offsets, const std::string& imgPath, const std::string& calibTablePath) : 13 | batchSize(batchSize), inputC(channels), inputH(height), inputW(width), scaleFactor(scaleFactor), offsets(offsets), 14 | calibTablePath(calibTablePath), imageIndex(0) 15 | { 16 | inputCount = batchSize * channels * height * width; 17 | std::fstream f(imgPath); 18 | if (f.is_open()) { 19 | std::string temp; 20 | while (std::getline(f, temp)) { 21 | imgPaths.push_back(temp); 22 | } 23 | } 24 | batchData = new float[inputCount]; 25 | CUDA_CHECK(cudaMalloc(&deviceInput, inputCount * sizeof(float))); 26 | } 27 | 28 | Int8EntropyCalibrator2::~Int8EntropyCalibrator2() 29 | { 30 | CUDA_CHECK(cudaFree(deviceInput)); 31 | if (batchData) { 32 | delete[] batchData; 33 | } 34 | } 35 | 36 | int 37 | Int8EntropyCalibrator2::getBatchSize() const noexcept 38 | { 39 | return batchSize; 40 | } 41 | 42 | bool 43 | Int8EntropyCalibrator2::getBatch(void** bindings, const char** names, int nbBindings) noexcept 44 | { 45 | if (imageIndex + batchSize > uint(imgPaths.size())) { 46 | return false; 47 | } 48 | 49 | float* ptr = batchData; 50 | for (size_t i = imageIndex; i < imageIndex + batchSize; ++i) { 51 | cv::Mat img = cv::imread(imgPaths[i]); 52 | if (img.empty()){ 53 | std::cerr << "Failed to read image for calibration" << std::endl; 54 | return false; 55 | } 56 | 57 | std::vector inputData = prepareImage(img, inputC, inputH, inputW, scaleFactor, offsets); 58 | 59 | size_t len = inputData.size(); 60 | memcpy(ptr, inputData.data(), len * sizeof(float)); 61 | ptr += inputData.size(); 62 | 63 | std::cout << "Load image: " << imgPaths[i] << std::endl; 64 | std::cout << "Progress: " << (i + 1) * 100. / imgPaths.size() << "%" << std::endl; 65 | } 66 | 67 | imageIndex += batchSize; 68 | 69 | CUDA_CHECK(cudaMemcpy(deviceInput, batchData, inputCount * sizeof(float), cudaMemcpyHostToDevice)); 70 | bindings[0] = deviceInput; 71 | 72 | return true; 73 | } 74 | 75 | const void* 76 | Int8EntropyCalibrator2::readCalibrationCache(std::size_t &length) noexcept 77 | { 78 | calibrationCache.clear(); 79 | std::ifstream input(calibTablePath, std::ios::binary); 80 | input >> std::noskipws; 81 | if (readCache && input.good()) { 82 | std::copy(std::istream_iterator(input), std::istream_iterator(), std::back_inserter(calibrationCache)); 83 | } 84 | length = calibrationCache.size(); 85 | return length ? calibrationCache.data() : nullptr; 86 | } 87 | 88 | void 89 | Int8EntropyCalibrator2::writeCalibrationCache(const void* cache, std::size_t length) noexcept 90 | { 91 | std::ofstream output(calibTablePath, std::ios::binary); 92 | output.write(reinterpret_cast(cache), length); 93 | } 94 | 95 | std::vector 96 | prepareImage(cv::Mat& img, int input_c, int input_h, int input_w, float scaleFactor, const float* offsets) 97 | { 98 | cv::Mat out; 99 | 100 | cv::cvtColor(img, out, cv::COLOR_BGR2RGB); 101 | 102 | int image_w = img.cols; 103 | int image_h = img.rows; 104 | 105 | if (image_w != input_w || image_h != input_h) { 106 | float resizeFactor = std::max(input_w / (float) image_w, input_h / (float) img.rows); 107 | cv::resize(out, out, cv::Size(0, 0), resizeFactor, resizeFactor, cv::INTER_CUBIC); 108 | cv::Rect crop(cv::Point(0.5 * (out.cols - input_w), 0.5 * (out.rows - input_h)), cv::Size(input_w, input_h)); 109 | out = out(crop); 110 | } 111 | 112 | out.convertTo(out, CV_32F, scaleFactor); 113 | cv::subtract(out, cv::Scalar(offsets[2] / 255, offsets[1] / 255, offsets[0] / 255), out, cv::noArray(), -1); 114 | 115 | std::vector input_channels(input_c); 116 | cv::split(out, input_channels); 117 | std::vector result(input_h * input_w * input_c); 118 | auto data = result.data(); 119 | int channelLength = input_h * input_w; 120 | for (int i = 0; i < input_c; ++i) { 121 | memcpy(data, input_channels[i].data, channelLength * sizeof(float)); 122 | data += channelLength; 123 | } 124 | 125 | return result; 126 | } 127 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/utils.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | * 22 | * Edited by Marcos Luciano 23 | * https://www.github.com/marcoslucianops 24 | */ 25 | 26 | #include "utils.h" 27 | 28 | #include 29 | #include 30 | #include 31 | 32 | static void 33 | leftTrim(std::string& s) 34 | { 35 | s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int ch) { return !isspace(ch); })); 36 | } 37 | 38 | static void 39 | rightTrim(std::string& s) 40 | { 41 | s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !isspace(ch); }).base(), s.end()); 42 | } 43 | 44 | std::string 45 | trim(std::string s) 46 | { 47 | leftTrim(s); 48 | rightTrim(s); 49 | return s; 50 | } 51 | 52 | float 53 | clamp(const float val, const float minVal, const float maxVal) 54 | { 55 | assert(minVal <= maxVal); 56 | return std::min(maxVal, std::max(minVal, val)); 57 | } 58 | 59 | bool 60 | fileExists(const std::string fileName, bool verbose) 61 | { 62 | if (!std::experimental::filesystem::exists(std::experimental::filesystem::path(fileName))) { 63 | if (verbose) { 64 | std::cout << "\nFile does not exist: " << fileName << std::endl; 65 | } 66 | return false; 67 | } 68 | return true; 69 | } 70 | 71 | std::vector 72 | loadWeights(const std::string weightsFilePath, const std::string& modelName) 73 | { 74 | assert(fileExists(weightsFilePath)); 75 | std::cout << "\nLoading pre-trained weights" << std::endl; 76 | 77 | std::vector weights; 78 | 79 | if (weightsFilePath.find(".weights") != std::string::npos) { 80 | std::ifstream file(weightsFilePath, std::ios_base::binary); 81 | assert(file.good()); 82 | std::string line; 83 | 84 | if (modelName.find("yolov2") != std::string::npos && modelName.find("yolov2-tiny") == std::string::npos) { 85 | // Remove 4 int32 bytes of data from the stream belonging to the header 86 | file.ignore(4 * 4); 87 | } 88 | else { 89 | // Remove 5 int32 bytes of data from the stream belonging to the header 90 | file.ignore(4 * 5); 91 | } 92 | 93 | char floatWeight[4]; 94 | while (!file.eof()) { 95 | file.read(floatWeight, 4); 96 | assert(file.gcount() == 4); 97 | weights.push_back(*reinterpret_cast(floatWeight)); 98 | if (file.peek() == std::istream::traits_type::eof()) { 99 | break; 100 | } 101 | } 102 | } 103 | else { 104 | std::cerr << "\nFile " << weightsFilePath << " is not supported" << std::endl; 105 | assert(0); 106 | } 107 | 108 | std::cout << "Loading weights of " << modelName << " complete" << std::endl; 109 | std::cout << "Total weights read: " << weights.size() << std::endl; 110 | 111 | return weights; 112 | } 113 | 114 | std::string 115 | dimsToString(const nvinfer1::Dims d) 116 | { 117 | assert(d.nbDims >= 1); 118 | 119 | std::stringstream s; 120 | s << "["; 121 | for (int i = 1; i < d.nbDims - 1; ++i) { 122 | s << d.d[i] << ", "; 123 | } 124 | s << d.d[d.nbDims - 1] << "]"; 125 | 126 | return s.str(); 127 | } 128 | 129 | int 130 | getNumChannels(nvinfer1::ITensor* t) 131 | { 132 | nvinfer1::Dims d = t->getDimensions(); 133 | assert(d.nbDims == 4); 134 | return d.d[1]; 135 | } 136 | 137 | void 138 | printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput, std::string layerOutput, 139 | std::string weightPtr) 140 | { 141 | std::cout << std::setw(7) << std::left << layerIndex << std::setw(40) << std::left << layerName; 142 | std::cout << std::setw(19) << std::left << layerInput << std::setw(19) << std::left << layerOutput; 143 | std::cout << weightPtr << std::endl; 144 | } 145 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/layers/reorg_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "reorg_layer.h" 7 | 8 | #include 9 | #include 10 | 11 | nvinfer1::ITensor* 12 | reorgLayer(int layerIdx, std::map& block, nvinfer1::ITensor* input, 13 | nvinfer1::INetworkDefinition* network, uint batchSize) 14 | { 15 | nvinfer1::ITensor* output; 16 | 17 | assert(block.at("type") == "reorg" || block.at("type") == "reorg3d"); 18 | 19 | int stride = 1; 20 | if(block.find("stride") != block.end()) { 21 | stride = std::stoi(block.at("stride")); 22 | } 23 | 24 | nvinfer1::Dims inputDims = input->getDimensions(); 25 | 26 | if (block.at("type") == "reorg3d") { 27 | std::string name1 = "slice1"; 28 | std::string name2 = "slice2"; 29 | std::string name3 = "slice3"; 30 | std::string name4 = "slice4"; 31 | nvinfer1::Dims start1 = {4, {0, 0, 0, 0}}; 32 | nvinfer1::Dims start2 = {4, {0, 0, 0, 1}}; 33 | nvinfer1::Dims start3 = {4, {0, 0, 1, 0}}; 34 | nvinfer1::Dims start4 = {4, {0, 0, 1, 1}}; 35 | nvinfer1::Dims sizeAll = {4, {inputDims.d[0], inputDims.d[1], inputDims.d[2] / stride, inputDims.d[3] / stride}}; 36 | nvinfer1::Dims strideAll = {4, {1, 1, stride, stride}}; 37 | 38 | nvinfer1::ITensor* slice1 = sliceLayer(layerIdx, name1, input, start1, sizeAll, strideAll, network, batchSize); 39 | assert(output != nullptr); 40 | 41 | nvinfer1::ITensor* slice2 = sliceLayer(layerIdx, name2, input, start2, sizeAll, strideAll, network, batchSize); 42 | assert(output != nullptr); 43 | 44 | nvinfer1::ITensor* slice3 = sliceLayer(layerIdx, name3, input, start3, sizeAll, strideAll, network, batchSize); 45 | assert(output != nullptr); 46 | 47 | nvinfer1::ITensor* slice4 = sliceLayer(layerIdx, name4, input, start4, sizeAll, strideAll, network, batchSize); 48 | assert(output != nullptr); 49 | 50 | std::vector concatInputs; 51 | concatInputs.push_back(slice1); 52 | concatInputs.push_back(slice2); 53 | concatInputs.push_back(slice3); 54 | concatInputs.push_back(slice4); 55 | 56 | nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size()); 57 | assert(concat != nullptr); 58 | std::string concatLayerName = "concat_" + std::to_string(layerIdx); 59 | concat->setName(concatLayerName.c_str()); 60 | concat->setAxis(0); 61 | output = concat->getOutput(0); 62 | } 63 | else { 64 | nvinfer1::IShuffleLayer* shuffle1 = network->addShuffle(*input); 65 | assert(shuffle1 != nullptr); 66 | std::string shuffle1LayerName = "shuffle1_" + std::to_string(layerIdx); 67 | shuffle1->setName(shuffle1LayerName.c_str()); 68 | nvinfer1::Dims reshapeDims1{6, {inputDims.d[0], inputDims.d[1] / (stride * stride), inputDims.d[2], stride, 69 | inputDims.d[3], stride}}; 70 | shuffle1->setReshapeDimensions(reshapeDims1); 71 | nvinfer1::Permutation permutation1{{0, 1, 2, 4, 3, 5}}; 72 | shuffle1->setSecondTranspose(permutation1); 73 | output = shuffle1->getOutput(0); 74 | 75 | nvinfer1::IShuffleLayer* shuffle2 = network->addShuffle(*output); 76 | assert(shuffle2 != nullptr); 77 | std::string shuffle2LayerName = "shuffle2_" + std::to_string(layerIdx); 78 | shuffle2->setName(shuffle2LayerName.c_str()); 79 | nvinfer1::Dims reshapeDims2{4, {inputDims.d[0], inputDims.d[1] / (stride * stride), inputDims.d[2] * inputDims.d[3], 80 | stride * stride}}; 81 | shuffle2->setReshapeDimensions(reshapeDims2); 82 | nvinfer1::Permutation permutation2{{0, 1, 3, 2}}; 83 | shuffle2->setSecondTranspose(permutation2); 84 | output = shuffle2->getOutput(0); 85 | 86 | nvinfer1::IShuffleLayer* shuffle3 = network->addShuffle(*output); 87 | assert(shuffle3 != nullptr); 88 | std::string shuffle3LayerName = "shuffle3_" + std::to_string(layerIdx); 89 | shuffle3->setName(shuffle3LayerName.c_str()); 90 | nvinfer1::Dims reshapeDims3{4, {inputDims.d[0], inputDims.d[1] / (stride * stride), stride * stride, 91 | inputDims.d[2] * inputDims.d[3]}}; 92 | shuffle3->setReshapeDimensions(reshapeDims3); 93 | nvinfer1::Permutation permutation3{{0, 2, 1, 3}}; 94 | shuffle3->setSecondTranspose(permutation3); 95 | output = shuffle3->getOutput(0); 96 | 97 | nvinfer1::IShuffleLayer* shuffle4 = network->addShuffle(*output); 98 | assert(shuffle4 != nullptr); 99 | std::string shuffle4LayerName = "shuffle4_" + std::to_string(layerIdx); 100 | shuffle4->setName(shuffle4LayerName.c_str()); 101 | nvinfer1::Dims reshapeDims4{4, {inputDims.d[0], inputDims.d[1] * stride * stride, inputDims.d[2] / stride, 102 | inputDims.d[3] / stride}}; 103 | shuffle4->setReshapeDimensions(reshapeDims4); 104 | output = shuffle4->getOutput(0); 105 | } 106 | 107 | return output; 108 | } 109 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/yoloForward_v2.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include 7 | 8 | inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); } 9 | 10 | __device__ void softmaxGPU(const float* input, const int bbindex, const int numGridCells, uint z_id, 11 | const uint numOutputClasses, float temp, float* output) 12 | { 13 | int i; 14 | float sum = 0; 15 | float largest = -INFINITY; 16 | for (i = 0; i < numOutputClasses; ++i) { 17 | int val = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]; 18 | largest = (val>largest) ? val : largest; 19 | } 20 | for (i = 0; i < numOutputClasses; ++i) { 21 | float e = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] / temp - largest / temp); 22 | sum += e; 23 | output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] = e; 24 | } 25 | for (i = 0; i < numOutputClasses; ++i) { 26 | output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] /= sum; 27 | } 28 | } 29 | 30 | __global__ void gpuRegionLayer(const float* input, float* softmax, float* boxes, float* scores, float* classes, 31 | const uint netWidth, const uint netHeight, const uint gridSizeX, const uint gridSizeY, const uint numOutputClasses, 32 | const uint numBBoxes, const uint64_t lastInputSize, const float* anchors) 33 | { 34 | uint x_id = blockIdx.x * blockDim.x + threadIdx.x; 35 | uint y_id = blockIdx.y * blockDim.y + threadIdx.y; 36 | uint z_id = blockIdx.z * blockDim.z + threadIdx.z; 37 | 38 | if (x_id >= gridSizeX || y_id >= gridSizeY || z_id >= numBBoxes) { 39 | return; 40 | } 41 | 42 | const int numGridCells = gridSizeX * gridSizeY; 43 | const int bbindex = y_id * gridSizeX + x_id; 44 | 45 | float xc = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) + x_id) * netWidth / gridSizeX; 46 | 47 | float yc = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) + y_id) * netHeight / gridSizeY; 48 | 49 | float w = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]) * anchors[z_id * 2] * netWidth / 50 | gridSizeX; 51 | 52 | float h = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]) * anchors[z_id * 2 + 1] * netHeight / 53 | gridSizeY; 54 | 55 | const float objectness = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]); 56 | 57 | softmaxGPU(input, bbindex, numGridCells, z_id, numOutputClasses, 1.0, softmax); 58 | 59 | float maxProb = 0.0f; 60 | int maxIndex = -1; 61 | 62 | for (uint i = 0; i < numOutputClasses; ++i) { 63 | float prob = softmax[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]; 64 | if (prob > maxProb) { 65 | maxProb = prob; 66 | maxIndex = i; 67 | } 68 | } 69 | 70 | int count = numGridCells * z_id + bbindex + lastInputSize; 71 | 72 | boxes[count * 4 + 0] = xc; 73 | boxes[count * 4 + 1] = yc; 74 | boxes[count * 4 + 2] = w; 75 | boxes[count * 4 + 3] = h; 76 | scores[count] = maxProb * objectness; 77 | classes[count] = (float) maxIndex; 78 | } 79 | 80 | cudaError_t cudaRegionLayer(const void* input, void* softmax, void* boxes, void* scores, void* classes, 81 | const uint& batchSize, const uint64_t& inputSize, const uint64_t& outputSize, const uint64_t& lastInputSize, 82 | const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, 83 | const uint& numBBoxes, const void* anchors, cudaStream_t stream); 84 | 85 | cudaError_t cudaRegionLayer(const void* input, void* softmax, void* boxes, void* scores, void* classes, 86 | const uint& batchSize, const uint64_t& inputSize, const uint64_t& outputSize, const uint64_t& lastInputSize, 87 | const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, 88 | const uint& numBBoxes, const void* anchors, cudaStream_t stream) 89 | { 90 | dim3 threads_per_block(16, 16, 4); 91 | dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, (gridSizeY / threads_per_block.y) + 1, 92 | (numBBoxes / threads_per_block.z) + 1); 93 | 94 | for (unsigned int batch = 0; batch < batchSize; ++batch) { 95 | gpuRegionLayer<<>>( 96 | reinterpret_cast (input) + (batch * inputSize), 97 | reinterpret_cast (softmax) + (batch * inputSize), 98 | reinterpret_cast (boxes) + (batch * 4 * outputSize), 99 | reinterpret_cast (scores) + (batch * 1 * outputSize), 100 | reinterpret_cast (classes) + (batch * 1 * outputSize), 101 | netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, lastInputSize, 102 | reinterpret_cast (anchors)); 103 | } 104 | return cudaGetLastError(); 105 | } 106 | -------------------------------------------------------------------------------- /src/config_tracker_NvDCF_perf.yml: -------------------------------------------------------------------------------- 1 | %YAML:1.0 2 | #################################################################################################### 3 | # SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | # SPDX-License-Identifier: LicenseRef-NvidiaProprietary 5 | # 6 | # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual 7 | # property and proprietary rights in and to this material, related 8 | # documentation and any modifications thereto. Any use, reproduction, 9 | # disclosure or distribution of this material and related documentation 10 | # without an express license agreement from NVIDIA CORPORATION or 11 | # its affiliates is strictly prohibited. 12 | #################################################################################################### 13 | 14 | BaseConfig: 15 | minDetectorConfidence: 0.0430 # If the confidence of a detector bbox is lower than this, then it won't be considered for tracking 16 | 17 | TargetManagement: 18 | enableBboxUnClipping: 1 # In case the bbox is likely to be clipped by image border, unclip bbox 19 | preserveStreamUpdateOrder: 0 # When assigning new target ids, preserve input streams' order to keep target ids in a deterministic order over multuple runs 20 | maxTargetsPerStream: 150 # Max number of targets to track per stream. Recommended to set >10. Note: this value should account for the targets being tracked in shadow mode as well. Max value depends on the GPU memory capacity 21 | 22 | # [Creation & Termination Policy] 23 | minIouDiff4NewTarget: 0.7418 # If the IOU between the newly detected object and any of the existing targets is higher than this threshold, this newly detected object will be discarded. 24 | minTrackerConfidence: 0.4009 # If the confidence of an object tracker is lower than this on the fly, then it will be tracked in shadow mode. Valid Range: [0.0, 1.0] 25 | probationAge: 2 # If the target's age exceeds this, the target will be considered to be valid. 26 | maxShadowTrackingAge: 51 # Max length of shadow tracking. If the shadowTrackingAge exceeds this limit, the tracker will be terminated. 27 | earlyTerminationAge: 1 # If the shadowTrackingAge reaches this threshold while in TENTATIVE period, the target will be terminated prematurely. 28 | 29 | TrajectoryManagement: 30 | useUniqueID: 0 # Use 64-bit long Unique ID when assignining tracker ID. 31 | 32 | DataAssociator: 33 | dataAssociatorType: 0 # the type of data associator among { DEFAULT= 0 } 34 | associationMatcherType: 1 # the type of matching algorithm among { GREEDY=0, CASCADED=1 } 35 | checkClassMatch: 1 # If checked, only the same-class objects are associated with each other. Default: true 36 | 37 | # [Association Metric: Thresholds for valid candidates] 38 | minMatchingScore4Overall: 0.4290 # Min total score 39 | minMatchingScore4SizeSimilarity: 0.3627 # Min bbox size similarity score 40 | minMatchingScore4Iou: 0.2575 # Min IOU score 41 | minMatchingScore4VisualSimilarity: 0.5356 # Min visual similarity score 42 | 43 | # [Association Metric: Weights] 44 | matchingScoreWeight4VisualSimilarity: 0.3370 # Weight for the visual similarity (in terms of correlation response ratio) 45 | matchingScoreWeight4SizeSimilarity: 0.4354 # Weight for the Size-similarity score 46 | matchingScoreWeight4Iou: 0.3656 # Weight for the IOU score 47 | 48 | # [Association Metric: Tentative detections] only uses iou similarity for tentative detections 49 | tentativeDetectorConfidence: 0.2008 # If a detection's confidence is lower than this but higher than minDetectorConfidence, then it's considered as a tentative detection 50 | minMatchingScore4TentativeIou: 0.5296 # Min iou threshold to match targets and tentative detection 51 | 52 | StateEstimator: 53 | stateEstimatorType: 1 # the type of state estimator among { DUMMY=0, SIMPLE=1, REGULAR=2 } 54 | 55 | # [Dynamics Modeling] 56 | processNoiseVar4Loc: 1.5110 # Process noise variance for bbox center 57 | processNoiseVar4Size: 1.3159 # Process noise variance for bbox size 58 | processNoiseVar4Vel: 0.0300 # Process noise variance for velocity 59 | measurementNoiseVar4Detector: 3.0283 # Measurement noise variance for detector's detection 60 | measurementNoiseVar4Tracker: 8.1505 # Measurement noise variance for tracker's localization 61 | 62 | VisualTracker: 63 | visualTrackerType: 1 # the type of visual tracker among { DUMMY=0, NvDCF=1 } 64 | 65 | # [NvDCF: Feature Extraction] 66 | useColorNames: 1 # Use ColorNames feature 67 | useHog: 0 # Use Histogram-of-Oriented-Gradient (HOG) feature 68 | featureImgSizeLevel: 2 # Size of a feature image. Valid range: {1, 2, 3, 4, 5}, from the smallest to the largest 69 | featureFocusOffsetFactor_y: -0.2000 # The offset for the center of hanning window relative to the feature height. The center of hanning window would move by (featureFocusOffsetFactor_y*featureMatSize.height) in vertical direction 70 | 71 | # [NvDCF: Correlation Filter] 72 | filterLr: 0.0750 # learning rate for DCF filter in exponential moving average. Valid Range: [0.0, 1.0] 73 | filterChannelWeightsLr: 0.1000 # learning rate for the channel weights among feature channels. Valid Range: [0.0, 1.0] 74 | gaussianSigma: 0.7500 # Standard deviation for Gaussian for desired response when creating DCF filter [pixels] 75 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/yolo.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | * 22 | * Edited by Marcos Luciano 23 | * https://www.github.com/marcoslucianops 24 | */ 25 | 26 | #ifndef _YOLO_H_ 27 | #define _YOLO_H_ 28 | 29 | #include "NvInferPlugin.h" 30 | #include "nvdsinfer_custom_impl.h" 31 | 32 | #include "layers/convolutional_layer.h" 33 | #include "layers/deconvolutional_layer.h" 34 | #include "layers/batchnorm_layer.h" 35 | #include "layers/implicit_layer.h" 36 | #include "layers/channels_layer.h" 37 | #include "layers/shortcut_layer.h" 38 | #include "layers/sam_layer.h" 39 | #include "layers/route_layer.h" 40 | #include "layers/upsample_layer.h" 41 | #include "layers/pooling_layer.h" 42 | #include "layers/reorg_layer.h" 43 | 44 | #if NV_TENSORRT_MAJOR >= 8 45 | #define INT int32_t 46 | #else 47 | #define INT int 48 | #endif 49 | 50 | #if NV_TENSORRT_MAJOR < 8 || (NV_TENSORRT_MAJOR == 8 && NV_TENSORRT_MINOR == 0) 51 | static class Logger : public nvinfer1::ILogger { 52 | void log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept override { 53 | if (severity <= nvinfer1::ILogger::Severity::kWARNING) 54 | std::cout << msg << std::endl; 55 | } 56 | } logger; 57 | #endif 58 | 59 | struct NetworkInfo 60 | { 61 | std::string inputBlobName; 62 | std::string networkType; 63 | std::string modelName; 64 | std::string onnxWtsFilePath; 65 | std::string darknetWtsFilePath; 66 | std::string darknetCfgFilePath; 67 | uint batchSize; 68 | int implicitBatch; 69 | std::string int8CalibPath; 70 | std::string deviceType; 71 | uint numDetectedClasses; 72 | int clusterMode; 73 | std::string networkMode; 74 | float scaleFactor; 75 | const float* offsets; 76 | uint workspaceSize; 77 | }; 78 | 79 | struct TensorInfo 80 | { 81 | std::string blobName; 82 | uint gridSizeX {0}; 83 | uint gridSizeY {0}; 84 | uint numBBoxes {0}; 85 | float scaleXY; 86 | std::vector anchors; 87 | std::vector mask; 88 | }; 89 | 90 | class Yolo : public IModelParser { 91 | public: 92 | Yolo(const NetworkInfo& networkInfo); 93 | 94 | ~Yolo() override; 95 | 96 | bool hasFullDimsSupported() const override { return false; } 97 | 98 | const char* getModelName() const override { 99 | return m_NetworkType == "onnx" ? m_OnnxWtsFilePath.substr(0, m_OnnxWtsFilePath.find(".onnx")).c_str() : 100 | m_DarknetCfgFilePath.substr(0, m_DarknetCfgFilePath.find(".cfg")).c_str(); 101 | } 102 | 103 | NvDsInferStatus parseModel(nvinfer1::INetworkDefinition& network) override; 104 | 105 | #if NV_TENSORRT_MAJOR >= 8 106 | nvinfer1::ICudaEngine* createEngine(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config); 107 | #else 108 | nvinfer1::ICudaEngine* createEngine(nvinfer1::IBuilder* builder); 109 | #endif 110 | 111 | protected: 112 | const std::string m_InputBlobName; 113 | const std::string m_NetworkType; 114 | const std::string m_ModelName; 115 | const std::string m_OnnxWtsFilePath; 116 | const std::string m_DarknetWtsFilePath; 117 | const std::string m_DarknetCfgFilePath; 118 | const uint m_BatchSize; 119 | const int m_ImplicitBatch; 120 | const std::string m_Int8CalibPath; 121 | const std::string m_DeviceType; 122 | const uint m_NumDetectedClasses; 123 | const int m_ClusterMode; 124 | const std::string m_NetworkMode; 125 | const float m_ScaleFactor; 126 | const float* m_Offsets; 127 | const uint m_WorkspaceSize; 128 | 129 | uint m_InputC; 130 | uint m_InputH; 131 | uint m_InputW; 132 | uint64_t m_InputSize; 133 | uint m_NumClasses; 134 | uint m_LetterBox; 135 | uint m_NewCoords; 136 | uint m_YoloCount; 137 | 138 | std::vector m_YoloTensors; 139 | std::vector> m_ConfigBlocks; 140 | std::vector m_TrtWeights; 141 | 142 | private: 143 | NvDsInferStatus buildYoloNetwork(std::vector& weights, nvinfer1::INetworkDefinition& network); 144 | 145 | std::vector> parseConfigFile(const std::string cfgFilePath); 146 | 147 | void parseConfigBlocks(); 148 | 149 | void destroyNetworkUtils(); 150 | }; 151 | 152 | #endif // _YOLO_H_ 153 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/layers/activation_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "activation_layer.h" 7 | 8 | #include 9 | #include 10 | 11 | nvinfer1::ITensor* 12 | activationLayer(int layerIdx, std::string activation, nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network, 13 | std::string layerName) 14 | { 15 | nvinfer1::ITensor* output; 16 | 17 | if (activation == "linear") 18 | output = input; 19 | else if (activation == "relu") { 20 | nvinfer1::IActivationLayer* relu = network->addActivation(*input, nvinfer1::ActivationType::kRELU); 21 | assert(relu != nullptr); 22 | std::string reluLayerName = "relu_" + layerName + std::to_string(layerIdx); 23 | relu->setName(reluLayerName.c_str()); 24 | output = relu->getOutput(0); 25 | } 26 | else if (activation == "sigmoid" || activation == "logistic") { 27 | nvinfer1::IActivationLayer* sigmoid = network->addActivation(*input, nvinfer1::ActivationType::kSIGMOID); 28 | assert(sigmoid != nullptr); 29 | std::string sigmoidLayerName = "sigmoid_" + layerName + std::to_string(layerIdx); 30 | sigmoid->setName(sigmoidLayerName.c_str()); 31 | output = sigmoid->getOutput(0); 32 | } 33 | else if (activation == "tanh") { 34 | nvinfer1::IActivationLayer* tanh = network->addActivation(*input, nvinfer1::ActivationType::kTANH); 35 | assert(tanh != nullptr); 36 | std::string tanhLayerName = "tanh_" + layerName + std::to_string(layerIdx); 37 | tanh->setName(tanhLayerName.c_str()); 38 | output = tanh->getOutput(0); 39 | } 40 | else if (activation == "leaky") { 41 | nvinfer1::IActivationLayer* leaky = network->addActivation(*input, nvinfer1::ActivationType::kLEAKY_RELU); 42 | assert(leaky != nullptr); 43 | std::string leakyLayerName = "leaky_" + layerName + std::to_string(layerIdx); 44 | leaky->setName(leakyLayerName.c_str()); 45 | leaky->setAlpha(0.1); 46 | output = leaky->getOutput(0); 47 | } 48 | else if (activation == "softplus") { 49 | nvinfer1::IActivationLayer* softplus = network->addActivation(*input, nvinfer1::ActivationType::kSOFTPLUS); 50 | assert(softplus != nullptr); 51 | std::string softplusLayerName = "softplus_" + layerName + std::to_string(layerIdx); 52 | softplus->setName(softplusLayerName.c_str()); 53 | output = softplus->getOutput(0); 54 | } 55 | else if (activation == "mish") { 56 | nvinfer1::IActivationLayer* softplus = network->addActivation(*input, nvinfer1::ActivationType::kSOFTPLUS); 57 | assert(softplus != nullptr); 58 | std::string softplusLayerName = "softplus_" + layerName + std::to_string(layerIdx); 59 | softplus->setName(softplusLayerName.c_str()); 60 | nvinfer1::IActivationLayer* tanh = network->addActivation(*softplus->getOutput(0), nvinfer1::ActivationType::kTANH); 61 | assert(tanh != nullptr); 62 | std::string tanhLayerName = "tanh_" + layerName + std::to_string(layerIdx); 63 | tanh->setName(tanhLayerName.c_str()); 64 | nvinfer1::IElementWiseLayer* mish = network->addElementWise(*input, *tanh->getOutput(0), 65 | nvinfer1::ElementWiseOperation::kPROD); 66 | assert(mish != nullptr); 67 | std::string mishLayerName = "mish_" + layerName + std::to_string(layerIdx); 68 | mish->setName(mishLayerName.c_str()); 69 | output = mish->getOutput(0); 70 | } 71 | else if (activation == "silu" || activation == "swish") { 72 | nvinfer1::IActivationLayer* sigmoid = network->addActivation(*input, nvinfer1::ActivationType::kSIGMOID); 73 | assert(sigmoid != nullptr); 74 | std::string sigmoidLayerName = "sigmoid_" + layerName + std::to_string(layerIdx); 75 | sigmoid->setName(sigmoidLayerName.c_str()); 76 | nvinfer1::IElementWiseLayer* silu = network->addElementWise(*input, *sigmoid->getOutput(0), 77 | nvinfer1::ElementWiseOperation::kPROD); 78 | assert(silu != nullptr); 79 | std::string siluLayerName = "silu_" + layerName + std::to_string(layerIdx); 80 | silu->setName(siluLayerName.c_str()); 81 | output = silu->getOutput(0); 82 | } 83 | else if (activation == "hardsigmoid") { 84 | nvinfer1::IActivationLayer* hardsigmoid = network->addActivation(*input, nvinfer1::ActivationType::kHARD_SIGMOID); 85 | assert(hardsigmoid != nullptr); 86 | std::string hardsigmoidLayerName = "hardsigmoid_" + layerName + std::to_string(layerIdx); 87 | hardsigmoid->setName(hardsigmoidLayerName.c_str()); 88 | hardsigmoid->setAlpha(1.0 / 6.0); 89 | hardsigmoid->setBeta(0.5); 90 | output = hardsigmoid->getOutput(0); 91 | } 92 | else if (activation == "hardswish") { 93 | nvinfer1::IActivationLayer* hardsigmoid = network->addActivation(*input, nvinfer1::ActivationType::kHARD_SIGMOID); 94 | assert(hardsigmoid != nullptr); 95 | std::string hardsigmoidLayerName = "hardsigmoid_" + layerName + std::to_string(layerIdx); 96 | hardsigmoid->setName(hardsigmoidLayerName.c_str()); 97 | hardsigmoid->setAlpha(1.0 / 6.0); 98 | hardsigmoid->setBeta(0.5); 99 | nvinfer1::IElementWiseLayer* hardswish = network->addElementWise(*input, *hardsigmoid->getOutput(0), 100 | nvinfer1::ElementWiseOperation::kPROD); 101 | assert(hardswish != nullptr); 102 | std::string hardswishLayerName = "hardswish_" + layerName + std::to_string(layerIdx); 103 | hardswish->setName(hardswishLayerName.c_str()); 104 | output = hardswish->getOutput(0); 105 | } 106 | else { 107 | std::cerr << "Activation not supported: " << activation << std::endl; 108 | assert(0); 109 | } 110 | return output; 111 | } 112 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "convolutional_layer.h" 7 | 8 | #include 9 | #include 10 | 11 | nvinfer1::ITensor* 12 | convolutionalLayer(int layerIdx, std::map& block, std::vector& weights, 13 | std::vector& trtWeights, int& weightPtr, int& inputChannels, nvinfer1::ITensor* input, 14 | nvinfer1::INetworkDefinition* network, std::string layerName) 15 | { 16 | nvinfer1::ITensor* output; 17 | 18 | assert(block.at("type") == "convolutional" || block.at("type") == "c2f"); 19 | assert(block.find("filters") != block.end()); 20 | assert(block.find("pad") != block.end()); 21 | assert(block.find("size") != block.end()); 22 | assert(block.find("stride") != block.end()); 23 | 24 | int filters = std::stoi(block.at("filters")); 25 | int padding = std::stoi(block.at("pad")); 26 | int kernelSize = std::stoi(block.at("size")); 27 | int stride = std::stoi(block.at("stride")); 28 | std::string activation = block.at("activation"); 29 | int bias = filters; 30 | 31 | bool batchNormalize = false; 32 | if (block.find("batch_normalize") != block.end()) { 33 | bias = 0; 34 | batchNormalize = (block.at("batch_normalize") == "1"); 35 | } 36 | 37 | if (block.find("bias") != block.end()) { 38 | bias = std::stoi(block.at("bias")); 39 | if (bias == 1) 40 | bias = filters; 41 | } 42 | 43 | int groups = 1; 44 | if (block.find("groups") != block.end()) 45 | groups = std::stoi(block.at("groups")); 46 | 47 | int pad; 48 | if (padding) 49 | pad = (kernelSize - 1) / 2; 50 | else 51 | pad = 0; 52 | 53 | int size = filters * inputChannels * kernelSize * kernelSize / groups; 54 | std::vector bnBiases; 55 | std::vector bnWeights; 56 | std::vector bnRunningMean; 57 | std::vector bnRunningVar; 58 | nvinfer1::Weights convWt {nvinfer1::DataType::kFLOAT, nullptr, size}; 59 | nvinfer1::Weights convBias {nvinfer1::DataType::kFLOAT, nullptr, bias}; 60 | 61 | if (batchNormalize == false) { 62 | float* val; 63 | if (bias != 0) { 64 | val = new float[filters]; 65 | for (int i = 0; i < filters; ++i) { 66 | val[i] = weights[weightPtr]; 67 | ++weightPtr; 68 | } 69 | convBias.values = val; 70 | trtWeights.push_back(convBias); 71 | } 72 | val = new float[size]; 73 | for (int i = 0; i < size; ++i) { 74 | val[i] = weights[weightPtr]; 75 | ++weightPtr; 76 | } 77 | convWt.values = val; 78 | trtWeights.push_back(convWt); 79 | } 80 | else { 81 | for (int i = 0; i < filters; ++i) { 82 | bnBiases.push_back(weights[weightPtr]); 83 | ++weightPtr; 84 | } 85 | for (int i = 0; i < filters; ++i) { 86 | bnWeights.push_back(weights[weightPtr]); 87 | ++weightPtr; 88 | } 89 | for (int i = 0; i < filters; ++i) { 90 | bnRunningMean.push_back(weights[weightPtr]); 91 | ++weightPtr; 92 | } 93 | for (int i = 0; i < filters; ++i) { 94 | bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5)); 95 | ++weightPtr; 96 | } 97 | float* val; 98 | if (bias != 0) { 99 | val = new float[filters]; 100 | for (int i = 0; i < filters; ++i) { 101 | val[i] = weights[weightPtr]; 102 | ++weightPtr; 103 | } 104 | convBias.values = val; 105 | } 106 | val = new float[size]; 107 | for (int i = 0; i < size; ++i) { 108 | val[i] = weights[weightPtr]; 109 | ++weightPtr; 110 | } 111 | convWt.values = val; 112 | trtWeights.push_back(convWt); 113 | if (bias != 0) 114 | trtWeights.push_back(convBias); 115 | } 116 | 117 | nvinfer1::IConvolutionLayer* conv = network->addConvolutionNd(*input, filters, nvinfer1::Dims{2, {kernelSize, kernelSize}}, 118 | convWt, convBias); 119 | assert(conv != nullptr); 120 | std::string convLayerName = "conv_" + layerName + std::to_string(layerIdx); 121 | conv->setName(convLayerName.c_str()); 122 | conv->setStrideNd(nvinfer1::Dims{2, {stride, stride}}); 123 | conv->setPaddingNd(nvinfer1::Dims{2, {pad, pad}}); 124 | 125 | if (block.find("groups") != block.end()) 126 | conv->setNbGroups(groups); 127 | 128 | output = conv->getOutput(0); 129 | 130 | if (batchNormalize == true) { 131 | size = filters; 132 | nvinfer1::Weights shift {nvinfer1::DataType::kFLOAT, nullptr, size}; 133 | nvinfer1::Weights scale {nvinfer1::DataType::kFLOAT, nullptr, size}; 134 | nvinfer1::Weights power {nvinfer1::DataType::kFLOAT, nullptr, size}; 135 | float* shiftWt = new float[size]; 136 | for (int i = 0; i < size; ++i) 137 | shiftWt[i] = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i)); 138 | shift.values = shiftWt; 139 | float* scaleWt = new float[size]; 140 | for (int i = 0; i < size; ++i) 141 | scaleWt[i] = bnWeights.at(i) / bnRunningVar[i]; 142 | scale.values = scaleWt; 143 | float* powerWt = new float[size]; 144 | for (int i = 0; i < size; ++i) 145 | powerWt[i] = 1.0; 146 | power.values = powerWt; 147 | trtWeights.push_back(shift); 148 | trtWeights.push_back(scale); 149 | trtWeights.push_back(power); 150 | 151 | nvinfer1::IScaleLayer* batchnorm = network->addScale(*output, nvinfer1::ScaleMode::kCHANNEL, shift, scale, power); 152 | assert(batchnorm != nullptr); 153 | std::string batchnormLayerName = "batchnorm_" + layerName + std::to_string(layerIdx); 154 | batchnorm->setName(batchnormLayerName.c_str()); 155 | output = batchnorm->getOutput(0); 156 | } 157 | 158 | output = activationLayer(layerIdx, activation, output, network, layerName); 159 | assert(output != nullptr); 160 | 161 | return output; 162 | } 163 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/nvdsinfer_yolo_engine.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | * 22 | * Edited by Marcos Luciano 23 | * https://www.github.com/marcoslucianops 24 | */ 25 | 26 | #include 27 | 28 | #include "nvdsinfer_custom_impl.h" 29 | #include "nvdsinfer_context.h" 30 | 31 | #include "yolo.h" 32 | 33 | #define USE_CUDA_ENGINE_GET_API 1 34 | 35 | static bool 36 | getYoloNetworkInfo(NetworkInfo& networkInfo, const NvDsInferContextInitParams* initParams) 37 | { 38 | std::string onnxWtsFilePath = initParams->onnxFilePath; 39 | std::string darknetWtsFilePath = initParams->modelFilePath; 40 | std::string darknetCfgFilePath = initParams->customNetworkConfigFilePath; 41 | 42 | std::string yoloType = onnxWtsFilePath != "" ? "onnx" : "darknet"; 43 | std::string modelName = yoloType == "onnx" ? 44 | onnxWtsFilePath.substr(0, onnxWtsFilePath.find(".onnx")).substr(onnxWtsFilePath.rfind("/") + 1) : 45 | darknetWtsFilePath.substr(0, darknetWtsFilePath.find(".weights")).substr(darknetWtsFilePath.rfind("/") + 1); 46 | 47 | std::transform(modelName.begin(), modelName.end(), modelName.begin(), [] (uint8_t c) { 48 | return std::tolower(c); 49 | }); 50 | 51 | networkInfo.inputBlobName = "input"; 52 | networkInfo.networkType = yoloType; 53 | networkInfo.modelName = modelName; 54 | networkInfo.onnxWtsFilePath = onnxWtsFilePath; 55 | networkInfo.darknetWtsFilePath = darknetWtsFilePath; 56 | networkInfo.darknetCfgFilePath = darknetCfgFilePath; 57 | networkInfo.batchSize = initParams->maxBatchSize; 58 | networkInfo.implicitBatch = initParams->forceImplicitBatchDimension; 59 | networkInfo.int8CalibPath = initParams->int8CalibrationFilePath; 60 | networkInfo.deviceType = initParams->useDLA ? "kDLA" : "kGPU"; 61 | networkInfo.numDetectedClasses = initParams->numDetectedClasses; 62 | networkInfo.clusterMode = initParams->clusterMode; 63 | networkInfo.scaleFactor = initParams->networkScaleFactor; 64 | networkInfo.offsets = initParams->offsets; 65 | networkInfo.workspaceSize = initParams->workspaceSize; 66 | 67 | if (initParams->networkMode == NvDsInferNetworkMode_FP32) 68 | networkInfo.networkMode = "FP32"; 69 | else if (initParams->networkMode == NvDsInferNetworkMode_INT8) 70 | networkInfo.networkMode = "INT8"; 71 | else if (initParams->networkMode == NvDsInferNetworkMode_FP16) 72 | networkInfo.networkMode = "FP16"; 73 | 74 | if (yoloType == "onnx") { 75 | if (!fileExists(networkInfo.onnxWtsFilePath)) { 76 | std::cerr << "ONNX model file does not exist\n" << std::endl; 77 | return false; 78 | } 79 | } 80 | else { 81 | if (!fileExists(networkInfo.darknetWtsFilePath)) { 82 | std::cerr << "Darknet weights file does not exist\n" << std::endl; 83 | return false; 84 | } 85 | else if (!fileExists(networkInfo.darknetCfgFilePath)) { 86 | std::cerr << "Darknet cfg file does not exist\n" << std::endl; 87 | return false; 88 | } 89 | } 90 | 91 | return true; 92 | } 93 | 94 | #if !USE_CUDA_ENGINE_GET_API 95 | IModelParser* 96 | NvDsInferCreateModelParser(const NvDsInferContextInitParams* initParams) 97 | { 98 | NetworkInfo networkInfo; 99 | if (!getYoloNetworkInfo(networkInfo, initParams)) 100 | return nullptr; 101 | 102 | return new Yolo(networkInfo); 103 | } 104 | #else 105 | 106 | #if NV_TENSORRT_MAJOR >= 8 107 | extern "C" bool 108 | NvDsInferYoloCudaEngineGet(nvinfer1::IBuilder* const builder, nvinfer1::IBuilderConfig* const builderConfig, 109 | const NvDsInferContextInitParams* const initParams, nvinfer1::DataType dataType, nvinfer1::ICudaEngine*& cudaEngine); 110 | 111 | extern "C" bool 112 | NvDsInferYoloCudaEngineGet(nvinfer1::IBuilder* const builder, nvinfer1::IBuilderConfig* const builderConfig, 113 | const NvDsInferContextInitParams* const initParams, nvinfer1::DataType dataType, nvinfer1::ICudaEngine*& cudaEngine) 114 | #else 115 | extern "C" bool 116 | NvDsInferYoloCudaEngineGet(nvinfer1::IBuilder* const builder, const NvDsInferContextInitParams* const initParams, 117 | nvinfer1::DataType dataType, nvinfer1::ICudaEngine*& cudaEngine); 118 | 119 | extern "C" bool 120 | NvDsInferYoloCudaEngineGet(nvinfer1::IBuilder* const builder, const NvDsInferContextInitParams* const initParams, 121 | nvinfer1::DataType dataType, nvinfer1::ICudaEngine*& cudaEngine) 122 | #endif 123 | 124 | { 125 | NetworkInfo networkInfo; 126 | if (!getYoloNetworkInfo(networkInfo, initParams)) 127 | return false; 128 | 129 | Yolo yolo(networkInfo); 130 | 131 | #if NV_TENSORRT_MAJOR >= 8 132 | cudaEngine = yolo.createEngine(builder, builderConfig); 133 | #else 134 | cudaEngine = yolo.createEngine(builder); 135 | #endif 136 | 137 | if (cudaEngine == nullptr) { 138 | std::cerr << "Failed to build CUDA engine" << std::endl; 139 | return false; 140 | } 141 | 142 | return true; 143 | } 144 | #endif 145 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 | # DeepStream YOLOv11 4 | 5 | [![NVIDIA](https://img.shields.io/badge/NVIDIA_DeepStream-7.0-76B900?logo=nvidia&logoColor=white)](https://developer.nvidia.com/deepstream-sdk) 6 | [![YOLO](https://img.shields.io/badge/YOLOv11-Custom-orange)](https://github.com/AlexeyAB/darknet) 7 | [![CUDA](https://img.shields.io/badge/CUDA-12.2-green?logo=nvidia&logoColor=white)](https://developer.nvidia.com/cuda-toolkit) 8 | 9 | A customized DeepStream application integrating YOLOv11 for real-time object detection. 10 | 11 |

12 | 13 | ## 📌 Introduction 14 | 15 | This repository houses a customized integration of YOLOv11 into the NVIDIA DeepStream SDK for real-time object detection. Based on the [DeepStream YOLO Plugin](https://github.com/marcoslucianops/DeepStream-Yolo.git), this project is adapted to work with the latest YOLOv11 model, providing enhanced detection capabilities with NVIDIA's optimized inference pipeline. 16 | 17 |
18 | 19 | ## 📦 Main Technologies 20 | 21 | - [NVIDIA DeepStream SDK](https://developer.nvidia.com/deepstream-sdk) - A complete streaming analytics toolkit for AI-based multi-sensor processing, video, audio, and image understanding. 22 | - [YOLOv11](https://docs.ultralytics.com/models/yolo11/) - Latest iteration of the real-time object detection system, implemented in the Darknet framework. 23 | - [CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit) - A parallel computing platform and programming model developed by NVIDIA for general computing on graphical processing units (GPUs). 24 | 25 |
26 | 27 | ## Acknowledgements 28 | 29 | This project is an extension of the [DeepStream YOLO](https://github.com/marcoslucianops/DeepStream-Yolo.git), which provides a foundational framework for real-time object detection with YOLO using NVIDIA's DeepStream SDK. For further insights, enhancements, or additional information, please refer to the [original repository](https://github.com/marcoslucianops/DeepStream-Yolo.gits). 30 | 31 | We extend our gratitude to the contributors of the original DeepStream YOLO project for their pioneering efforts and for providing a robust platform for further innovation and development. 32 | 33 | 34 |
35 | 36 | ## 📁 Project 37 | 38 | The directory structure of the project looks like this: 39 | ``` 40 | ├── LICENSE 41 | ├── README.md 42 | ├── requirements.txt 43 | └── src 44 | ├── config_infer_primary_yoloV11.txt 45 | ├── config_tracker_NvDCF_perf.yml 46 | ├── deepstream_app_config.txt 47 | ├── labels.txt 48 | ├── models 49 | │ └── .geetkeep 50 | ├── nvdsinfer_custom_impl_Yolo 51 | │ ├── calibrator.cpp 52 | │ ├── calibrator.h 53 | │ ├── layers 54 | │ │ ├── activation_layer.cpp 55 | │ │ ├── activation_layer.h 56 | │ │ ├── batchnorm_layer.cpp 57 | │ │ ├── batchnorm_layer.h 58 | │ │ ├── channels_layer.cpp 59 | │ │ ├── channels_layer.h 60 | │ │ ├── convolutional_layer.cpp 61 | │ │ ├── convolutional_layer.h 62 | │ │ ├── deconvolutional_layer.cpp 63 | │ │ ├── deconvolutional_layer.h 64 | │ │ ├── implicit_layer.cpp 65 | │ │ ├── implicit_layer.h 66 | │ │ ├── pooling_layer.cpp 67 | │ │ ├── pooling_layer.h 68 | │ │ ├── reorg_layer.cpp 69 | │ │ ├── reorg_layer.h 70 | │ │ ├── route_layer.cpp 71 | │ │ ├── route_layer.h 72 | │ │ ├── sam_layer.cpp 73 | │ │ ├── sam_layer.h 74 | │ │ ├── shortcut_layer.cpp 75 | │ │ ├── shortcut_layer.h 76 | │ │ ├── slice_layer.cpp 77 | │ │ ├── slice_layer.h 78 | │ │ ├── upsample_layer.cpp 79 | │ │ ├── upsample_layer.h 80 | │ ├── Makefile 81 | │ ├── nvdsinfer_yolo_engine.cpp 82 | │ ├── nvdsinitinputlayers_Yolo.cpp 83 | │ ├── nvdsparsebbox_Yolo.cpp 84 | │ ├── nvdsparsebbox_Yolo_cuda.cu 85 | │ ├── utils.cpp 86 | │ ├── utils.h 87 | │ ├── yolo.cpp 88 | │ ├── yoloForward.cu 89 | │ ├── yoloForward_nc.cu 90 | │ ├── yoloForward_v2.cu 91 | │ ├── yolo.h 92 | │ ├── yoloPlugins.cpp 93 | │ ├── yoloPlugins.h 94 | └── scripts 95 | └── export_yolo11.py 96 | ``` 97 | 98 |
99 | 100 | ## 🚀 Quickstart 101 | ```bash 102 | # clone project 103 | git clone https://github.com/sh-aidev/deepstream-yolo11.git 104 | cd deepstream-yolov11 105 | 106 | # Download the model weights 107 | cd src 108 | wget wget -P models/ https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m.pt 109 | 110 | # For Other yolo weights use the below links: 111 | # YOLO11n - https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt 112 | # YOLO11s - https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s.pt 113 | # YOLO11l - https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l.pt 114 | #YOLO11x - https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x.pt 115 | 116 | # Convert the model weights to onnx 117 | python3 scripts/export_yolo11.py -w models/yolo11m.pt 118 | 119 | # Change the model path in the config_infer_primary_yoloV11.txt file to the onnx file generated as 120 | # onnx-file=models/yolo11m.onnx 121 | 122 | # Set the CUDA_VER according to your DeepStream version 123 | export CUDA_VER=12.2 124 | 125 | # COmpile the YOLO plugin 126 | make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo 127 | 128 | # Run Deepstream Application 129 | deepstream-app -c deepstream_app_config.txt 130 | ``` 131 | 132 |
133 | 134 | ## 📝 Docker container usage instructions 135 | **Prerequisites:** 136 | - [Docker](https://docs.docker.com/get-docker/) 137 | - [Visual Studio Code](https://code.visualstudio.com/) 138 | - [Remote - Containers](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) extension 139 | 140 | **Steps:** 141 | 1. Clone this repository 142 | 2. Open the repository in Visual Studio Code 143 | 3. press crtl+shift+p and select "Remote-Containers: Reopen in Container" 144 | 4. Wait for the container to build 145 | 5. Open a terminal in Visual Studio Code and run the following commands: 146 | 147 |
148 | 149 | ## References 150 | - [DeepStream-Yolo](https://github.com/marcoslucianops/DeepStream-Yolo.git) 151 | - [DeepStream SDK Documentation](https://docs.nvidia.com/metropolis/deepstream/dev-guide/index.html) 152 | - [NVIDIA DeepStream GitHub](https://github.com/NVIDIA-AI-IOT/deepstream_reference_apps) 153 | - [YOLOv11 Ultralytics](https://docs.ultralytics.com/models/yolo11/) 154 |
-------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/yoloPlugins.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | * 22 | * Edited by Marcos Luciano 23 | * https://www.github.com/marcoslucianops 24 | */ 25 | 26 | #ifndef __YOLO_PLUGINS__ 27 | #define __YOLO_PLUGINS__ 28 | 29 | #include 30 | 31 | #include "yolo.h" 32 | 33 | #define CUDA_CHECK(status) { \ 34 | if (status != 0) { \ 35 | std::cout << "CUDA failure: " << cudaGetErrorString(status) << " in file " << __FILE__ << " at line " << __LINE__ << \ 36 | std::endl; \ 37 | abort(); \ 38 | } \ 39 | } 40 | 41 | namespace { 42 | const char* YOLOLAYER_PLUGIN_VERSION {"1"}; 43 | const char* YOLOLAYER_PLUGIN_NAME {"YoloLayer_TRT"}; 44 | } // namespace 45 | 46 | class YoloLayer : public nvinfer1::IPluginV2DynamicExt { 47 | public: 48 | YoloLayer(const void* data, size_t length); 49 | 50 | YoloLayer(const uint& netWidth, const uint& netHeight, const uint& numClasses, const uint& newCoords, 51 | const std::vector& yoloTensors, const uint64_t& outputSize); 52 | 53 | nvinfer1::IPluginV2DynamicExt* clone() const noexcept override; 54 | 55 | int initialize() noexcept override { return 0; } 56 | 57 | void terminate() noexcept override {} 58 | 59 | void destroy() noexcept override { delete this; } 60 | 61 | size_t getSerializationSize() const noexcept override; 62 | 63 | void serialize(void* buffer) const noexcept override; 64 | 65 | int getNbOutputs() const noexcept override { return 3; } 66 | 67 | nvinfer1::DimsExprs getOutputDimensions(INT index, const nvinfer1::DimsExprs* inputs, INT nbInputDims, 68 | nvinfer1::IExprBuilder& exprBuilder) noexcept override; 69 | 70 | size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, INT nbInputs, 71 | const nvinfer1::PluginTensorDesc* outputs, INT nbOutputs) const noexcept override { return 0; } 72 | 73 | bool supportsFormatCombination(INT pos, const nvinfer1::PluginTensorDesc* inOut, INT nbInputs, INT nbOutputs) noexcept 74 | override; 75 | 76 | const char* getPluginType() const noexcept override { return YOLOLAYER_PLUGIN_NAME; } 77 | 78 | const char* getPluginVersion() const noexcept override { return YOLOLAYER_PLUGIN_VERSION; } 79 | 80 | void setPluginNamespace(const char* pluginNamespace) noexcept override { m_Namespace = pluginNamespace; } 81 | 82 | const char* getPluginNamespace() const noexcept override { return m_Namespace.c_str(); } 83 | 84 | nvinfer1::DataType getOutputDataType(INT index, const nvinfer1::DataType* inputTypes, INT nbInputs) const noexcept 85 | override; 86 | 87 | void attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, nvinfer1::IGpuAllocator* gpuAllocator) 88 | noexcept override {} 89 | 90 | void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, INT nbInput, 91 | const nvinfer1::DynamicPluginTensorDesc* out, INT nbOutput) noexcept override; 92 | 93 | void detachFromContext() noexcept override {} 94 | 95 | INT enqueue(const nvinfer1::PluginTensorDesc* inputDesc, const nvinfer1::PluginTensorDesc* outputDesc, 96 | void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept override; 97 | 98 | private: 99 | std::string m_Namespace {""}; 100 | uint m_NetWidth {0}; 101 | uint m_NetHeight {0}; 102 | uint m_NumClasses {0}; 103 | uint m_NewCoords {0}; 104 | std::vector m_YoloTensors; 105 | uint64_t m_OutputSize {0}; 106 | }; 107 | 108 | class YoloLayerPluginCreator : public nvinfer1::IPluginCreator { 109 | public: 110 | YoloLayerPluginCreator() {} 111 | 112 | ~YoloLayerPluginCreator() {} 113 | 114 | const char* getPluginName() const noexcept override { return YOLOLAYER_PLUGIN_NAME; } 115 | 116 | const char* getPluginVersion() const noexcept override { return YOLOLAYER_PLUGIN_VERSION; } 117 | 118 | const nvinfer1::PluginFieldCollection* getFieldNames() noexcept override { 119 | std::cerr<< "YoloLayerPluginCreator::getFieldNames is not implemented" << std::endl; 120 | return nullptr; 121 | } 122 | 123 | nvinfer1::IPluginV2DynamicExt* createPlugin(const char* name, const nvinfer1::PluginFieldCollection* fc) noexcept 124 | override { 125 | std::cerr<< "YoloLayerPluginCreator::getFieldNames is not implemented"; 126 | return nullptr; 127 | } 128 | 129 | nvinfer1::IPluginV2DynamicExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) noexcept 130 | override { 131 | std::cout << "Deserialize yoloLayer plugin: " << name << std::endl; 132 | return new YoloLayer(serialData, serialLength); 133 | } 134 | 135 | void setPluginNamespace(const char* libNamespace) noexcept override { m_Namespace = libNamespace; } 136 | 137 | const char* getPluginNamespace() const noexcept override { return m_Namespace.c_str(); } 138 | 139 | private: 140 | std::string m_Namespace {""}; 141 | }; 142 | 143 | #endif // __YOLO_PLUGINS__ 144 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | * 22 | * Edited by Marcos Luciano 23 | * https://www.github.com/marcoslucianops 24 | */ 25 | 26 | #include "nvdsinfer_custom_impl.h" 27 | 28 | #include "utils.h" 29 | 30 | extern "C" bool 31 | NvDsInferParseYolo(std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, 32 | NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList); 33 | 34 | extern "C" bool 35 | NvDsInferParseYoloE(std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, 36 | NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList); 37 | 38 | static NvDsInferParseObjectInfo 39 | convertBBox(const float& bx1, const float& by1, const float& bx2, const float& by2, const uint& netW, const uint& netH) 40 | { 41 | NvDsInferParseObjectInfo b; 42 | 43 | float x1 = bx1; 44 | float y1 = by1; 45 | float x2 = bx2; 46 | float y2 = by2; 47 | 48 | x1 = clamp(x1, 0, netW); 49 | y1 = clamp(y1, 0, netH); 50 | x2 = clamp(x2, 0, netW); 51 | y2 = clamp(y2, 0, netH); 52 | 53 | b.left = x1; 54 | b.width = clamp(x2 - x1, 0, netW); 55 | b.top = y1; 56 | b.height = clamp(y2 - y1, 0, netH); 57 | 58 | return b; 59 | } 60 | 61 | static void 62 | addBBoxProposal(const float bx1, const float by1, const float bx2, const float by2, const uint& netW, const uint& netH, 63 | const int maxIndex, const float maxProb, std::vector& binfo) 64 | { 65 | NvDsInferParseObjectInfo bbi = convertBBox(bx1, by1, bx2, by2, netW, netH); 66 | 67 | if (bbi.width < 1 || bbi.height < 1) { 68 | return; 69 | } 70 | 71 | bbi.detectionConfidence = maxProb; 72 | bbi.classId = maxIndex; 73 | binfo.push_back(bbi); 74 | } 75 | 76 | static std::vector 77 | decodeTensorYolo(const float* boxes, const float* scores, const float* classes, const uint& outputSize, const uint& netW, 78 | const uint& netH, const std::vector& preclusterThreshold) 79 | { 80 | std::vector binfo; 81 | 82 | for (uint b = 0; b < outputSize; ++b) { 83 | float maxProb = scores[b]; 84 | int maxIndex = (int) classes[b]; 85 | 86 | if (maxProb < preclusterThreshold[maxIndex]) { 87 | continue; 88 | } 89 | 90 | float bxc = boxes[b * 4 + 0]; 91 | float byc = boxes[b * 4 + 1]; 92 | float bw = boxes[b * 4 + 2]; 93 | float bh = boxes[b * 4 + 3]; 94 | 95 | float bx1 = bxc - bw / 2; 96 | float by1 = byc - bh / 2; 97 | float bx2 = bx1 + bw; 98 | float by2 = by1 + bh; 99 | 100 | addBBoxProposal(bx1, by1, bx2, by2, netW, netH, maxIndex, maxProb, binfo); 101 | } 102 | 103 | return binfo; 104 | } 105 | 106 | static std::vector 107 | decodeTensorYoloE(const float* boxes, const float* scores, const float* classes, const uint& outputSize, const uint& netW, 108 | const uint& netH, const std::vector& preclusterThreshold) 109 | { 110 | std::vector binfo; 111 | 112 | for (uint b = 0; b < outputSize; ++b) { 113 | float maxProb = scores[b]; 114 | int maxIndex = (int) classes[b]; 115 | 116 | if (maxProb < preclusterThreshold[maxIndex]) { 117 | continue; 118 | } 119 | 120 | float bx1 = boxes[b * 4 + 0]; 121 | float by1 = boxes[b * 4 + 1]; 122 | float bx2 = boxes[b * 4 + 2]; 123 | float by2 = boxes[b * 4 + 3]; 124 | 125 | addBBoxProposal(bx1, by1, bx2, by2, netW, netH, maxIndex, maxProb, binfo); 126 | } 127 | 128 | return binfo; 129 | } 130 | 131 | static bool 132 | NvDsInferParseCustomYolo(std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, 133 | NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList) 134 | { 135 | if (outputLayersInfo.empty()) { 136 | std::cerr << "ERROR: Could not find output layer in bbox parsing" << std::endl; 137 | return false; 138 | } 139 | 140 | std::vector objects; 141 | 142 | const NvDsInferLayerInfo& boxes = outputLayersInfo[0]; 143 | const NvDsInferLayerInfo& scores = outputLayersInfo[1]; 144 | const NvDsInferLayerInfo& classes = outputLayersInfo[2]; 145 | 146 | const uint outputSize = boxes.inferDims.d[0]; 147 | 148 | std::vector outObjs = decodeTensorYolo((const float*) (boxes.buffer), 149 | (const float*) (scores.buffer), (const float*) (classes.buffer), outputSize, networkInfo.width, networkInfo.height, 150 | detectionParams.perClassPreclusterThreshold); 151 | 152 | objects.insert(objects.end(), outObjs.begin(), outObjs.end()); 153 | 154 | objectList = objects; 155 | 156 | return true; 157 | } 158 | 159 | static bool 160 | NvDsInferParseCustomYoloE(std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, 161 | NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList) 162 | { 163 | if (outputLayersInfo.empty()) { 164 | std::cerr << "ERROR: Could not find output layer in bbox parsing" << std::endl; 165 | return false; 166 | } 167 | 168 | std::vector objects; 169 | 170 | const NvDsInferLayerInfo& boxes = outputLayersInfo[0]; 171 | const NvDsInferLayerInfo& scores = outputLayersInfo[1]; 172 | const NvDsInferLayerInfo& classes = outputLayersInfo[2]; 173 | 174 | const uint outputSize = boxes.inferDims.d[0]; 175 | 176 | std::vector outObjs = decodeTensorYoloE((const float*) (boxes.buffer), 177 | (const float*) (scores.buffer), (const float*) (classes.buffer), outputSize, networkInfo.width, networkInfo.height, 178 | detectionParams.perClassPreclusterThreshold); 179 | 180 | objects.insert(objects.end(), outObjs.begin(), outObjs.end()); 181 | 182 | objectList = objects; 183 | 184 | return true; 185 | } 186 | 187 | extern "C" bool 188 | NvDsInferParseYolo(std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, 189 | NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList) 190 | { 191 | return NvDsInferParseCustomYolo(outputLayersInfo, networkInfo, detectionParams, objectList); 192 | } 193 | 194 | extern "C" bool 195 | NvDsInferParseYoloE(std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, 196 | NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList) 197 | { 198 | return NvDsInferParseCustomYoloE(outputLayersInfo, networkInfo, detectionParams, objectList); 199 | } 200 | 201 | CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseYolo); 202 | CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseYoloE); 203 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo_cuda.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | * 22 | * Edited by Marcos Luciano 23 | * https://www.github.com/marcoslucianops 24 | */ 25 | 26 | #include 27 | #include 28 | #include 29 | 30 | #include "nvdsinfer_custom_impl.h" 31 | 32 | extern "C" bool 33 | NvDsInferParseYoloCuda(std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, 34 | NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList); 35 | 36 | extern "C" bool 37 | NvDsInferParseYoloECuda(std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, 38 | NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList); 39 | 40 | __global__ void decodeTensorYoloCuda(NvDsInferParseObjectInfo *binfo, float* boxes, float* scores, float* classes, 41 | int outputSize, int netW, int netH, float minPreclusterThreshold) 42 | { 43 | int x_id = blockIdx.x * blockDim.x + threadIdx.x; 44 | 45 | if (x_id >= outputSize) { 46 | return; 47 | } 48 | 49 | float maxProb = scores[x_id]; 50 | int maxIndex = (int) classes[x_id]; 51 | 52 | if (maxProb < minPreclusterThreshold) { 53 | binfo[x_id].detectionConfidence = 0.0; 54 | return; 55 | } 56 | 57 | float bxc = boxes[x_id * 4 + 0]; 58 | float byc = boxes[x_id * 4 + 1]; 59 | float bw = boxes[x_id * 4 + 2]; 60 | float bh = boxes[x_id * 4 + 3]; 61 | 62 | float x0 = bxc - bw / 2; 63 | float y0 = byc - bh / 2; 64 | float x1 = x0 + bw; 65 | float y1 = y0 + bh; 66 | 67 | x0 = fminf(float(netW), fmaxf(float(0.0), x0)); 68 | y0 = fminf(float(netH), fmaxf(float(0.0), y0)); 69 | x1 = fminf(float(netW), fmaxf(float(0.0), x1)); 70 | y1 = fminf(float(netH), fmaxf(float(0.0), y1)); 71 | 72 | binfo[x_id].left = x0; 73 | binfo[x_id].top = y0; 74 | binfo[x_id].width = fminf(float(netW), fmaxf(float(0.0), x1 - x0)); 75 | binfo[x_id].height = fminf(float(netH), fmaxf(float(0.0), y1 - y0)); 76 | binfo[x_id].detectionConfidence = maxProb; 77 | binfo[x_id].classId = maxIndex; 78 | } 79 | 80 | __global__ void decodeTensorYoloECuda(NvDsInferParseObjectInfo *binfo, float* boxes, float* scores, float* classes, 81 | int outputSize, int netW, int netH, float minPreclusterThreshold) 82 | { 83 | int x_id = blockIdx.x * blockDim.x + threadIdx.x; 84 | 85 | if (x_id >= outputSize) { 86 | return; 87 | } 88 | 89 | float maxProb = scores[x_id]; 90 | int maxIndex = (int) classes[x_id]; 91 | 92 | if (maxProb < minPreclusterThreshold) { 93 | binfo[x_id].detectionConfidence = 0.0; 94 | return; 95 | } 96 | 97 | float x0 = boxes[x_id * 4 + 0]; 98 | float y0 = boxes[x_id * 4 + 1]; 99 | float x1 = boxes[x_id * 4 + 2]; 100 | float y1 = boxes[x_id * 4 + 3]; 101 | 102 | x0 = fminf(float(netW), fmaxf(float(0.0), x0)); 103 | y0 = fminf(float(netH), fmaxf(float(0.0), y0)); 104 | x1 = fminf(float(netW), fmaxf(float(0.0), x1)); 105 | y1 = fminf(float(netH), fmaxf(float(0.0), y1)); 106 | 107 | binfo[x_id].left = x0; 108 | binfo[x_id].top = y0; 109 | binfo[x_id].width = fminf(float(netW), fmaxf(float(0.0), x1 - x0)); 110 | binfo[x_id].height = fminf(float(netH), fmaxf(float(0.0), y1 - y0)); 111 | binfo[x_id].detectionConfidence = maxProb; 112 | binfo[x_id].classId = maxIndex; 113 | } 114 | 115 | static bool NvDsInferParseCustomYoloCuda(std::vector const& outputLayersInfo, 116 | NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams, 117 | std::vector& objectList) 118 | { 119 | if (outputLayersInfo.empty()) { 120 | std::cerr << "ERROR: Could not find output layer in bbox parsing" << std::endl; 121 | return false; 122 | } 123 | 124 | const NvDsInferLayerInfo& boxes = outputLayersInfo[0]; 125 | const NvDsInferLayerInfo& scores = outputLayersInfo[1]; 126 | const NvDsInferLayerInfo& classes = outputLayersInfo[2]; 127 | 128 | const int outputSize = boxes.inferDims.d[0]; 129 | 130 | thrust::device_vector objects(outputSize); 131 | 132 | float minPreclusterThreshold = *(std::min_element(detectionParams.perClassPreclusterThreshold.begin(), 133 | detectionParams.perClassPreclusterThreshold.end())); 134 | 135 | int threads_per_block = 1024; 136 | int number_of_blocks = ((outputSize - 1) / threads_per_block) + 1; 137 | 138 | decodeTensorYoloCuda<<>>( 139 | thrust::raw_pointer_cast(objects.data()), (float*) (boxes.buffer), (float*) (scores.buffer), 140 | (float*) (classes.buffer), outputSize, networkInfo.width, networkInfo.height, minPreclusterThreshold); 141 | 142 | objectList.resize(outputSize); 143 | thrust::copy(objects.begin(), objects.end(), objectList.begin()); 144 | 145 | return true; 146 | } 147 | 148 | static bool NvDsInferParseCustomYoloECuda(std::vector const& outputLayersInfo, 149 | NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams, 150 | std::vector& objectList) 151 | { 152 | if (outputLayersInfo.empty()) { 153 | std::cerr << "ERROR: Could not find output layer in bbox parsing" << std::endl; 154 | return false; 155 | } 156 | 157 | const NvDsInferLayerInfo& boxes = outputLayersInfo[0]; 158 | const NvDsInferLayerInfo& scores = outputLayersInfo[1]; 159 | const NvDsInferLayerInfo& classes = outputLayersInfo[2]; 160 | 161 | const int outputSize = boxes.inferDims.d[0]; 162 | 163 | thrust::device_vector objects(outputSize); 164 | 165 | float minPreclusterThreshold = *(std::min_element(detectionParams.perClassPreclusterThreshold.begin(), 166 | detectionParams.perClassPreclusterThreshold.end())); 167 | 168 | int threads_per_block = 1024; 169 | int number_of_blocks = ((outputSize - 1) / threads_per_block) + 1; 170 | 171 | decodeTensorYoloECuda<<>>( 172 | thrust::raw_pointer_cast(objects.data()), (float*) (boxes.buffer), (float*) (scores.buffer), 173 | (float*) (classes.buffer), outputSize, networkInfo.width, networkInfo.height, minPreclusterThreshold); 174 | 175 | objectList.resize(outputSize); 176 | thrust::copy(objects.begin(), objects.end(), objectList.begin()); 177 | 178 | return true; 179 | } 180 | 181 | extern "C" bool 182 | NvDsInferParseYoloCuda(std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, 183 | NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList) 184 | { 185 | return NvDsInferParseCustomYoloCuda(outputLayersInfo, networkInfo, detectionParams, objectList); 186 | } 187 | 188 | extern "C" bool 189 | NvDsInferParseYoloECuda(std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, 190 | NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList) 191 | { 192 | return NvDsInferParseCustomYoloECuda(outputLayersInfo, networkInfo, detectionParams, objectList); 193 | } 194 | 195 | CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseYoloCuda); 196 | CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseYoloECuda); 197 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | * 22 | * Edited by Marcos Luciano 23 | * https://www.github.com/marcoslucianops 24 | */ 25 | 26 | #include "yoloPlugins.h" 27 | 28 | namespace { 29 | template 30 | void write(char*& buffer, const T& val) { 31 | *reinterpret_cast(buffer) = val; 32 | buffer += sizeof(T); 33 | } 34 | template 35 | void read(const char*& buffer, T& val) { 36 | val = *reinterpret_cast(buffer); 37 | buffer += sizeof(T); 38 | } 39 | } 40 | 41 | cudaError_t cudaYoloLayer_nc(const void* input, void* boxes, void* scores, void* classes, const uint& batchSize, 42 | const uint64_t& inputSize, const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth, 43 | const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, 44 | const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream); 45 | 46 | cudaError_t cudaYoloLayer(const void* input, void* boxes, void* scores, void* classes, const uint& batchSize, 47 | const uint64_t& inputSize, const uint64_t& outputSize, const uint64_t& lastInputSize, const uint& netWidth, 48 | const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, 49 | const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream); 50 | 51 | cudaError_t cudaRegionLayer(const void* input, void* softmax, void* boxes, void* scores, void* classes, 52 | const uint& batchSize, const uint64_t& inputSize, const uint64_t& outputSize, const uint64_t& lastInputSize, 53 | const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, 54 | const uint& numBBoxes, const void* anchors, cudaStream_t stream); 55 | 56 | YoloLayer::YoloLayer(const void* data, size_t length) { 57 | const char* d = static_cast(data); 58 | 59 | read(d, m_NetWidth); 60 | read(d, m_NetHeight); 61 | read(d, m_NumClasses); 62 | read(d, m_NewCoords); 63 | read(d, m_OutputSize); 64 | 65 | uint yoloTensorsSize; 66 | read(d, yoloTensorsSize); 67 | for (uint i = 0; i < yoloTensorsSize; ++i) { 68 | TensorInfo curYoloTensor; 69 | read(d, curYoloTensor.gridSizeX); 70 | read(d, curYoloTensor.gridSizeY); 71 | read(d, curYoloTensor.numBBoxes); 72 | read(d, curYoloTensor.scaleXY); 73 | 74 | uint anchorsSize; 75 | read(d, anchorsSize); 76 | for (uint j = 0; j < anchorsSize; ++j) { 77 | float result; 78 | read(d, result); 79 | curYoloTensor.anchors.push_back(result); 80 | } 81 | 82 | uint maskSize; 83 | read(d, maskSize); 84 | for (uint j = 0; j < maskSize; ++j) { 85 | int result; 86 | read(d, result); 87 | curYoloTensor.mask.push_back(result); 88 | } 89 | 90 | m_YoloTensors.push_back(curYoloTensor); 91 | } 92 | }; 93 | 94 | YoloLayer::YoloLayer(const uint& netWidth, const uint& netHeight, const uint& numClasses, const uint& newCoords, 95 | const std::vector& yoloTensors, const uint64_t& outputSize) : m_NetWidth(netWidth), 96 | m_NetHeight(netHeight), m_NumClasses(numClasses), m_NewCoords(newCoords), m_YoloTensors(yoloTensors), 97 | m_OutputSize(outputSize) 98 | { 99 | assert(m_NetWidth > 0); 100 | assert(m_NetHeight > 0); 101 | }; 102 | 103 | nvinfer1::IPluginV2DynamicExt* 104 | YoloLayer::clone() const noexcept 105 | { 106 | return new YoloLayer(m_NetWidth, m_NetHeight, m_NumClasses, m_NewCoords, m_YoloTensors, m_OutputSize); 107 | } 108 | 109 | size_t 110 | YoloLayer::getSerializationSize() const noexcept 111 | { 112 | size_t totalSize = 0; 113 | 114 | totalSize += sizeof(m_NetWidth); 115 | totalSize += sizeof(m_NetHeight); 116 | totalSize += sizeof(m_NumClasses); 117 | totalSize += sizeof(m_NewCoords); 118 | totalSize += sizeof(m_OutputSize); 119 | 120 | uint yoloTensorsSize = m_YoloTensors.size(); 121 | totalSize += sizeof(yoloTensorsSize); 122 | 123 | for (uint i = 0; i < yoloTensorsSize; ++i) { 124 | const TensorInfo& curYoloTensor = m_YoloTensors.at(i); 125 | totalSize += sizeof(curYoloTensor.gridSizeX); 126 | totalSize += sizeof(curYoloTensor.gridSizeY); 127 | totalSize += sizeof(curYoloTensor.numBBoxes); 128 | totalSize += sizeof(curYoloTensor.scaleXY); 129 | totalSize += sizeof(uint) + sizeof(curYoloTensor.anchors[0]) * curYoloTensor.anchors.size(); 130 | totalSize += sizeof(uint) + sizeof(curYoloTensor.mask[0]) * curYoloTensor.mask.size(); 131 | } 132 | 133 | return totalSize; 134 | } 135 | 136 | void 137 | YoloLayer::serialize(void* buffer) const noexcept 138 | { 139 | char* d = static_cast(buffer); 140 | 141 | write(d, m_NetWidth); 142 | write(d, m_NetHeight); 143 | write(d, m_NumClasses); 144 | write(d, m_NewCoords); 145 | write(d, m_OutputSize); 146 | 147 | uint yoloTensorsSize = m_YoloTensors.size(); 148 | write(d, yoloTensorsSize); 149 | for (uint i = 0; i < yoloTensorsSize; ++i) { 150 | const TensorInfo& curYoloTensor = m_YoloTensors.at(i); 151 | write(d, curYoloTensor.gridSizeX); 152 | write(d, curYoloTensor.gridSizeY); 153 | write(d, curYoloTensor.numBBoxes); 154 | write(d, curYoloTensor.scaleXY); 155 | 156 | uint anchorsSize = curYoloTensor.anchors.size(); 157 | write(d, anchorsSize); 158 | for (uint j = 0; j < anchorsSize; ++j) 159 | write(d, curYoloTensor.anchors[j]); 160 | 161 | uint maskSize = curYoloTensor.mask.size(); 162 | write(d, maskSize); 163 | for (uint j = 0; j < maskSize; ++j) 164 | write(d, curYoloTensor.mask[j]); 165 | } 166 | } 167 | 168 | nvinfer1::DimsExprs 169 | YoloLayer::getOutputDimensions(INT index, const nvinfer1::DimsExprs* inputs, INT nbInputDims, 170 | nvinfer1::IExprBuilder& exprBuilder)noexcept 171 | { 172 | assert(index < 3); 173 | if (index == 0) { 174 | return nvinfer1::DimsExprs{3, {inputs->d[0], exprBuilder.constant(static_cast(m_OutputSize)), 175 | exprBuilder.constant(4)}}; 176 | } 177 | return nvinfer1::DimsExprs{3, {inputs->d[0], exprBuilder.constant(static_cast(m_OutputSize)), 178 | exprBuilder.constant(1)}}; 179 | } 180 | 181 | bool 182 | YoloLayer::supportsFormatCombination(INT pos, const nvinfer1::PluginTensorDesc* inOut, INT nbInputs, INT nbOutputs) noexcept 183 | { 184 | return inOut[pos].format == nvinfer1::TensorFormat::kLINEAR && inOut[pos].type == nvinfer1::DataType::kFLOAT; 185 | } 186 | 187 | nvinfer1::DataType 188 | YoloLayer::getOutputDataType(INT index, const nvinfer1::DataType* inputTypes, INT nbInputs) const noexcept 189 | { 190 | assert(index < 3); 191 | return nvinfer1::DataType::kFLOAT; 192 | } 193 | 194 | void 195 | YoloLayer::configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, INT nbInput, 196 | const nvinfer1::DynamicPluginTensorDesc* out, INT nbOutput) noexcept 197 | { 198 | assert(nbInput > 0); 199 | assert(in->desc.format == nvinfer1::PluginFormat::kLINEAR); 200 | assert(in->desc.dims.d != nullptr); 201 | } 202 | 203 | INT 204 | YoloLayer::enqueue(const nvinfer1::PluginTensorDesc* inputDesc, const nvinfer1::PluginTensorDesc* outputDesc, 205 | void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept 206 | { 207 | INT batchSize = inputDesc[0].dims.d[0]; 208 | 209 | void* boxes = outputs[0]; 210 | void* scores = outputs[1]; 211 | void* classes = outputs[2]; 212 | 213 | uint64_t lastInputSize = 0; 214 | 215 | uint yoloTensorsSize = m_YoloTensors.size(); 216 | for (uint i = 0; i < yoloTensorsSize; ++i) { 217 | TensorInfo& curYoloTensor = m_YoloTensors.at(i); 218 | 219 | const uint numBBoxes = curYoloTensor.numBBoxes; 220 | const float scaleXY = curYoloTensor.scaleXY; 221 | const uint gridSizeX = curYoloTensor.gridSizeX; 222 | const uint gridSizeY = curYoloTensor.gridSizeY; 223 | const std::vector anchors = curYoloTensor.anchors; 224 | const std::vector mask = curYoloTensor.mask; 225 | 226 | void* v_anchors; 227 | void* v_mask; 228 | if (anchors.size() > 0) { 229 | CUDA_CHECK(cudaMalloc(&v_anchors, sizeof(float) * anchors.size())); 230 | CUDA_CHECK(cudaMemcpyAsync(v_anchors, anchors.data(), sizeof(float) * anchors.size(), cudaMemcpyHostToDevice, stream)); 231 | } 232 | if (mask.size() > 0) { 233 | CUDA_CHECK(cudaMalloc(&v_mask, sizeof(int) * mask.size())); 234 | CUDA_CHECK(cudaMemcpyAsync(v_mask, mask.data(), sizeof(int) * mask.size(), cudaMemcpyHostToDevice, stream)); 235 | } 236 | 237 | const uint64_t inputSize = (numBBoxes * (4 + 1 + m_NumClasses)) * gridSizeY * gridSizeX; 238 | 239 | if (mask.size() > 0) { 240 | if (m_NewCoords) { 241 | CUDA_CHECK(cudaYoloLayer_nc(inputs[i], boxes, scores, classes, batchSize, inputSize, m_OutputSize, lastInputSize, 242 | m_NetWidth, m_NetHeight, gridSizeX, gridSizeY, m_NumClasses, numBBoxes, scaleXY, v_anchors, v_mask, stream)); 243 | } 244 | else { 245 | CUDA_CHECK(cudaYoloLayer(inputs[i], boxes, scores, classes, batchSize, inputSize, m_OutputSize, lastInputSize, 246 | m_NetWidth, m_NetHeight, gridSizeX, gridSizeY, m_NumClasses, numBBoxes, scaleXY, v_anchors, v_mask, stream)); 247 | } 248 | } 249 | else { 250 | void* softmax; 251 | CUDA_CHECK(cudaMalloc(&softmax, sizeof(float) * inputSize * batchSize)); 252 | CUDA_CHECK(cudaMemsetAsync((float*) softmax, 0, sizeof(float) * inputSize * batchSize, stream)); 253 | 254 | CUDA_CHECK(cudaRegionLayer(inputs[i], softmax, boxes, scores, classes, batchSize, inputSize, m_OutputSize, 255 | lastInputSize, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY, m_NumClasses, numBBoxes, v_anchors, stream)); 256 | 257 | CUDA_CHECK(cudaFree(softmax)); 258 | } 259 | 260 | if (anchors.size() > 0) { 261 | CUDA_CHECK(cudaFree(v_anchors)); 262 | } 263 | if (mask.size() > 0) { 264 | CUDA_CHECK(cudaFree(v_mask)); 265 | } 266 | 267 | lastInputSize += numBBoxes * gridSizeY * gridSizeX; 268 | } 269 | 270 | return 0; 271 | } 272 | 273 | REGISTER_TENSORRT_PLUGIN(YoloLayerPluginCreator); 274 | -------------------------------------------------------------------------------- /src/nvdsinfer_custom_impl_Yolo/yolo.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | * 22 | * Edited by Marcos Luciano 23 | * https://www.github.com/marcoslucianops 24 | */ 25 | 26 | #include "NvOnnxParser.h" 27 | 28 | #include "yolo.h" 29 | #include "yoloPlugins.h" 30 | 31 | #ifdef OPENCV 32 | #include "calibrator.h" 33 | #endif 34 | 35 | Yolo::Yolo(const NetworkInfo& networkInfo) : m_InputBlobName(networkInfo.inputBlobName), 36 | m_NetworkType(networkInfo.networkType), m_ModelName(networkInfo.modelName), 37 | m_OnnxWtsFilePath(networkInfo.onnxWtsFilePath), m_DarknetWtsFilePath(networkInfo.darknetWtsFilePath), 38 | m_DarknetCfgFilePath(networkInfo.darknetCfgFilePath), m_BatchSize(networkInfo.batchSize), 39 | m_ImplicitBatch(networkInfo.implicitBatch), m_Int8CalibPath(networkInfo.int8CalibPath), 40 | m_DeviceType(networkInfo.deviceType), m_NumDetectedClasses(networkInfo.numDetectedClasses), 41 | m_ClusterMode(networkInfo.clusterMode), m_NetworkMode(networkInfo.networkMode), m_ScaleFactor(networkInfo.scaleFactor), 42 | m_Offsets(networkInfo.offsets), m_WorkspaceSize(networkInfo.workspaceSize), m_InputC(0), m_InputH(0), m_InputW(0), 43 | m_InputSize(0), m_NumClasses(0), m_LetterBox(0), m_NewCoords(0), m_YoloCount(0) 44 | { 45 | } 46 | 47 | Yolo::~Yolo() 48 | { 49 | destroyNetworkUtils(); 50 | } 51 | 52 | nvinfer1::ICudaEngine* 53 | #if NV_TENSORRT_MAJOR >= 8 54 | Yolo::createEngine(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config) 55 | #else 56 | Yolo::createEngine(nvinfer1::IBuilder* builder) 57 | #endif 58 | 59 | { 60 | assert(builder); 61 | 62 | #if NV_TENSORRT_MAJOR < 8 63 | nvinfer1::IBuilderConfig* config = builder->createBuilderConfig(); 64 | if (m_WorkspaceSize > 0) { 65 | config->setMaxWorkspaceSize((size_t) m_WorkspaceSize * 1024 * 1024); 66 | } 67 | #endif 68 | 69 | nvinfer1::NetworkDefinitionCreationFlags flags = 70 | 1U << static_cast(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); 71 | 72 | nvinfer1::INetworkDefinition* network = builder->createNetworkV2(flags); 73 | assert(network); 74 | 75 | nvonnxparser::IParser* parser; 76 | 77 | if (m_NetworkType == "onnx") { 78 | 79 | #if NV_TENSORRT_MAJOR >= 8 && NV_TENSORRT_MINOR > 0 80 | parser = nvonnxparser::createParser(*network, *builder->getLogger()); 81 | #else 82 | parser = nvonnxparser::createParser(*network, logger); 83 | #endif 84 | 85 | if (!parser->parseFromFile(m_OnnxWtsFilePath.c_str(), static_cast(nvinfer1::ILogger::Severity::kWARNING))) { 86 | std::cerr << "\nCould not parse the ONNX model\n" << std::endl; 87 | 88 | #if NV_TENSORRT_MAJOR >= 8 89 | delete parser; 90 | delete network; 91 | #else 92 | parser->destroy(); 93 | config->destroy(); 94 | network->destroy(); 95 | #endif 96 | 97 | return nullptr; 98 | } 99 | m_InputC = network->getInput(0)->getDimensions().d[1]; 100 | m_InputH = network->getInput(0)->getDimensions().d[2]; 101 | m_InputW = network->getInput(0)->getDimensions().d[3]; 102 | } 103 | else { 104 | m_ConfigBlocks = parseConfigFile(m_DarknetCfgFilePath); 105 | parseConfigBlocks(); 106 | if (parseModel(*network) != NVDSINFER_SUCCESS) { 107 | 108 | #if NV_TENSORRT_MAJOR >= 8 109 | delete network; 110 | #else 111 | config->destroy(); 112 | network->destroy(); 113 | #endif 114 | 115 | return nullptr; 116 | } 117 | } 118 | 119 | if ((m_NetworkType == "darknet" && !m_ImplicitBatch) || network->getInput(0)->getDimensions().d[0] == -1) { 120 | nvinfer1::IOptimizationProfile* profile = builder->createOptimizationProfile(); 121 | assert(profile); 122 | for (INT i = 0; i < network->getNbInputs(); ++i) { 123 | nvinfer1::ITensor* input = network->getInput(i); 124 | nvinfer1::Dims inputDims = input->getDimensions(); 125 | nvinfer1::Dims dims = inputDims; 126 | dims.d[0] = 1; 127 | profile->setDimensions(input->getName(), nvinfer1::OptProfileSelector::kMIN, dims); 128 | dims.d[0] = m_BatchSize; 129 | profile->setDimensions(input->getName(), nvinfer1::OptProfileSelector::kOPT, dims); 130 | dims.d[0] = m_BatchSize; 131 | profile->setDimensions(input->getName(), nvinfer1::OptProfileSelector::kMAX, dims); 132 | } 133 | config->addOptimizationProfile(profile); 134 | } 135 | 136 | std::cout << "\nBuilding the TensorRT Engine\n" << std::endl; 137 | 138 | if (m_NetworkType == "darknet") { 139 | if (m_NumClasses != m_NumDetectedClasses) { 140 | std::cout << "NOTE: Number of classes mismatch, make sure to set num-detected-classes=" << m_NumClasses 141 | << " in config_infer file\n" << std::endl; 142 | } 143 | if (m_LetterBox == 1) { 144 | std::cout << "NOTE: letter_box is set in cfg file, make sure to set maintain-aspect-ratio=1 in config_infer file" 145 | << " to get better accuracy\n" << std::endl; 146 | } 147 | } 148 | if (m_ClusterMode != 2) { 149 | std::cout << "NOTE: Wrong cluster-mode is set, make sure to set cluster-mode=2 in config_infer file\n" << std::endl; 150 | } 151 | 152 | if (m_NetworkMode == "FP16") { 153 | assert(builder->platformHasFastFp16()); 154 | config->setFlag(nvinfer1::BuilderFlag::kFP16); 155 | } 156 | else if (m_NetworkMode == "INT8") { 157 | assert(builder->platformHasFastInt8()); 158 | config->setFlag(nvinfer1::BuilderFlag::kINT8); 159 | if (m_Int8CalibPath != "" && !fileExists(m_Int8CalibPath)) { 160 | 161 | #ifdef OPENCV 162 | std::string calib_image_list; 163 | int calib_batch_size; 164 | if (getenv("INT8_CALIB_IMG_PATH")) { 165 | calib_image_list = getenv("INT8_CALIB_IMG_PATH"); 166 | } 167 | else { 168 | std::cerr << "INT8_CALIB_IMG_PATH not set" << std::endl; 169 | assert(0); 170 | } 171 | if (getenv("INT8_CALIB_BATCH_SIZE")) { 172 | calib_batch_size = std::stoi(getenv("INT8_CALIB_BATCH_SIZE")); 173 | } 174 | else { 175 | std::cerr << "INT8_CALIB_BATCH_SIZE not set" << std::endl; 176 | assert(0); 177 | } 178 | nvinfer1::IInt8EntropyCalibrator2* calibrator = new Int8EntropyCalibrator2(calib_batch_size, m_InputC, m_InputH, 179 | m_InputW, m_ScaleFactor, m_Offsets, calib_image_list, m_Int8CalibPath); 180 | config->setInt8Calibrator(calibrator); 181 | #else 182 | std::cerr << "OpenCV is required to run INT8 calibrator\n" << std::endl; 183 | 184 | #if NV_TENSORRT_MAJOR >= 8 185 | if (m_NetworkType == "onnx") { 186 | delete parser; 187 | } 188 | delete network; 189 | #else 190 | if (m_NetworkType == "onnx") { 191 | parser->destroy(); 192 | } 193 | config->destroy(); 194 | network->destroy(); 195 | #endif 196 | 197 | return nullptr; 198 | #endif 199 | 200 | } 201 | } 202 | 203 | #ifdef GRAPH 204 | config->setProfilingVerbosity(nvinfer1::ProfilingVerbosity::kDETAILED); 205 | #endif 206 | 207 | nvinfer1::ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config); 208 | if (engine) { 209 | std::cout << "Building complete\n" << std::endl; 210 | } 211 | else { 212 | std::cerr << "Building engine failed\n" << std::endl; 213 | } 214 | 215 | #ifdef GRAPH 216 | nvinfer1::IExecutionContext *context = engine->createExecutionContext(); 217 | nvinfer1::IEngineInspector *inpector = engine->createEngineInspector(); 218 | inpector->setExecutionContext(context); 219 | std::ofstream graph; 220 | graph.open("graph.json"); 221 | graph << inpector->getEngineInformation(nvinfer1::LayerInformationFormat::kJSON); 222 | graph.close(); 223 | std::cout << "Network graph saved to graph.json\n" << std::endl; 224 | 225 | #if NV_TENSORRT_MAJOR >= 8 226 | delete inpector; 227 | delete context; 228 | #else 229 | inpector->destroy(); 230 | context->destroy(); 231 | #endif 232 | 233 | #endif 234 | 235 | #if NV_TENSORRT_MAJOR >= 8 236 | if (m_NetworkType == "onnx") { 237 | delete parser; 238 | } 239 | delete network; 240 | #else 241 | if (m_NetworkType == "onnx") { 242 | parser->destroy(); 243 | } 244 | config->destroy(); 245 | network->destroy(); 246 | #endif 247 | 248 | return engine; 249 | } 250 | 251 | NvDsInferStatus 252 | Yolo::parseModel(nvinfer1::INetworkDefinition& network) { 253 | destroyNetworkUtils(); 254 | 255 | std::vector weights = loadWeights(m_DarknetWtsFilePath, m_ModelName); 256 | std::cout << "Building YOLO network\n" << std::endl; 257 | NvDsInferStatus status = buildYoloNetwork(weights, network); 258 | 259 | if (status == NVDSINFER_SUCCESS) { 260 | std::cout << "Building YOLO network complete" << std::endl; 261 | } 262 | else { 263 | std::cerr << "Building YOLO network failed" << std::endl; 264 | } 265 | 266 | return status; 267 | } 268 | 269 | NvDsInferStatus 270 | Yolo::buildYoloNetwork(std::vector& weights, nvinfer1::INetworkDefinition& network) 271 | { 272 | int weightPtr = 0; 273 | 274 | uint batchSize = m_ImplicitBatch ? m_BatchSize : -1; 275 | 276 | nvinfer1::ITensor* data = network.addInput(m_InputBlobName.c_str(), nvinfer1::DataType::kFLOAT, 277 | nvinfer1::Dims{4, {static_cast(batchSize), static_cast(m_InputC), static_cast(m_InputH), 278 | static_cast(m_InputW)}}); 279 | assert(data != nullptr && data->getDimensions().nbDims > 0); 280 | 281 | nvinfer1::ITensor* previous = data; 282 | std::vector tensorOutputs; 283 | 284 | nvinfer1::ITensor* yoloTensorInputs[m_YoloCount]; 285 | uint yoloCountInputs = 0; 286 | 287 | for (uint i = 0; i < m_ConfigBlocks.size(); ++i) { 288 | std::string layerIndex = "(" + std::to_string(tensorOutputs.size()) + ")"; 289 | 290 | if (m_ConfigBlocks.at(i).at("type") == "net") 291 | printLayerInfo("", "Layer", "Input Shape", "Output Shape", "WeightPtr"); 292 | else if (m_ConfigBlocks.at(i).at("type") == "conv" || m_ConfigBlocks.at(i).at("type") == "convolutional") { 293 | int channels = getNumChannels(previous); 294 | std::string inputVol = dimsToString(previous->getDimensions()); 295 | previous = convolutionalLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, channels, previous, &network); 296 | assert(previous != nullptr); 297 | std::string outputVol = dimsToString(previous->getDimensions()); 298 | tensorOutputs.push_back(previous); 299 | std::string layerName = "conv_" + m_ConfigBlocks.at(i).at("activation"); 300 | printLayerInfo(layerIndex, layerName, inputVol, outputVol, std::to_string(weightPtr)); 301 | } 302 | else if (m_ConfigBlocks.at(i).at("type") == "deconvolutional") { 303 | int channels = getNumChannels(previous); 304 | std::string inputVol = dimsToString(previous->getDimensions()); 305 | previous = deconvolutionalLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, channels, previous, 306 | &network); 307 | assert(previous != nullptr); 308 | std::string outputVol = dimsToString(previous->getDimensions()); 309 | tensorOutputs.push_back(previous); 310 | std::string layerName = "deconv"; 311 | printLayerInfo(layerIndex, layerName, inputVol, outputVol, std::to_string(weightPtr)); 312 | } 313 | else if (m_ConfigBlocks.at(i).at("type") == "batchnorm") { 314 | std::string inputVol = dimsToString(previous->getDimensions()); 315 | previous = batchnormLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, previous, &network); 316 | assert(previous != nullptr); 317 | std::string outputVol = dimsToString(previous->getDimensions()); 318 | tensorOutputs.push_back(previous); 319 | std::string layerName = "batchnorm_" + m_ConfigBlocks.at(i).at("activation"); 320 | printLayerInfo(layerIndex, layerName, inputVol, outputVol, std::to_string(weightPtr)); 321 | } 322 | else if (m_ConfigBlocks.at(i).at("type") == "implicit" || m_ConfigBlocks.at(i).at("type") == "implicit_add" || 323 | m_ConfigBlocks.at(i).at("type") == "implicit_mul") { 324 | previous = implicitLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, &network); 325 | assert(previous != nullptr); 326 | std::string outputVol = dimsToString(previous->getDimensions()); 327 | tensorOutputs.push_back(previous); 328 | std::string layerName = "implicit"; 329 | printLayerInfo(layerIndex, layerName, "-", outputVol, std::to_string(weightPtr)); 330 | } 331 | else if (m_ConfigBlocks.at(i).at("type") == "shift_channels" || m_ConfigBlocks.at(i).at("type") == "control_channels") { 332 | assert(m_ConfigBlocks.at(i).find("from") != m_ConfigBlocks.at(i).end()); 333 | int from = stoi(m_ConfigBlocks.at(i).at("from")); 334 | if (from > 0) 335 | from = from - i + 1; 336 | assert((i - 2 >= 0) && (i - 2 < tensorOutputs.size())); 337 | assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size())); 338 | assert(i + from - 1 < i - 2); 339 | 340 | std::string inputVol = dimsToString(previous->getDimensions()); 341 | previous = channelsLayer(i, m_ConfigBlocks.at(i), previous, tensorOutputs[i + from - 1], &network); 342 | assert(previous != nullptr); 343 | std::string outputVol = dimsToString(previous->getDimensions()); 344 | tensorOutputs.push_back(previous); 345 | std::string layerName = m_ConfigBlocks.at(i).at("type") + ": " + std::to_string(i + from - 1); 346 | printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); 347 | } 348 | else if (m_ConfigBlocks.at(i).at("type") == "shortcut") { 349 | assert(m_ConfigBlocks.at(i).find("from") != m_ConfigBlocks.at(i).end()); 350 | int from = stoi(m_ConfigBlocks.at(i).at("from")); 351 | if (from > 0) 352 | from = from - i + 1; 353 | assert((i - 2 >= 0) && (i - 2 < tensorOutputs.size())); 354 | assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size())); 355 | assert(i + from - 1 < i - 2); 356 | 357 | std::string activation = "linear"; 358 | if (m_ConfigBlocks.at(i).find("activation") != m_ConfigBlocks.at(i).end()) 359 | activation = m_ConfigBlocks.at(i).at("activation"); 360 | 361 | std::string inputVol = dimsToString(previous->getDimensions()); 362 | std::string shortcutVol = dimsToString(tensorOutputs[i + from - 1]->getDimensions()); 363 | previous = shortcutLayer(i, activation, inputVol, shortcutVol, m_ConfigBlocks.at(i), previous, 364 | tensorOutputs[i + from - 1], &network, m_BatchSize); 365 | assert(previous != nullptr); 366 | std::string outputVol = dimsToString(previous->getDimensions()); 367 | tensorOutputs.push_back(previous); 368 | std::string layerName = "shortcut_" + activation + ": " + std::to_string(i + from - 1); 369 | printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); 370 | 371 | if (inputVol != shortcutVol) 372 | std::cout << inputVol << " +" << shortcutVol << std::endl; 373 | } 374 | else if (m_ConfigBlocks.at(i).at("type") == "sam") { 375 | assert(m_ConfigBlocks.at(i).find("from") != m_ConfigBlocks.at(i).end()); 376 | int from = stoi(m_ConfigBlocks.at(i).at("from")); 377 | if (from > 0) 378 | from = from - i + 1; 379 | assert((i - 2 >= 0) && (i - 2 < tensorOutputs.size())); 380 | assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size())); 381 | assert(i + from - 1 < i - 2); 382 | 383 | std::string activation = "linear"; 384 | if (m_ConfigBlocks.at(i).find("activation") != m_ConfigBlocks.at(i).end()) 385 | activation = m_ConfigBlocks.at(i).at("activation"); 386 | 387 | std::string inputVol = dimsToString(previous->getDimensions()); 388 | previous = samLayer(i, activation, m_ConfigBlocks.at(i), previous, tensorOutputs[i + from - 1], &network); 389 | assert(previous != nullptr); 390 | std::string outputVol = dimsToString(previous->getDimensions()); 391 | tensorOutputs.push_back(previous); 392 | std::string layerName = "sam_" + activation + ": " + std::to_string(i + from - 1); 393 | printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); 394 | } 395 | else if (m_ConfigBlocks.at(i).at("type") == "route") { 396 | std::string layers; 397 | previous = routeLayer(i, layers, m_ConfigBlocks.at(i), tensorOutputs, &network, m_BatchSize); 398 | assert(previous != nullptr); 399 | std::string outputVol = dimsToString(previous->getDimensions()); 400 | tensorOutputs.push_back(previous); 401 | std::string layerName = "route: " + layers; 402 | printLayerInfo(layerIndex, layerName, "-", outputVol, "-"); 403 | } 404 | else if (m_ConfigBlocks.at(i).at("type") == "upsample") { 405 | std::string inputVol = dimsToString(previous->getDimensions()); 406 | previous = upsampleLayer(i, m_ConfigBlocks[i], previous, &network); 407 | assert(previous != nullptr); 408 | std::string outputVol = dimsToString(previous->getDimensions()); 409 | tensorOutputs.push_back(previous); 410 | std::string layerName = "upsample"; 411 | printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); 412 | } 413 | else if (m_ConfigBlocks.at(i).at("type") == "max" || m_ConfigBlocks.at(i).at("type") == "maxpool" || 414 | m_ConfigBlocks.at(i).at("type") == "avg" || m_ConfigBlocks.at(i).at("type") == "avgpool") { 415 | std::string inputVol = dimsToString(previous->getDimensions()); 416 | previous = poolingLayer(i, m_ConfigBlocks.at(i), previous, &network); 417 | assert(previous != nullptr); 418 | std::string outputVol = dimsToString(previous->getDimensions()); 419 | tensorOutputs.push_back(previous); 420 | std::string layerName = m_ConfigBlocks.at(i).at("type"); 421 | printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); 422 | } 423 | else if (m_ConfigBlocks.at(i).at("type") == "reorg" || m_ConfigBlocks.at(i).at("type") == "reorg3d") { 424 | std::string inputVol = dimsToString(previous->getDimensions()); 425 | previous = reorgLayer(i, m_ConfigBlocks.at(i), previous, &network, m_BatchSize); 426 | assert(previous != nullptr); 427 | std::string outputVol = dimsToString(previous->getDimensions()); 428 | tensorOutputs.push_back(previous); 429 | std::string layerName = m_ConfigBlocks.at(i).at("type"); 430 | printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); 431 | } 432 | else if (m_ConfigBlocks.at(i).at("type") == "yolo" || m_ConfigBlocks.at(i).at("type") == "region") { 433 | std::string blobName = m_ConfigBlocks.at(i).at("type") == "yolo" ? "yolo_" + std::to_string(i) : 434 | "region_" + std::to_string(i); 435 | nvinfer1::Dims prevTensorDims = previous->getDimensions(); 436 | TensorInfo& curYoloTensor = m_YoloTensors.at(yoloCountInputs); 437 | curYoloTensor.blobName = blobName; 438 | curYoloTensor.gridSizeY = prevTensorDims.d[2]; 439 | curYoloTensor.gridSizeX = prevTensorDims.d[3]; 440 | std::string inputVol = dimsToString(previous->getDimensions()); 441 | tensorOutputs.push_back(previous); 442 | yoloTensorInputs[yoloCountInputs] = previous; 443 | ++yoloCountInputs; 444 | std::string layerName = m_ConfigBlocks.at(i).at("type") == "yolo" ? "yolo" : "region"; 445 | printLayerInfo(layerIndex, layerName, inputVol, "-", "-"); 446 | } 447 | else if (m_ConfigBlocks.at(i).at("type") == "dropout") { 448 | // pass 449 | } 450 | else { 451 | std::cerr << "\nUnsupported layer type --> \"" << m_ConfigBlocks.at(i).at("type") << "\"" << std::endl; 452 | assert(0); 453 | } 454 | } 455 | 456 | if ((int) weights.size() != weightPtr) { 457 | std::cerr << "\nNumber of unused weights left: " << weights.size() - weightPtr << std::endl; 458 | assert(0); 459 | } 460 | 461 | if (m_YoloCount == yoloCountInputs) { 462 | uint64_t outputSize = 0; 463 | for (uint j = 0; j < yoloCountInputs; ++j) { 464 | TensorInfo& curYoloTensor = m_YoloTensors.at(j); 465 | outputSize += curYoloTensor.numBBoxes * curYoloTensor.gridSizeY * curYoloTensor.gridSizeX; 466 | } 467 | 468 | nvinfer1::IPluginV2DynamicExt* yoloPlugin = new YoloLayer(m_InputW, m_InputH, m_NumClasses, m_NewCoords, m_YoloTensors, 469 | outputSize); 470 | assert(yoloPlugin != nullptr); 471 | nvinfer1::IPluginV2Layer* yolo = network.addPluginV2(yoloTensorInputs, m_YoloCount, *yoloPlugin); 472 | assert(yolo != nullptr); 473 | std::string yoloLayerName = "yolo"; 474 | yolo->setName(yoloLayerName.c_str()); 475 | 476 | std::string outputlayerName; 477 | nvinfer1::ITensor* detection_boxes = yolo->getOutput(0); 478 | outputlayerName = "boxes"; 479 | detection_boxes->setName(outputlayerName.c_str()); 480 | nvinfer1::ITensor* detection_scores = yolo->getOutput(1); 481 | outputlayerName = "scores"; 482 | detection_scores->setName(outputlayerName.c_str()); 483 | nvinfer1::ITensor* detection_classes = yolo->getOutput(2); 484 | outputlayerName = "classes"; 485 | detection_classes->setName(outputlayerName.c_str()); 486 | network.markOutput(*detection_boxes); 487 | network.markOutput(*detection_scores); 488 | network.markOutput(*detection_classes); 489 | } 490 | else { 491 | std::cerr << "\nError in yolo cfg file" << std::endl; 492 | assert(0); 493 | } 494 | 495 | std::cout << "\nOutput YOLO blob names: " << std::endl; 496 | for (auto& tensor : m_YoloTensors) 497 | std::cout << tensor.blobName << std::endl; 498 | 499 | int nbLayers = network.getNbLayers(); 500 | std::cout << "\nTotal number of YOLO layers: " << nbLayers << "\n" << std::endl; 501 | 502 | return NVDSINFER_SUCCESS; 503 | } 504 | 505 | std::vector> 506 | Yolo::parseConfigFile(const std::string cfgFilePath) 507 | { 508 | assert(fileExists(cfgFilePath)); 509 | std::ifstream file(cfgFilePath); 510 | assert(file.good()); 511 | std::string line; 512 | std::vector> blocks; 513 | std::map block; 514 | 515 | while (getline(file, line)) { 516 | if (line.size() == 0 || line.front() == ' ' || line.front() == '#') 517 | continue; 518 | 519 | line = trim(line); 520 | if (line.front() == '[') { 521 | if (block.size() > 0) { 522 | blocks.push_back(block); 523 | block.clear(); 524 | } 525 | std::string key = "type"; 526 | std::string value = trim(line.substr(1, line.size() - 2)); 527 | block.insert(std::pair(key, value)); 528 | } 529 | else { 530 | int cpos = line.find('='); 531 | std::string key = trim(line.substr(0, cpos)); 532 | std::string value = trim(line.substr(cpos + 1)); 533 | block.insert(std::pair(key, value)); 534 | } 535 | } 536 | 537 | blocks.push_back(block); 538 | return blocks; 539 | } 540 | 541 | void 542 | Yolo::parseConfigBlocks() 543 | { 544 | for (auto block : m_ConfigBlocks) { 545 | if (block.at("type") == "net") { 546 | assert((block.find("height") != block.end()) && "Missing 'height' param in network cfg"); 547 | assert((block.find("width") != block.end()) && "Missing 'width' param in network cfg"); 548 | assert((block.find("channels") != block.end()) && "Missing 'channels' param in network cfg"); 549 | 550 | m_InputH = std::stoul(block.at("height")); 551 | m_InputW = std::stoul(block.at("width")); 552 | m_InputC = std::stoul(block.at("channels")); 553 | m_InputSize = m_InputC * m_InputH * m_InputW; 554 | 555 | if (block.find("letter_box") != block.end()) 556 | m_LetterBox = std::stoul(block.at("letter_box")); 557 | } 558 | else if ((block.at("type") == "region") || (block.at("type") == "yolo")) 559 | { 560 | assert((block.find("num") != block.end()) && 561 | std::string("Missing 'num' param in " + block.at("type") + " layer").c_str()); 562 | assert((block.find("classes") != block.end()) && 563 | std::string("Missing 'classes' param in " + block.at("type") + " layer").c_str()); 564 | assert((block.find("anchors") != block.end()) && 565 | std::string("Missing 'anchors' param in " + block.at("type") + " layer").c_str()); 566 | 567 | ++m_YoloCount; 568 | 569 | m_NumClasses = std::stoul(block.at("classes")); 570 | 571 | if (block.find("new_coords") != block.end()) 572 | m_NewCoords = std::stoul(block.at("new_coords")); 573 | 574 | TensorInfo outputTensor; 575 | 576 | std::string anchorString = block.at("anchors"); 577 | while (!anchorString.empty()) { 578 | int npos = anchorString.find_first_of(','); 579 | if (npos != -1) { 580 | float anchor = std::stof(trim(anchorString.substr(0, npos))); 581 | outputTensor.anchors.push_back(anchor); 582 | anchorString.erase(0, npos + 1); 583 | } 584 | else { 585 | float anchor = std::stof(trim(anchorString)); 586 | outputTensor.anchors.push_back(anchor); 587 | break; 588 | } 589 | } 590 | 591 | if (block.find("mask") != block.end()) { 592 | std::string maskString = block.at("mask"); 593 | while (!maskString.empty()) { 594 | int npos = maskString.find_first_of(','); 595 | if (npos != -1) { 596 | int mask = std::stoul(trim(maskString.substr(0, npos))); 597 | outputTensor.mask.push_back(mask); 598 | maskString.erase(0, npos + 1); 599 | } 600 | else { 601 | int mask = std::stoul(trim(maskString)); 602 | outputTensor.mask.push_back(mask); 603 | break; 604 | } 605 | } 606 | } 607 | 608 | if (block.find("scale_x_y") != block.end()) 609 | outputTensor.scaleXY = std::stof(block.at("scale_x_y")); 610 | else 611 | outputTensor.scaleXY = 1.0; 612 | 613 | outputTensor.numBBoxes = outputTensor.mask.size() > 0 ? outputTensor.mask.size() : std::stoul(trim(block.at("num"))); 614 | 615 | m_YoloTensors.push_back(outputTensor); 616 | } 617 | } 618 | } 619 | 620 | void 621 | Yolo::destroyNetworkUtils() 622 | { 623 | for (uint i = 0; i < m_TrtWeights.size(); ++i) 624 | if (m_TrtWeights[i].count > 0) 625 | free(const_cast(m_TrtWeights[i].values)); 626 | m_TrtWeights.clear(); 627 | } 628 | --------------------------------------------------------------------------------