├── .github └── FUNDING.yml ├── .gitignore ├── README.md ├── config_infer_primary_yoloV8.txt ├── deepstream_app_config.txt ├── labels.txt └── nvdsinfer_custom_impl_Yolo ├── Makefile ├── calibrator.cpp ├── calibrator.h ├── layers ├── activation_layer.cpp ├── activation_layer.h ├── batchnorm_layer.cpp ├── batchnorm_layer.h ├── channels_layer.cpp ├── channels_layer.h ├── cls_layer.cpp ├── cls_layer.h ├── convolutional_layer.cpp ├── convolutional_layer.h ├── implicit_layer.cpp ├── implicit_layer.h ├── pooling_layer.cpp ├── pooling_layer.h ├── reduce_layer.cpp ├── reduce_layer.h ├── reg_layer.cpp ├── reg_layer.h ├── reorg_layer.cpp ├── reorg_layer.h ├── route_layer.cpp ├── route_layer.h ├── shortcut_layer.cpp ├── shortcut_layer.h ├── shuffle_layer.cpp ├── shuffle_layer.h ├── softmax_layer.cpp ├── softmax_layer.h ├── upsample_layer.cpp └── upsample_layer.h ├── nvdsinfer_yolo_engine.cpp ├── nvdsparsebbox_Yolo.cpp ├── utils.cpp ├── utils.h ├── yolo.cpp ├── yolo.h ├── yoloForward.cu ├── yoloForward_e.cu ├── yoloForward_nc.cu ├── yoloForward_r.cu ├── yoloForward_v2.cu ├── yoloPlugins.cpp └── yoloPlugins.h /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | #github: [marcoslucianops] 2 | custom: ['https://www.buymeacoffee.com/marcoslucianops'] 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.so 3 | *.pyc 4 | *.onnx 5 | *.engine 6 | *.pt 7 | *.trt 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # YoloV8_Deepstream 2 | `YOLOv8` Running on Deepstream 3 | 4 | # Prepare the environment 5 | 1. Install TensorRT follow [`TensorRT install`](https://developer.nvidia.com/nvidia-tensorrt-8x-download) 6 | 2. Install Deepstream follow [`deepstream official`](https://docs.nvidia.com/metropolis/deepstream/dev-guide/text/DS_Quickstart.html) 7 | 8 | # Download model 9 | All of yolov8 onnx models at [`yolov8 models`](https://drive.google.com/drive/folders/1LgsOK1ufRrVNoILJxLUd2s2wPd6jHs2a?usp=sharing) 10 | 11 | # Custom video and models 12 | You can follow how to config deepstream at [`deepstream config`](https://docs.nvidia.com/metropolis/deepstream/dev-guide/text/DS_ref_app_deepstream.html) 13 | 14 | # References 15 | 1. [`yolov8 official`](https://github.com/ultralytics/ultralytics) 16 | 2. [`yolo deepstream`](https://github.com/marcoslucianops/DeepStream-Yolo) 17 | 18 | -------------------------------------------------------------------------------- /config_infer_primary_yoloV8.txt: -------------------------------------------------------------------------------- 1 | [property] 2 | gpu-id=0 3 | net-scale-factor=0.0039215697906911373 4 | model-color-format=0 5 | onnx-file=./weights/yolov8n_nms.onnx 6 | model-engine-file = ./weights/yolov8n_nms.onnx_b1_gpu0_fp32.engine 7 | labelfile-path=labels.txt 8 | batch-size=1 9 | network-mode=0 10 | num-detected-classes=80 11 | interval=0 12 | gie-unique-id=1 13 | process-mode=1 14 | network-type=0 15 | cluster-mode=2 16 | maintain-aspect-ratio=1 17 | parse-bbox-func-name=NvDsInferParseYolo 18 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so 19 | 20 | 21 | [class-attrs-all] 22 | nms-iou-threshold=0.45 23 | pre-cluster-threshold=0.25 24 | topk=300 25 | -------------------------------------------------------------------------------- /deepstream_app_config.txt: -------------------------------------------------------------------------------- 1 | [application] 2 | enable-perf-measurement=1 3 | perf-measurement-interval-sec=5 4 | 5 | [tiled-display] 6 | enable=1 7 | rows=1 8 | columns=1 9 | width=1280 10 | height=720 11 | gpu-id=0 12 | nvbuf-memory-type=0 13 | 14 | [source0] 15 | enable=1 16 | type=3 17 | uri=file:///home/haobk/video.mp4 18 | num-sources=1 19 | gpu-id=0 20 | cudadec-memtype=0 21 | 22 | [sink0] 23 | enable=1 24 | type=2 25 | sync=0 26 | gpu-id=0 27 | nvbuf-memory-type=0 28 | 29 | [osd] 30 | enable=1 31 | gpu-id=0 32 | border-width=5 33 | text-size=15 34 | text-color=1;1;1;1; 35 | text-bg-color=0.3;0.3;0.3;1 36 | font=Serif 37 | show-clock=0 38 | clock-x-offset=800 39 | clock-y-offset=820 40 | clock-text-size=12 41 | clock-color=1;0;0;0 42 | nvbuf-memory-type=0 43 | 44 | [streammux] 45 | gpu-id=0 46 | live-source=0 47 | batch-size=1 48 | batched-push-timeout=40000 49 | width=1920 50 | height=1080 51 | enable-padding=0 52 | nvbuf-memory-type=0 53 | 54 | [primary-gie] 55 | enable=1 56 | gpu-id=0 57 | gie-unique-id=1 58 | nvbuf-memory-type=0 59 | config-file=config_infer_primary_yoloV8.txt 60 | 61 | [tests] 62 | file-loop=0 63 | -------------------------------------------------------------------------------- /labels.txt: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/Makefile: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a 5 | # copy of this software and associated documentation files (the "Software"), 6 | # to deal in the Software without restriction, including without limitation 7 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | # and/or sell copies of the Software, and to permit persons to whom the 9 | # Software is furnished to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in 12 | # all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | # DEALINGS IN THE SOFTWARE. 21 | # 22 | # Edited by Marcos Luciano 23 | # https://www.github.com/marcoslucianops 24 | ################################################################################ 25 | 26 | CUDA_VER?= 27 | ifeq ($(CUDA_VER),) 28 | $(error "CUDA_VER is not set") 29 | endif 30 | 31 | OPENCV?= 32 | ifeq ($(OPENCV),) 33 | OPENCV=0 34 | endif 35 | 36 | CC:= g++ 37 | NVCC:=/usr/local/cuda-$(CUDA_VER)/bin/nvcc 38 | 39 | CFLAGS:= -Wall -std=c++11 -shared -fPIC -Wno-error=deprecated-declarations 40 | CFLAGS+= -I/opt/nvidia/deepstream/deepstream/sources/includes -I/usr/local/cuda-$(CUDA_VER)/include 41 | 42 | ifeq ($(OPENCV), 1) 43 | COMMON= -DOPENCV 44 | CFLAGS+= $(shell pkg-config --cflags opencv4 2> /dev/null || pkg-config --cflags opencv) 45 | LIBS+= $(shell pkg-config --libs opencv4 2> /dev/null || pkg-config --libs opencv) 46 | endif 47 | 48 | LIBS+= -lnvinfer_plugin -lnvinfer -lnvparsers -L/usr/local/cuda-$(CUDA_VER)/lib64 -lcudart -lcublas -lstdc++fs 49 | LFLAGS:= -shared -Wl,--start-group $(LIBS) -Wl,--end-group 50 | 51 | INCS:= $(wildcard *.h) 52 | 53 | SRCFILES:= $(filter-out calibrator.cpp, $(wildcard *.cpp)) 54 | 55 | ifeq ($(OPENCV), 1) 56 | SRCFILES+= calibrator.cpp 57 | endif 58 | 59 | SRCFILES+= $(wildcard layers/*.cpp) 60 | SRCFILES+= $(wildcard *.cu) 61 | 62 | TARGET_LIB:= libnvdsinfer_custom_impl_Yolo.so 63 | 64 | TARGET_OBJS:= $(SRCFILES:.cpp=.o) 65 | TARGET_OBJS:= $(TARGET_OBJS:.cu=.o) 66 | 67 | all: $(TARGET_LIB) 68 | 69 | %.o: %.cpp $(INCS) Makefile 70 | $(CC) -c $(COMMON) -o $@ $(CFLAGS) $< 71 | 72 | %.o: %.cu $(INCS) Makefile 73 | $(NVCC) -c -o $@ --compiler-options '-fPIC' $< 74 | 75 | $(TARGET_LIB) : $(TARGET_OBJS) 76 | $(CC) -o $@ $(TARGET_OBJS) $(LFLAGS) 77 | 78 | clean: 79 | rm -rf $(TARGET_LIB) 80 | rm -rf $(TARGET_OBJS) 81 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/calibrator.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "calibrator.h" 7 | #include 8 | #include 9 | 10 | namespace nvinfer1 11 | { 12 | Int8EntropyCalibrator2::Int8EntropyCalibrator2(const int &batchsize, const int &channels, const int &height, const int &width, const int &letterbox, const std::string &imgPath, 13 | const std::string &calibTablePath):batchSize(batchsize), inputC(channels), inputH(height), inputW(width), letterBox(letterbox), calibTablePath(calibTablePath), imageIndex(0) 14 | { 15 | inputCount = batchsize * channels * height * width; 16 | std::fstream f(imgPath); 17 | if (f.is_open()) 18 | { 19 | std::string temp; 20 | while (std::getline(f, temp)) imgPaths.push_back(temp); 21 | } 22 | batchData = new float[inputCount]; 23 | CUDA_CHECK(cudaMalloc(&deviceInput, inputCount * sizeof(float))); 24 | } 25 | 26 | Int8EntropyCalibrator2::~Int8EntropyCalibrator2() 27 | { 28 | CUDA_CHECK(cudaFree(deviceInput)); 29 | if (batchData) 30 | delete[] batchData; 31 | } 32 | 33 | int Int8EntropyCalibrator2::getBatchSize() const noexcept 34 | { 35 | return batchSize; 36 | } 37 | 38 | bool Int8EntropyCalibrator2::getBatch(void **bindings, const char **names, int nbBindings) noexcept 39 | { 40 | if (imageIndex + batchSize > uint(imgPaths.size())) 41 | return false; 42 | 43 | float* ptr = batchData; 44 | for (size_t j = imageIndex; j < imageIndex + batchSize; ++j) 45 | { 46 | cv::Mat img = cv::imread(imgPaths[j], cv::IMREAD_COLOR); 47 | std::vectorinputData = prepareImage(img, inputC, inputH, inputW, letterBox); 48 | 49 | int len = (int)(inputData.size()); 50 | memcpy(ptr, inputData.data(), len * sizeof(float)); 51 | 52 | ptr += inputData.size(); 53 | std::cout << "Load image: " << imgPaths[j] << std::endl; 54 | std::cout << "Progress: " << (j + 1)*100. / imgPaths.size() << "%" << std::endl; 55 | } 56 | imageIndex += batchSize; 57 | CUDA_CHECK(cudaMemcpy(deviceInput, batchData, inputCount * sizeof(float), cudaMemcpyHostToDevice)); 58 | bindings[0] = deviceInput; 59 | return true; 60 | } 61 | 62 | const void* Int8EntropyCalibrator2::readCalibrationCache(std::size_t &length) noexcept 63 | { 64 | calibrationCache.clear(); 65 | std::ifstream input(calibTablePath, std::ios::binary); 66 | input >> std::noskipws; 67 | if (readCache && input.good()) 68 | { 69 | std::copy(std::istream_iterator(input), std::istream_iterator(), 70 | std::back_inserter(calibrationCache)); 71 | } 72 | length = calibrationCache.size(); 73 | return length ? calibrationCache.data() : nullptr; 74 | } 75 | 76 | void Int8EntropyCalibrator2::writeCalibrationCache(const void* cache, std::size_t length) noexcept 77 | { 78 | std::ofstream output(calibTablePath, std::ios::binary); 79 | output.write(reinterpret_cast(cache), length); 80 | } 81 | } 82 | 83 | std::vector prepareImage(cv::Mat& img, int input_c, int input_h, int input_w, int letter_box) 84 | { 85 | cv::Mat out; 86 | int image_w = img.cols; 87 | int image_h = img.rows; 88 | if (image_w != input_w || image_h != input_h) 89 | { 90 | if (letter_box == 1) 91 | { 92 | float ratio_w = (float)image_w / (float)input_w; 93 | float ratio_h = (float)image_h / (float)input_h; 94 | if (ratio_w > ratio_h) 95 | { 96 | int new_width = input_w * ratio_h; 97 | int x = (image_w - new_width) / 2; 98 | cv::Rect roi(abs(x), 0, new_width, image_h); 99 | out = img(roi); 100 | } 101 | else if (ratio_w < ratio_h) 102 | { 103 | int new_height = input_h * ratio_w; 104 | int y = (image_h - new_height) / 2; 105 | cv::Rect roi(0, abs(y), image_w, new_height); 106 | out = img(roi); 107 | } 108 | else { 109 | out = img; 110 | } 111 | cv::resize(out, out, cv::Size(input_w, input_h), 0, 0, cv::INTER_CUBIC); 112 | } 113 | else 114 | { 115 | cv::resize(img, out, cv::Size(input_w, input_h), 0, 0, cv::INTER_CUBIC); 116 | } 117 | cv::cvtColor(out, out, cv::COLOR_BGR2RGB); 118 | } 119 | else 120 | { 121 | cv::cvtColor(img, out, cv::COLOR_BGR2RGB); 122 | } 123 | if (input_c == 3) 124 | { 125 | out.convertTo(out, CV_32FC3, 1.0 / 255.0); 126 | } 127 | else 128 | { 129 | out.convertTo(out, CV_32FC1, 1.0 / 255.0); 130 | } 131 | std::vector input_channels(input_c); 132 | cv::split(out, input_channels); 133 | std::vector result(input_h * input_w * input_c); 134 | auto data = result.data(); 135 | int channelLength = input_h * input_w; 136 | for (int i = 0; i < input_c; ++i) 137 | { 138 | memcpy(data, input_channels[i].data, channelLength * sizeof(float)); 139 | data += channelLength; 140 | } 141 | return result; 142 | } 143 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/calibrator.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef CALIBRATOR_H 7 | #define CALIBRATOR_H 8 | 9 | #include "opencv2/opencv.hpp" 10 | #include "cuda_runtime.h" 11 | #include "NvInfer.h" 12 | #include 13 | #include 14 | 15 | #ifndef CUDA_CHECK 16 | #define CUDA_CHECK(callstr) \ 17 | { \ 18 | cudaError_t error_code = callstr; \ 19 | if (error_code != cudaSuccess) { \ 20 | std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__; \ 21 | assert(0); \ 22 | } \ 23 | } 24 | #endif 25 | 26 | namespace nvinfer1 { 27 | class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 { 28 | public: 29 | Int8EntropyCalibrator2(const int &batchsize, 30 | const int &channels, 31 | const int &height, 32 | const int &width, 33 | const int &letterbox, 34 | const std::string &imgPath, 35 | const std::string &calibTablePath); 36 | 37 | virtual ~Int8EntropyCalibrator2(); 38 | int getBatchSize() const noexcept override; 39 | bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept override; 40 | const void* readCalibrationCache(std::size_t& length) noexcept override; 41 | void writeCalibrationCache(const void* cache, size_t length) noexcept override; 42 | 43 | private: 44 | int batchSize; 45 | int inputC; 46 | int inputH; 47 | int inputW; 48 | int letterBox; 49 | std::string calibTablePath; 50 | size_t imageIndex; 51 | size_t inputCount; 52 | std::vector imgPaths; 53 | float *batchData{ nullptr }; 54 | void *deviceInput{ nullptr }; 55 | bool readCache; 56 | std::vector calibrationCache; 57 | }; 58 | } 59 | 60 | std::vector prepareImage(cv::Mat& img, int input_c, int input_h, int input_w, int letter_box); 61 | 62 | #endif //CALIBRATOR_H 63 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/activation_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "activation_layer.h" 7 | 8 | nvinfer1::ITensor* activationLayer( 9 | int layerIdx, 10 | std::string activation, 11 | nvinfer1::ITensor* input, 12 | nvinfer1::INetworkDefinition* network) 13 | { 14 | nvinfer1::ITensor* output; 15 | 16 | if (activation == "linear") 17 | { 18 | output = input; 19 | } 20 | else if (activation == "relu") 21 | { 22 | nvinfer1::IActivationLayer* relu = network->addActivation(*input, nvinfer1::ActivationType::kRELU); 23 | assert(relu != nullptr); 24 | std::string reluLayerName = "relu_" + std::to_string(layerIdx); 25 | relu->setName(reluLayerName.c_str()); 26 | output = relu->getOutput(0); 27 | } 28 | else if (activation == "sigmoid" || activation == "logistic") 29 | { 30 | nvinfer1::IActivationLayer* sigmoid = network->addActivation(*input, nvinfer1::ActivationType::kSIGMOID); 31 | assert(sigmoid != nullptr); 32 | std::string sigmoidLayerName = "sigmoid_" + std::to_string(layerIdx); 33 | sigmoid->setName(sigmoidLayerName.c_str()); 34 | output = sigmoid->getOutput(0); 35 | } 36 | else if (activation == "tanh") 37 | { 38 | nvinfer1::IActivationLayer* tanh = network->addActivation(*input, nvinfer1::ActivationType::kTANH); 39 | assert(tanh != nullptr); 40 | std::string tanhLayerName = "tanh_" + std::to_string(layerIdx); 41 | tanh->setName(tanhLayerName.c_str()); 42 | output = tanh->getOutput(0); 43 | } 44 | else if (activation == "leaky") 45 | { 46 | nvinfer1::IActivationLayer* leaky = network->addActivation(*input, nvinfer1::ActivationType::kLEAKY_RELU); 47 | assert(leaky != nullptr); 48 | std::string leakyLayerName = "leaky_" + std::to_string(layerIdx); 49 | leaky->setName(leakyLayerName.c_str()); 50 | leaky->setAlpha(0.1); 51 | output = leaky->getOutput(0); 52 | } 53 | else if (activation == "softplus") 54 | { 55 | nvinfer1::IActivationLayer* softplus = network->addActivation(*input, nvinfer1::ActivationType::kSOFTPLUS); 56 | assert(softplus != nullptr); 57 | std::string softplusLayerName = "softplus_" + std::to_string(layerIdx); 58 | softplus->setName(softplusLayerName.c_str()); 59 | output = softplus->getOutput(0); 60 | } 61 | else if (activation == "mish") 62 | { 63 | nvinfer1::IActivationLayer* softplus = network->addActivation(*input, nvinfer1::ActivationType::kSOFTPLUS); 64 | assert(softplus != nullptr); 65 | std::string softplusLayerName = "softplus_" + std::to_string(layerIdx); 66 | softplus->setName(softplusLayerName.c_str()); 67 | nvinfer1::IActivationLayer* tanh = network->addActivation(*softplus->getOutput(0), nvinfer1::ActivationType::kTANH); 68 | assert(tanh != nullptr); 69 | std::string tanhLayerName = "tanh_" + std::to_string(layerIdx); 70 | tanh->setName(tanhLayerName.c_str()); 71 | nvinfer1::IElementWiseLayer* mish 72 | = network->addElementWise(*input, *tanh->getOutput(0), nvinfer1::ElementWiseOperation::kPROD); 73 | assert(mish != nullptr); 74 | std::string mishLayerName = "mish_" + std::to_string(layerIdx); 75 | mish->setName(mishLayerName.c_str()); 76 | output = mish->getOutput(0); 77 | } 78 | else if (activation == "silu" || activation == "swish") 79 | { 80 | nvinfer1::IActivationLayer* sigmoid = network->addActivation(*input, nvinfer1::ActivationType::kSIGMOID); 81 | assert(sigmoid != nullptr); 82 | std::string sigmoidLayerName = "sigmoid_" + std::to_string(layerIdx); 83 | sigmoid->setName(sigmoidLayerName.c_str()); 84 | nvinfer1::IElementWiseLayer* silu 85 | = network->addElementWise(*input, *sigmoid->getOutput(0), nvinfer1::ElementWiseOperation::kPROD); 86 | assert(silu != nullptr); 87 | std::string siluLayerName = "silu_" + std::to_string(layerIdx); 88 | silu->setName(siluLayerName.c_str()); 89 | output = silu->getOutput(0); 90 | } 91 | else if (activation == "hardsigmoid") 92 | { 93 | nvinfer1::IActivationLayer* hardsigmoid = network->addActivation(*input, nvinfer1::ActivationType::kHARD_SIGMOID); 94 | assert(hardsigmoid != nullptr); 95 | std::string hardsigmoidLayerName = "hardsigmoid_" + std::to_string(layerIdx); 96 | hardsigmoid->setName(hardsigmoidLayerName.c_str()); 97 | hardsigmoid->setAlpha(1.0 / 6.0); 98 | hardsigmoid->setBeta(0.5); 99 | output = hardsigmoid->getOutput(0); 100 | } 101 | else if (activation == "hardswish") 102 | { 103 | nvinfer1::IActivationLayer* hardsigmoid = network->addActivation(*input, nvinfer1::ActivationType::kHARD_SIGMOID); 104 | assert(hardsigmoid != nullptr); 105 | std::string hardsigmoidLayerName = "hardsigmoid_" + std::to_string(layerIdx); 106 | hardsigmoid->setName(hardsigmoidLayerName.c_str()); 107 | hardsigmoid->setAlpha(1.0 / 6.0); 108 | hardsigmoid->setBeta(0.5); 109 | nvinfer1::IElementWiseLayer* hardswish 110 | = network->addElementWise(*input, *hardsigmoid->getOutput(0), nvinfer1::ElementWiseOperation::kPROD); 111 | assert(hardswish != nullptr); 112 | std::string hardswishLayerName = "hardswish_" + std::to_string(layerIdx); 113 | hardswish->setName(hardswishLayerName.c_str()); 114 | output = hardswish->getOutput(0); 115 | } 116 | else 117 | { 118 | std::cerr << "Activation not supported: " << activation << std::endl; 119 | std::abort(); 120 | } 121 | return output; 122 | } 123 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/activation_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __ACTIVATION_LAYER_H__ 7 | #define __ACTIVATION_LAYER_H__ 8 | 9 | #include 10 | #include 11 | 12 | #include "NvInfer.h" 13 | 14 | nvinfer1::ITensor* activationLayer( 15 | int layerIdx, 16 | std::string activation, 17 | nvinfer1::ITensor* input, 18 | nvinfer1::INetworkDefinition* network); 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/batchnorm_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include 7 | #include "batchnorm_layer.h" 8 | 9 | nvinfer1::ITensor* batchnormLayer( 10 | int layerIdx, 11 | std::map& block, 12 | std::vector& weights, 13 | std::vector& trtWeights, 14 | int& weightPtr, 15 | std::string weightsType, 16 | float eps, 17 | nvinfer1::ITensor* input, 18 | nvinfer1::INetworkDefinition* network) 19 | { 20 | nvinfer1::ITensor* output; 21 | 22 | assert(block.at("type") == "batchnorm"); 23 | assert(block.find("filters") != block.end()); 24 | 25 | int filters = std::stoi(block.at("filters")); 26 | std::string activation = block.at("activation"); 27 | 28 | std::vector bnBiases; 29 | std::vector bnWeights; 30 | std::vector bnRunningMean; 31 | std::vector bnRunningVar; 32 | 33 | if (weightsType == "weights") 34 | { 35 | for (int i = 0; i < filters; ++i) 36 | { 37 | bnBiases.push_back(weights[weightPtr]); 38 | weightPtr++; 39 | } 40 | for (int i = 0; i < filters; ++i) 41 | { 42 | bnWeights.push_back(weights[weightPtr]); 43 | weightPtr++; 44 | } 45 | for (int i = 0; i < filters; ++i) 46 | { 47 | bnRunningMean.push_back(weights[weightPtr]); 48 | weightPtr++; 49 | } 50 | for (int i = 0; i < filters; ++i) 51 | { 52 | bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5)); 53 | weightPtr++; 54 | } 55 | } 56 | else 57 | { 58 | for (int i = 0; i < filters; ++i) 59 | { 60 | bnWeights.push_back(weights[weightPtr]); 61 | weightPtr++; 62 | } 63 | for (int i = 0; i < filters; ++i) 64 | { 65 | bnBiases.push_back(weights[weightPtr]); 66 | weightPtr++; 67 | } 68 | for (int i = 0; i < filters; ++i) 69 | { 70 | bnRunningMean.push_back(weights[weightPtr]); 71 | weightPtr++; 72 | } 73 | for (int i = 0; i < filters; ++i) 74 | { 75 | bnRunningVar.push_back(sqrt(weights[weightPtr] + eps)); 76 | weightPtr++; 77 | } 78 | } 79 | 80 | int size = filters; 81 | nvinfer1::Weights shift{nvinfer1::DataType::kFLOAT, nullptr, size}; 82 | nvinfer1::Weights scale{nvinfer1::DataType::kFLOAT, nullptr, size}; 83 | nvinfer1::Weights power{nvinfer1::DataType::kFLOAT, nullptr, size}; 84 | float* shiftWt = new float[size]; 85 | for (int i = 0; i < size; ++i) 86 | shiftWt[i] = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i)); 87 | shift.values = shiftWt; 88 | float* scaleWt = new float[size]; 89 | for (int i = 0; i < size; ++i) 90 | scaleWt[i] = bnWeights.at(i) / bnRunningVar[i]; 91 | scale.values = scaleWt; 92 | float* powerWt = new float[size]; 93 | for (int i = 0; i < size; ++i) 94 | powerWt[i] = 1.0; 95 | power.values = powerWt; 96 | trtWeights.push_back(shift); 97 | trtWeights.push_back(scale); 98 | trtWeights.push_back(power); 99 | 100 | nvinfer1::IScaleLayer* batchnorm = network->addScale(*input, nvinfer1::ScaleMode::kCHANNEL, shift, scale, power); 101 | assert(batchnorm != nullptr); 102 | std::string batchnormLayerName = "batchnorm_" + std::to_string(layerIdx); 103 | batchnorm->setName(batchnormLayerName.c_str()); 104 | output = batchnorm->getOutput(0); 105 | 106 | output = activationLayer(layerIdx, activation, output, network); 107 | assert(output != nullptr); 108 | 109 | return output; 110 | } 111 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/batchnorm_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __BATCHNORM_LAYER_H__ 7 | #define __BATCHNORM_LAYER_H__ 8 | 9 | #include 10 | #include 11 | 12 | #include "NvInfer.h" 13 | 14 | #include "activation_layer.h" 15 | 16 | nvinfer1::ITensor* batchnormLayer( 17 | int layerIdx, 18 | std::map& block, 19 | std::vector& weights, 20 | std::vector& trtWeights, 21 | int& weightPtr, 22 | std::string weightsType, 23 | float eps, 24 | nvinfer1::ITensor* input, 25 | nvinfer1::INetworkDefinition* network); 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/channels_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "channels_layer.h" 7 | 8 | nvinfer1::ITensor* channelsLayer( 9 | int layerIdx, 10 | std::map& block, 11 | nvinfer1::ITensor* input, 12 | nvinfer1::ITensor* implicitTensor, 13 | nvinfer1::INetworkDefinition* network) 14 | { 15 | nvinfer1::ITensor* output; 16 | 17 | assert(block.at("type") == "shift_channels" || block.at("type") == "control_channels"); 18 | 19 | if (block.at("type") == "shift_channels") { 20 | nvinfer1::IElementWiseLayer* shift 21 | = network->addElementWise(*input, *implicitTensor, nvinfer1::ElementWiseOperation::kSUM); 22 | assert(shift != nullptr); 23 | std::string shiftLayerName = "shift_channels_" + std::to_string(layerIdx); 24 | shift->setName(shiftLayerName.c_str()); 25 | output = shift->getOutput(0); 26 | } 27 | else if (block.at("type") == "control_channels") { 28 | nvinfer1::IElementWiseLayer* control 29 | = network->addElementWise(*input, *implicitTensor, nvinfer1::ElementWiseOperation::kPROD); 30 | assert(control != nullptr); 31 | std::string controlLayerName = "control_channels_" + std::to_string(layerIdx); 32 | control->setName(controlLayerName.c_str()); 33 | output = control->getOutput(0); 34 | } 35 | 36 | return output; 37 | } 38 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/channels_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __CHANNELS_LAYER_H__ 7 | #define __CHANNELS_LAYER_H__ 8 | 9 | #include 10 | #include 11 | 12 | #include "NvInfer.h" 13 | 14 | nvinfer1::ITensor* channelsLayer( 15 | int layerIdx, 16 | std::map& block, 17 | nvinfer1::ITensor* input, 18 | nvinfer1::ITensor* implicitTensor, 19 | nvinfer1::INetworkDefinition* network); 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/cls_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "cls_layer.h" 7 | 8 | nvinfer1::ITensor* clsLayer( 9 | int layerIdx, 10 | std::map& block, 11 | nvinfer1::ITensor* input, 12 | nvinfer1::INetworkDefinition* network) 13 | { 14 | nvinfer1::ITensor* output; 15 | 16 | assert(block.at("type") == "cls"); 17 | 18 | nvinfer1::IShuffleLayer* shuffle = network->addShuffle(*input); 19 | assert(shuffle != nullptr); 20 | std::string shuffleLayerName = "shuffle_" + std::to_string(layerIdx); 21 | shuffle->setName(shuffleLayerName.c_str()); 22 | nvinfer1::Permutation permutation; 23 | permutation.order[0] = 1; 24 | permutation.order[1] = 0; 25 | shuffle->setFirstTranspose(permutation); 26 | output = shuffle->getOutput(0); 27 | 28 | return output; 29 | } 30 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/cls_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __CLS_LAYER_H__ 7 | #define __CLS_LAYER_H__ 8 | 9 | #include 10 | #include 11 | 12 | #include "NvInfer.h" 13 | 14 | nvinfer1::ITensor* clsLayer( 15 | int layerIdx, 16 | std::map& block, 17 | nvinfer1::ITensor* input, 18 | nvinfer1::INetworkDefinition* network); 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include 7 | #include "convolutional_layer.h" 8 | 9 | nvinfer1::ITensor* convolutionalLayer( 10 | int layerIdx, 11 | std::map& block, 12 | std::vector& weights, 13 | std::vector& trtWeights, 14 | int& weightPtr, 15 | std::string weightsType, 16 | int& inputChannels, 17 | float eps, 18 | nvinfer1::ITensor* input, 19 | nvinfer1::INetworkDefinition* network) 20 | { 21 | nvinfer1::ITensor* output; 22 | 23 | assert(block.at("type") == "convolutional"); 24 | assert(block.find("filters") != block.end()); 25 | assert(block.find("pad") != block.end()); 26 | assert(block.find("size") != block.end()); 27 | assert(block.find("stride") != block.end()); 28 | 29 | int filters = std::stoi(block.at("filters")); 30 | int padding = std::stoi(block.at("pad")); 31 | int kernelSize = std::stoi(block.at("size")); 32 | int stride = std::stoi(block.at("stride")); 33 | std::string activation = block.at("activation"); 34 | int bias = filters; 35 | 36 | bool batchNormalize = false; 37 | if (block.find("batch_normalize") != block.end()) 38 | { 39 | bias = 0; 40 | batchNormalize = (block.at("batch_normalize") == "1"); 41 | } 42 | 43 | int groups = 1; 44 | if (block.find("groups") != block.end()) 45 | groups = std::stoi(block.at("groups")); 46 | 47 | if (block.find("bias") != block.end()) 48 | bias = std::stoi(block.at("bias")); 49 | 50 | int pad; 51 | if (padding) 52 | pad = (kernelSize - 1) / 2; 53 | else 54 | pad = 0; 55 | 56 | int size = filters * inputChannels * kernelSize * kernelSize / groups; 57 | std::vector bnBiases; 58 | std::vector bnWeights; 59 | std::vector bnRunningMean; 60 | std::vector bnRunningVar; 61 | nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, size}; 62 | nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, bias}; 63 | 64 | if (weightsType == "weights") 65 | { 66 | if (batchNormalize == false) 67 | { 68 | float* val; 69 | if (bias != 0) { 70 | val = new float[filters]; 71 | for (int i = 0; i < filters; ++i) 72 | { 73 | val[i] = weights[weightPtr]; 74 | weightPtr++; 75 | } 76 | convBias.values = val; 77 | trtWeights.push_back(convBias); 78 | } 79 | val = new float[size]; 80 | for (int i = 0; i < size; ++i) 81 | { 82 | val[i] = weights[weightPtr]; 83 | weightPtr++; 84 | } 85 | convWt.values = val; 86 | trtWeights.push_back(convWt); 87 | } 88 | else 89 | { 90 | for (int i = 0; i < filters; ++i) 91 | { 92 | bnBiases.push_back(weights[weightPtr]); 93 | weightPtr++; 94 | } 95 | for (int i = 0; i < filters; ++i) 96 | { 97 | bnWeights.push_back(weights[weightPtr]); 98 | weightPtr++; 99 | } 100 | for (int i = 0; i < filters; ++i) 101 | { 102 | bnRunningMean.push_back(weights[weightPtr]); 103 | weightPtr++; 104 | } 105 | for (int i = 0; i < filters; ++i) 106 | { 107 | bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5)); 108 | weightPtr++; 109 | } 110 | float* val = new float[size]; 111 | for (int i = 0; i < size; ++i) 112 | { 113 | val[i] = weights[weightPtr]; 114 | weightPtr++; 115 | } 116 | convWt.values = val; 117 | trtWeights.push_back(convWt); 118 | if (bias != 0) 119 | trtWeights.push_back(convBias); 120 | } 121 | } 122 | else 123 | { 124 | if (batchNormalize == false) 125 | { 126 | float* val = new float[size]; 127 | for (int i = 0; i < size; ++i) 128 | { 129 | val[i] = weights[weightPtr]; 130 | weightPtr++; 131 | } 132 | convWt.values = val; 133 | trtWeights.push_back(convWt); 134 | if (bias != 0) { 135 | val = new float[filters]; 136 | for (int i = 0; i < filters; ++i) 137 | { 138 | val[i] = weights[weightPtr]; 139 | weightPtr++; 140 | } 141 | convBias.values = val; 142 | trtWeights.push_back(convBias); 143 | } 144 | } 145 | else 146 | { 147 | float* val = new float[size]; 148 | for (int i = 0; i < size; ++i) 149 | { 150 | val[i] = weights[weightPtr]; 151 | weightPtr++; 152 | } 153 | convWt.values = val; 154 | for (int i = 0; i < filters; ++i) 155 | { 156 | bnWeights.push_back(weights[weightPtr]); 157 | weightPtr++; 158 | } 159 | for (int i = 0; i < filters; ++i) 160 | { 161 | bnBiases.push_back(weights[weightPtr]); 162 | weightPtr++; 163 | } 164 | for (int i = 0; i < filters; ++i) 165 | { 166 | bnRunningMean.push_back(weights[weightPtr]); 167 | weightPtr++; 168 | } 169 | for (int i = 0; i < filters; ++i) 170 | { 171 | bnRunningVar.push_back(sqrt(weights[weightPtr] + eps)); 172 | weightPtr++; 173 | } 174 | trtWeights.push_back(convWt); 175 | if (bias != 0) 176 | trtWeights.push_back(convBias); 177 | } 178 | } 179 | 180 | nvinfer1::IConvolutionLayer* conv 181 | = network->addConvolutionNd(*input, filters, nvinfer1::Dims{2, {kernelSize, kernelSize}}, convWt, convBias); 182 | assert(conv != nullptr); 183 | std::string convLayerName = "conv_" + std::to_string(layerIdx); 184 | conv->setName(convLayerName.c_str()); 185 | conv->setStrideNd(nvinfer1::Dims{2, {stride, stride}}); 186 | conv->setPaddingNd(nvinfer1::Dims{2, {pad, pad}}); 187 | 188 | if (block.find("groups") != block.end()) 189 | conv->setNbGroups(groups); 190 | 191 | output = conv->getOutput(0); 192 | 193 | if (batchNormalize == true) 194 | { 195 | size = filters; 196 | nvinfer1::Weights shift{nvinfer1::DataType::kFLOAT, nullptr, size}; 197 | nvinfer1::Weights scale{nvinfer1::DataType::kFLOAT, nullptr, size}; 198 | nvinfer1::Weights power{nvinfer1::DataType::kFLOAT, nullptr, size}; 199 | float* shiftWt = new float[size]; 200 | for (int i = 0; i < size; ++i) 201 | shiftWt[i] = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i)); 202 | shift.values = shiftWt; 203 | float* scaleWt = new float[size]; 204 | for (int i = 0; i < size; ++i) 205 | scaleWt[i] = bnWeights.at(i) / bnRunningVar[i]; 206 | scale.values = scaleWt; 207 | float* powerWt = new float[size]; 208 | for (int i = 0; i < size; ++i) 209 | powerWt[i] = 1.0; 210 | power.values = powerWt; 211 | trtWeights.push_back(shift); 212 | trtWeights.push_back(scale); 213 | trtWeights.push_back(power); 214 | 215 | nvinfer1::IScaleLayer* batchnorm = network->addScale(*output, nvinfer1::ScaleMode::kCHANNEL, shift, scale, power); 216 | assert(batchnorm != nullptr); 217 | std::string batchnormLayerName = "batchnorm_" + std::to_string(layerIdx); 218 | batchnorm->setName(batchnormLayerName.c_str()); 219 | output = batchnorm->getOutput(0); 220 | } 221 | 222 | output = activationLayer(layerIdx, activation, output, network); 223 | assert(output != nullptr); 224 | 225 | return output; 226 | } 227 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __CONVOLUTIONAL_LAYER_H__ 7 | #define __CONVOLUTIONAL_LAYER_H__ 8 | 9 | #include 10 | #include 11 | 12 | #include "NvInfer.h" 13 | 14 | #include "activation_layer.h" 15 | 16 | nvinfer1::ITensor* convolutionalLayer( 17 | int layerIdx, 18 | std::map& block, 19 | std::vector& weights, 20 | std::vector& trtWeights, 21 | int& weightPtr, 22 | std::string weightsType, 23 | int& inputChannels, 24 | float eps, 25 | nvinfer1::ITensor* input, 26 | nvinfer1::INetworkDefinition* network); 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/implicit_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "implicit_layer.h" 7 | 8 | nvinfer1::ITensor* implicitLayer( 9 | int layerIdx, 10 | std::map& block, 11 | std::vector& weights, 12 | std::vector& trtWeights, 13 | int& weightPtr, 14 | nvinfer1::INetworkDefinition* network) 15 | { 16 | nvinfer1::ITensor* output; 17 | 18 | assert(block.at("type") == "implicit_add" || block.at("type") == "implicit_mul"); 19 | assert(block.find("filters") != block.end()); 20 | 21 | int filters = std::stoi(block.at("filters")); 22 | 23 | nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, filters}; 24 | 25 | float* val = new float[filters]; 26 | for (int i = 0; i < filters; ++i) 27 | { 28 | val[i] = weights[weightPtr]; 29 | weightPtr++; 30 | } 31 | convWt.values = val; 32 | trtWeights.push_back(convWt); 33 | 34 | nvinfer1::IConstantLayer* implicit = network->addConstant(nvinfer1::Dims{3, {filters, 1, 1}}, convWt); 35 | assert(implicit != nullptr); 36 | std::string implicitLayerName = block.at("type") + "_" + std::to_string(layerIdx); 37 | implicit->setName(implicitLayerName.c_str()); 38 | output = implicit->getOutput(0); 39 | 40 | return output; 41 | } 42 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/implicit_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __IMPLICIT_LAYER_H__ 7 | #define __IMPLICIT_LAYER_H__ 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include "NvInfer.h" 14 | 15 | nvinfer1::ITensor* implicitLayer( 16 | int layerIdx, 17 | std::map& block, 18 | std::vector& weights, 19 | std::vector& trtWeights, 20 | int& weightPtr, 21 | nvinfer1::INetworkDefinition* network); 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/pooling_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "pooling_layer.h" 7 | 8 | nvinfer1::ITensor* poolingLayer( 9 | int layerIdx, 10 | std::map& block, 11 | nvinfer1::ITensor* input, 12 | nvinfer1::INetworkDefinition* network) 13 | { 14 | nvinfer1::ITensor* output; 15 | 16 | assert(block.at("type") == "maxpool" || block.at("type") == "avgpool"); 17 | 18 | if (block.at("type") == "maxpool") 19 | { 20 | assert(block.find("size") != block.end()); 21 | assert(block.find("stride") != block.end()); 22 | 23 | int size = std::stoi(block.at("size")); 24 | int stride = std::stoi(block.at("stride")); 25 | 26 | nvinfer1::IPoolingLayer* maxpool 27 | = network->addPoolingNd(*input, nvinfer1::PoolingType::kMAX, nvinfer1::Dims{2, {size, size}}); 28 | assert(maxpool != nullptr); 29 | std::string maxpoolLayerName = "maxpool_" + std::to_string(layerIdx); 30 | maxpool->setName(maxpoolLayerName.c_str()); 31 | maxpool->setStrideNd(nvinfer1::Dims{2, {stride, stride}}); 32 | maxpool->setPaddingNd(nvinfer1::Dims{2, {(size - 1) / 2, (size - 1) / 2}}); 33 | if (size == 2 && stride == 1) 34 | { 35 | maxpool->setPrePadding(nvinfer1::Dims{2, {0, 0}}); 36 | maxpool->setPostPadding(nvinfer1::Dims{2, {1, 1}}); 37 | } 38 | output = maxpool->getOutput(0); 39 | } 40 | else if (block.at("type") == "avgpool") 41 | { 42 | nvinfer1::Dims inputDims = input->getDimensions(); 43 | nvinfer1::IPoolingLayer* avgpool = network->addPoolingNd( 44 | *input, nvinfer1::PoolingType::kAVERAGE, nvinfer1::Dims{2, {inputDims.d[1], inputDims.d[2]}}); 45 | assert(avgpool != nullptr); 46 | std::string avgpoolLayerName = "avgpool_" + std::to_string(layerIdx); 47 | avgpool->setName(avgpoolLayerName.c_str()); 48 | output = avgpool->getOutput(0); 49 | } 50 | else 51 | { 52 | std::cerr << "Pooling not supported: " << block.at("type") << std::endl; 53 | std::abort(); 54 | } 55 | 56 | return output; 57 | } 58 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/pooling_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __POOLING_LAYER_H__ 7 | #define __POOLING_LAYER_H__ 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include "NvInfer.h" 14 | 15 | nvinfer1::ITensor* poolingLayer( 16 | int layerIdx, 17 | std::map& block, 18 | nvinfer1::ITensor* input, 19 | nvinfer1::INetworkDefinition* network); 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/reduce_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "reduce_layer.h" 7 | 8 | nvinfer1::ITensor* reduceLayer( 9 | int layerIdx, 10 | std::map& block, 11 | nvinfer1::ITensor* input, 12 | nvinfer1::INetworkDefinition* network) 13 | { 14 | nvinfer1::ITensor* output; 15 | 16 | assert(block.at("type") == "reduce"); 17 | assert(block.find("mode") != block.end()); 18 | assert(block.find("axes") != block.end()); 19 | 20 | std::string mode = block.at("mode"); 21 | 22 | nvinfer1::ReduceOperation operation; 23 | if (mode == "mean") 24 | operation = nvinfer1::ReduceOperation::kAVG; 25 | 26 | std::string strAxes = block.at("axes"); 27 | std::vector axes; 28 | size_t lastPos = 0, pos = 0; 29 | while ((pos = strAxes.find(',', lastPos)) != std::string::npos) 30 | { 31 | int vL = std::stoi(trim(strAxes.substr(lastPos, pos - lastPos))); 32 | axes.push_back(vL); 33 | lastPos = pos + 1; 34 | } 35 | if (lastPos < strAxes.length()) 36 | { 37 | std::string lastV = trim(strAxes.substr(lastPos)); 38 | if (!lastV.empty()) 39 | axes.push_back(std::stoi(lastV)); 40 | } 41 | assert(!axes.empty()); 42 | 43 | uint32_t axisMask = 0; 44 | for (int axis : axes) 45 | axisMask |= 1 << axis; 46 | 47 | bool keepDims = false; 48 | if (block.find("keep") != block.end()) 49 | keepDims = std::stoi(block.at("keep")) == 1 ? true : false; 50 | 51 | nvinfer1::IReduceLayer* reduce = network->addReduce(*input, operation, axisMask, keepDims); 52 | assert(reduce != nullptr); 53 | std::string reduceLayerName = "reduce_" + std::to_string(layerIdx); 54 | reduce->setName(reduceLayerName.c_str()); 55 | output = reduce->getOutput(0); 56 | 57 | return output; 58 | } 59 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/reduce_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __REDUCE_LAYER_H__ 7 | #define __REDUCE_LAYER_H__ 8 | 9 | #include "NvInfer.h" 10 | #include "../utils.h" 11 | 12 | nvinfer1::ITensor* reduceLayer( 13 | int layerIdx, 14 | std::map& block, 15 | nvinfer1::ITensor* input, 16 | nvinfer1::INetworkDefinition* network); 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/reg_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "reg_layer.h" 7 | 8 | nvinfer1::ITensor* regLayer( 9 | int layerIdx, 10 | std::map& block, 11 | std::vector& weights, 12 | std::vector& trtWeights, 13 | int& weightPtr, 14 | nvinfer1::ITensor* input, 15 | nvinfer1::INetworkDefinition* network) 16 | { 17 | nvinfer1::ITensor* output; 18 | 19 | assert(block.at("type") == "reg"); 20 | 21 | nvinfer1::IShuffleLayer* shuffle = network->addShuffle(*input); 22 | assert(shuffle != nullptr); 23 | std::string shuffleLayerName = "shuffle_" + std::to_string(layerIdx); 24 | shuffle->setName(shuffleLayerName.c_str()); 25 | nvinfer1::Permutation permutation; 26 | permutation.order[0] = 1; 27 | permutation.order[1] = 0; 28 | shuffle->setFirstTranspose(permutation); 29 | output = shuffle->getOutput(0); 30 | nvinfer1::Dims shuffleDims = output->getDimensions(); 31 | 32 | nvinfer1::ISliceLayer* sliceLt = network->addSlice( 33 | *output, nvinfer1::Dims{2, {0, 0}}, nvinfer1::Dims{2, {shuffleDims.d[0], 2}}, nvinfer1::Dims{2, {1, 1}}); 34 | assert(sliceLt != nullptr); 35 | std::string sliceLtLayerName = "slice_lt_" + std::to_string(layerIdx); 36 | sliceLt->setName(sliceLtLayerName.c_str()); 37 | nvinfer1::ITensor* lt = sliceLt->getOutput(0); 38 | 39 | nvinfer1::ISliceLayer* sliceRb = network->addSlice( 40 | *output, nvinfer1::Dims{2, {0, 2}}, nvinfer1::Dims{2, {shuffleDims.d[0], 2}}, nvinfer1::Dims{2, {1, 1}}); 41 | assert(sliceRb != nullptr); 42 | std::string sliceRbLayerName = "slice_rb_" + std::to_string(layerIdx); 43 | sliceRb->setName(sliceRbLayerName.c_str()); 44 | nvinfer1::ITensor* rb = sliceRb->getOutput(0); 45 | 46 | int channels = shuffleDims.d[0] * 2; 47 | nvinfer1::Weights anchorPointsWt{nvinfer1::DataType::kFLOAT, nullptr, channels}; 48 | float* val = new float[channels]; 49 | for (int i = 0; i < channels; ++i) 50 | { 51 | val[i] = weights[weightPtr]; 52 | weightPtr++; 53 | } 54 | anchorPointsWt.values = val; 55 | trtWeights.push_back(anchorPointsWt); 56 | 57 | nvinfer1::IConstantLayer* anchorPoints = network->addConstant(nvinfer1::Dims{2, {shuffleDims.d[0], 2}}, anchorPointsWt); 58 | assert(anchorPoints != nullptr); 59 | std::string anchorPointsLayerName = "anchor_points_" + std::to_string(layerIdx); 60 | anchorPoints->setName(anchorPointsLayerName.c_str()); 61 | nvinfer1::ITensor* anchorPointsTensor = anchorPoints->getOutput(0); 62 | 63 | nvinfer1::IElementWiseLayer* x1y1 64 | = network->addElementWise(*anchorPointsTensor, *lt, nvinfer1::ElementWiseOperation::kSUB); 65 | assert(x1y1 != nullptr); 66 | std::string x1y1LayerName = "x1y1_" + std::to_string(layerIdx); 67 | x1y1->setName(x1y1LayerName.c_str()); 68 | nvinfer1::ITensor* x1y1Tensor = x1y1->getOutput(0); 69 | 70 | nvinfer1::IElementWiseLayer* x2y2 71 | = network->addElementWise(*rb, *anchorPointsTensor, nvinfer1::ElementWiseOperation::kSUM); 72 | assert(x2y2 != nullptr); 73 | std::string x2y2LayerName = "x2y2_" + std::to_string(layerIdx); 74 | x2y2->setName(x2y2LayerName.c_str()); 75 | nvinfer1::ITensor* x2y2Tensor = x2y2->getOutput(0); 76 | 77 | std::vector concatInputs; 78 | concatInputs.push_back(x1y1Tensor); 79 | concatInputs.push_back(x2y2Tensor); 80 | 81 | nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size()); 82 | assert(concat != nullptr); 83 | std::string concatLayerName = "concat_" + std::to_string(layerIdx); 84 | concat->setName(concatLayerName.c_str()); 85 | concat->setAxis(1); 86 | output = concat->getOutput(0); 87 | 88 | channels = shuffleDims.d[0]; 89 | nvinfer1::Weights stridePointsWt{nvinfer1::DataType::kFLOAT, nullptr, channels}; 90 | val = new float[channels]; 91 | for (int i = 0; i < channels; ++i) 92 | { 93 | val[i] = weights[weightPtr]; 94 | weightPtr++; 95 | } 96 | stridePointsWt.values = val; 97 | trtWeights.push_back(stridePointsWt); 98 | 99 | nvinfer1::IConstantLayer* stridePoints = network->addConstant(nvinfer1::Dims{2, {shuffleDims.d[0], 1}}, stridePointsWt); 100 | assert(stridePoints != nullptr); 101 | std::string stridePointsLayerName = "stride_points_" + std::to_string(layerIdx); 102 | stridePoints->setName(stridePointsLayerName.c_str()); 103 | nvinfer1::ITensor* stridePointsTensor = stridePoints->getOutput(0); 104 | 105 | nvinfer1::IElementWiseLayer* pred 106 | = network->addElementWise(*output, *stridePointsTensor, nvinfer1::ElementWiseOperation::kPROD); 107 | assert(pred != nullptr); 108 | std::string predLayerName = "pred_" + std::to_string(layerIdx); 109 | pred->setName(predLayerName.c_str()); 110 | output = pred->getOutput(0); 111 | 112 | return output; 113 | } 114 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/reg_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __REG_LAYER_H__ 7 | #define __REG_LAYER_H__ 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include "NvInfer.h" 14 | 15 | nvinfer1::ITensor* regLayer( 16 | int layerIdx, 17 | std::map& block, 18 | std::vector& weights, 19 | std::vector& trtWeights, 20 | int& weightPtr, 21 | nvinfer1::ITensor* input, 22 | nvinfer1::INetworkDefinition* network); 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/reorg_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "reorg_layer.h" 7 | 8 | nvinfer1::ITensor* reorgLayer( 9 | int layerIdx, 10 | std::map& block, 11 | nvinfer1::ITensor* input, 12 | nvinfer1::INetworkDefinition* network) 13 | { 14 | nvinfer1::ITensor* output; 15 | 16 | assert(block.at("type") == "reorg"); 17 | 18 | nvinfer1::Dims inputDims = input->getDimensions(); 19 | 20 | nvinfer1::ISliceLayer *slice1 = network->addSlice( 21 | *input, nvinfer1::Dims{3, {0, 0, 0}}, nvinfer1::Dims{3, {inputDims.d[0], inputDims.d[1] / 2, inputDims.d[2] / 2}}, 22 | nvinfer1::Dims{3, {1, 2, 2}}); 23 | assert(slice1 != nullptr); 24 | std::string slice1LayerName = "slice1_" + std::to_string(layerIdx); 25 | slice1->setName(slice1LayerName.c_str()); 26 | 27 | nvinfer1::ISliceLayer *slice2 = network->addSlice( 28 | *input, nvinfer1::Dims{3, {0, 1, 0}}, nvinfer1::Dims{3, {inputDims.d[0], inputDims.d[1] / 2, inputDims.d[2] / 2}}, 29 | nvinfer1::Dims{3, {1, 2, 2}}); 30 | assert(slice2 != nullptr); 31 | std::string slice2LayerName = "slice2_" + std::to_string(layerIdx); 32 | slice2->setName(slice2LayerName.c_str()); 33 | 34 | nvinfer1::ISliceLayer *slice3 = network->addSlice( 35 | *input, nvinfer1::Dims{3, {0, 0, 1}}, nvinfer1::Dims{3, {inputDims.d[0], inputDims.d[1] / 2, inputDims.d[2] / 2}}, 36 | nvinfer1::Dims{3, {1, 2, 2}}); 37 | assert(slice3 != nullptr); 38 | std::string slice3LayerName = "slice3_" + std::to_string(layerIdx); 39 | slice3->setName(slice3LayerName.c_str()); 40 | 41 | nvinfer1::ISliceLayer *slice4 = network->addSlice( 42 | *input, nvinfer1::Dims{3, {0, 1, 1}}, nvinfer1::Dims{3, {inputDims.d[0], inputDims.d[1] / 2, inputDims.d[2] / 2}}, 43 | nvinfer1::Dims{3, {1, 2, 2}}); 44 | assert(slice4 != nullptr); 45 | std::string slice4LayerName = "slice4_" + std::to_string(layerIdx); 46 | slice4->setName(slice4LayerName.c_str()); 47 | 48 | std::vector concatInputs; 49 | concatInputs.push_back(slice1->getOutput(0)); 50 | concatInputs.push_back(slice2->getOutput(0)); 51 | concatInputs.push_back(slice3->getOutput(0)); 52 | concatInputs.push_back(slice4->getOutput(0)); 53 | 54 | nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size()); 55 | assert(concat != nullptr); 56 | std::string concatLayerName = "concat_" + std::to_string(layerIdx); 57 | concat->setName(concatLayerName.c_str()); 58 | concat->setAxis(0); 59 | output = concat->getOutput(0); 60 | 61 | return output; 62 | } 63 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/reorg_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __REORGV5_LAYER_H__ 7 | #define __REORGV5_LAYER_H__ 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include "NvInfer.h" 14 | 15 | nvinfer1::ITensor* reorgLayer( 16 | int layerIdx, 17 | std::map& block, 18 | nvinfer1::ITensor* input, 19 | nvinfer1::INetworkDefinition* network); 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/route_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "route_layer.h" 7 | 8 | nvinfer1::ITensor* routeLayer( 9 | int layerIdx, 10 | std::string& layers, 11 | std::map& block, 12 | std::vector tensorOutputs, 13 | nvinfer1::INetworkDefinition* network) 14 | { 15 | nvinfer1::ITensor* output; 16 | 17 | assert(block.at("type") == "route"); 18 | assert(block.find("layers") != block.end()); 19 | 20 | std::string strLayers = block.at("layers"); 21 | std::vector idxLayers; 22 | size_t lastPos = 0, pos = 0; 23 | while ((pos = strLayers.find(',', lastPos)) != std::string::npos) 24 | { 25 | int vL = std::stoi(trim(strLayers.substr(lastPos, pos - lastPos))); 26 | idxLayers.push_back(vL); 27 | lastPos = pos + 1; 28 | } 29 | if (lastPos < strLayers.length()) 30 | { 31 | std::string lastV = trim(strLayers.substr(lastPos)); 32 | if (!lastV.empty()) 33 | idxLayers.push_back(std::stoi(lastV)); 34 | } 35 | assert (!idxLayers.empty()); 36 | std::vector concatInputs; 37 | for (uint i = 0; i < idxLayers.size(); ++i) 38 | { 39 | if (idxLayers[i] < 0) 40 | idxLayers[i] = tensorOutputs.size() + idxLayers[i]; 41 | assert (idxLayers[i] >= 0 && idxLayers[i] < (int)tensorOutputs.size()); 42 | concatInputs.push_back(tensorOutputs[idxLayers[i]]); 43 | if (i < idxLayers.size() - 1) 44 | layers += std::to_string(idxLayers[i]) + ", "; 45 | } 46 | layers += std::to_string(idxLayers[idxLayers.size() - 1]); 47 | 48 | if (concatInputs.size() == 1) 49 | output = concatInputs[0]; 50 | else { 51 | int axis = 0; 52 | if (block.find("axis") != block.end()) 53 | axis = std::stoi(block.at("axis")); 54 | if (axis < 0) 55 | axis = concatInputs[0]->getDimensions().nbDims + axis; 56 | 57 | nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size()); 58 | assert(concat != nullptr); 59 | std::string concatLayerName = "route_" + std::to_string(layerIdx); 60 | concat->setName(concatLayerName.c_str()); 61 | concat->setAxis(axis); 62 | output = concat->getOutput(0); 63 | } 64 | 65 | if (block.find("groups") != block.end()) 66 | { 67 | nvinfer1::Dims prevTensorDims = output->getDimensions(); 68 | int groups = stoi(block.at("groups")); 69 | int group_id = stoi(block.at("group_id")); 70 | int startSlice = (prevTensorDims.d[0] / groups) * group_id; 71 | int channelSlice = (prevTensorDims.d[0] / groups); 72 | nvinfer1::ISliceLayer* slice = network->addSlice( 73 | *output, nvinfer1::Dims{3, {startSlice, 0, 0}}, 74 | nvinfer1::Dims{3, {channelSlice, prevTensorDims.d[1], prevTensorDims.d[2]}}, nvinfer1::Dims{3, {1, 1, 1}}); 75 | assert(slice != nullptr); 76 | std::string sliceLayerName = "slice_" + std::to_string(layerIdx); 77 | slice->setName(sliceLayerName.c_str()); 78 | output = slice->getOutput(0); 79 | } 80 | 81 | return output; 82 | } 83 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/route_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __ROUTE_LAYER_H__ 7 | #define __ROUTE_LAYER_H__ 8 | 9 | #include "NvInfer.h" 10 | #include "../utils.h" 11 | 12 | nvinfer1::ITensor* routeLayer( 13 | int layerIdx, 14 | std::string& layers, 15 | std::map& block, 16 | std::vector tensorOutputs, 17 | nvinfer1::INetworkDefinition* network); 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "shortcut_layer.h" 7 | 8 | nvinfer1::ITensor* shortcutLayer( 9 | int layerIdx, 10 | std::string mode, 11 | std::string activation, 12 | std::string inputVol, 13 | std::string shortcutVol, 14 | std::map& block, 15 | nvinfer1::ITensor* input, 16 | nvinfer1::ITensor* shortcutInput, 17 | nvinfer1::INetworkDefinition* network) 18 | { 19 | nvinfer1::ITensor* output; 20 | 21 | assert(block.at("type") == "shortcut"); 22 | 23 | nvinfer1::ElementWiseOperation operation = nvinfer1::ElementWiseOperation::kSUM; 24 | 25 | if (mode == "mul") 26 | operation = nvinfer1::ElementWiseOperation::kPROD; 27 | 28 | if (mode == "add" && inputVol != shortcutVol) 29 | { 30 | nvinfer1::ISliceLayer* slice = network->addSlice( 31 | *shortcutInput, nvinfer1::Dims{3, {0, 0, 0}}, input->getDimensions(), nvinfer1::Dims{3, {1, 1, 1}}); 32 | assert(slice != nullptr); 33 | std::string sliceLayerName = "slice_" + std::to_string(layerIdx); 34 | slice->setName(sliceLayerName.c_str()); 35 | output = slice->getOutput(0); 36 | } 37 | else 38 | { 39 | output = shortcutInput; 40 | } 41 | 42 | nvinfer1::IElementWiseLayer* shortcut = network->addElementWise(*input, *output, operation); 43 | assert(shortcut != nullptr); 44 | std::string shortcutLayerName = "shortcut_" + std::to_string(layerIdx); 45 | shortcut->setName(shortcutLayerName.c_str()); 46 | output = shortcut->getOutput(0); 47 | 48 | output = activationLayer(layerIdx, activation, output, network); 49 | assert(output != nullptr); 50 | 51 | return output; 52 | } 53 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __SHORTCUT_LAYER_H__ 7 | #define __SHORTCUT_LAYER_H__ 8 | 9 | #include 10 | 11 | #include "NvInfer.h" 12 | 13 | #include "activation_layer.h" 14 | 15 | nvinfer1::ITensor* shortcutLayer( 16 | int layerIdx, 17 | std::string mode, 18 | std::string activation, 19 | std::string inputVol, 20 | std::string shortcutVol, 21 | std::map& block, 22 | nvinfer1::ITensor* input, 23 | nvinfer1::ITensor* shortcut, 24 | nvinfer1::INetworkDefinition* network); 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/shuffle_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "shuffle_layer.h" 7 | 8 | nvinfer1::ITensor* shuffleLayer( 9 | int layerIdx, 10 | std::string& layer, 11 | std::map& block, 12 | nvinfer1::ITensor* input, 13 | std::vector tensorOutputs, 14 | nvinfer1::INetworkDefinition* network) 15 | { 16 | nvinfer1::ITensor* output; 17 | 18 | assert(block.at("type") == "shuffle"); 19 | 20 | nvinfer1::IShuffleLayer* shuffle = network->addShuffle(*input); 21 | assert(shuffle != nullptr); 22 | std::string shuffleLayerName = "shuffle_" + std::to_string(layerIdx); 23 | shuffle->setName(shuffleLayerName.c_str()); 24 | 25 | if (block.find("reshape") != block.end()) 26 | { 27 | std::string strReshape = block.at("reshape"); 28 | std::vector reshape; 29 | size_t lastPos = 0, pos = 0; 30 | while ((pos = strReshape.find(',', lastPos)) != std::string::npos) 31 | { 32 | int vL = std::stoi(trim(strReshape.substr(lastPos, pos - lastPos))); 33 | reshape.push_back(vL); 34 | lastPos = pos + 1; 35 | } 36 | if (lastPos < strReshape.length()) 37 | { 38 | std::string lastV = trim(strReshape.substr(lastPos)); 39 | if (!lastV.empty()) 40 | reshape.push_back(std::stoi(lastV)); 41 | } 42 | assert(!reshape.empty()); 43 | 44 | int from = -1; 45 | if (block.find("from") != block.end()) 46 | from = std::stoi(block.at("from")); 47 | 48 | if (from < 0) 49 | from = tensorOutputs.size() + from; 50 | 51 | layer = std::to_string(from); 52 | 53 | nvinfer1::Dims inputTensorDims = tensorOutputs[from]->getDimensions(); 54 | int32_t l = inputTensorDims.d[1] * inputTensorDims.d[2]; 55 | 56 | nvinfer1::Dims reshapeDims; 57 | reshapeDims.nbDims = reshape.size(); 58 | 59 | for (uint i = 0; i < reshape.size(); ++i) 60 | if (reshape[i] == 0) 61 | reshapeDims.d[i] = l; 62 | else 63 | reshapeDims.d[i] = reshape[i]; 64 | 65 | shuffle->setReshapeDimensions(reshapeDims); 66 | } 67 | 68 | if (block.find("transpose1") != block.end()) 69 | { 70 | std::string strTranspose1 = block.at("transpose1"); 71 | std::vector transpose1; 72 | size_t lastPos = 0, pos = 0; 73 | while ((pos = strTranspose1.find(',', lastPos)) != std::string::npos) 74 | { 75 | int vL = std::stoi(trim(strTranspose1.substr(lastPos, pos - lastPos))); 76 | transpose1.push_back(vL); 77 | lastPos = pos + 1; 78 | } 79 | if (lastPos < strTranspose1.length()) 80 | { 81 | std::string lastV = trim(strTranspose1.substr(lastPos)); 82 | if (!lastV.empty()) 83 | transpose1.push_back(std::stoi(lastV)); 84 | } 85 | assert(!transpose1.empty()); 86 | 87 | nvinfer1::Permutation permutation1; 88 | for (uint i = 0; i < transpose1.size(); ++i) 89 | permutation1.order[i] = transpose1[i]; 90 | 91 | shuffle->setFirstTranspose(permutation1); 92 | } 93 | 94 | if (block.find("transpose2") != block.end()) 95 | { 96 | std::string strTranspose2 = block.at("transpose2"); 97 | std::vector transpose2; 98 | size_t lastPos = 0, pos = 0; 99 | while ((pos = strTranspose2.find(',', lastPos)) != std::string::npos) 100 | { 101 | int vL = std::stoi(trim(strTranspose2.substr(lastPos, pos - lastPos))); 102 | transpose2.push_back(vL); 103 | lastPos = pos + 1; 104 | } 105 | if (lastPos < strTranspose2.length()) 106 | { 107 | std::string lastV = trim(strTranspose2.substr(lastPos)); 108 | if (!lastV.empty()) 109 | transpose2.push_back(std::stoi(lastV)); 110 | } 111 | assert(!transpose2.empty()); 112 | 113 | nvinfer1::Permutation permutation2; 114 | for (uint i = 0; i < transpose2.size(); ++i) 115 | permutation2.order[i] = transpose2[i]; 116 | 117 | shuffle->setSecondTranspose(permutation2); 118 | } 119 | 120 | output = shuffle->getOutput(0); 121 | 122 | return output; 123 | } 124 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/shuffle_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __SHUFFLE_LAYER_H__ 7 | #define __SHUFFLE_LAYER_H__ 8 | 9 | #include "NvInfer.h" 10 | #include "../utils.h" 11 | 12 | nvinfer1::ITensor* shuffleLayer( 13 | int layerIdx, 14 | std::string& layer, 15 | std::map& block, 16 | nvinfer1::ITensor* input, 17 | std::vector tensorOutputs, 18 | nvinfer1::INetworkDefinition* network); 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/softmax_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "softmax_layer.h" 7 | 8 | nvinfer1::ITensor* softmaxLayer( 9 | int layerIdx, 10 | std::map& block, 11 | nvinfer1::ITensor* input, 12 | nvinfer1::INetworkDefinition* network) 13 | { 14 | nvinfer1::ITensor* output; 15 | 16 | assert(block.at("type") == "softmax"); 17 | assert(block.find("axes") != block.end()); 18 | 19 | int axes = std::stoi(block.at("axes")); 20 | 21 | nvinfer1::ISoftMaxLayer* softmax = network->addSoftMax(*input); 22 | assert(softmax != nullptr); 23 | std::string softmaxLayerName = "softmax_" + std::to_string(layerIdx); 24 | softmax->setName(softmaxLayerName.c_str()); 25 | softmax->setAxes(1 << axes); 26 | output = softmax->getOutput(0); 27 | 28 | return output; 29 | } 30 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/softmax_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __SOFTMAX_LAYER_H__ 7 | #define __SOFTMAX_LAYER_H__ 8 | 9 | #include 10 | #include 11 | 12 | #include "NvInfer.h" 13 | 14 | nvinfer1::ITensor* softmaxLayer( 15 | int layerIdx, 16 | std::map& block, 17 | nvinfer1::ITensor* input, 18 | nvinfer1::INetworkDefinition* network); 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/upsample_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include "upsample_layer.h" 7 | 8 | nvinfer1::ITensor* upsampleLayer( 9 | int layerIdx, 10 | std::map& block, 11 | nvinfer1::ITensor* input, 12 | nvinfer1::INetworkDefinition* network) 13 | { 14 | nvinfer1::ITensor* output; 15 | 16 | assert(block.at("type") == "upsample"); 17 | assert(block.find("stride") != block.end()); 18 | 19 | int stride = std::stoi(block.at("stride")); 20 | 21 | float scale[3] = {1, static_cast(stride), static_cast(stride)}; 22 | 23 | nvinfer1::IResizeLayer* resize = network->addResize(*input); 24 | assert(resize != nullptr); 25 | std::string resizeLayerName = "upsample_" + std::to_string(layerIdx); 26 | resize->setName(resizeLayerName.c_str()); 27 | resize->setResizeMode(nvinfer1::ResizeMode::kNEAREST); 28 | resize->setScales(scale, 3); 29 | output = resize->getOutput(0); 30 | 31 | return output; 32 | } 33 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/layers/upsample_layer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #ifndef __UPSAMPLE_LAYER_H__ 7 | #define __UPSAMPLE_LAYER_H__ 8 | 9 | #include 10 | #include 11 | 12 | #include "NvInfer.h" 13 | 14 | nvinfer1::ITensor* upsampleLayer( 15 | int layerIdx, 16 | std::map& block, 17 | nvinfer1::ITensor* input, 18 | nvinfer1::INetworkDefinition* network); 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/nvdsinfer_yolo_engine.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | * 22 | * Edited by Marcos Luciano 23 | * https://www.github.com/marcoslucianops 24 | */ 25 | 26 | #include "nvdsinfer_custom_impl.h" 27 | #include "nvdsinfer_context.h" 28 | #include "yoloPlugins.h" 29 | #include "yolo.h" 30 | 31 | #include 32 | 33 | #define USE_CUDA_ENGINE_GET_API 1 34 | 35 | static bool getYoloNetworkInfo(NetworkInfo &networkInfo, const NvDsInferContextInitParams* initParams) 36 | { 37 | std::string yoloCfg = initParams->customNetworkConfigFilePath; 38 | std::string yoloType; 39 | 40 | std::transform(yoloCfg.begin(), yoloCfg.end(), yoloCfg.begin(), [] (uint8_t c) { 41 | return std::tolower(c); 42 | }); 43 | 44 | yoloType = yoloCfg.substr(0, yoloCfg.find(".cfg")); 45 | 46 | networkInfo.inputBlobName = "data"; 47 | networkInfo.networkType = yoloType; 48 | networkInfo.configFilePath = initParams->customNetworkConfigFilePath; 49 | networkInfo.wtsFilePath = initParams->modelFilePath; 50 | networkInfo.int8CalibPath = initParams->int8CalibrationFilePath; 51 | networkInfo.deviceType = (initParams->useDLA ? "kDLA" : "kGPU"); 52 | networkInfo.numDetectedClasses = initParams->numDetectedClasses; 53 | networkInfo.clusterMode = initParams->clusterMode; 54 | networkInfo.scoreThreshold = initParams->perClassDetectionParams->preClusterThreshold; 55 | 56 | if (initParams->networkMode == 0) 57 | networkInfo.networkMode = "FP32"; 58 | else if (initParams->networkMode == 1) 59 | networkInfo.networkMode = "INT8"; 60 | else if (initParams->networkMode == 2) 61 | networkInfo.networkMode = "FP16"; 62 | 63 | if (networkInfo.configFilePath.empty() || networkInfo.wtsFilePath.empty()) 64 | { 65 | std::cerr << "YOLO config file or weights file is not specified\n" << std::endl; 66 | return false; 67 | } 68 | 69 | if (!fileExists(networkInfo.configFilePath) || !fileExists(networkInfo.wtsFilePath)) 70 | { 71 | std::cerr << "YOLO config file or weights file is not exist\n" << std::endl; 72 | return false; 73 | } 74 | 75 | return true; 76 | } 77 | 78 | #if !USE_CUDA_ENGINE_GET_API 79 | IModelParser* NvDsInferCreateModelParser( 80 | const NvDsInferContextInitParams* initParams) { 81 | NetworkInfo networkInfo; 82 | if (!getYoloNetworkInfo(networkInfo, initParams)) 83 | return nullptr; 84 | 85 | return new Yolo(networkInfo); 86 | } 87 | #else 88 | extern "C" 89 | bool NvDsInferYoloCudaEngineGet(nvinfer1::IBuilder * const builder, 90 | nvinfer1::IBuilderConfig * const builderConfig, 91 | const NvDsInferContextInitParams * const initParams, 92 | nvinfer1::DataType dataType, 93 | nvinfer1::ICudaEngine *& cudaEngine); 94 | 95 | extern "C" 96 | bool NvDsInferYoloCudaEngineGet(nvinfer1::IBuilder * const builder, 97 | nvinfer1::IBuilderConfig * const builderConfig, 98 | const NvDsInferContextInitParams * const initParams, 99 | nvinfer1::DataType dataType, 100 | nvinfer1::ICudaEngine *& cudaEngine) 101 | { 102 | NetworkInfo networkInfo; 103 | if (!getYoloNetworkInfo(networkInfo, initParams)) 104 | return false; 105 | 106 | Yolo yolo(networkInfo); 107 | cudaEngine = yolo.createEngine (builder, builderConfig); 108 | if (cudaEngine == nullptr) 109 | { 110 | std::cerr << "Failed to build CUDA engine on " << networkInfo.configFilePath << std::endl; 111 | return false; 112 | } 113 | 114 | return true; 115 | } 116 | #endif 117 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | * 22 | * Edited by Marcos Luciano 23 | * https://www.github.com/marcoslucianops 24 | */ 25 | 26 | #include 27 | #include 28 | #include 29 | #include "nvdsinfer_custom_impl.h" 30 | #include "utils.h" 31 | 32 | #include "yoloPlugins.h" 33 | 34 | extern "C" bool NvDsInferParseYolo( 35 | std::vector const &outputLayersInfo, NvDsInferNetworkInfo const &networkInfo, 36 | NvDsInferParseDetectionParams const &detectionParams, std::vector &objectList); 37 | 38 | static NvDsInferParseObjectInfo convertBBox( 39 | const float &bx1, const float &by1, const float &bx2, const float &by2, const uint &netW, const uint &netH) 40 | { 41 | NvDsInferParseObjectInfo b; 42 | 43 | float x1 = bx1; 44 | float y1 = by1; 45 | float x2 = bx2; 46 | float y2 = by2; 47 | 48 | x1 = clamp(x1, 0, netW); 49 | y1 = clamp(y1, 0, netH); 50 | x2 = clamp(x2, 0, netW); 51 | y2 = clamp(y2, 0, netH); 52 | 53 | b.left = x1; 54 | b.width = clamp(x2 - x1, 0, netW); 55 | b.top = y1; 56 | b.height = clamp(y2 - y1, 0, netH); 57 | 58 | return b; 59 | } 60 | 61 | static void addBBoxProposal( 62 | const float bx1, const float by1, const float bx2, const float by2, const uint &netW, const uint &netH, 63 | const int maxIndex, const float maxProb, std::vector &binfo) 64 | { 65 | NvDsInferParseObjectInfo bbi = convertBBox(bx1, by1, bx2, by2, netW, netH); 66 | if (bbi.width < 1 || bbi.height < 1) 67 | return; 68 | 69 | bbi.detectionConfidence = maxProb; 70 | bbi.classId = maxIndex; 71 | binfo.push_back(bbi); 72 | } 73 | 74 | static std::vector decodeYoloTensor( 75 | const int *counts, const float *boxes, const float *scores, const int *classes, const uint &netW, const uint &netH) 76 | { 77 | std::vector binfo; 78 | 79 | uint numBoxes = counts[0]; 80 | for (uint b = 0; b < numBoxes; ++b) 81 | { 82 | float bx1 = boxes[b * 4 + 0]; 83 | float by1 = boxes[b * 4 + 1]; 84 | float bx2 = boxes[b * 4 + 2]; 85 | float by2 = boxes[b * 4 + 3]; 86 | 87 | float maxProb = scores[b]; 88 | int maxIndex = classes[b]; 89 | 90 | addBBoxProposal(bx1, by1, bx2, by2, netW, netH, maxIndex, maxProb, binfo); 91 | } 92 | return binfo; 93 | } 94 | 95 | static bool NvDsInferParseCustomYolo( 96 | std::vector const &outputLayersInfo, NvDsInferNetworkInfo const &networkInfo, 97 | NvDsInferParseDetectionParams const &detectionParams, std::vector &objectList, 98 | const uint &numClasses) 99 | { 100 | if (outputLayersInfo.empty()) 101 | { 102 | std::cerr << "ERROR: Could not find output layer in bbox parsing" << std::endl; 103 | return false; 104 | } 105 | 106 | std::vector objects; 107 | 108 | const NvDsInferLayerInfo &counts = outputLayersInfo[0]; 109 | const NvDsInferLayerInfo &boxes = outputLayersInfo[1]; 110 | const NvDsInferLayerInfo &scores = outputLayersInfo[2]; 111 | const NvDsInferLayerInfo &classes = outputLayersInfo[3]; 112 | 113 | std::vector outObjs = 114 | decodeYoloTensor( 115 | (const int *)(counts.buffer), (const float *)(boxes.buffer), (const float *)(scores.buffer), 116 | (const int *)(classes.buffer), networkInfo.width, networkInfo.height); 117 | 118 | objects.insert(objects.end(), outObjs.begin(), outObjs.end()); 119 | 120 | objectList = objects; 121 | 122 | return true; 123 | } 124 | 125 | extern "C" bool NvDsInferParseYolo( 126 | std::vector const &outputLayersInfo, NvDsInferNetworkInfo const &networkInfo, 127 | NvDsInferParseDetectionParams const &detectionParams, std::vector &objectList) 128 | { 129 | int num_classes = kNUM_CLASSES; 130 | 131 | return NvDsInferParseCustomYolo( 132 | outputLayersInfo, networkInfo, detectionParams, objectList, num_classes); 133 | } 134 | 135 | CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseYolo); 136 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/utils.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | * 22 | * Edited by Marcos Luciano 23 | * https://www.github.com/marcoslucianops 24 | */ 25 | 26 | #include "utils.h" 27 | 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | static void leftTrim(std::string& s) 34 | { 35 | s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int ch) { return !isspace(ch); })); 36 | } 37 | 38 | static void rightTrim(std::string& s) 39 | { 40 | s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !isspace(ch); }).base(), s.end()); 41 | } 42 | 43 | std::string trim(std::string s) 44 | { 45 | leftTrim(s); 46 | rightTrim(s); 47 | return s; 48 | } 49 | 50 | float clamp(const float val, const float minVal, const float maxVal) 51 | { 52 | assert(minVal <= maxVal); 53 | return std::min(maxVal, std::max(minVal, val)); 54 | } 55 | 56 | bool fileExists(const std::string fileName, bool verbose) 57 | { 58 | if (!std::experimental::filesystem::exists(std::experimental::filesystem::path(fileName))) 59 | { 60 | if (verbose) std::cout << "\nFile does not exist: " << fileName << std::endl; 61 | return false; 62 | } 63 | return true; 64 | } 65 | 66 | std::vector loadWeights(const std::string weightsFilePath, const std::string& networkType) 67 | { 68 | assert(fileExists(weightsFilePath)); 69 | std::cout << "\nLoading pre-trained weights" << std::endl; 70 | 71 | std::vector weights; 72 | 73 | if (weightsFilePath.find(".weights") != std::string::npos) { 74 | std::ifstream file(weightsFilePath, std::ios_base::binary); 75 | assert(file.good()); 76 | std::string line; 77 | 78 | if (networkType.find("yolov2") != std::string::npos && networkType.find("yolov2-tiny") == std::string::npos) 79 | { 80 | // Remove 4 int32 bytes of data from the stream belonging to the header 81 | file.ignore(4 * 4); 82 | } 83 | else 84 | { 85 | // Remove 5 int32 bytes of data from the stream belonging to the header 86 | file.ignore(4 * 5); 87 | } 88 | 89 | char floatWeight[4]; 90 | while (!file.eof()) 91 | { 92 | file.read(floatWeight, 4); 93 | assert(file.gcount() == 4); 94 | weights.push_back(*reinterpret_cast(floatWeight)); 95 | if (file.peek() == std::istream::traits_type::eof()) break; 96 | } 97 | } 98 | 99 | else if (weightsFilePath.find(".wts") != std::string::npos) { 100 | std::ifstream file(weightsFilePath); 101 | assert(file.good()); 102 | int32_t count; 103 | file >> count; 104 | assert(count > 0 && "\nInvalid .wts file."); 105 | 106 | uint32_t floatWeight; 107 | std::string name; 108 | uint32_t size; 109 | 110 | while (count--) { 111 | file >> name >> std::dec >> size; 112 | for (uint32_t x = 0, y = size; x < y; ++x) 113 | { 114 | file >> std::hex >> floatWeight; 115 | weights.push_back(*reinterpret_cast(&floatWeight)); 116 | }; 117 | } 118 | } 119 | 120 | else { 121 | std::cerr << "\nFile " << weightsFilePath << " is not supported" << std::endl; 122 | std::abort(); 123 | } 124 | 125 | std::cout << "Loading weights of " << networkType << " complete" 126 | << std::endl; 127 | std::cout << "Total weights read: " << weights.size() << std::endl; 128 | return weights; 129 | } 130 | 131 | std::string dimsToString(const nvinfer1::Dims d) 132 | { 133 | std::stringstream s; 134 | assert(d.nbDims >= 1); 135 | s << "["; 136 | for (int i = 0; i < d.nbDims - 1; ++i) 137 | s << d.d[i] << ", "; 138 | s << d.d[d.nbDims - 1] << "]"; 139 | 140 | return s.str(); 141 | } 142 | 143 | int getNumChannels(nvinfer1::ITensor* t) 144 | { 145 | nvinfer1::Dims d = t->getDimensions(); 146 | assert(d.nbDims == 3); 147 | 148 | return d.d[0]; 149 | } 150 | 151 | void printLayerInfo( 152 | std::string layerIndex, std::string layerName, std::string layerInput, std::string layerOutput, std::string weightPtr) 153 | { 154 | std::cout << std::setw(8) << std::left << layerIndex << std::setw(30) << std::left << layerName; 155 | std::cout << std::setw(20) << std::left << layerInput << std::setw(20) << std::left << layerOutput; 156 | std::cout << weightPtr << std::endl; 157 | } 158 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/utils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | * 22 | * Edited by Marcos Luciano 23 | * https://www.github.com/marcoslucianops 24 | */ 25 | 26 | 27 | #ifndef __UTILS_H__ 28 | #define __UTILS_H__ 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | #include "NvInfer.h" 37 | 38 | std::string trim(std::string s); 39 | float clamp(const float val, const float minVal, const float maxVal); 40 | bool fileExists(const std::string fileName, bool verbose = true); 41 | std::vector loadWeights(const std::string weightsFilePath, const std::string& networkType); 42 | std::string dimsToString(const nvinfer1::Dims d); 43 | int getNumChannels(nvinfer1::ITensor* t); 44 | void printLayerInfo( 45 | std::string layerIndex, std::string layerName, std::string layerInput, std::string layerOutput, std::string weightPtr); 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/yolo.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | * 22 | * Edited by Marcos Luciano 23 | * https://www.github.com/marcoslucianops 24 | */ 25 | 26 | #include "yolo.h" 27 | #include "yoloPlugins.h" 28 | #include 29 | 30 | #ifdef OPENCV 31 | #include "calibrator.h" 32 | #endif 33 | 34 | Yolo::Yolo(const NetworkInfo& networkInfo) 35 | : m_InputBlobName(networkInfo.inputBlobName), 36 | m_NetworkType(networkInfo.networkType), 37 | m_ConfigFilePath(networkInfo.configFilePath), 38 | m_WtsFilePath(networkInfo.wtsFilePath), 39 | m_Int8CalibPath(networkInfo.int8CalibPath), 40 | m_DeviceType(networkInfo.deviceType), 41 | m_NumDetectedClasses(networkInfo.numDetectedClasses), 42 | m_ClusterMode(networkInfo.clusterMode), 43 | m_NetworkMode(networkInfo.networkMode), 44 | m_ScoreThreshold(networkInfo.scoreThreshold), 45 | m_InputH(0), 46 | m_InputW(0), 47 | m_InputC(0), 48 | m_InputSize(0), 49 | m_NumClasses(0), 50 | m_LetterBox(0), 51 | m_NewCoords(0), 52 | m_YoloCount(0) 53 | {} 54 | 55 | Yolo::~Yolo() 56 | { 57 | destroyNetworkUtils(); 58 | } 59 | 60 | nvinfer1::ICudaEngine *Yolo::createEngine(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config) 61 | { 62 | assert (builder); 63 | 64 | m_ConfigBlocks = parseConfigFile(m_ConfigFilePath); 65 | parseConfigBlocks(); 66 | 67 | nvinfer1::INetworkDefinition *network = builder->createNetworkV2(0); 68 | if (parseModel(*network) != NVDSINFER_SUCCESS) 69 | { 70 | delete network; 71 | return nullptr; 72 | } 73 | 74 | std::cout << "Building the TensorRT Engine\n" << std::endl; 75 | 76 | if (m_NumClasses != m_NumDetectedClasses) 77 | { 78 | std::cout << "NOTE: Number of classes mismatch, make sure to set num-detected-classes=" << m_NumClasses 79 | << " in config_infer file\n" << std::endl; 80 | } 81 | if (m_LetterBox == 1) 82 | { 83 | std::cout << "NOTE: letter_box is set in cfg file, make sure to set maintain-aspect-ratio=1 in config_infer file" 84 | << " to get better accuracy\n" << std::endl; 85 | } 86 | if (m_ClusterMode != 2) 87 | { 88 | std::cout << "NOTE: Wrong cluster-mode is set, make sure to set cluster-mode=2 in config_infer file\n" 89 | << std::endl; 90 | } 91 | 92 | if (m_NetworkMode == "INT8" && !fileExists(m_Int8CalibPath)) 93 | { 94 | assert(builder->platformHasFastInt8()); 95 | #ifdef OPENCV 96 | std::string calib_image_list; 97 | int calib_batch_size; 98 | if (getenv("INT8_CALIB_IMG_PATH")) 99 | calib_image_list = getenv("INT8_CALIB_IMG_PATH"); 100 | else 101 | { 102 | std::cerr << "INT8_CALIB_IMG_PATH not set" << std::endl; 103 | std::abort(); 104 | } 105 | if (getenv("INT8_CALIB_BATCH_SIZE")) 106 | calib_batch_size = std::stoi(getenv("INT8_CALIB_BATCH_SIZE")); 107 | else 108 | { 109 | std::cerr << "INT8_CALIB_BATCH_SIZE not set" << std::endl; 110 | std::abort(); 111 | } 112 | nvinfer1::Int8EntropyCalibrator2 *calibrator = new nvinfer1::Int8EntropyCalibrator2( 113 | calib_batch_size, m_InputC, m_InputH, m_InputW, m_LetterBox, calib_image_list, m_Int8CalibPath); 114 | config->setFlag(nvinfer1::BuilderFlag::kINT8); 115 | config->setInt8Calibrator(calibrator); 116 | #else 117 | std::cerr << "OpenCV is required to run INT8 calibrator\n" << std::endl; 118 | assert(0); 119 | #endif 120 | } 121 | 122 | nvinfer1::ICudaEngine *engine = builder->buildEngineWithConfig(*network, *config); 123 | if (engine) 124 | std::cout << "Building complete\n" << std::endl; 125 | else 126 | std::cerr << "Building engine failed\n" << std::endl; 127 | 128 | delete network; 129 | return engine; 130 | } 131 | 132 | NvDsInferStatus Yolo::parseModel(nvinfer1::INetworkDefinition& network) { 133 | destroyNetworkUtils(); 134 | 135 | std::vector weights = loadWeights(m_WtsFilePath, m_NetworkType); 136 | std::cout << "Building YOLO network\n" << std::endl; 137 | NvDsInferStatus status = buildYoloNetwork(weights, network); 138 | 139 | if (status == NVDSINFER_SUCCESS) 140 | std::cout << "Building YOLO network complete" << std::endl; 141 | else 142 | std::cerr << "Building YOLO network failed" << std::endl; 143 | 144 | return status; 145 | } 146 | 147 | NvDsInferStatus Yolo::buildYoloNetwork(std::vector& weights, nvinfer1::INetworkDefinition& network) 148 | { 149 | int weightPtr = 0; 150 | 151 | std::string weightsType; 152 | if (m_WtsFilePath.find(".weights") != std::string::npos) 153 | weightsType = "weights"; 154 | else 155 | weightsType = "wts"; 156 | 157 | float eps = 1.0e-5; 158 | if (m_NetworkType.find("yolov5") != std::string::npos || m_NetworkType.find("yolov7") != std::string::npos) 159 | eps = 1.0e-3; 160 | else if (m_NetworkType.find("yolor") != std::string::npos) 161 | eps = 1.0e-4; 162 | 163 | nvinfer1::ITensor* data = network.addInput( 164 | m_InputBlobName.c_str(), nvinfer1::DataType::kFLOAT, 165 | nvinfer1::Dims{3, {static_cast(m_InputC), static_cast(m_InputH), static_cast(m_InputW)}}); 166 | assert(data != nullptr && data->getDimensions().nbDims > 0); 167 | 168 | nvinfer1::ITensor* previous = data; 169 | std::vector tensorOutputs; 170 | 171 | nvinfer1::ITensor* yoloTensorInputs[m_YoloCount]; 172 | uint yoloCountInputs = 0; 173 | 174 | int modelType = -1; 175 | 176 | for (uint i = 0; i < m_ConfigBlocks.size(); ++i) 177 | { 178 | std::string layerIndex = "(" + std::to_string(tensorOutputs.size()) + ")"; 179 | 180 | if (m_ConfigBlocks.at(i).at("type") == "net") 181 | printLayerInfo("", "Layer", "Input Shape", "Output Shape", "WeightPtr"); 182 | 183 | else if (m_ConfigBlocks.at(i).at("type") == "convolutional") 184 | { 185 | int channels = getNumChannels(previous); 186 | std::string inputVol = dimsToString(previous->getDimensions()); 187 | previous = convolutionalLayer( 188 | i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, weightsType, channels, eps, previous, &network); 189 | assert(previous != nullptr); 190 | std::string outputVol = dimsToString(previous->getDimensions()); 191 | tensorOutputs.push_back(previous); 192 | std::string layerName = "conv_" + m_ConfigBlocks.at(i).at("activation"); 193 | printLayerInfo(layerIndex, layerName, inputVol, outputVol, std::to_string(weightPtr)); 194 | } 195 | 196 | else if (m_ConfigBlocks.at(i).at("type") == "batchnorm") 197 | { 198 | std::string inputVol = dimsToString(previous->getDimensions()); 199 | previous = batchnormLayer( 200 | i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, weightsType, eps, previous, &network); 201 | assert(previous != nullptr); 202 | std::string outputVol = dimsToString(previous->getDimensions()); 203 | tensorOutputs.push_back(previous); 204 | std::string layerName = "batchnorm_" + m_ConfigBlocks.at(i).at("activation"); 205 | printLayerInfo(layerIndex, layerName, inputVol, outputVol, std::to_string(weightPtr)); 206 | } 207 | 208 | else if (m_ConfigBlocks.at(i).at("type") == "implicit_add" || m_ConfigBlocks.at(i).at("type") == "implicit_mul") 209 | { 210 | previous = implicitLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, &network); 211 | assert(previous != nullptr); 212 | std::string outputVol = dimsToString(previous->getDimensions()); 213 | tensorOutputs.push_back(previous); 214 | std::string layerName = m_ConfigBlocks.at(i).at("type"); 215 | printLayerInfo(layerIndex, layerName, "-", outputVol, std::to_string(weightPtr)); 216 | } 217 | 218 | else if (m_ConfigBlocks.at(i).at("type") == "shift_channels" || 219 | m_ConfigBlocks.at(i).at("type") == "control_channels") 220 | { 221 | assert(m_ConfigBlocks.at(i).find("from") != m_ConfigBlocks.at(i).end()); 222 | int from = stoi(m_ConfigBlocks.at(i).at("from")); 223 | if (from > 0) 224 | from = from - i + 1; 225 | assert((i - 2 >= 0) && (i - 2 < tensorOutputs.size())); 226 | assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size())); 227 | assert(i + from - 1 < i - 2); 228 | 229 | std::string inputVol = dimsToString(previous->getDimensions()); 230 | previous = channelsLayer(i, m_ConfigBlocks.at(i), previous, tensorOutputs[i + from - 1], &network); 231 | assert(previous != nullptr); 232 | std::string outputVol = dimsToString(previous->getDimensions()); 233 | tensorOutputs.push_back(previous); 234 | std::string layerName = m_ConfigBlocks.at(i).at("type") + ": " + std::to_string(i + from - 1); 235 | printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); 236 | } 237 | 238 | else if (m_ConfigBlocks.at(i).at("type") == "shortcut") 239 | { 240 | assert(m_ConfigBlocks.at(i).find("from") != m_ConfigBlocks.at(i).end()); 241 | int from = stoi(m_ConfigBlocks.at(i).at("from")); 242 | if (from > 0) 243 | from = from - i + 1; 244 | assert((i - 2 >= 0) && (i - 2 < tensorOutputs.size())); 245 | assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size())); 246 | assert(i + from - 1 < i - 2); 247 | 248 | std::string mode = "add"; 249 | if (m_ConfigBlocks.at(i).find("mode") != m_ConfigBlocks.at(i).end()) 250 | mode = m_ConfigBlocks.at(i).at("mode"); 251 | 252 | std::string activation = "linear"; 253 | if (m_ConfigBlocks.at(i).find("activation") != m_ConfigBlocks.at(i).end()) 254 | activation = m_ConfigBlocks.at(i).at("activation"); 255 | 256 | std::string inputVol = dimsToString(previous->getDimensions()); 257 | std::string shortcutVol = dimsToString(tensorOutputs[i + from - 1]->getDimensions()); 258 | previous = shortcutLayer( 259 | i, mode, activation, inputVol, shortcutVol, m_ConfigBlocks.at(i), previous, tensorOutputs[i + from - 1], 260 | &network); 261 | assert(previous != nullptr); 262 | std::string outputVol = dimsToString(previous->getDimensions()); 263 | tensorOutputs.push_back(previous); 264 | std::string layerName = "shortcut_" + mode + "_" + activation + ": " + std::to_string(i + from - 1); 265 | printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); 266 | 267 | if (mode == "add" && inputVol != shortcutVol) 268 | std::cout << inputVol << " +" << shortcutVol << std::endl; 269 | } 270 | 271 | else if (m_ConfigBlocks.at(i).at("type") == "route") 272 | { 273 | std::string layers; 274 | previous = routeLayer(i, layers, m_ConfigBlocks.at(i), tensorOutputs, &network); 275 | assert(previous != nullptr); 276 | std::string outputVol = dimsToString(previous->getDimensions()); 277 | tensorOutputs.push_back(previous); 278 | std::string layerName = "route: " + layers; 279 | printLayerInfo(layerIndex, layerName, "-", outputVol, "-"); 280 | } 281 | 282 | else if (m_ConfigBlocks.at(i).at("type") == "upsample") 283 | { 284 | std::string inputVol = dimsToString(previous->getDimensions()); 285 | previous = upsampleLayer(i, m_ConfigBlocks[i], previous, &network); 286 | assert(previous != nullptr); 287 | std::string outputVol = dimsToString(previous->getDimensions()); 288 | tensorOutputs.push_back(previous); 289 | std::string layerName = "upsample"; 290 | printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); 291 | } 292 | 293 | else if (m_ConfigBlocks.at(i).at("type") == "maxpool" || m_ConfigBlocks.at(i).at("type") == "avgpool") 294 | { 295 | std::string inputVol = dimsToString(previous->getDimensions()); 296 | previous = poolingLayer(i, m_ConfigBlocks.at(i), previous, &network); 297 | assert(previous != nullptr); 298 | std::string outputVol = dimsToString(previous->getDimensions()); 299 | tensorOutputs.push_back(previous); 300 | std::string layerName = m_ConfigBlocks.at(i).at("type"); 301 | printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); 302 | } 303 | 304 | else if (m_ConfigBlocks.at(i).at("type") == "reorg") 305 | { 306 | std::string inputVol = dimsToString(previous->getDimensions()); 307 | if (m_NetworkType.find("yolov2") != std::string::npos) { 308 | nvinfer1::IPluginV2* reorgPlugin = createReorgPlugin(2); 309 | assert(reorgPlugin != nullptr); 310 | nvinfer1::IPluginV2Layer* reorg = network.addPluginV2(&previous, 1, *reorgPlugin); 311 | assert(reorg != nullptr); 312 | std::string reorglayerName = "reorg_" + std::to_string(i); 313 | reorg->setName(reorglayerName.c_str()); 314 | previous = reorg->getOutput(0); 315 | } 316 | else 317 | previous = reorgLayer(i, m_ConfigBlocks.at(i), previous, &network); 318 | assert(previous != nullptr); 319 | std::string outputVol = dimsToString(previous->getDimensions()); 320 | tensorOutputs.push_back(previous); 321 | std::string layerName = "reorg"; 322 | printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); 323 | } 324 | 325 | else if (m_ConfigBlocks.at(i).at("type") == "reduce") 326 | { 327 | std::string inputVol = dimsToString(previous->getDimensions()); 328 | previous = reduceLayer(i, m_ConfigBlocks.at(i), previous, &network); 329 | assert(previous != nullptr); 330 | std::string outputVol = dimsToString(previous->getDimensions()); 331 | tensorOutputs.push_back(previous); 332 | std::string layerName = "reduce"; 333 | printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); 334 | } 335 | 336 | else if (m_ConfigBlocks.at(i).at("type") == "shuffle") 337 | { 338 | std::string layer; 339 | std::string inputVol = dimsToString(previous->getDimensions()); 340 | previous = shuffleLayer(i, layer, m_ConfigBlocks.at(i), previous, tensorOutputs, &network); 341 | assert(previous != nullptr); 342 | std::string outputVol = dimsToString(previous->getDimensions()); 343 | tensorOutputs.push_back(previous); 344 | std::string layerName = "shuffle: " + layer; 345 | printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); 346 | } 347 | 348 | else if (m_ConfigBlocks.at(i).at("type") == "softmax") 349 | { 350 | std::string inputVol = dimsToString(previous->getDimensions()); 351 | previous = softmaxLayer(i, m_ConfigBlocks.at(i), previous, &network); 352 | assert(previous != nullptr); 353 | std::string outputVol = dimsToString(previous->getDimensions()); 354 | tensorOutputs.push_back(previous); 355 | std::string layerName = "softmax"; 356 | printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); 357 | } 358 | 359 | else if (m_ConfigBlocks.at(i).at("type") == "yolo" || m_ConfigBlocks.at(i).at("type") == "region") 360 | { 361 | if (m_ConfigBlocks.at(i).at("type") == "yolo") 362 | if (m_NetworkType.find("yolor") != std::string::npos) 363 | modelType = 2; 364 | else 365 | modelType = 1; 366 | else 367 | modelType = 0; 368 | 369 | std::string blobName = modelType != 0 ? "yolo_" + std::to_string(i) : "region_" + std::to_string(i); 370 | nvinfer1::Dims prevTensorDims = previous->getDimensions(); 371 | TensorInfo& curYoloTensor = m_YoloTensors.at(yoloCountInputs); 372 | curYoloTensor.blobName = blobName; 373 | curYoloTensor.gridSizeX = prevTensorDims.d[2]; 374 | curYoloTensor.gridSizeY = prevTensorDims.d[1]; 375 | 376 | std::string inputVol = dimsToString(previous->getDimensions()); 377 | tensorOutputs.push_back(previous); 378 | yoloTensorInputs[yoloCountInputs] = previous; 379 | ++yoloCountInputs; 380 | std::string layerName = modelType != 0 ? "yolo" : "region"; 381 | printLayerInfo(layerIndex, layerName, inputVol, "-", "-"); 382 | } 383 | 384 | else if (m_ConfigBlocks.at(i).at("type") == "cls") 385 | { 386 | modelType = 3; 387 | 388 | std::string blobName = "cls_" + std::to_string(i); 389 | nvinfer1::Dims prevTensorDims = previous->getDimensions(); 390 | TensorInfo& curYoloTensor = m_YoloTensors.at(yoloCountInputs); 391 | curYoloTensor.blobName = blobName; 392 | curYoloTensor.numBBoxes = prevTensorDims.d[1]; 393 | m_NumClasses = prevTensorDims.d[0]; 394 | 395 | std::string inputVol = dimsToString(previous->getDimensions()); 396 | previous = clsLayer(i, m_ConfigBlocks.at(i), previous, &network); 397 | assert(previous != nullptr); 398 | std::string outputVol = dimsToString(previous->getDimensions()); 399 | tensorOutputs.push_back(previous); 400 | yoloTensorInputs[yoloCountInputs] = previous; 401 | ++yoloCountInputs; 402 | std::string layerName = "cls"; 403 | printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-"); 404 | } 405 | 406 | else if (m_ConfigBlocks.at(i).at("type") == "reg") 407 | { 408 | modelType = 3; 409 | 410 | std::string blobName = "reg_" + std::to_string(i); 411 | nvinfer1::Dims prevTensorDims = previous->getDimensions(); 412 | TensorInfo& curYoloTensor = m_YoloTensors.at(yoloCountInputs); 413 | curYoloTensor.blobName = blobName; 414 | curYoloTensor.numBBoxes = prevTensorDims.d[1]; 415 | 416 | std::string inputVol = dimsToString(previous->getDimensions()); 417 | previous = regLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, previous, &network); 418 | assert(previous != nullptr); 419 | std::string outputVol = dimsToString(previous->getDimensions()); 420 | tensorOutputs.push_back(previous); 421 | yoloTensorInputs[yoloCountInputs] = previous; 422 | ++yoloCountInputs; 423 | std::string layerName = "reg"; 424 | printLayerInfo(layerIndex, layerName, inputVol, outputVol, std::to_string(weightPtr)); 425 | } 426 | 427 | else 428 | { 429 | std::cout << "\nUnsupported layer type --> \"" << m_ConfigBlocks.at(i).at("type") << "\"" << std::endl; 430 | assert(0); 431 | } 432 | } 433 | 434 | if ((int)weights.size() != weightPtr) 435 | { 436 | std::cout << "\nNumber of unused weights left: " << weights.size() - weightPtr << std::endl; 437 | assert(0); 438 | } 439 | 440 | if (m_YoloCount == yoloCountInputs) 441 | { 442 | assert((modelType != -1) && "\nCould not determine model type"); 443 | 444 | uint64_t outputSize = 0; 445 | for (uint j = 0; j < yoloCountInputs; ++j) 446 | { 447 | TensorInfo& curYoloTensor = m_YoloTensors.at(j); 448 | if (modelType == 3) 449 | outputSize = curYoloTensor.numBBoxes; 450 | else 451 | outputSize += curYoloTensor.gridSizeX * curYoloTensor.gridSizeY * curYoloTensor.numBBoxes; 452 | } 453 | 454 | nvinfer1::IPluginV2* yoloPlugin = new YoloLayer( 455 | m_InputW, m_InputH, m_NumClasses, m_NewCoords, m_YoloTensors, outputSize, modelType, m_ScoreThreshold); 456 | assert(yoloPlugin != nullptr); 457 | nvinfer1::IPluginV2Layer* yolo = network.addPluginV2(yoloTensorInputs, m_YoloCount, *yoloPlugin); 458 | assert(yolo != nullptr); 459 | std::string yoloLayerName = "yolo"; 460 | yolo->setName(yoloLayerName.c_str()); 461 | 462 | std::string outputlayerName; 463 | nvinfer1::ITensor* num_detections = yolo->getOutput(0); 464 | outputlayerName = "num_detections"; 465 | num_detections->setName(outputlayerName.c_str()); 466 | nvinfer1::ITensor* detection_boxes = yolo->getOutput(1); 467 | outputlayerName = "detection_boxes"; 468 | detection_boxes->setName(outputlayerName.c_str()); 469 | nvinfer1::ITensor* detection_scores = yolo->getOutput(2); 470 | outputlayerName = "detection_scores"; 471 | detection_scores->setName(outputlayerName.c_str()); 472 | nvinfer1::ITensor* detection_classes = yolo->getOutput(3); 473 | outputlayerName = "detection_classes"; 474 | detection_classes->setName(outputlayerName.c_str()); 475 | network.markOutput(*num_detections); 476 | network.markOutput(*detection_boxes); 477 | network.markOutput(*detection_scores); 478 | network.markOutput(*detection_classes); 479 | } 480 | else { 481 | std::cout << "\nError in yolo cfg file" << std::endl; 482 | assert(0); 483 | } 484 | 485 | std::cout << "\nOutput YOLO blob names: " << std::endl; 486 | for (auto& tensor : m_YoloTensors) 487 | { 488 | std::cout << tensor.blobName << std::endl; 489 | } 490 | 491 | int nbLayers = network.getNbLayers(); 492 | std::cout << "\nTotal number of YOLO layers: " << nbLayers << "\n" << std::endl; 493 | 494 | return NVDSINFER_SUCCESS; 495 | } 496 | 497 | std::vector> 498 | Yolo::parseConfigFile (const std::string cfgFilePath) 499 | { 500 | assert(fileExists(cfgFilePath)); 501 | std::ifstream file(cfgFilePath); 502 | assert(file.good()); 503 | std::string line; 504 | std::vector> blocks; 505 | std::map block; 506 | 507 | while (getline(file, line)) 508 | { 509 | if (line.size() == 0) continue; 510 | if (line.front() == ' ') continue; 511 | if (line.front() == '#') continue; 512 | line = trim(line); 513 | if (line.front() == '[') 514 | { 515 | if (block.size() > 0) 516 | { 517 | blocks.push_back(block); 518 | block.clear(); 519 | } 520 | std::string key = "type"; 521 | std::string value = trim(line.substr(1, line.size() - 2)); 522 | block.insert(std::pair(key, value)); 523 | } 524 | else 525 | { 526 | int cpos = line.find('='); 527 | std::string key = trim(line.substr(0, cpos)); 528 | std::string value = trim(line.substr(cpos + 1)); 529 | block.insert(std::pair(key, value)); 530 | } 531 | } 532 | blocks.push_back(block); 533 | return blocks; 534 | } 535 | 536 | void Yolo::parseConfigBlocks() 537 | { 538 | for (auto block : m_ConfigBlocks) 539 | { 540 | if (block.at("type") == "net") 541 | { 542 | assert((block.find("height") != block.end()) && "Missing 'height' param in network cfg"); 543 | assert((block.find("width") != block.end()) && "Missing 'width' param in network cfg"); 544 | assert((block.find("channels") != block.end()) && "Missing 'channels' param in network cfg"); 545 | 546 | m_InputH = std::stoul(block.at("height")); 547 | m_InputW = std::stoul(block.at("width")); 548 | m_InputC = std::stoul(block.at("channels")); 549 | m_InputSize = m_InputC * m_InputH * m_InputW; 550 | 551 | if (block.find("letter_box") != block.end()) 552 | { 553 | m_LetterBox = std::stoul(block.at("letter_box")); 554 | } 555 | } 556 | else if ((block.at("type") == "region") || (block.at("type") == "yolo")) 557 | { 558 | assert((block.find("num") != block.end()) 559 | && std::string("Missing 'num' param in " + block.at("type") + " layer").c_str()); 560 | assert((block.find("classes") != block.end()) 561 | && std::string("Missing 'classes' param in " + block.at("type") + " layer").c_str()); 562 | assert((block.find("anchors") != block.end()) 563 | && std::string("Missing 'anchors' param in " + block.at("type") + " layer").c_str()); 564 | 565 | ++m_YoloCount; 566 | 567 | m_NumClasses = std::stoul(block.at("classes")); 568 | 569 | if (block.find("new_coords") != block.end()) 570 | { 571 | m_NewCoords = std::stoul(block.at("new_coords")); 572 | } 573 | 574 | TensorInfo outputTensor; 575 | 576 | std::string anchorString = block.at("anchors"); 577 | while (!anchorString.empty()) 578 | { 579 | int npos = anchorString.find_first_of(','); 580 | if (npos != -1) 581 | { 582 | float anchor = std::stof(trim(anchorString.substr(0, npos))); 583 | outputTensor.anchors.push_back(anchor); 584 | anchorString.erase(0, npos + 1); 585 | } 586 | else 587 | { 588 | float anchor = std::stof(trim(anchorString)); 589 | outputTensor.anchors.push_back(anchor); 590 | break; 591 | } 592 | } 593 | 594 | if (block.find("mask") != block.end()) 595 | { 596 | std::string maskString = block.at("mask"); 597 | while (!maskString.empty()) 598 | { 599 | int npos = maskString.find_first_of(','); 600 | if (npos != -1) 601 | { 602 | int mask = std::stoul(trim(maskString.substr(0, npos))); 603 | outputTensor.mask.push_back(mask); 604 | maskString.erase(0, npos + 1); 605 | } 606 | else 607 | { 608 | int mask = std::stoul(trim(maskString)); 609 | outputTensor.mask.push_back(mask); 610 | break; 611 | } 612 | } 613 | } 614 | 615 | if (block.find("scale_x_y") != block.end()) 616 | { 617 | outputTensor.scaleXY = std::stof(block.at("scale_x_y")); 618 | } 619 | else 620 | { 621 | outputTensor.scaleXY = 1.0; 622 | } 623 | 624 | outputTensor.numBBoxes 625 | = outputTensor.mask.size() > 0 ? outputTensor.mask.size() : std::stoul(trim(block.at("num"))); 626 | 627 | m_YoloTensors.push_back(outputTensor); 628 | } 629 | else if ((block.at("type") == "cls") || (block.at("type") == "reg")) 630 | { 631 | ++m_YoloCount; 632 | TensorInfo outputTensor; 633 | m_YoloTensors.push_back(outputTensor); 634 | } 635 | } 636 | } 637 | 638 | void Yolo::destroyNetworkUtils() 639 | { 640 | for (uint i = 0; i < m_TrtWeights.size(); ++i) 641 | if (m_TrtWeights[i].count > 0) 642 | free(const_cast(m_TrtWeights[i].values)); 643 | m_TrtWeights.clear(); 644 | } 645 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/yolo.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | * 22 | * Edited by Marcos Luciano 23 | * https://www.github.com/marcoslucianops 24 | */ 25 | 26 | #ifndef _YOLO_H_ 27 | #define _YOLO_H_ 28 | 29 | #include "layers/convolutional_layer.h" 30 | #include "layers/batchnorm_layer.h" 31 | #include "layers/implicit_layer.h" 32 | #include "layers/channels_layer.h" 33 | #include "layers/shortcut_layer.h" 34 | #include "layers/route_layer.h" 35 | #include "layers/upsample_layer.h" 36 | #include "layers/pooling_layer.h" 37 | #include "layers/reorg_layer.h" 38 | #include "layers/reduce_layer.h" 39 | #include "layers/shuffle_layer.h" 40 | #include "layers/softmax_layer.h" 41 | #include "layers/cls_layer.h" 42 | #include "layers/reg_layer.h" 43 | 44 | #include "nvdsinfer_custom_impl.h" 45 | 46 | struct NetworkInfo 47 | { 48 | std::string inputBlobName; 49 | std::string networkType; 50 | std::string configFilePath; 51 | std::string wtsFilePath; 52 | std::string int8CalibPath; 53 | std::string deviceType; 54 | uint numDetectedClasses; 55 | int clusterMode; 56 | float scoreThreshold; 57 | std::string networkMode; 58 | }; 59 | 60 | struct TensorInfo 61 | { 62 | std::string blobName; 63 | uint gridSizeX {0}; 64 | uint gridSizeY {0}; 65 | uint numBBoxes {0}; 66 | float scaleXY; 67 | std::vector anchors; 68 | std::vector mask; 69 | }; 70 | 71 | class Yolo : public IModelParser { 72 | public: 73 | Yolo(const NetworkInfo& networkInfo); 74 | 75 | ~Yolo() override; 76 | 77 | bool hasFullDimsSupported() const override { return false; } 78 | 79 | const char* getModelName() const override { 80 | return m_ConfigFilePath.empty() ? m_NetworkType.c_str() : m_ConfigFilePath.c_str(); 81 | } 82 | 83 | NvDsInferStatus parseModel(nvinfer1::INetworkDefinition& network) override; 84 | 85 | nvinfer1::ICudaEngine *createEngine (nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config); 86 | 87 | protected: 88 | const std::string m_InputBlobName; 89 | const std::string m_NetworkType; 90 | const std::string m_ConfigFilePath; 91 | const std::string m_WtsFilePath; 92 | const std::string m_Int8CalibPath; 93 | const std::string m_DeviceType; 94 | const uint m_NumDetectedClasses; 95 | const int m_ClusterMode; 96 | const std::string m_NetworkMode; 97 | const float m_ScoreThreshold; 98 | 99 | uint m_InputH; 100 | uint m_InputW; 101 | uint m_InputC; 102 | uint64_t m_InputSize; 103 | uint m_NumClasses; 104 | uint m_LetterBox; 105 | uint m_NewCoords; 106 | uint m_YoloCount; 107 | 108 | std::vector m_YoloTensors; 109 | std::vector> m_ConfigBlocks; 110 | std::vector m_TrtWeights; 111 | 112 | private: 113 | NvDsInferStatus buildYoloNetwork(std::vector& weights, nvinfer1::INetworkDefinition& network); 114 | 115 | std::vector> parseConfigFile(const std::string cfgFilePath); 116 | 117 | void parseConfigBlocks(); 118 | 119 | void destroyNetworkUtils(); 120 | }; 121 | 122 | #endif // _YOLO_H_ 123 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/yoloForward.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include 7 | 8 | inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); } 9 | 10 | __global__ void gpuYoloLayer( 11 | const float* input, int* num_detections, float* detection_boxes, float* detection_scores, int* detection_classes, 12 | const float scoreThreshold, const uint netWidth, const uint netHeight, const uint gridSizeX, const uint gridSizeY, 13 | const uint numOutputClasses, const uint numBBoxes, const float scaleXY, const float* anchors, const int* mask) 14 | { 15 | uint x_id = blockIdx.x * blockDim.x + threadIdx.x; 16 | uint y_id = blockIdx.y * blockDim.y + threadIdx.y; 17 | uint z_id = blockIdx.z * blockDim.z + threadIdx.z; 18 | 19 | if (x_id >= gridSizeX || y_id >= gridSizeY || z_id >= numBBoxes) 20 | return; 21 | 22 | const int numGridCells = gridSizeX * gridSizeY; 23 | const int bbindex = y_id * gridSizeX + x_id; 24 | 25 | const float objectness 26 | = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]); 27 | 28 | if (objectness < scoreThreshold) 29 | return; 30 | 31 | int count = (int)atomicAdd(num_detections, 1); 32 | 33 | const float alpha = scaleXY; 34 | const float beta = -0.5 * (scaleXY - 1); 35 | 36 | float x 37 | = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) 38 | * alpha + beta + x_id) * netWidth / gridSizeX; 39 | 40 | float y 41 | = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) 42 | * alpha + beta + y_id) * netHeight / gridSizeY; 43 | 44 | float w 45 | = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]) 46 | * anchors[mask[z_id] * 2]; 47 | 48 | float h 49 | = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]) 50 | * anchors[mask[z_id] * 2 + 1]; 51 | 52 | float maxProb = 0.0f; 53 | int maxIndex = -1; 54 | 55 | for (uint i = 0; i < numOutputClasses; ++i) 56 | { 57 | float prob 58 | = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]); 59 | 60 | if (prob > maxProb) 61 | { 62 | maxProb = prob; 63 | maxIndex = i; 64 | } 65 | } 66 | 67 | detection_boxes[count * 4 + 0] = x - 0.5 * w; 68 | detection_boxes[count * 4 + 1] = y - 0.5 * h; 69 | detection_boxes[count * 4 + 2] = x + 0.5 * w; 70 | detection_boxes[count * 4 + 3] = y + 0.5 * h; 71 | detection_scores[count] = objectness * maxProb; 72 | detection_classes[count] = maxIndex; 73 | } 74 | 75 | cudaError_t cudaYoloLayer( 76 | const void* input, void* num_detections, void* detection_boxes, void* detection_scores, void* detection_classes, 77 | const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth, 78 | const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, 79 | const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream); 80 | 81 | cudaError_t cudaYoloLayer( 82 | const void* input, void* num_detections, void* detection_boxes, void* detection_scores, void* detection_classes, 83 | const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth, 84 | const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, 85 | const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream) 86 | { 87 | dim3 threads_per_block(16, 16, 4); 88 | dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, 89 | (gridSizeY / threads_per_block.y) + 1, 90 | (numBBoxes / threads_per_block.z) + 1); 91 | 92 | for (unsigned int batch = 0; batch < batchSize; ++batch) 93 | { 94 | gpuYoloLayer<<>>( 95 | reinterpret_cast(input) + (batch * inputSize), 96 | reinterpret_cast(num_detections) + (batch), 97 | reinterpret_cast(detection_boxes) + (batch * 4 * outputSize), 98 | reinterpret_cast(detection_scores) + (batch * outputSize), 99 | reinterpret_cast(detection_classes) + (batch * outputSize), 100 | scoreThreshold, netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, scaleXY, 101 | reinterpret_cast(anchors), reinterpret_cast(mask)); 102 | } 103 | return cudaGetLastError(); 104 | } 105 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/yoloForward_e.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include 7 | #include 8 | 9 | __global__ void gpuYoloLayer_e( 10 | const float* cls, const float* reg, int* num_detections, float* detection_boxes, float* detection_scores, 11 | int* detection_classes, const float scoreThreshold, const uint netWidth, const uint netHeight, 12 | const uint numOutputClasses, const uint64_t outputSize) 13 | { 14 | uint x_id = blockIdx.x * blockDim.x + threadIdx.x; 15 | 16 | if (x_id >= outputSize) 17 | return; 18 | 19 | float maxProb = 0.0f; 20 | int maxIndex = -1; 21 | 22 | for (uint i = 0; i < numOutputClasses; ++i) 23 | { 24 | float prob 25 | = cls[x_id * numOutputClasses + i]; 26 | 27 | if (prob > maxProb) 28 | { 29 | maxProb = prob; 30 | maxIndex = i; 31 | } 32 | } 33 | 34 | if (maxProb < scoreThreshold) 35 | return; 36 | 37 | int count = (int)atomicAdd(num_detections, 1); 38 | 39 | detection_boxes[count * 4 + 0] = reg[x_id * 4 + 0]; 40 | detection_boxes[count * 4 + 1] = reg[x_id * 4 + 1]; 41 | detection_boxes[count * 4 + 2] = reg[x_id * 4 + 2]; 42 | detection_boxes[count * 4 + 3] = reg[x_id * 4 + 3]; 43 | detection_scores[count] = maxProb; 44 | detection_classes[count] = maxIndex; 45 | } 46 | 47 | cudaError_t cudaYoloLayer_e( 48 | const void* cls, const void* reg, void* num_detections, void* detection_boxes, void* detection_scores, 49 | void* detection_classes, const uint& batchSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth, 50 | const uint& netHeight, const uint& numOutputClasses, cudaStream_t stream); 51 | 52 | cudaError_t cudaYoloLayer_e( 53 | const void* cls, const void* reg, void* num_detections, void* detection_boxes, void* detection_scores, 54 | void* detection_classes, const uint& batchSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth, 55 | const uint& netHeight, const uint& numOutputClasses, cudaStream_t stream) 56 | { 57 | int threads_per_block = 16; 58 | int number_of_blocks = (outputSize / threads_per_block) + 1; 59 | 60 | for (unsigned int batch = 0; batch < batchSize; ++batch) 61 | { 62 | gpuYoloLayer_e<<>>( 63 | reinterpret_cast(cls) + (batch * numOutputClasses * outputSize), 64 | reinterpret_cast(reg) + (batch * 4 * outputSize), 65 | reinterpret_cast(num_detections) + (batch), 66 | reinterpret_cast(detection_boxes) + (batch * 4 * outputSize), 67 | reinterpret_cast(detection_scores) + (batch * outputSize), 68 | reinterpret_cast(detection_classes) + (batch * outputSize), 69 | scoreThreshold, netWidth, netHeight, numOutputClasses, outputSize); 70 | } 71 | return cudaGetLastError(); 72 | } 73 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/yoloForward_nc.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include 7 | 8 | __global__ void gpuYoloLayer_nc( 9 | const float* input, int* num_detections, float* detection_boxes, float* detection_scores, int* detection_classes, 10 | const float scoreThreshold, const uint netWidth, const uint netHeight, const uint gridSizeX, const uint gridSizeY, 11 | const uint numOutputClasses, const uint numBBoxes, const float scaleXY, const float* anchors, const int* mask) 12 | { 13 | uint x_id = blockIdx.x * blockDim.x + threadIdx.x; 14 | uint y_id = blockIdx.y * blockDim.y + threadIdx.y; 15 | uint z_id = blockIdx.z * blockDim.z + threadIdx.z; 16 | 17 | if (x_id >= gridSizeX || y_id >= gridSizeY || z_id >= numBBoxes) 18 | return; 19 | 20 | const int numGridCells = gridSizeX * gridSizeY; 21 | const int bbindex = y_id * gridSizeX + x_id; 22 | 23 | const float objectness 24 | = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]; 25 | 26 | if (objectness < scoreThreshold) 27 | return; 28 | 29 | int count = (int)atomicAdd(num_detections, 1); 30 | 31 | const float alpha = scaleXY; 32 | const float beta = -0.5 * (scaleXY - 1); 33 | 34 | float x 35 | = (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)] 36 | * alpha + beta + x_id) * netWidth / gridSizeX; 37 | 38 | float y 39 | = (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)] 40 | * alpha + beta + y_id) * netHeight / gridSizeY; 41 | 42 | float w 43 | = __powf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)] * 2, 2) 44 | * anchors[mask[z_id] * 2]; 45 | 46 | float h 47 | = __powf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)] * 2, 2) 48 | * anchors[mask[z_id] * 2 + 1]; 49 | 50 | float maxProb = 0.0f; 51 | int maxIndex = -1; 52 | 53 | for (uint i = 0; i < numOutputClasses; ++i) 54 | { 55 | float prob 56 | = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]; 57 | 58 | if (prob > maxProb) 59 | { 60 | maxProb = prob; 61 | maxIndex = i; 62 | } 63 | } 64 | 65 | detection_boxes[count * 4 + 0] = x - 0.5 * w; 66 | detection_boxes[count * 4 + 1] = y - 0.5 * h; 67 | detection_boxes[count * 4 + 2] = x + 0.5 * w; 68 | detection_boxes[count * 4 + 3] = y + 0.5 * h; 69 | detection_scores[count] = objectness * maxProb; 70 | detection_classes[count] = maxIndex; 71 | } 72 | 73 | cudaError_t cudaYoloLayer_nc( 74 | const void* input, void* num_detections, void* detection_boxes, void* detection_scores, void* detection_classes, 75 | const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth, 76 | const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, 77 | const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream); 78 | 79 | cudaError_t cudaYoloLayer_nc( 80 | const void* input, void* num_detections, void* detection_boxes, void* detection_scores, void* detection_classes, 81 | const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth, 82 | const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, 83 | const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream) 84 | { 85 | dim3 threads_per_block(16, 16, 4); 86 | dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, 87 | (gridSizeY / threads_per_block.y) + 1, 88 | (numBBoxes / threads_per_block.z) + 1); 89 | 90 | for (unsigned int batch = 0; batch < batchSize; ++batch) 91 | { 92 | gpuYoloLayer_nc<<>>( 93 | reinterpret_cast(input) + (batch * inputSize), 94 | reinterpret_cast(num_detections) + (batch), 95 | reinterpret_cast(detection_boxes) + (batch * 4 * outputSize), 96 | reinterpret_cast(detection_scores) + (batch * outputSize), 97 | reinterpret_cast(detection_classes) + (batch * outputSize), 98 | scoreThreshold, netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, scaleXY, 99 | reinterpret_cast(anchors), reinterpret_cast(mask)); 100 | } 101 | return cudaGetLastError(); 102 | } 103 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/yoloForward_r.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include 7 | 8 | inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); } 9 | 10 | __global__ void gpuYoloLayer_r( 11 | const float* input, int* num_detections, float* detection_boxes, float* detection_scores, int* detection_classes, 12 | const float scoreThreshold, const uint netWidth, const uint netHeight, const uint gridSizeX, const uint gridSizeY, 13 | const uint numOutputClasses, const uint numBBoxes, const float scaleXY, const float* anchors, const int* mask) 14 | { 15 | uint x_id = blockIdx.x * blockDim.x + threadIdx.x; 16 | uint y_id = blockIdx.y * blockDim.y + threadIdx.y; 17 | uint z_id = blockIdx.z * blockDim.z + threadIdx.z; 18 | 19 | if (x_id >= gridSizeX || y_id >= gridSizeY || z_id >= numBBoxes) 20 | return; 21 | 22 | const int numGridCells = gridSizeX * gridSizeY; 23 | const int bbindex = y_id * gridSizeX + x_id; 24 | 25 | const float objectness 26 | = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]); 27 | 28 | if (objectness < scoreThreshold) 29 | return; 30 | 31 | int count = (int)atomicAdd(num_detections, 1); 32 | 33 | const float alpha = scaleXY; 34 | const float beta = -0.5 * (scaleXY - 1); 35 | 36 | float x 37 | = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) 38 | * alpha + beta + x_id) * netWidth / gridSizeX; 39 | 40 | float y 41 | = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) 42 | * alpha + beta + y_id) * netHeight / gridSizeY; 43 | 44 | float w 45 | = __powf(sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]) * 2, 2) 46 | * anchors[mask[z_id] * 2]; 47 | 48 | float h 49 | = __powf(sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]) * 2, 2) 50 | * anchors[mask[z_id] * 2 + 1]; 51 | 52 | float maxProb = 0.0f; 53 | int maxIndex = -1; 54 | 55 | for (uint i = 0; i < numOutputClasses; ++i) 56 | { 57 | float prob 58 | = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]); 59 | 60 | if (prob > maxProb) 61 | { 62 | maxProb = prob; 63 | maxIndex = i; 64 | } 65 | } 66 | 67 | detection_boxes[count * 4 + 0] = x - 0.5 * w; 68 | detection_boxes[count * 4 + 1] = y - 0.5 * h; 69 | detection_boxes[count * 4 + 2] = x + 0.5 * w; 70 | detection_boxes[count * 4 + 3] = y + 0.5 * h; 71 | detection_scores[count] = objectness * maxProb; 72 | detection_classes[count] = maxIndex; 73 | } 74 | 75 | cudaError_t cudaYoloLayer_r( 76 | const void* input, void* num_detections, void* detection_boxes, void* detection_scores, void* detection_classes, 77 | const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth, 78 | const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, 79 | const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream); 80 | 81 | cudaError_t cudaYoloLayer_r( 82 | const void* input, void* num_detections, void* detection_boxes, void* detection_scores, void* detection_classes, 83 | const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth, 84 | const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, 85 | const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream) 86 | { 87 | dim3 threads_per_block(16, 16, 4); 88 | dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, 89 | (gridSizeY / threads_per_block.y) + 1, 90 | (numBBoxes / threads_per_block.z) + 1); 91 | 92 | for (unsigned int batch = 0; batch < batchSize; ++batch) 93 | { 94 | gpuYoloLayer_r<<>>( 95 | reinterpret_cast(input) + (batch * inputSize), 96 | reinterpret_cast(num_detections) + (batch), 97 | reinterpret_cast(detection_boxes) + (batch * 4 * outputSize), 98 | reinterpret_cast(detection_scores) + (batch * outputSize), 99 | reinterpret_cast(detection_classes) + (batch * outputSize), 100 | scoreThreshold, netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, scaleXY, 101 | reinterpret_cast(anchors), reinterpret_cast(mask)); 102 | } 103 | return cudaGetLastError(); 104 | } 105 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/yoloForward_v2.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Marcos Luciano 3 | * https://www.github.com/marcoslucianops 4 | */ 5 | 6 | #include 7 | 8 | inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); } 9 | 10 | __device__ void softmaxGPU( 11 | const float* input, const int bbindex, const int numGridCells, uint z_id, const uint numOutputClasses, float temp, 12 | float* output) 13 | { 14 | int i; 15 | float sum = 0; 16 | float largest = -INFINITY; 17 | for (i = 0; i < numOutputClasses; ++i) { 18 | int val = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]; 19 | largest = (val>largest) ? val : largest; 20 | } 21 | for (i = 0; i < numOutputClasses; ++i) { 22 | float e = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] / temp - largest / temp); 23 | sum += e; 24 | output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] = e; 25 | } 26 | for (i = 0; i < numOutputClasses; ++i) { 27 | output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] /= sum; 28 | } 29 | } 30 | 31 | __global__ void gpuRegionLayer( 32 | const float* input, float* softmax, int* num_detections, float* detection_boxes, float* detection_scores, 33 | int* detection_classes, const float scoreThreshold, const uint netWidth, const uint netHeight, const uint gridSizeX, 34 | const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, const float* anchors) 35 | { 36 | uint x_id = blockIdx.x * blockDim.x + threadIdx.x; 37 | uint y_id = blockIdx.y * blockDim.y + threadIdx.y; 38 | uint z_id = blockIdx.z * blockDim.z + threadIdx.z; 39 | 40 | if (x_id >= gridSizeX || y_id >= gridSizeY || z_id >= numBBoxes) 41 | return; 42 | 43 | const int numGridCells = gridSizeX * gridSizeY; 44 | const int bbindex = y_id * gridSizeX + x_id; 45 | 46 | const float objectness 47 | = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]); 48 | 49 | if (objectness < scoreThreshold) 50 | return; 51 | 52 | int count = (int)atomicAdd(num_detections, 1); 53 | 54 | float x 55 | = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) 56 | + x_id) * netWidth / gridSizeX; 57 | 58 | float y 59 | = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) 60 | + y_id) * netHeight / gridSizeY; 61 | 62 | float w 63 | = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]) 64 | * anchors[z_id * 2] * netWidth / gridSizeX; 65 | 66 | float h 67 | = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]) 68 | * anchors[z_id * 2 + 1] * netHeight / gridSizeY; 69 | 70 | softmaxGPU(input, bbindex, numGridCells, z_id, numOutputClasses, 1.0, softmax); 71 | 72 | float maxProb = 0.0f; 73 | int maxIndex = -1; 74 | 75 | for (uint i = 0; i < numOutputClasses; ++i) 76 | { 77 | float prob 78 | = softmax[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]; 79 | 80 | if (prob > maxProb) 81 | { 82 | maxProb = prob; 83 | maxIndex = i; 84 | } 85 | } 86 | 87 | detection_boxes[count * 4 + 0] = x - 0.5 * w; 88 | detection_boxes[count * 4 + 1] = y - 0.5 * h; 89 | detection_boxes[count * 4 + 2] = x + 0.5 * w; 90 | detection_boxes[count * 4 + 3] = y + 0.5 * h; 91 | detection_scores[count] = objectness * maxProb; 92 | detection_classes[count] = maxIndex; 93 | } 94 | 95 | cudaError_t cudaRegionLayer( 96 | const void* input, void* softmax, void* num_detections, void* detection_boxes, void* detection_scores, 97 | void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, 98 | const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, 99 | const uint& numBBoxes, const void* anchors, cudaStream_t stream); 100 | 101 | cudaError_t cudaRegionLayer( 102 | const void* input, void* softmax, void* num_detections, void* detection_boxes, void* detection_scores, 103 | void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, 104 | const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, 105 | const uint& numBBoxes, const void* anchors, cudaStream_t stream) 106 | { 107 | dim3 threads_per_block(16, 16, 4); 108 | dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1, 109 | (gridSizeY / threads_per_block.y) + 1, 110 | (numBBoxes / threads_per_block.z) + 1); 111 | 112 | for (unsigned int batch = 0; batch < batchSize; ++batch) 113 | { 114 | gpuRegionLayer<<>>( 115 | reinterpret_cast(input) + (batch * inputSize), 116 | reinterpret_cast(softmax) + (batch * inputSize), 117 | reinterpret_cast(num_detections) + (batch), 118 | reinterpret_cast(detection_boxes) + (batch * 4 * outputSize), 119 | reinterpret_cast(detection_scores) + (batch * outputSize), 120 | reinterpret_cast(detection_classes) + (batch * outputSize), 121 | scoreThreshold, netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, 122 | reinterpret_cast(anchors)); 123 | } 124 | return cudaGetLastError(); 125 | } 126 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | * 22 | * Edited by Marcos Luciano 23 | * https://www.github.com/marcoslucianops 24 | */ 25 | 26 | #include "yoloPlugins.h" 27 | #include "NvInferPlugin.h" 28 | #include 29 | #include 30 | #include 31 | 32 | uint kNUM_CLASSES; 33 | 34 | namespace { 35 | template 36 | void write(char*& buffer, const T& val) 37 | { 38 | *reinterpret_cast(buffer) = val; 39 | buffer += sizeof(T); 40 | } 41 | 42 | template 43 | void read(const char*& buffer, T& val) 44 | { 45 | val = *reinterpret_cast(buffer); 46 | buffer += sizeof(T); 47 | } 48 | } 49 | 50 | cudaError_t cudaYoloLayer_e( 51 | const void* cls, const void* reg, void* num_detections, void* detection_boxes, void* detection_scores, 52 | void* detection_classes, const uint& batchSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth, 53 | const uint& netHeight, const uint& numOutputClasses, cudaStream_t stream); 54 | 55 | cudaError_t cudaYoloLayer_r( 56 | const void* input, void* num_detections, void* detection_boxes, void* detection_scores, void* detection_classes, 57 | const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth, 58 | const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, 59 | const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream); 60 | 61 | cudaError_t cudaYoloLayer_nc( 62 | const void* input, void* num_detections, void* detection_boxes, void* detection_scores, void* detection_classes, 63 | const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth, 64 | const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, 65 | const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream); 66 | 67 | cudaError_t cudaYoloLayer( 68 | const void* input, void* num_detections, void* detection_boxes, void* detection_scores, void* detection_classes, 69 | const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth, 70 | const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes, 71 | const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream); 72 | 73 | cudaError_t cudaRegionLayer( 74 | const void* input, void* softmax, void* num_detections, void* detection_boxes, void* detection_scores, 75 | void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, 76 | const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, 77 | const uint& numBBoxes, const void* anchors, cudaStream_t stream); 78 | 79 | YoloLayer::YoloLayer (const void* data, size_t length) 80 | { 81 | const char *d = static_cast(data); 82 | 83 | read(d, m_NetWidth); 84 | read(d, m_NetHeight); 85 | read(d, m_NumClasses); 86 | read(d, m_NewCoords); 87 | read(d, m_OutputSize); 88 | read(d, m_Type); 89 | read(d, m_ScoreThreshold); 90 | 91 | if (m_Type != 3) { 92 | uint yoloTensorsSize; 93 | read(d, yoloTensorsSize); 94 | for (uint i = 0; i < yoloTensorsSize; ++i) 95 | { 96 | TensorInfo curYoloTensor; 97 | read(d, curYoloTensor.gridSizeX); 98 | read(d, curYoloTensor.gridSizeY); 99 | read(d, curYoloTensor.numBBoxes); 100 | read(d, curYoloTensor.scaleXY); 101 | 102 | uint anchorsSize; 103 | read(d, anchorsSize); 104 | for (uint j = 0; j < anchorsSize; j++) 105 | { 106 | float result; 107 | read(d, result); 108 | curYoloTensor.anchors.push_back(result); 109 | } 110 | 111 | uint maskSize; 112 | read(d, maskSize); 113 | for (uint j = 0; j < maskSize; j++) 114 | { 115 | int result; 116 | read(d, result); 117 | curYoloTensor.mask.push_back(result); 118 | } 119 | m_YoloTensors.push_back(curYoloTensor); 120 | } 121 | } 122 | 123 | kNUM_CLASSES = m_NumClasses; 124 | }; 125 | 126 | YoloLayer::YoloLayer( 127 | const uint& netWidth, const uint& netHeight, const uint& numClasses, const uint& newCoords, 128 | const std::vector& yoloTensors, const uint64_t& outputSize, const uint& modelType, 129 | const float& scoreThreshold) : 130 | m_NetWidth(netWidth), 131 | m_NetHeight(netHeight), 132 | m_NumClasses(numClasses), 133 | m_NewCoords(newCoords), 134 | m_YoloTensors(yoloTensors), 135 | m_OutputSize(outputSize), 136 | m_Type(modelType), 137 | m_ScoreThreshold(scoreThreshold) 138 | { 139 | assert(m_NetWidth > 0); 140 | assert(m_NetHeight > 0); 141 | 142 | kNUM_CLASSES = m_NumClasses; 143 | }; 144 | 145 | nvinfer1::Dims 146 | YoloLayer::getOutputDimensions( 147 | int index, const nvinfer1::Dims* inputs, int nbInputDims) noexcept 148 | { 149 | assert(index <= 4); 150 | if (index == 0) { 151 | return nvinfer1::Dims{1, {1}}; 152 | } 153 | else if (index == 1) { 154 | return nvinfer1::Dims{2, {static_cast(m_OutputSize), 4}}; 155 | } 156 | return nvinfer1::Dims{1, {static_cast(m_OutputSize)}}; 157 | } 158 | 159 | bool YoloLayer::supportsFormat ( 160 | nvinfer1::DataType type, nvinfer1::PluginFormat format) const noexcept { 161 | return (type == nvinfer1::DataType::kFLOAT && 162 | format == nvinfer1::PluginFormat::kLINEAR); 163 | } 164 | 165 | void 166 | YoloLayer::configureWithFormat ( 167 | const nvinfer1::Dims* inputDims, int nbInputs, 168 | const nvinfer1::Dims* outputDims, int nbOutputs, 169 | nvinfer1::DataType type, nvinfer1::PluginFormat format, int maxBatchSize) noexcept 170 | { 171 | assert(nbInputs > 0); 172 | assert(format == nvinfer1::PluginFormat::kLINEAR); 173 | assert(inputDims != nullptr); 174 | } 175 | 176 | int32_t YoloLayer::enqueue ( 177 | int batchSize, void const* const* inputs, void* const* outputs, void* workspace, 178 | cudaStream_t stream) noexcept 179 | { 180 | void* num_detections = outputs[0]; 181 | void* detection_boxes = outputs[1]; 182 | void* detection_scores = outputs[2]; 183 | void* detection_classes = outputs[3]; 184 | 185 | CUDA_CHECK(cudaMemsetAsync((int*)num_detections, 0, sizeof(int) * batchSize, stream)); 186 | CUDA_CHECK(cudaMemsetAsync((float*)detection_boxes, 0, sizeof(float) * m_OutputSize * 4 * batchSize, stream)); 187 | CUDA_CHECK(cudaMemsetAsync((float*)detection_scores, 0, sizeof(float) * m_OutputSize * batchSize, stream)); 188 | CUDA_CHECK(cudaMemsetAsync((int*)detection_classes, 0, sizeof(int) * m_OutputSize * batchSize, stream)); 189 | 190 | if (m_Type == 3) 191 | { 192 | CUDA_CHECK(cudaYoloLayer_e( 193 | inputs[0], inputs[1], num_detections, detection_boxes, detection_scores, detection_classes, batchSize, 194 | m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, m_NumClasses, stream)); 195 | } 196 | else 197 | { 198 | uint yoloTensorsSize = m_YoloTensors.size(); 199 | for (uint i = 0; i < yoloTensorsSize; ++i) 200 | { 201 | TensorInfo& curYoloTensor = m_YoloTensors.at(i); 202 | 203 | uint numBBoxes = curYoloTensor.numBBoxes; 204 | float scaleXY = curYoloTensor.scaleXY; 205 | uint gridSizeX = curYoloTensor.gridSizeX; 206 | uint gridSizeY = curYoloTensor.gridSizeY; 207 | std::vector anchors = curYoloTensor.anchors; 208 | std::vector mask = curYoloTensor.mask; 209 | 210 | void* v_anchors; 211 | void* v_mask; 212 | if (anchors.size() > 0) { 213 | float* f_anchors = anchors.data(); 214 | CUDA_CHECK(cudaMalloc(&v_anchors, sizeof(float) * anchors.size())); 215 | CUDA_CHECK(cudaMemcpy(v_anchors, f_anchors, sizeof(float) * anchors.size(), cudaMemcpyHostToDevice)); 216 | } 217 | if (mask.size() > 0) { 218 | int* f_mask = mask.data(); 219 | CUDA_CHECK(cudaMalloc(&v_mask, sizeof(int) * mask.size())); 220 | CUDA_CHECK(cudaMemcpy(v_mask, f_mask, sizeof(int) * mask.size(), cudaMemcpyHostToDevice)); 221 | } 222 | 223 | uint64_t inputSize = gridSizeX * gridSizeY * (numBBoxes * (4 + 1 + m_NumClasses)); 224 | 225 | if (m_Type == 2) { // YOLOR incorrect param: scale_x_y = 2.0 226 | CUDA_CHECK(cudaYoloLayer_r( 227 | inputs[i], num_detections, detection_boxes, detection_scores, detection_classes, batchSize, inputSize, 228 | m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY, m_NumClasses, numBBoxes, 229 | 2.0, v_anchors, v_mask, stream)); 230 | } 231 | else if (m_Type == 1) { 232 | if (m_NewCoords) { 233 | CUDA_CHECK(cudaYoloLayer_nc( 234 | inputs[i], num_detections, detection_boxes, detection_scores, detection_classes, batchSize, 235 | inputSize, m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY, 236 | m_NumClasses, numBBoxes, scaleXY, v_anchors, v_mask, stream)); 237 | } 238 | else { 239 | CUDA_CHECK(cudaYoloLayer( 240 | inputs[i], num_detections, detection_boxes, detection_scores, detection_classes, batchSize, 241 | inputSize, m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY, 242 | m_NumClasses, numBBoxes, scaleXY, v_anchors, v_mask, stream)); 243 | } 244 | } 245 | else { 246 | void* softmax; 247 | CUDA_CHECK(cudaMalloc(&softmax, sizeof(float) * inputSize * batchSize)); 248 | CUDA_CHECK(cudaMemsetAsync((float*)softmax, 0, sizeof(float) * inputSize * batchSize)); 249 | 250 | CUDA_CHECK(cudaRegionLayer( 251 | inputs[i], softmax, num_detections, detection_boxes, detection_scores, detection_classes, batchSize, 252 | inputSize, m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY, m_NumClasses, 253 | numBBoxes, v_anchors, stream)); 254 | 255 | CUDA_CHECK(cudaFree(softmax)); 256 | } 257 | 258 | if (anchors.size() > 0) { 259 | CUDA_CHECK(cudaFree(v_anchors)); 260 | } 261 | if (mask.size() > 0) { 262 | CUDA_CHECK(cudaFree(v_mask)); 263 | } 264 | } 265 | } 266 | 267 | return 0; 268 | } 269 | 270 | size_t YoloLayer::getSerializationSize() const noexcept 271 | { 272 | size_t totalSize = 0; 273 | 274 | totalSize += sizeof(m_NetWidth); 275 | totalSize += sizeof(m_NetHeight); 276 | totalSize += sizeof(m_NumClasses); 277 | totalSize += sizeof(m_NewCoords); 278 | totalSize += sizeof(m_OutputSize); 279 | totalSize += sizeof(m_Type); 280 | totalSize += sizeof(m_ScoreThreshold); 281 | 282 | if (m_Type != 3) { 283 | uint yoloTensorsSize = m_YoloTensors.size(); 284 | totalSize += sizeof(yoloTensorsSize); 285 | 286 | for (uint i = 0; i < yoloTensorsSize; ++i) 287 | { 288 | const TensorInfo& curYoloTensor = m_YoloTensors.at(i); 289 | totalSize += sizeof(curYoloTensor.gridSizeX); 290 | totalSize += sizeof(curYoloTensor.gridSizeY); 291 | totalSize += sizeof(curYoloTensor.numBBoxes); 292 | totalSize += sizeof(curYoloTensor.scaleXY); 293 | totalSize += sizeof(uint) + sizeof(curYoloTensor.anchors[0]) * curYoloTensor.anchors.size(); 294 | totalSize += sizeof(uint) + sizeof(curYoloTensor.mask[0]) * curYoloTensor.mask.size(); 295 | } 296 | } 297 | 298 | return totalSize; 299 | } 300 | 301 | void YoloLayer::serialize(void* buffer) const noexcept 302 | { 303 | char *d = static_cast(buffer); 304 | 305 | write(d, m_NetWidth); 306 | write(d, m_NetHeight); 307 | write(d, m_NumClasses); 308 | write(d, m_NewCoords); 309 | write(d, m_OutputSize); 310 | write(d, m_Type); 311 | write(d, m_ScoreThreshold); 312 | 313 | if (m_Type != 3) { 314 | uint yoloTensorsSize = m_YoloTensors.size(); 315 | write(d, yoloTensorsSize); 316 | for (uint i = 0; i < yoloTensorsSize; ++i) 317 | { 318 | const TensorInfo& curYoloTensor = m_YoloTensors.at(i); 319 | write(d, curYoloTensor.gridSizeX); 320 | write(d, curYoloTensor.gridSizeY); 321 | write(d, curYoloTensor.numBBoxes); 322 | write(d, curYoloTensor.scaleXY); 323 | 324 | uint anchorsSize = curYoloTensor.anchors.size(); 325 | write(d, anchorsSize); 326 | for (uint j = 0; j < anchorsSize; ++j) 327 | { 328 | write(d, curYoloTensor.anchors[j]); 329 | } 330 | 331 | uint maskSize = curYoloTensor.mask.size(); 332 | write(d, maskSize); 333 | for (uint j = 0; j < maskSize; ++j) 334 | { 335 | write(d, curYoloTensor.mask[j]); 336 | } 337 | } 338 | } 339 | } 340 | 341 | nvinfer1::IPluginV2* YoloLayer::clone() const noexcept 342 | { 343 | return new YoloLayer ( 344 | m_NetWidth, m_NetHeight, m_NumClasses, m_NewCoords, m_YoloTensors, m_OutputSize, m_Type, m_ScoreThreshold); 345 | } 346 | 347 | REGISTER_TENSORRT_PLUGIN(YoloLayerPluginCreator); 348 | -------------------------------------------------------------------------------- /nvdsinfer_custom_impl_Yolo/yoloPlugins.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | * 22 | * Edited by Marcos Luciano 23 | * https://www.github.com/marcoslucianops 24 | */ 25 | 26 | #ifndef __YOLO_PLUGINS__ 27 | #define __YOLO_PLUGINS__ 28 | 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | #include 36 | 37 | #include "NvInferPlugin.h" 38 | 39 | #include "yolo.h" 40 | 41 | #define CUDA_CHECK(status) \ 42 | { \ 43 | if (status != 0) \ 44 | { \ 45 | std::cout << "CUDA failure: " << cudaGetErrorString(status) << " in file " << __FILE__ << " at line " \ 46 | << __LINE__ << std::endl; \ 47 | abort(); \ 48 | } \ 49 | } 50 | 51 | namespace 52 | { 53 | const char* YOLOLAYER_PLUGIN_VERSION {"1"}; 54 | const char* YOLOLAYER_PLUGIN_NAME {"YoloLayer_TRT"}; 55 | } // namespace 56 | 57 | class YoloLayer : public nvinfer1::IPluginV2 58 | { 59 | public: 60 | YoloLayer (const void* data, size_t length); 61 | 62 | YoloLayer ( 63 | const uint& netWidth, const uint& netHeight, const uint& numClasses, const uint& newCoords, 64 | const std::vector& yoloTensors, const uint64_t& outputSize, const uint& modelType, 65 | const float& scoreThreshold); 66 | 67 | const char* getPluginType () const noexcept override { return YOLOLAYER_PLUGIN_NAME; } 68 | 69 | const char* getPluginVersion () const noexcept override { return YOLOLAYER_PLUGIN_VERSION; } 70 | 71 | int getNbOutputs () const noexcept override { return 4; } 72 | 73 | nvinfer1::Dims getOutputDimensions ( 74 | int index, const nvinfer1::Dims* inputs, 75 | int nbInputDims) noexcept override; 76 | 77 | bool supportsFormat ( 78 | nvinfer1::DataType type, nvinfer1::PluginFormat format) const noexcept override; 79 | 80 | void configureWithFormat ( 81 | const nvinfer1::Dims* inputDims, int nbInputs, const nvinfer1::Dims* outputDims, int nbOutputs, 82 | nvinfer1::DataType type, nvinfer1::PluginFormat format, int maxBatchSize) noexcept override; 83 | 84 | int initialize () noexcept override { return 0; } 85 | 86 | void terminate () noexcept override {} 87 | 88 | size_t getWorkspaceSize (int maxBatchSize) const noexcept override { return 0; } 89 | 90 | int32_t enqueue ( 91 | int batchSize, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) 92 | noexcept override; 93 | 94 | size_t getSerializationSize() const noexcept override; 95 | 96 | void serialize (void* buffer) const noexcept override; 97 | 98 | void destroy () noexcept override { delete this; } 99 | 100 | nvinfer1::IPluginV2* clone() const noexcept override; 101 | 102 | void setPluginNamespace (const char* pluginNamespace) noexcept override { 103 | m_Namespace = pluginNamespace; 104 | } 105 | 106 | virtual const char* getPluginNamespace () const noexcept override { 107 | return m_Namespace.c_str(); 108 | } 109 | 110 | private: 111 | std::string m_Namespace {""}; 112 | uint m_NetWidth {0}; 113 | uint m_NetHeight {0}; 114 | uint m_NumClasses {0}; 115 | uint m_NewCoords {0}; 116 | std::vector m_YoloTensors; 117 | uint64_t m_OutputSize {0}; 118 | uint m_Type {0}; 119 | float m_ScoreThreshold {0}; 120 | }; 121 | 122 | class YoloLayerPluginCreator : public nvinfer1::IPluginCreator 123 | { 124 | public: 125 | YoloLayerPluginCreator () {} 126 | 127 | ~YoloLayerPluginCreator () {} 128 | 129 | const char* getPluginName () const noexcept override { return YOLOLAYER_PLUGIN_NAME; } 130 | 131 | const char* getPluginVersion () const noexcept override { return YOLOLAYER_PLUGIN_VERSION; } 132 | 133 | const nvinfer1::PluginFieldCollection* getFieldNames() noexcept override { 134 | std::cerr<< "YoloLayerPluginCreator::getFieldNames is not implemented" << std::endl; 135 | return nullptr; 136 | } 137 | 138 | nvinfer1::IPluginV2* createPlugin ( 139 | const char* name, const nvinfer1::PluginFieldCollection* fc) noexcept override 140 | { 141 | std::cerr<< "YoloLayerPluginCreator::getFieldNames is not implemented"; 142 | return nullptr; 143 | } 144 | 145 | nvinfer1::IPluginV2* deserializePlugin ( 146 | const char* name, const void* serialData, size_t serialLength) noexcept override 147 | { 148 | std::cout << "Deserialize yoloLayer plugin: " << name << std::endl; 149 | return new YoloLayer(serialData, serialLength); 150 | } 151 | 152 | void setPluginNamespace(const char* libNamespace) noexcept override { 153 | m_Namespace = libNamespace; 154 | } 155 | const char* getPluginNamespace() const noexcept override { 156 | return m_Namespace.c_str(); 157 | } 158 | 159 | private: 160 | std::string m_Namespace {""}; 161 | }; 162 | 163 | extern uint kNUM_CLASSES; 164 | 165 | #endif // __YOLO_PLUGINS__ 166 | --------------------------------------------------------------------------------