├── .github
    └── FUNDING.yml
├── .gitignore
├── README.md
├── config_infer_primary_yoloV8.txt
├── deepstream_app_config.txt
├── labels.txt
└── nvdsinfer_custom_impl_Yolo
    ├── Makefile
    ├── calibrator.cpp
    ├── calibrator.h
    ├── layers
        ├── activation_layer.cpp
        ├── activation_layer.h
        ├── batchnorm_layer.cpp
        ├── batchnorm_layer.h
        ├── channels_layer.cpp
        ├── channels_layer.h
        ├── cls_layer.cpp
        ├── cls_layer.h
        ├── convolutional_layer.cpp
        ├── convolutional_layer.h
        ├── implicit_layer.cpp
        ├── implicit_layer.h
        ├── pooling_layer.cpp
        ├── pooling_layer.h
        ├── reduce_layer.cpp
        ├── reduce_layer.h
        ├── reg_layer.cpp
        ├── reg_layer.h
        ├── reorg_layer.cpp
        ├── reorg_layer.h
        ├── route_layer.cpp
        ├── route_layer.h
        ├── shortcut_layer.cpp
        ├── shortcut_layer.h
        ├── shuffle_layer.cpp
        ├── shuffle_layer.h
        ├── softmax_layer.cpp
        ├── softmax_layer.h
        ├── upsample_layer.cpp
        └── upsample_layer.h
    ├── nvdsinfer_yolo_engine.cpp
    ├── nvdsparsebbox_Yolo.cpp
    ├── utils.cpp
    ├── utils.h
    ├── yolo.cpp
    ├── yolo.h
    ├── yoloForward.cu
    ├── yoloForward_e.cu
    ├── yoloForward_nc.cu
    ├── yoloForward_r.cu
    ├── yoloForward_v2.cu
    ├── yoloPlugins.cpp
    └── yoloPlugins.h


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | #github: [marcoslucianops]
2 | custom: ['https://www.buymeacoffee.com/marcoslucianops']
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | *.so
3 | *.pyc
4 | *.onnx
5 | *.engine
6 | *.pt
7 | *.trt
8 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # YoloV8_Deepstream
 2 | `YOLOv8` Running on Deepstream 
 3 | 
 4 | # Prepare the environment
 5 | 1. Install TensorRT follow [`TensorRT install`](https://developer.nvidia.com/nvidia-tensorrt-8x-download)
 6 | 2. Install Deepstream follow [`deepstream official`](https://docs.nvidia.com/metropolis/deepstream/dev-guide/text/DS_Quickstart.html)
 7 | 
 8 | # Download model
 9 | All of yolov8 onnx models at [`yolov8 models`](https://drive.google.com/drive/folders/1LgsOK1ufRrVNoILJxLUd2s2wPd6jHs2a?usp=sharing)
10 | 
11 | # Custom video and models 
12 | You can follow how to config deepstream at [`deepstream config`](https://docs.nvidia.com/metropolis/deepstream/dev-guide/text/DS_ref_app_deepstream.html)
13 | 
14 | # References
15 | 1. [`yolov8 official`](https://github.com/ultralytics/ultralytics)
16 | 2. [`yolo deepstream`](https://github.com/marcoslucianops/DeepStream-Yolo)
17 | 
18 | 


--------------------------------------------------------------------------------
/config_infer_primary_yoloV8.txt:
--------------------------------------------------------------------------------
 1 | [property]
 2 | gpu-id=0
 3 | net-scale-factor=0.0039215697906911373
 4 | model-color-format=0
 5 | onnx-file=./weights/yolov8n_nms.onnx
 6 | model-engine-file = ./weights/yolov8n_nms.onnx_b1_gpu0_fp32.engine
 7 | labelfile-path=labels.txt
 8 | batch-size=1
 9 | network-mode=0
10 | num-detected-classes=80
11 | interval=0
12 | gie-unique-id=1
13 | process-mode=1
14 | network-type=0
15 | cluster-mode=2
16 | maintain-aspect-ratio=1
17 | parse-bbox-func-name=NvDsInferParseYolo
18 | custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so
19 | 
20 | 
21 | [class-attrs-all]
22 | nms-iou-threshold=0.45
23 | pre-cluster-threshold=0.25
24 | topk=300
25 | 


--------------------------------------------------------------------------------
/deepstream_app_config.txt:
--------------------------------------------------------------------------------
 1 | [application]
 2 | enable-perf-measurement=1
 3 | perf-measurement-interval-sec=5
 4 | 
 5 | [tiled-display]
 6 | enable=1
 7 | rows=1
 8 | columns=1
 9 | width=1280
10 | height=720
11 | gpu-id=0
12 | nvbuf-memory-type=0
13 | 
14 | [source0]
15 | enable=1
16 | type=3
17 | uri=file:///home/haobk/video.mp4
18 | num-sources=1
19 | gpu-id=0
20 | cudadec-memtype=0
21 | 
22 | [sink0]
23 | enable=1
24 | type=2
25 | sync=0
26 | gpu-id=0
27 | nvbuf-memory-type=0
28 | 
29 | [osd]
30 | enable=1
31 | gpu-id=0
32 | border-width=5
33 | text-size=15
34 | text-color=1;1;1;1;
35 | text-bg-color=0.3;0.3;0.3;1
36 | font=Serif
37 | show-clock=0
38 | clock-x-offset=800
39 | clock-y-offset=820
40 | clock-text-size=12
41 | clock-color=1;0;0;0
42 | nvbuf-memory-type=0
43 | 
44 | [streammux]
45 | gpu-id=0
46 | live-source=0
47 | batch-size=1
48 | batched-push-timeout=40000
49 | width=1920
50 | height=1080
51 | enable-padding=0
52 | nvbuf-memory-type=0
53 | 
54 | [primary-gie]
55 | enable=1
56 | gpu-id=0
57 | gie-unique-id=1
58 | nvbuf-memory-type=0
59 | config-file=config_infer_primary_yoloV8.txt
60 | 
61 | [tests]
62 | file-loop=0
63 | 


--------------------------------------------------------------------------------
/labels.txt:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorbike
 5 | aeroplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/Makefile:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Permission is hereby granted, free of charge, to any person obtaining a
 5 | # copy of this software and associated documentation files (the "Software"),
 6 | # to deal in the Software without restriction, including without limitation
 7 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
 8 | # and/or sell copies of the Software, and to permit persons to whom the
 9 | # Software is furnished to do so, subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be included in
12 | # all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 | # DEALINGS IN THE SOFTWARE.
21 | #
22 | # Edited by Marcos Luciano
23 | # https://www.github.com/marcoslucianops
24 | ################################################################################
25 | 
26 | CUDA_VER?=
27 | ifeq ($(CUDA_VER),)
28 | 	$(error "CUDA_VER is not set")
29 | endif
30 | 
31 | OPENCV?=
32 | ifeq ($(OPENCV),)
33 | 	OPENCV=0
34 | endif
35 | 
36 | CC:= g++
37 | NVCC:=/usr/local/cuda-$(CUDA_VER)/bin/nvcc
38 | 
39 | CFLAGS:= -Wall -std=c++11 -shared -fPIC -Wno-error=deprecated-declarations
40 | CFLAGS+= -I/opt/nvidia/deepstream/deepstream/sources/includes -I/usr/local/cuda-$(CUDA_VER)/include
41 | 
42 | ifeq ($(OPENCV), 1)
43 | 	COMMON= -DOPENCV
44 | 	CFLAGS+= $(shell pkg-config --cflags opencv4 2> /dev/null || pkg-config --cflags opencv)
45 | 	LIBS+= $(shell pkg-config --libs opencv4 2> /dev/null || pkg-config --libs opencv)
46 | endif
47 | 
48 | LIBS+= -lnvinfer_plugin -lnvinfer -lnvparsers -L/usr/local/cuda-$(CUDA_VER)/lib64 -lcudart -lcublas -lstdc++fs
49 | LFLAGS:= -shared -Wl,--start-group $(LIBS) -Wl,--end-group
50 | 
51 | INCS:= $(wildcard *.h)
52 | 
53 | SRCFILES:= $(filter-out calibrator.cpp, $(wildcard *.cpp))
54 | 
55 | ifeq ($(OPENCV), 1)
56 | 	SRCFILES+= calibrator.cpp
57 | endif
58 | 
59 | SRCFILES+= $(wildcard layers/*.cpp)
60 | SRCFILES+= $(wildcard *.cu)
61 | 
62 | TARGET_LIB:= libnvdsinfer_custom_impl_Yolo.so
63 | 
64 | TARGET_OBJS:= $(SRCFILES:.cpp=.o)
65 | TARGET_OBJS:= $(TARGET_OBJS:.cu=.o)
66 | 
67 | all: $(TARGET_LIB)
68 | 
69 | %.o: %.cpp $(INCS) Makefile
70 | 	$(CC) -c $(COMMON) -o $@ $(CFLAGS) $<
71 | 
72 | %.o: %.cu $(INCS) Makefile
73 | 	$(NVCC) -c -o $@ --compiler-options '-fPIC' $<
74 | 
75 | $(TARGET_LIB) : $(TARGET_OBJS)
76 | 	$(CC) -o $@  $(TARGET_OBJS) $(LFLAGS)
77 | 
78 | clean:
79 | 	rm -rf $(TARGET_LIB)
80 | 	rm -rf $(TARGET_OBJS)
81 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/calibrator.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Created by Marcos Luciano
  3 |  * https://www.github.com/marcoslucianops
  4 |  */
  5 | 
  6 | #include "calibrator.h"
  7 | #include <fstream>
  8 | #include <iterator>
  9 | 
 10 | namespace nvinfer1
 11 | {
 12 |     Int8EntropyCalibrator2::Int8EntropyCalibrator2(const int &batchsize, const int &channels, const int &height, const int &width, const int &letterbox, const std::string &imgPath,
 13 |         const std::string &calibTablePath):batchSize(batchsize), inputC(channels), inputH(height), inputW(width), letterBox(letterbox), calibTablePath(calibTablePath), imageIndex(0)
 14 |     {
 15 |         inputCount = batchsize * channels * height * width;
 16 |         std::fstream f(imgPath);
 17 |         if (f.is_open())
 18 |         {
 19 |             std::string temp;
 20 |             while (std::getline(f, temp)) imgPaths.push_back(temp);
 21 |         }
 22 |         batchData = new float[inputCount];
 23 |         CUDA_CHECK(cudaMalloc(&deviceInput, inputCount * sizeof(float)));
 24 |     }
 25 | 
 26 |     Int8EntropyCalibrator2::~Int8EntropyCalibrator2()
 27 |     {
 28 |         CUDA_CHECK(cudaFree(deviceInput));
 29 |         if (batchData)
 30 |             delete[] batchData;
 31 |     }
 32 | 
 33 |     int Int8EntropyCalibrator2::getBatchSize() const noexcept
 34 |     {
 35 |         return batchSize;
 36 |     }
 37 | 
 38 |     bool Int8EntropyCalibrator2::getBatch(void **bindings, const char **names, int nbBindings) noexcept
 39 |     {
 40 |         if (imageIndex + batchSize > uint(imgPaths.size()))
 41 |             return false;
 42 | 
 43 |         float* ptr = batchData;
 44 |         for (size_t j = imageIndex; j < imageIndex + batchSize; ++j)
 45 |         {
 46 |             cv::Mat img = cv::imread(imgPaths[j], cv::IMREAD_COLOR);
 47 |             std::vector<float>inputData = prepareImage(img, inputC, inputH, inputW, letterBox);
 48 | 
 49 |             int len = (int)(inputData.size());
 50 |             memcpy(ptr, inputData.data(), len * sizeof(float));
 51 | 
 52 |             ptr += inputData.size();
 53 |             std::cout << "Load image: " << imgPaths[j] << std::endl;
 54 |             std::cout << "Progress: " << (j + 1)*100. / imgPaths.size() << "%" << std::endl;
 55 |         }
 56 |         imageIndex += batchSize;
 57 |         CUDA_CHECK(cudaMemcpy(deviceInput, batchData, inputCount * sizeof(float), cudaMemcpyHostToDevice));
 58 |         bindings[0] = deviceInput;
 59 |         return true;
 60 |     }
 61 | 
 62 |     const void* Int8EntropyCalibrator2::readCalibrationCache(std::size_t &length) noexcept
 63 |     {
 64 |         calibrationCache.clear();
 65 |         std::ifstream input(calibTablePath, std::ios::binary);
 66 |         input >> std::noskipws;
 67 |         if (readCache && input.good())
 68 |         {
 69 |             std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(),
 70 |                 std::back_inserter(calibrationCache));
 71 |         }
 72 |         length = calibrationCache.size();
 73 |         return length ? calibrationCache.data() : nullptr;
 74 |     }
 75 | 
 76 |     void Int8EntropyCalibrator2::writeCalibrationCache(const void* cache, std::size_t length) noexcept
 77 |     {
 78 |         std::ofstream output(calibTablePath, std::ios::binary);
 79 |         output.write(reinterpret_cast<const char*>(cache), length);
 80 |     }
 81 | }
 82 | 
 83 | std::vector<float> prepareImage(cv::Mat& img, int input_c, int input_h, int input_w, int letter_box)
 84 | {
 85 |     cv::Mat out;
 86 |     int image_w = img.cols;
 87 |     int image_h = img.rows;
 88 |     if (image_w != input_w || image_h != input_h)
 89 |     {
 90 |         if (letter_box == 1)
 91 |         {
 92 |             float ratio_w = (float)image_w / (float)input_w;
 93 |             float ratio_h = (float)image_h / (float)input_h;
 94 |             if (ratio_w > ratio_h)
 95 |             {
 96 |                 int new_width = input_w * ratio_h;
 97 |                 int x = (image_w - new_width) / 2;
 98 |                 cv::Rect roi(abs(x), 0, new_width, image_h);
 99 |                 out = img(roi);
100 |             }
101 |             else if (ratio_w < ratio_h)
102 |             {
103 |                 int new_height = input_h * ratio_w;
104 |                 int y = (image_h - new_height) / 2;
105 |                 cv::Rect roi(0, abs(y), image_w, new_height);
106 |                 out = img(roi);
107 |             }
108 |             else {
109 |                 out = img;
110 |             }
111 |             cv::resize(out, out, cv::Size(input_w, input_h), 0, 0, cv::INTER_CUBIC);
112 |         }
113 |         else
114 |         {
115 |             cv::resize(img, out, cv::Size(input_w, input_h), 0, 0, cv::INTER_CUBIC);
116 |         }
117 |         cv::cvtColor(out, out, cv::COLOR_BGR2RGB);
118 |     }
119 |     else
120 |     {
121 |         cv::cvtColor(img, out, cv::COLOR_BGR2RGB);
122 |     }
123 |     if (input_c == 3)
124 |     {
125 |         out.convertTo(out, CV_32FC3, 1.0 / 255.0);
126 |     }
127 |     else
128 |     {
129 |         out.convertTo(out, CV_32FC1, 1.0 / 255.0);
130 |     }
131 |     std::vector<cv::Mat> input_channels(input_c);
132 |     cv::split(out, input_channels);
133 |     std::vector<float> result(input_h * input_w * input_c);
134 |     auto data = result.data();
135 |     int channelLength = input_h * input_w;
136 |     for (int i = 0; i < input_c; ++i)
137 |     {
138 |         memcpy(data, input_channels[i].data, channelLength * sizeof(float));
139 |         data += channelLength;
140 |     }
141 |     return result;
142 | }
143 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/calibrator.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #ifndef CALIBRATOR_H
 7 | #define CALIBRATOR_H
 8 | 
 9 | #include "opencv2/opencv.hpp"
10 | #include "cuda_runtime.h"
11 | #include "NvInfer.h"
12 | #include <vector>
13 | #include <string>
14 | 
15 | #ifndef CUDA_CHECK
16 | #define CUDA_CHECK(callstr)                                                                    \
17 |     {                                                                                          \
18 |         cudaError_t error_code = callstr;                                                      \
19 |         if (error_code != cudaSuccess) {                                                       \
20 |             std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__; \
21 |             assert(0);                                                                         \
22 |         }                                                                                      \
23 |     }
24 | #endif
25 | 
26 | namespace nvinfer1 {
27 |     class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {
28 |     public:
29 |         Int8EntropyCalibrator2(const int &batchsize,
30 |                              const int &channels,
31 |                              const int &height,
32 |                              const int &width,
33 |                              const int &letterbox,
34 |                              const std::string &imgPath,
35 |                              const std::string &calibTablePath);
36 | 
37 |         virtual ~Int8EntropyCalibrator2();
38 |         int getBatchSize() const noexcept override;
39 |         bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept override;
40 |         const void* readCalibrationCache(std::size_t& length) noexcept override;
41 |         void writeCalibrationCache(const void* cache, size_t length) noexcept override;
42 | 
43 |     private:
44 |         int batchSize;
45 |         int inputC;
46 |         int inputH;
47 |         int inputW;
48 |         int letterBox;
49 |         std::string calibTablePath;
50 |         size_t imageIndex;
51 |         size_t inputCount;
52 |         std::vector<std::string> imgPaths;
53 |         float *batchData{ nullptr };
54 |         void  *deviceInput{ nullptr };
55 |         bool readCache;
56 |         std::vector<char> calibrationCache;
57 |     };
58 | }
59 | 
60 | std::vector<float> prepareImage(cv::Mat& img, int input_c, int input_h, int input_w, int letter_box);
61 | 
62 | #endif //CALIBRATOR_H
63 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/activation_layer.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Created by Marcos Luciano
  3 |  * https://www.github.com/marcoslucianops
  4 |  */
  5 | 
  6 | #include "activation_layer.h"
  7 | 
  8 | nvinfer1::ITensor* activationLayer(
  9 |     int layerIdx,
 10 |     std::string activation,
 11 |     nvinfer1::ITensor* input,
 12 |     nvinfer1::INetworkDefinition* network)
 13 | {
 14 |     nvinfer1::ITensor* output;
 15 | 
 16 |     if (activation == "linear")
 17 |     {
 18 |         output = input;
 19 |     }
 20 |     else if (activation == "relu")
 21 |     {
 22 |         nvinfer1::IActivationLayer* relu = network->addActivation(*input, nvinfer1::ActivationType::kRELU);
 23 |         assert(relu != nullptr);
 24 |         std::string reluLayerName = "relu_" + std::to_string(layerIdx);
 25 |         relu->setName(reluLayerName.c_str());
 26 |         output = relu->getOutput(0);
 27 |     }
 28 |     else if (activation == "sigmoid" || activation == "logistic")
 29 |     {
 30 |         nvinfer1::IActivationLayer* sigmoid = network->addActivation(*input, nvinfer1::ActivationType::kSIGMOID);
 31 |         assert(sigmoid != nullptr);
 32 |         std::string sigmoidLayerName = "sigmoid_" + std::to_string(layerIdx);
 33 |         sigmoid->setName(sigmoidLayerName.c_str());
 34 |         output = sigmoid->getOutput(0);
 35 |     }
 36 |     else if (activation == "tanh")
 37 |     {
 38 |         nvinfer1::IActivationLayer* tanh = network->addActivation(*input, nvinfer1::ActivationType::kTANH);
 39 |         assert(tanh != nullptr);
 40 |         std::string tanhLayerName = "tanh_" + std::to_string(layerIdx);
 41 |         tanh->setName(tanhLayerName.c_str());
 42 |         output = tanh->getOutput(0);
 43 |     }
 44 |     else if (activation == "leaky")
 45 |     {
 46 |         nvinfer1::IActivationLayer* leaky = network->addActivation(*input, nvinfer1::ActivationType::kLEAKY_RELU);
 47 |         assert(leaky != nullptr);
 48 |         std::string leakyLayerName = "leaky_" + std::to_string(layerIdx);
 49 |         leaky->setName(leakyLayerName.c_str());
 50 |         leaky->setAlpha(0.1);
 51 |         output = leaky->getOutput(0);
 52 |     }
 53 |     else if (activation == "softplus")
 54 |     {
 55 |         nvinfer1::IActivationLayer* softplus = network->addActivation(*input, nvinfer1::ActivationType::kSOFTPLUS);
 56 |         assert(softplus != nullptr);
 57 |         std::string softplusLayerName = "softplus_" + std::to_string(layerIdx);
 58 |         softplus->setName(softplusLayerName.c_str());
 59 |         output = softplus->getOutput(0);
 60 |     }
 61 |     else if (activation == "mish")
 62 |     {
 63 |         nvinfer1::IActivationLayer* softplus = network->addActivation(*input, nvinfer1::ActivationType::kSOFTPLUS);
 64 |         assert(softplus != nullptr);
 65 |         std::string softplusLayerName = "softplus_" + std::to_string(layerIdx);
 66 |         softplus->setName(softplusLayerName.c_str());
 67 |         nvinfer1::IActivationLayer* tanh = network->addActivation(*softplus->getOutput(0), nvinfer1::ActivationType::kTANH);
 68 |         assert(tanh != nullptr);
 69 |         std::string tanhLayerName = "tanh_" + std::to_string(layerIdx);
 70 |         tanh->setName(tanhLayerName.c_str());
 71 |         nvinfer1::IElementWiseLayer* mish
 72 |             = network->addElementWise(*input, *tanh->getOutput(0), nvinfer1::ElementWiseOperation::kPROD);
 73 |         assert(mish != nullptr);
 74 |         std::string mishLayerName = "mish_" + std::to_string(layerIdx);
 75 |         mish->setName(mishLayerName.c_str());
 76 |         output = mish->getOutput(0);
 77 |     }
 78 |     else if (activation == "silu" || activation == "swish")
 79 |     {
 80 |         nvinfer1::IActivationLayer* sigmoid = network->addActivation(*input, nvinfer1::ActivationType::kSIGMOID);
 81 |         assert(sigmoid != nullptr);
 82 |         std::string sigmoidLayerName = "sigmoid_" + std::to_string(layerIdx);
 83 |         sigmoid->setName(sigmoidLayerName.c_str());
 84 |         nvinfer1::IElementWiseLayer* silu
 85 |             = network->addElementWise(*input, *sigmoid->getOutput(0), nvinfer1::ElementWiseOperation::kPROD);
 86 |         assert(silu != nullptr);
 87 |         std::string siluLayerName = "silu_" + std::to_string(layerIdx);
 88 |         silu->setName(siluLayerName.c_str());
 89 |         output = silu->getOutput(0);
 90 |     }
 91 |     else if (activation == "hardsigmoid")
 92 |     {
 93 |         nvinfer1::IActivationLayer* hardsigmoid = network->addActivation(*input, nvinfer1::ActivationType::kHARD_SIGMOID);
 94 |         assert(hardsigmoid != nullptr);
 95 |         std::string hardsigmoidLayerName = "hardsigmoid_" + std::to_string(layerIdx);
 96 |         hardsigmoid->setName(hardsigmoidLayerName.c_str());
 97 |         hardsigmoid->setAlpha(1.0 / 6.0);
 98 |         hardsigmoid->setBeta(0.5);
 99 |         output = hardsigmoid->getOutput(0);
100 |     }
101 |     else if (activation == "hardswish")
102 |     {
103 |         nvinfer1::IActivationLayer* hardsigmoid = network->addActivation(*input, nvinfer1::ActivationType::kHARD_SIGMOID);
104 |         assert(hardsigmoid != nullptr);
105 |         std::string hardsigmoidLayerName = "hardsigmoid_" + std::to_string(layerIdx);
106 |         hardsigmoid->setName(hardsigmoidLayerName.c_str());
107 |         hardsigmoid->setAlpha(1.0 / 6.0);
108 |         hardsigmoid->setBeta(0.5);
109 |         nvinfer1::IElementWiseLayer* hardswish
110 |             = network->addElementWise(*input, *hardsigmoid->getOutput(0), nvinfer1::ElementWiseOperation::kPROD);
111 |         assert(hardswish != nullptr);
112 |         std::string hardswishLayerName = "hardswish_" + std::to_string(layerIdx);
113 |         hardswish->setName(hardswishLayerName.c_str());
114 |         output = hardswish->getOutput(0);
115 |     }
116 |     else
117 |     {
118 |         std::cerr << "Activation not supported: " << activation << std::endl;
119 |         std::abort();
120 |     }
121 |     return output;
122 | }
123 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/activation_layer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #ifndef __ACTIVATION_LAYER_H__
 7 | #define __ACTIVATION_LAYER_H__
 8 | 
 9 | #include <cassert>
10 | #include <iostream>
11 | 
12 | #include "NvInfer.h"
13 | 
14 | nvinfer1::ITensor* activationLayer(
15 |     int layerIdx,
16 |     std::string activation,
17 |     nvinfer1::ITensor* input,
18 |     nvinfer1::INetworkDefinition* network);
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/batchnorm_layer.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Created by Marcos Luciano
  3 |  * https://www.github.com/marcoslucianops
  4 |  */
  5 | 
  6 | #include <math.h>
  7 | #include "batchnorm_layer.h"
  8 | 
  9 | nvinfer1::ITensor* batchnormLayer(
 10 |     int layerIdx,
 11 |     std::map<std::string, std::string>& block,
 12 |     std::vector<float>& weights,
 13 |     std::vector<nvinfer1::Weights>& trtWeights,
 14 |     int& weightPtr,
 15 |     std::string weightsType,
 16 |     float eps,
 17 |     nvinfer1::ITensor* input,
 18 |     nvinfer1::INetworkDefinition* network)
 19 | {
 20 |     nvinfer1::ITensor* output;
 21 | 
 22 |     assert(block.at("type") == "batchnorm");
 23 |     assert(block.find("filters") != block.end());
 24 | 
 25 |     int filters = std::stoi(block.at("filters"));
 26 |     std::string activation = block.at("activation");
 27 | 
 28 |     std::vector<float> bnBiases;
 29 |     std::vector<float> bnWeights;
 30 |     std::vector<float> bnRunningMean;
 31 |     std::vector<float> bnRunningVar;
 32 | 
 33 |     if (weightsType == "weights")
 34 |     {
 35 |         for (int i = 0; i < filters; ++i)
 36 |         {
 37 |             bnBiases.push_back(weights[weightPtr]);
 38 |             weightPtr++;
 39 |         }
 40 |         for (int i = 0; i < filters; ++i)
 41 |         {
 42 |             bnWeights.push_back(weights[weightPtr]);
 43 |             weightPtr++;
 44 |         }
 45 |         for (int i = 0; i < filters; ++i)
 46 |         {
 47 |             bnRunningMean.push_back(weights[weightPtr]);
 48 |             weightPtr++;
 49 |         }
 50 |         for (int i = 0; i < filters; ++i)
 51 |         {
 52 |             bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5));
 53 |             weightPtr++;
 54 |         }
 55 |     }
 56 |     else
 57 |     {
 58 |         for (int i = 0; i < filters; ++i)
 59 |         {
 60 |             bnWeights.push_back(weights[weightPtr]);
 61 |             weightPtr++;
 62 |         }
 63 |         for (int i = 0; i < filters; ++i)
 64 |         {
 65 |             bnBiases.push_back(weights[weightPtr]);
 66 |             weightPtr++;
 67 |         }
 68 |         for (int i = 0; i < filters; ++i)
 69 |         {
 70 |             bnRunningMean.push_back(weights[weightPtr]);
 71 |             weightPtr++;
 72 |         }
 73 |         for (int i = 0; i < filters; ++i)
 74 |         {
 75 |             bnRunningVar.push_back(sqrt(weights[weightPtr] + eps));
 76 |             weightPtr++;
 77 |         }
 78 |     }
 79 | 
 80 |     int size = filters;
 81 |     nvinfer1::Weights shift{nvinfer1::DataType::kFLOAT, nullptr, size};
 82 |     nvinfer1::Weights scale{nvinfer1::DataType::kFLOAT, nullptr, size};
 83 |     nvinfer1::Weights power{nvinfer1::DataType::kFLOAT, nullptr, size};
 84 |     float* shiftWt = new float[size];
 85 |     for (int i = 0; i < size; ++i)
 86 |         shiftWt[i] = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i));
 87 |     shift.values = shiftWt;
 88 |     float* scaleWt = new float[size];
 89 |     for (int i = 0; i < size; ++i)
 90 |         scaleWt[i] = bnWeights.at(i) / bnRunningVar[i];
 91 |     scale.values = scaleWt;
 92 |     float* powerWt = new float[size];
 93 |     for (int i = 0; i < size; ++i)
 94 |         powerWt[i] = 1.0;
 95 |     power.values = powerWt;
 96 |     trtWeights.push_back(shift);
 97 |     trtWeights.push_back(scale);
 98 |     trtWeights.push_back(power);
 99 | 
100 |     nvinfer1::IScaleLayer* batchnorm = network->addScale(*input, nvinfer1::ScaleMode::kCHANNEL, shift, scale, power);
101 |     assert(batchnorm != nullptr);
102 |     std::string batchnormLayerName = "batchnorm_" + std::to_string(layerIdx);
103 |     batchnorm->setName(batchnormLayerName.c_str());
104 |     output = batchnorm->getOutput(0);
105 | 
106 |     output = activationLayer(layerIdx, activation, output, network);
107 |     assert(output != nullptr);
108 | 
109 |     return output;
110 | }
111 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/batchnorm_layer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #ifndef __BATCHNORM_LAYER_H__
 7 | #define __BATCHNORM_LAYER_H__
 8 | 
 9 | #include <map>
10 | #include <vector>
11 | 
12 | #include "NvInfer.h"
13 | 
14 | #include "activation_layer.h"
15 | 
16 | nvinfer1::ITensor* batchnormLayer(
17 |     int layerIdx,
18 |     std::map<std::string, std::string>& block,
19 |     std::vector<float>& weights,
20 |     std::vector<nvinfer1::Weights>& trtWeights,
21 |     int& weightPtr,
22 |     std::string weightsType,
23 |     float eps,
24 |     nvinfer1::ITensor* input,
25 |     nvinfer1::INetworkDefinition* network);
26 | 
27 | #endif
28 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/channels_layer.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #include "channels_layer.h"
 7 | 
 8 | nvinfer1::ITensor* channelsLayer(
 9 |     int layerIdx,
10 |     std::map<std::string, std::string>& block,
11 |     nvinfer1::ITensor* input,
12 |     nvinfer1::ITensor* implicitTensor,
13 |     nvinfer1::INetworkDefinition* network)
14 | {
15 |     nvinfer1::ITensor* output;
16 | 
17 |     assert(block.at("type") == "shift_channels" || block.at("type") == "control_channels");
18 | 
19 |     if (block.at("type") == "shift_channels") {
20 |         nvinfer1::IElementWiseLayer* shift
21 |             = network->addElementWise(*input, *implicitTensor, nvinfer1::ElementWiseOperation::kSUM);
22 |         assert(shift != nullptr);
23 |         std::string shiftLayerName = "shift_channels_" + std::to_string(layerIdx);
24 |         shift->setName(shiftLayerName.c_str());
25 |         output = shift->getOutput(0);
26 |     }
27 |     else if (block.at("type") == "control_channels") {
28 |         nvinfer1::IElementWiseLayer* control
29 |             = network->addElementWise(*input, *implicitTensor, nvinfer1::ElementWiseOperation::kPROD);
30 |         assert(control != nullptr);
31 |         std::string controlLayerName = "control_channels_" + std::to_string(layerIdx);
32 |         control->setName(controlLayerName.c_str());
33 |         output = control->getOutput(0);
34 |     }
35 | 
36 |     return output;
37 | }
38 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/channels_layer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #ifndef __CHANNELS_LAYER_H__
 7 | #define __CHANNELS_LAYER_H__
 8 | 
 9 | #include <map>
10 | #include <cassert>
11 | 
12 | #include "NvInfer.h"
13 | 
14 | nvinfer1::ITensor* channelsLayer(
15 |     int layerIdx,
16 |     std::map<std::string, std::string>& block,
17 |     nvinfer1::ITensor* input,
18 |     nvinfer1::ITensor* implicitTensor,
19 |     nvinfer1::INetworkDefinition* network);
20 | 
21 | #endif
22 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/cls_layer.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #include "cls_layer.h"
 7 | 
 8 | nvinfer1::ITensor* clsLayer(
 9 |     int layerIdx,
10 |     std::map<std::string, std::string>& block,
11 |     nvinfer1::ITensor* input,
12 |     nvinfer1::INetworkDefinition* network)
13 | {
14 |     nvinfer1::ITensor* output;
15 | 
16 |     assert(block.at("type") == "cls");
17 | 
18 |     nvinfer1::IShuffleLayer* shuffle = network->addShuffle(*input);
19 |     assert(shuffle != nullptr);
20 |     std::string shuffleLayerName = "shuffle_" + std::to_string(layerIdx);
21 |     shuffle->setName(shuffleLayerName.c_str());
22 |     nvinfer1::Permutation permutation;
23 |     permutation.order[0] = 1;
24 |     permutation.order[1] = 0;
25 |     shuffle->setFirstTranspose(permutation);
26 |     output = shuffle->getOutput(0);
27 | 
28 |     return output;
29 | }
30 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/cls_layer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #ifndef __CLS_LAYER_H__
 7 | #define __CLS_LAYER_H__
 8 | 
 9 | #include <map>
10 | #include <cassert>
11 | 
12 | #include "NvInfer.h"
13 | 
14 | nvinfer1::ITensor* clsLayer(
15 |     int layerIdx,
16 |     std::map<std::string, std::string>& block,
17 |     nvinfer1::ITensor* input,
18 |     nvinfer1::INetworkDefinition* network);
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Created by Marcos Luciano
  3 |  * https://www.github.com/marcoslucianops
  4 |  */
  5 | 
  6 | #include <math.h>
  7 | #include "convolutional_layer.h"
  8 | 
  9 | nvinfer1::ITensor* convolutionalLayer(
 10 |     int layerIdx,
 11 |     std::map<std::string, std::string>& block,
 12 |     std::vector<float>& weights,
 13 |     std::vector<nvinfer1::Weights>& trtWeights,
 14 |     int& weightPtr,
 15 |     std::string weightsType,
 16 |     int& inputChannels,
 17 |     float eps,
 18 |     nvinfer1::ITensor* input,
 19 |     nvinfer1::INetworkDefinition* network)
 20 | {
 21 |     nvinfer1::ITensor* output;
 22 | 
 23 |     assert(block.at("type") == "convolutional");
 24 |     assert(block.find("filters") != block.end());
 25 |     assert(block.find("pad") != block.end());
 26 |     assert(block.find("size") != block.end());
 27 |     assert(block.find("stride") != block.end());
 28 | 
 29 |     int filters = std::stoi(block.at("filters"));
 30 |     int padding = std::stoi(block.at("pad"));
 31 |     int kernelSize = std::stoi(block.at("size"));
 32 |     int stride = std::stoi(block.at("stride"));
 33 |     std::string activation = block.at("activation");
 34 |     int bias = filters;
 35 | 
 36 |     bool batchNormalize = false;
 37 |     if (block.find("batch_normalize") != block.end())
 38 |     {
 39 |         bias = 0;
 40 |         batchNormalize = (block.at("batch_normalize") == "1");
 41 |     }
 42 | 
 43 |     int groups = 1;
 44 |     if (block.find("groups") != block.end())
 45 |         groups = std::stoi(block.at("groups"));
 46 | 
 47 |     if (block.find("bias") != block.end())
 48 |         bias = std::stoi(block.at("bias"));
 49 | 
 50 |     int pad;
 51 |     if (padding)
 52 |         pad = (kernelSize - 1) / 2;
 53 |     else
 54 |         pad = 0;
 55 | 
 56 |     int size = filters * inputChannels * kernelSize * kernelSize / groups;
 57 |     std::vector<float> bnBiases;
 58 |     std::vector<float> bnWeights;
 59 |     std::vector<float> bnRunningMean;
 60 |     std::vector<float> bnRunningVar;
 61 |     nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, size};
 62 |     nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, bias};
 63 | 
 64 |     if (weightsType == "weights")
 65 |     {
 66 |         if (batchNormalize == false)
 67 |         {
 68 |             float* val;
 69 |             if (bias != 0) {
 70 |                 val = new float[filters];
 71 |                 for (int i = 0; i < filters; ++i)
 72 |                 {
 73 |                     val[i] = weights[weightPtr];
 74 |                     weightPtr++;
 75 |                 }
 76 |                 convBias.values = val;
 77 |                 trtWeights.push_back(convBias);
 78 |             }
 79 |             val = new float[size];
 80 |             for (int i = 0; i < size; ++i)
 81 |             {
 82 |                 val[i] = weights[weightPtr];
 83 |                 weightPtr++;
 84 |             }
 85 |             convWt.values = val;
 86 |             trtWeights.push_back(convWt);
 87 |         }
 88 |         else
 89 |         {
 90 |             for (int i = 0; i < filters; ++i)
 91 |             {
 92 |                 bnBiases.push_back(weights[weightPtr]);
 93 |                 weightPtr++;
 94 |             }
 95 |             for (int i = 0; i < filters; ++i)
 96 |             {
 97 |                 bnWeights.push_back(weights[weightPtr]);
 98 |                 weightPtr++;
 99 |             }
100 |             for (int i = 0; i < filters; ++i)
101 |             {
102 |                 bnRunningMean.push_back(weights[weightPtr]);
103 |                 weightPtr++;
104 |             }
105 |             for (int i = 0; i < filters; ++i)
106 |             {
107 |                 bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5));
108 |                 weightPtr++;
109 |             }
110 |             float* val = new float[size];
111 |             for (int i = 0; i < size; ++i)
112 |             {
113 |                 val[i] = weights[weightPtr];
114 |                 weightPtr++;
115 |             }
116 |             convWt.values = val;
117 |             trtWeights.push_back(convWt);
118 |             if (bias != 0)
119 |                 trtWeights.push_back(convBias);
120 |         }
121 |     }
122 |     else
123 |     {
124 |         if (batchNormalize == false)
125 |         {
126 |             float* val = new float[size];
127 |             for (int i = 0; i < size; ++i)
128 |             {
129 |                 val[i] = weights[weightPtr];
130 |                 weightPtr++;
131 |             }
132 |             convWt.values = val;
133 |             trtWeights.push_back(convWt);
134 |             if (bias != 0) {
135 |                 val = new float[filters];
136 |                 for (int i = 0; i < filters; ++i)
137 |                 {
138 |                     val[i] = weights[weightPtr];
139 |                     weightPtr++;
140 |                 }
141 |                 convBias.values = val;
142 |                 trtWeights.push_back(convBias);
143 |             }
144 |         }
145 |         else
146 |         {
147 |             float* val = new float[size];
148 |             for (int i = 0; i < size; ++i)
149 |             {
150 |                 val[i] = weights[weightPtr];
151 |                 weightPtr++;
152 |             }
153 |             convWt.values = val;
154 |             for (int i = 0; i < filters; ++i)
155 |             {
156 |                 bnWeights.push_back(weights[weightPtr]);
157 |                 weightPtr++;
158 |             }
159 |             for (int i = 0; i < filters; ++i)
160 |             {
161 |                 bnBiases.push_back(weights[weightPtr]);
162 |                 weightPtr++;
163 |             }
164 |             for (int i = 0; i < filters; ++i)
165 |             {
166 |                 bnRunningMean.push_back(weights[weightPtr]);
167 |                 weightPtr++;
168 |             }
169 |             for (int i = 0; i < filters; ++i)
170 |             {
171 |                 bnRunningVar.push_back(sqrt(weights[weightPtr] + eps));
172 |                 weightPtr++;
173 |             }
174 |             trtWeights.push_back(convWt);
175 |             if (bias != 0)
176 |                 trtWeights.push_back(convBias);
177 |         }
178 |     }
179 | 
180 |     nvinfer1::IConvolutionLayer* conv
181 |         = network->addConvolutionNd(*input, filters, nvinfer1::Dims{2, {kernelSize, kernelSize}}, convWt, convBias);
182 |     assert(conv != nullptr);
183 |     std::string convLayerName = "conv_" + std::to_string(layerIdx);
184 |     conv->setName(convLayerName.c_str());
185 |     conv->setStrideNd(nvinfer1::Dims{2, {stride, stride}});
186 |     conv->setPaddingNd(nvinfer1::Dims{2, {pad, pad}});
187 | 
188 |     if (block.find("groups") != block.end())
189 |         conv->setNbGroups(groups);
190 | 
191 |     output = conv->getOutput(0);
192 | 
193 |     if (batchNormalize == true)
194 |     {
195 |         size = filters;
196 |         nvinfer1::Weights shift{nvinfer1::DataType::kFLOAT, nullptr, size};
197 |         nvinfer1::Weights scale{nvinfer1::DataType::kFLOAT, nullptr, size};
198 |         nvinfer1::Weights power{nvinfer1::DataType::kFLOAT, nullptr, size};
199 |         float* shiftWt = new float[size];
200 |         for (int i = 0; i < size; ++i)
201 |             shiftWt[i] = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i));
202 |         shift.values = shiftWt;
203 |         float* scaleWt = new float[size];
204 |         for (int i = 0; i < size; ++i)
205 |             scaleWt[i] = bnWeights.at(i) / bnRunningVar[i];
206 |         scale.values = scaleWt;
207 |         float* powerWt = new float[size];
208 |         for (int i = 0; i < size; ++i)
209 |             powerWt[i] = 1.0;
210 |         power.values = powerWt;
211 |         trtWeights.push_back(shift);
212 |         trtWeights.push_back(scale);
213 |         trtWeights.push_back(power);
214 | 
215 |         nvinfer1::IScaleLayer* batchnorm = network->addScale(*output, nvinfer1::ScaleMode::kCHANNEL, shift, scale, power);
216 |         assert(batchnorm != nullptr);
217 |         std::string batchnormLayerName = "batchnorm_" + std::to_string(layerIdx);
218 |         batchnorm->setName(batchnormLayerName.c_str());
219 |         output = batchnorm->getOutput(0);
220 |     }
221 | 
222 |     output = activationLayer(layerIdx, activation, output, network);
223 |     assert(output != nullptr);
224 | 
225 |     return output;
226 | }
227 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/convolutional_layer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #ifndef __CONVOLUTIONAL_LAYER_H__
 7 | #define __CONVOLUTIONAL_LAYER_H__
 8 | 
 9 | #include <map>
10 | #include <vector>
11 | 
12 | #include "NvInfer.h"
13 | 
14 | #include "activation_layer.h"
15 | 
16 | nvinfer1::ITensor* convolutionalLayer(
17 |     int layerIdx,
18 |     std::map<std::string, std::string>& block,
19 |     std::vector<float>& weights,
20 |     std::vector<nvinfer1::Weights>& trtWeights,
21 |     int& weightPtr,
22 |     std::string weightsType,
23 |     int& inputChannels,
24 |     float eps,
25 |     nvinfer1::ITensor* input,
26 |     nvinfer1::INetworkDefinition* network);
27 | 
28 | #endif
29 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/implicit_layer.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #include "implicit_layer.h"
 7 | 
 8 | nvinfer1::ITensor* implicitLayer(
 9 |     int layerIdx,
10 |     std::map<std::string, std::string>& block,
11 |     std::vector<float>& weights,
12 |     std::vector<nvinfer1::Weights>& trtWeights,
13 |     int& weightPtr,
14 |     nvinfer1::INetworkDefinition* network)
15 | {
16 |     nvinfer1::ITensor* output;
17 | 
18 |     assert(block.at("type") == "implicit_add" || block.at("type") == "implicit_mul");
19 |     assert(block.find("filters") != block.end());
20 | 
21 |     int filters = std::stoi(block.at("filters"));
22 | 
23 |     nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, filters};
24 | 
25 |     float* val = new float[filters];
26 |     for (int i = 0; i < filters; ++i)
27 |     {
28 |         val[i] = weights[weightPtr];
29 |         weightPtr++;
30 |     }
31 |     convWt.values = val;
32 |     trtWeights.push_back(convWt);
33 | 
34 |     nvinfer1::IConstantLayer* implicit = network->addConstant(nvinfer1::Dims{3, {filters, 1, 1}}, convWt);
35 |     assert(implicit != nullptr);
36 |     std::string implicitLayerName = block.at("type") + "_" + std::to_string(layerIdx);
37 |     implicit->setName(implicitLayerName.c_str());
38 |     output = implicit->getOutput(0);
39 | 
40 |     return output;
41 | }
42 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/implicit_layer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #ifndef __IMPLICIT_LAYER_H__
 7 | #define __IMPLICIT_LAYER_H__
 8 | 
 9 | #include <map>
10 | #include <vector>
11 | #include <cassert>
12 | 
13 | #include "NvInfer.h"
14 | 
15 | nvinfer1::ITensor* implicitLayer(
16 |     int layerIdx,
17 |     std::map<std::string, std::string>& block,
18 |     std::vector<float>& weights,
19 |     std::vector<nvinfer1::Weights>& trtWeights,
20 |     int& weightPtr,
21 |     nvinfer1::INetworkDefinition* network);
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/pooling_layer.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #include "pooling_layer.h"
 7 | 
 8 | nvinfer1::ITensor* poolingLayer(
 9 |     int layerIdx,
10 |     std::map<std::string, std::string>& block,
11 |     nvinfer1::ITensor* input,
12 |     nvinfer1::INetworkDefinition* network)
13 | {
14 |     nvinfer1::ITensor* output;
15 | 
16 |     assert(block.at("type") == "maxpool" || block.at("type") == "avgpool");
17 | 
18 |     if (block.at("type") == "maxpool")
19 |     {
20 |         assert(block.find("size") != block.end());
21 |         assert(block.find("stride") != block.end());
22 | 
23 |         int size = std::stoi(block.at("size"));
24 |         int stride = std::stoi(block.at("stride"));
25 | 
26 |         nvinfer1::IPoolingLayer* maxpool
27 |             = network->addPoolingNd(*input, nvinfer1::PoolingType::kMAX, nvinfer1::Dims{2, {size, size}});
28 |         assert(maxpool != nullptr);
29 |         std::string maxpoolLayerName = "maxpool_" + std::to_string(layerIdx);
30 |         maxpool->setName(maxpoolLayerName.c_str());
31 |         maxpool->setStrideNd(nvinfer1::Dims{2, {stride, stride}});
32 |         maxpool->setPaddingNd(nvinfer1::Dims{2, {(size - 1) / 2, (size - 1) / 2}});
33 |         if (size == 2 && stride == 1)
34 |         {
35 |             maxpool->setPrePadding(nvinfer1::Dims{2, {0, 0}});
36 |             maxpool->setPostPadding(nvinfer1::Dims{2, {1, 1}});
37 |         }
38 |         output = maxpool->getOutput(0);
39 |     }
40 |     else if (block.at("type") == "avgpool")
41 |     {
42 |         nvinfer1::Dims inputDims = input->getDimensions();
43 |         nvinfer1::IPoolingLayer* avgpool = network->addPoolingNd(
44 |             *input, nvinfer1::PoolingType::kAVERAGE, nvinfer1::Dims{2, {inputDims.d[1], inputDims.d[2]}});
45 |         assert(avgpool != nullptr);
46 |         std::string avgpoolLayerName = "avgpool_" + std::to_string(layerIdx);
47 |         avgpool->setName(avgpoolLayerName.c_str());
48 |         output = avgpool->getOutput(0);
49 |     }
50 |     else
51 |     {
52 |         std::cerr << "Pooling not supported: " << block.at("type") << std::endl;
53 |         std::abort();
54 |     }
55 | 
56 |     return output;
57 | }
58 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/pooling_layer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #ifndef __POOLING_LAYER_H__
 7 | #define __POOLING_LAYER_H__
 8 | 
 9 | #include <map>
10 | #include <cassert>
11 | #include <iostream>
12 | 
13 | #include "NvInfer.h"
14 | 
15 | nvinfer1::ITensor* poolingLayer(
16 |     int layerIdx,
17 |     std::map<std::string, std::string>& block,
18 |     nvinfer1::ITensor* input,
19 |     nvinfer1::INetworkDefinition* network);
20 | 
21 | #endif
22 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/reduce_layer.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #include "reduce_layer.h"
 7 | 
 8 | nvinfer1::ITensor* reduceLayer(
 9 |     int layerIdx,
10 |     std::map<std::string, std::string>& block,
11 |     nvinfer1::ITensor* input,
12 |     nvinfer1::INetworkDefinition* network)
13 | {
14 |     nvinfer1::ITensor* output;
15 | 
16 |     assert(block.at("type") == "reduce");
17 |     assert(block.find("mode") != block.end());
18 |     assert(block.find("axes") != block.end());
19 | 
20 |     std::string mode = block.at("mode");
21 | 
22 |     nvinfer1::ReduceOperation operation;
23 |     if (mode == "mean")
24 |         operation = nvinfer1::ReduceOperation::kAVG;
25 | 
26 |     std::string strAxes = block.at("axes");
27 |     std::vector<int32_t> axes;
28 |     size_t lastPos = 0, pos = 0;
29 |     while ((pos = strAxes.find(',', lastPos)) != std::string::npos)
30 |     {
31 |         int vL = std::stoi(trim(strAxes.substr(lastPos, pos - lastPos)));
32 |         axes.push_back(vL);
33 |         lastPos = pos + 1;
34 |     }
35 |     if (lastPos < strAxes.length())
36 |     {
37 |         std::string lastV = trim(strAxes.substr(lastPos));
38 |         if (!lastV.empty())
39 |             axes.push_back(std::stoi(lastV));
40 |     }
41 |     assert(!axes.empty());
42 |     
43 |     uint32_t axisMask = 0;
44 |     for (int axis : axes)
45 |         axisMask |= 1 << axis;
46 |     
47 |     bool keepDims = false;
48 |     if (block.find("keep") != block.end())
49 |         keepDims = std::stoi(block.at("keep")) == 1 ? true : false;
50 | 
51 |     nvinfer1::IReduceLayer* reduce = network->addReduce(*input, operation, axisMask, keepDims);
52 |     assert(reduce != nullptr);
53 |     std::string reduceLayerName = "reduce_" + std::to_string(layerIdx);
54 |     reduce->setName(reduceLayerName.c_str());
55 |     output = reduce->getOutput(0);
56 | 
57 |     return output;
58 | }
59 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/reduce_layer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #ifndef __REDUCE_LAYER_H__
 7 | #define __REDUCE_LAYER_H__
 8 | 
 9 | #include "NvInfer.h"
10 | #include "../utils.h"
11 | 
12 | nvinfer1::ITensor* reduceLayer(
13 |     int layerIdx,
14 |     std::map<std::string, std::string>& block,
15 |     nvinfer1::ITensor* input,
16 |     nvinfer1::INetworkDefinition* network);
17 | 
18 | #endif
19 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/reg_layer.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Created by Marcos Luciano
  3 |  * https://www.github.com/marcoslucianops
  4 |  */
  5 | 
  6 | #include "reg_layer.h"
  7 | 
  8 | nvinfer1::ITensor* regLayer(
  9 |     int layerIdx,
 10 |     std::map<std::string, std::string>& block,
 11 |     std::vector<float>& weights,
 12 |     std::vector<nvinfer1::Weights>& trtWeights,
 13 |     int& weightPtr,
 14 |     nvinfer1::ITensor* input,
 15 |     nvinfer1::INetworkDefinition* network)
 16 | {
 17 |     nvinfer1::ITensor* output;
 18 | 
 19 |     assert(block.at("type") == "reg");
 20 | 
 21 |     nvinfer1::IShuffleLayer* shuffle = network->addShuffle(*input);
 22 |     assert(shuffle != nullptr);
 23 |     std::string shuffleLayerName = "shuffle_" + std::to_string(layerIdx);
 24 |     shuffle->setName(shuffleLayerName.c_str());
 25 |     nvinfer1::Permutation permutation;
 26 |     permutation.order[0] = 1;
 27 |     permutation.order[1] = 0;
 28 |     shuffle->setFirstTranspose(permutation);
 29 |     output = shuffle->getOutput(0);
 30 |     nvinfer1::Dims shuffleDims = output->getDimensions();
 31 | 
 32 |     nvinfer1::ISliceLayer* sliceLt = network->addSlice(
 33 |         *output, nvinfer1::Dims{2, {0, 0}}, nvinfer1::Dims{2, {shuffleDims.d[0], 2}}, nvinfer1::Dims{2, {1, 1}});
 34 |     assert(sliceLt != nullptr);
 35 |     std::string sliceLtLayerName = "slice_lt_" + std::to_string(layerIdx);
 36 |     sliceLt->setName(sliceLtLayerName.c_str());
 37 |     nvinfer1::ITensor* lt = sliceLt->getOutput(0);
 38 | 
 39 |     nvinfer1::ISliceLayer* sliceRb = network->addSlice(
 40 |         *output, nvinfer1::Dims{2, {0, 2}}, nvinfer1::Dims{2, {shuffleDims.d[0], 2}}, nvinfer1::Dims{2, {1, 1}});
 41 |     assert(sliceRb != nullptr);
 42 |     std::string sliceRbLayerName = "slice_rb_" + std::to_string(layerIdx);
 43 |     sliceRb->setName(sliceRbLayerName.c_str());
 44 |     nvinfer1::ITensor* rb = sliceRb->getOutput(0);
 45 | 
 46 |     int channels = shuffleDims.d[0] * 2;
 47 |     nvinfer1::Weights anchorPointsWt{nvinfer1::DataType::kFLOAT, nullptr, channels};
 48 |     float* val = new float[channels];
 49 |     for (int i = 0; i < channels; ++i)
 50 |     {
 51 |         val[i] = weights[weightPtr];
 52 |         weightPtr++;
 53 |     }
 54 |     anchorPointsWt.values = val;
 55 |     trtWeights.push_back(anchorPointsWt);
 56 | 
 57 |     nvinfer1::IConstantLayer* anchorPoints = network->addConstant(nvinfer1::Dims{2, {shuffleDims.d[0], 2}}, anchorPointsWt);
 58 |     assert(anchorPoints != nullptr);
 59 |     std::string anchorPointsLayerName = "anchor_points_" + std::to_string(layerIdx);
 60 |     anchorPoints->setName(anchorPointsLayerName.c_str());
 61 |     nvinfer1::ITensor* anchorPointsTensor = anchorPoints->getOutput(0);
 62 | 
 63 |     nvinfer1::IElementWiseLayer* x1y1
 64 |         = network->addElementWise(*anchorPointsTensor, *lt, nvinfer1::ElementWiseOperation::kSUB);
 65 |     assert(x1y1 != nullptr);
 66 |     std::string x1y1LayerName = "x1y1_" + std::to_string(layerIdx);
 67 |     x1y1->setName(x1y1LayerName.c_str());
 68 |     nvinfer1::ITensor* x1y1Tensor = x1y1->getOutput(0);
 69 | 
 70 |     nvinfer1::IElementWiseLayer* x2y2
 71 |         = network->addElementWise(*rb, *anchorPointsTensor, nvinfer1::ElementWiseOperation::kSUM);
 72 |     assert(x2y2 != nullptr);
 73 |     std::string x2y2LayerName = "x2y2_" + std::to_string(layerIdx);
 74 |     x2y2->setName(x2y2LayerName.c_str());
 75 |     nvinfer1::ITensor* x2y2Tensor = x2y2->getOutput(0);
 76 | 
 77 |     std::vector<nvinfer1::ITensor*> concatInputs;
 78 |     concatInputs.push_back(x1y1Tensor);
 79 |     concatInputs.push_back(x2y2Tensor);
 80 | 
 81 |     nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size());
 82 |     assert(concat != nullptr);
 83 |     std::string concatLayerName = "concat_" + std::to_string(layerIdx);
 84 |     concat->setName(concatLayerName.c_str());
 85 |     concat->setAxis(1);
 86 |     output = concat->getOutput(0);
 87 | 
 88 |     channels = shuffleDims.d[0];
 89 |     nvinfer1::Weights stridePointsWt{nvinfer1::DataType::kFLOAT, nullptr, channels};
 90 |     val = new float[channels];
 91 |     for (int i = 0; i < channels; ++i)
 92 |     {
 93 |         val[i] = weights[weightPtr];
 94 |         weightPtr++;
 95 |     }
 96 |     stridePointsWt.values = val;
 97 |     trtWeights.push_back(stridePointsWt);
 98 | 
 99 |     nvinfer1::IConstantLayer* stridePoints = network->addConstant(nvinfer1::Dims{2, {shuffleDims.d[0], 1}}, stridePointsWt);
100 |     assert(stridePoints != nullptr);
101 |     std::string stridePointsLayerName = "stride_points_" + std::to_string(layerIdx);
102 |     stridePoints->setName(stridePointsLayerName.c_str());
103 |     nvinfer1::ITensor* stridePointsTensor = stridePoints->getOutput(0);
104 | 
105 |     nvinfer1::IElementWiseLayer* pred
106 |         = network->addElementWise(*output, *stridePointsTensor, nvinfer1::ElementWiseOperation::kPROD);
107 |     assert(pred != nullptr);
108 |     std::string predLayerName = "pred_" + std::to_string(layerIdx);
109 |     pred->setName(predLayerName.c_str());
110 |     output = pred->getOutput(0);
111 | 
112 |     return output;
113 | }
114 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/reg_layer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #ifndef __REG_LAYER_H__
 7 | #define __REG_LAYER_H__
 8 | 
 9 | #include <map>
10 | #include <vector>
11 | #include <cassert>
12 | 
13 | #include "NvInfer.h"
14 | 
15 | nvinfer1::ITensor* regLayer(
16 |     int layerIdx,
17 |     std::map<std::string, std::string>& block,
18 |     std::vector<float>& weights,
19 |     std::vector<nvinfer1::Weights>& trtWeights,
20 |     int& weightPtr,
21 |     nvinfer1::ITensor* input,
22 |     nvinfer1::INetworkDefinition* network);
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/reorg_layer.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #include "reorg_layer.h"
 7 | 
 8 | nvinfer1::ITensor* reorgLayer(
 9 |     int layerIdx,
10 |     std::map<std::string, std::string>& block,
11 |     nvinfer1::ITensor* input,
12 |     nvinfer1::INetworkDefinition* network)
13 | {
14 |     nvinfer1::ITensor* output;
15 | 
16 |     assert(block.at("type") == "reorg");
17 | 
18 |     nvinfer1::Dims inputDims = input->getDimensions();
19 | 
20 |     nvinfer1::ISliceLayer *slice1 = network->addSlice(
21 |         *input, nvinfer1::Dims{3, {0, 0, 0}}, nvinfer1::Dims{3, {inputDims.d[0], inputDims.d[1] / 2, inputDims.d[2] / 2}},
22 |         nvinfer1::Dims{3, {1, 2, 2}});
23 |     assert(slice1 != nullptr);
24 |     std::string slice1LayerName = "slice1_" + std::to_string(layerIdx);
25 |     slice1->setName(slice1LayerName.c_str());
26 | 
27 |     nvinfer1::ISliceLayer *slice2 = network->addSlice(
28 |         *input, nvinfer1::Dims{3, {0, 1, 0}}, nvinfer1::Dims{3, {inputDims.d[0], inputDims.d[1] / 2, inputDims.d[2] / 2}},
29 |         nvinfer1::Dims{3, {1, 2, 2}});
30 |     assert(slice2 != nullptr);
31 |     std::string slice2LayerName = "slice2_" + std::to_string(layerIdx);
32 |     slice2->setName(slice2LayerName.c_str());
33 | 
34 |     nvinfer1::ISliceLayer *slice3 = network->addSlice(
35 |         *input, nvinfer1::Dims{3, {0, 0, 1}}, nvinfer1::Dims{3, {inputDims.d[0], inputDims.d[1] / 2, inputDims.d[2] / 2}},
36 |         nvinfer1::Dims{3, {1, 2, 2}});
37 |     assert(slice3 != nullptr);
38 |     std::string slice3LayerName = "slice3_" + std::to_string(layerIdx);
39 |     slice3->setName(slice3LayerName.c_str());
40 | 
41 |     nvinfer1::ISliceLayer *slice4 = network->addSlice(
42 |         *input, nvinfer1::Dims{3, {0, 1, 1}}, nvinfer1::Dims{3, {inputDims.d[0], inputDims.d[1] / 2, inputDims.d[2] / 2}},
43 |         nvinfer1::Dims{3, {1, 2, 2}});
44 |     assert(slice4 != nullptr);
45 |     std::string slice4LayerName = "slice4_" + std::to_string(layerIdx);
46 |     slice4->setName(slice4LayerName.c_str());
47 | 
48 |     std::vector<nvinfer1::ITensor*> concatInputs;
49 |     concatInputs.push_back(slice1->getOutput(0));
50 |     concatInputs.push_back(slice2->getOutput(0));
51 |     concatInputs.push_back(slice3->getOutput(0));
52 |     concatInputs.push_back(slice4->getOutput(0));
53 | 
54 |     nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size());
55 |     assert(concat != nullptr);
56 |     std::string concatLayerName = "concat_" + std::to_string(layerIdx);
57 |     concat->setName(concatLayerName.c_str());
58 |     concat->setAxis(0);
59 |     output = concat->getOutput(0);
60 | 
61 |     return output;
62 | }
63 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/reorg_layer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #ifndef __REORGV5_LAYER_H__
 7 | #define __REORGV5_LAYER_H__
 8 | 
 9 | #include <map>
10 | #include <vector>
11 | #include <cassert>
12 | 
13 | #include "NvInfer.h"
14 | 
15 | nvinfer1::ITensor* reorgLayer(
16 |     int layerIdx,
17 |     std::map<std::string, std::string>& block,
18 |     nvinfer1::ITensor* input,
19 |     nvinfer1::INetworkDefinition* network);
20 | 
21 | #endif
22 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/route_layer.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #include "route_layer.h"
 7 | 
 8 | nvinfer1::ITensor* routeLayer(
 9 |     int layerIdx,
10 |     std::string& layers,
11 |     std::map<std::string, std::string>& block,
12 |     std::vector<nvinfer1::ITensor*> tensorOutputs,
13 |     nvinfer1::INetworkDefinition* network)
14 | {
15 |     nvinfer1::ITensor* output;
16 | 
17 |     assert(block.at("type") == "route");
18 |     assert(block.find("layers") != block.end());
19 | 
20 |     std::string strLayers = block.at("layers");
21 |     std::vector<int> idxLayers;
22 |     size_t lastPos = 0, pos = 0;
23 |     while ((pos = strLayers.find(',', lastPos)) != std::string::npos)
24 |     {
25 |         int vL = std::stoi(trim(strLayers.substr(lastPos, pos - lastPos)));
26 |         idxLayers.push_back(vL);
27 |         lastPos = pos + 1;
28 |     }
29 |     if (lastPos < strLayers.length())
30 |     {
31 |         std::string lastV = trim(strLayers.substr(lastPos));
32 |         if (!lastV.empty())
33 |             idxLayers.push_back(std::stoi(lastV));
34 |     }
35 |     assert (!idxLayers.empty());
36 |     std::vector<nvinfer1::ITensor*> concatInputs;
37 |     for (uint i = 0; i < idxLayers.size(); ++i)
38 |     {
39 |         if (idxLayers[i] < 0)
40 |             idxLayers[i] = tensorOutputs.size() + idxLayers[i];
41 |         assert (idxLayers[i] >= 0 && idxLayers[i] < (int)tensorOutputs.size());
42 |         concatInputs.push_back(tensorOutputs[idxLayers[i]]);
43 |         if (i < idxLayers.size() - 1)
44 |             layers += std::to_string(idxLayers[i]) + ", ";
45 |     }
46 |     layers += std::to_string(idxLayers[idxLayers.size() - 1]);
47 | 
48 |     if (concatInputs.size() == 1)
49 |         output = concatInputs[0];
50 |     else {
51 |         int axis = 0;
52 |         if (block.find("axis") != block.end())
53 |             axis = std::stoi(block.at("axis"));
54 |         if (axis < 0)
55 |             axis = concatInputs[0]->getDimensions().nbDims + axis;
56 | 
57 |         nvinfer1::IConcatenationLayer* concat = network->addConcatenation(concatInputs.data(), concatInputs.size());
58 |         assert(concat != nullptr);
59 |         std::string concatLayerName = "route_" + std::to_string(layerIdx);
60 |         concat->setName(concatLayerName.c_str());
61 |         concat->setAxis(axis);
62 |         output = concat->getOutput(0);
63 |     }
64 | 
65 |     if (block.find("groups") != block.end())
66 |     {
67 |         nvinfer1::Dims prevTensorDims = output->getDimensions();
68 |         int groups = stoi(block.at("groups"));
69 |         int group_id = stoi(block.at("group_id"));
70 |         int startSlice = (prevTensorDims.d[0] / groups) * group_id;
71 |         int channelSlice = (prevTensorDims.d[0] / groups);
72 |         nvinfer1::ISliceLayer* slice = network->addSlice(
73 |             *output, nvinfer1::Dims{3, {startSlice, 0, 0}},
74 |             nvinfer1::Dims{3, {channelSlice, prevTensorDims.d[1], prevTensorDims.d[2]}}, nvinfer1::Dims{3, {1, 1, 1}});
75 |         assert(slice != nullptr);
76 |         std::string sliceLayerName = "slice_" + std::to_string(layerIdx);
77 |         slice->setName(sliceLayerName.c_str());
78 |         output = slice->getOutput(0);
79 |     }
80 | 
81 |     return output;
82 | }
83 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/route_layer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #ifndef __ROUTE_LAYER_H__
 7 | #define __ROUTE_LAYER_H__
 8 | 
 9 | #include "NvInfer.h"
10 | #include "../utils.h"
11 | 
12 | nvinfer1::ITensor* routeLayer(
13 |     int layerIdx,
14 |     std::string& layers,
15 |     std::map<std::string, std::string>& block,
16 |     std::vector<nvinfer1::ITensor*> tensorOutputs,
17 |     nvinfer1::INetworkDefinition* network);
18 | 
19 | #endif
20 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #include "shortcut_layer.h"
 7 | 
 8 | nvinfer1::ITensor* shortcutLayer(
 9 |     int layerIdx,
10 |     std::string mode,
11 |     std::string activation,
12 |     std::string inputVol,
13 |     std::string shortcutVol,
14 |     std::map<std::string, std::string>& block,
15 |     nvinfer1::ITensor* input,
16 |     nvinfer1::ITensor* shortcutInput,
17 |     nvinfer1::INetworkDefinition* network)
18 | {
19 |     nvinfer1::ITensor* output;
20 | 
21 |     assert(block.at("type") == "shortcut");
22 | 
23 |     nvinfer1::ElementWiseOperation operation = nvinfer1::ElementWiseOperation::kSUM;
24 | 
25 |     if (mode == "mul")
26 |         operation = nvinfer1::ElementWiseOperation::kPROD;
27 | 
28 |     if (mode == "add" && inputVol != shortcutVol)
29 |     {
30 |         nvinfer1::ISliceLayer* slice = network->addSlice(
31 |             *shortcutInput, nvinfer1::Dims{3, {0, 0, 0}}, input->getDimensions(), nvinfer1::Dims{3, {1, 1, 1}});
32 |         assert(slice != nullptr);
33 |         std::string sliceLayerName = "slice_" + std::to_string(layerIdx);
34 |         slice->setName(sliceLayerName.c_str());
35 |         output = slice->getOutput(0);
36 |     }
37 |     else 
38 |     {
39 |         output = shortcutInput;
40 |     }
41 | 
42 |     nvinfer1::IElementWiseLayer* shortcut = network->addElementWise(*input, *output, operation);
43 |     assert(shortcut != nullptr);
44 |     std::string shortcutLayerName = "shortcut_" + std::to_string(layerIdx);
45 |     shortcut->setName(shortcutLayerName.c_str());
46 |     output = shortcut->getOutput(0);
47 | 
48 |     output = activationLayer(layerIdx, activation, output, network);
49 |     assert(output != nullptr);
50 | 
51 |     return output;
52 | }
53 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/shortcut_layer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #ifndef __SHORTCUT_LAYER_H__
 7 | #define __SHORTCUT_LAYER_H__
 8 | 
 9 | #include <map>
10 | 
11 | #include "NvInfer.h"
12 | 
13 | #include "activation_layer.h"
14 | 
15 | nvinfer1::ITensor* shortcutLayer(
16 |     int layerIdx,
17 |     std::string mode,
18 |     std::string activation,
19 |     std::string inputVol,
20 |     std::string shortcutVol,
21 |     std::map<std::string, std::string>& block,
22 |     nvinfer1::ITensor* input,
23 |     nvinfer1::ITensor* shortcut,
24 |     nvinfer1::INetworkDefinition* network);
25 | 
26 | #endif
27 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/shuffle_layer.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Created by Marcos Luciano
  3 |  * https://www.github.com/marcoslucianops
  4 |  */
  5 | 
  6 | #include "shuffle_layer.h"
  7 | 
  8 | nvinfer1::ITensor* shuffleLayer(
  9 |     int layerIdx,
 10 |     std::string& layer,
 11 |     std::map<std::string, std::string>& block,
 12 |     nvinfer1::ITensor* input,
 13 |     std::vector<nvinfer1::ITensor*> tensorOutputs,
 14 |     nvinfer1::INetworkDefinition* network)
 15 | {
 16 |     nvinfer1::ITensor* output;
 17 | 
 18 |     assert(block.at("type") == "shuffle");
 19 | 
 20 |     nvinfer1::IShuffleLayer* shuffle = network->addShuffle(*input);
 21 |     assert(shuffle != nullptr);
 22 |     std::string shuffleLayerName = "shuffle_" + std::to_string(layerIdx);
 23 |     shuffle->setName(shuffleLayerName.c_str());
 24 | 
 25 |     if (block.find("reshape") != block.end())
 26 |     {
 27 |         std::string strReshape = block.at("reshape");
 28 |         std::vector<int32_t> reshape;
 29 |         size_t lastPos = 0, pos = 0;
 30 |         while ((pos = strReshape.find(',', lastPos)) != std::string::npos)
 31 |         {
 32 |             int vL = std::stoi(trim(strReshape.substr(lastPos, pos - lastPos)));
 33 |             reshape.push_back(vL);
 34 |             lastPos = pos + 1;
 35 |         }
 36 |         if (lastPos < strReshape.length())
 37 |         {
 38 |             std::string lastV = trim(strReshape.substr(lastPos));
 39 |             if (!lastV.empty())
 40 |                 reshape.push_back(std::stoi(lastV));
 41 |         }
 42 |         assert(!reshape.empty());
 43 | 
 44 |         int from = -1;
 45 |         if (block.find("from") != block.end())
 46 |             from = std::stoi(block.at("from"));
 47 | 
 48 |         if (from < 0)
 49 |             from = tensorOutputs.size() + from;
 50 | 
 51 |         layer = std::to_string(from);
 52 | 
 53 |         nvinfer1::Dims inputTensorDims = tensorOutputs[from]->getDimensions();
 54 |         int32_t l = inputTensorDims.d[1] * inputTensorDims.d[2];
 55 |         
 56 |         nvinfer1::Dims reshapeDims;
 57 |         reshapeDims.nbDims = reshape.size();
 58 | 
 59 |         for (uint i = 0; i < reshape.size(); ++i)
 60 |             if (reshape[i] == 0)
 61 |                 reshapeDims.d[i] = l;
 62 |             else
 63 |                 reshapeDims.d[i] = reshape[i];
 64 | 
 65 |         shuffle->setReshapeDimensions(reshapeDims);
 66 |     }
 67 | 
 68 |     if (block.find("transpose1") != block.end())
 69 |     {
 70 |         std::string strTranspose1 = block.at("transpose1");
 71 |         std::vector<int32_t> transpose1;
 72 |         size_t lastPos = 0, pos = 0;
 73 |         while ((pos = strTranspose1.find(',', lastPos)) != std::string::npos)
 74 |         {
 75 |             int vL = std::stoi(trim(strTranspose1.substr(lastPos, pos - lastPos)));
 76 |             transpose1.push_back(vL);
 77 |             lastPos = pos + 1;
 78 |         }
 79 |         if (lastPos < strTranspose1.length())
 80 |         {
 81 |             std::string lastV = trim(strTranspose1.substr(lastPos));
 82 |             if (!lastV.empty())
 83 |                 transpose1.push_back(std::stoi(lastV));
 84 |         }
 85 |         assert(!transpose1.empty());
 86 |         
 87 |         nvinfer1::Permutation permutation1;
 88 |         for (uint i = 0; i < transpose1.size(); ++i)
 89 |             permutation1.order[i] = transpose1[i];
 90 | 
 91 |         shuffle->setFirstTranspose(permutation1);
 92 |     }
 93 | 
 94 |     if (block.find("transpose2") != block.end())
 95 |     {
 96 |         std::string strTranspose2 = block.at("transpose2");
 97 |         std::vector<int32_t> transpose2;
 98 |         size_t lastPos = 0, pos = 0;
 99 |         while ((pos = strTranspose2.find(',', lastPos)) != std::string::npos)
100 |         {
101 |             int vL = std::stoi(trim(strTranspose2.substr(lastPos, pos - lastPos)));
102 |             transpose2.push_back(vL);
103 |             lastPos = pos + 1;
104 |         }
105 |         if (lastPos < strTranspose2.length())
106 |         {
107 |             std::string lastV = trim(strTranspose2.substr(lastPos));
108 |             if (!lastV.empty())
109 |                 transpose2.push_back(std::stoi(lastV));
110 |         }
111 |         assert(!transpose2.empty());
112 | 
113 |         nvinfer1::Permutation permutation2;
114 |         for (uint i = 0; i < transpose2.size(); ++i)
115 |             permutation2.order[i] = transpose2[i];
116 | 
117 |         shuffle->setSecondTranspose(permutation2);
118 |     }
119 | 
120 |     output = shuffle->getOutput(0);
121 | 
122 |     return output;
123 | }
124 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/shuffle_layer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #ifndef __SHUFFLE_LAYER_H__
 7 | #define __SHUFFLE_LAYER_H__
 8 | 
 9 | #include "NvInfer.h"
10 | #include "../utils.h"
11 | 
12 | nvinfer1::ITensor* shuffleLayer(
13 |     int layerIdx,
14 |     std::string& layer,
15 |     std::map<std::string, std::string>& block,
16 |     nvinfer1::ITensor* input,
17 |     std::vector<nvinfer1::ITensor*> tensorOutputs,
18 |     nvinfer1::INetworkDefinition* network);
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/softmax_layer.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #include "softmax_layer.h"
 7 | 
 8 | nvinfer1::ITensor* softmaxLayer(
 9 |     int layerIdx,
10 |     std::map<std::string, std::string>& block,
11 |     nvinfer1::ITensor* input,
12 |     nvinfer1::INetworkDefinition* network)
13 | {
14 |     nvinfer1::ITensor* output;
15 | 
16 |     assert(block.at("type") == "softmax");
17 |     assert(block.find("axes") != block.end());
18 | 
19 |     int axes = std::stoi(block.at("axes"));
20 | 
21 |     nvinfer1::ISoftMaxLayer* softmax = network->addSoftMax(*input);
22 |     assert(softmax != nullptr);
23 |     std::string softmaxLayerName = "softmax_" + std::to_string(layerIdx);
24 |     softmax->setName(softmaxLayerName.c_str());
25 |     softmax->setAxes(1 << axes);
26 |     output = softmax->getOutput(0);
27 | 
28 |     return output;
29 | }
30 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/softmax_layer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #ifndef __SOFTMAX_LAYER_H__
 7 | #define __SOFTMAX_LAYER_H__
 8 | 
 9 | #include <map>
10 | #include <cassert>
11 | 
12 | #include "NvInfer.h"
13 | 
14 | nvinfer1::ITensor* softmaxLayer(
15 |     int layerIdx,
16 |     std::map<std::string, std::string>& block,
17 |     nvinfer1::ITensor* input,
18 |     nvinfer1::INetworkDefinition* network);
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #include "upsample_layer.h"
 7 | 
 8 | nvinfer1::ITensor* upsampleLayer(
 9 |     int layerIdx,
10 |     std::map<std::string, std::string>& block,
11 |     nvinfer1::ITensor* input,
12 |     nvinfer1::INetworkDefinition* network)
13 | {
14 |     nvinfer1::ITensor* output;
15 | 
16 |     assert(block.at("type") == "upsample");
17 |     assert(block.find("stride") != block.end());
18 | 
19 |     int stride = std::stoi(block.at("stride"));
20 | 
21 |     float scale[3] = {1, static_cast<float>(stride), static_cast<float>(stride)};
22 | 
23 |     nvinfer1::IResizeLayer* resize = network->addResize(*input);
24 |     assert(resize != nullptr);
25 |     std::string resizeLayerName = "upsample_" + std::to_string(layerIdx);
26 |     resize->setName(resizeLayerName.c_str());
27 |     resize->setResizeMode(nvinfer1::ResizeMode::kNEAREST);
28 |     resize->setScales(scale, 3);
29 |     output = resize->getOutput(0);
30 | 
31 |     return output;
32 | }
33 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/layers/upsample_layer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #ifndef __UPSAMPLE_LAYER_H__
 7 | #define __UPSAMPLE_LAYER_H__
 8 | 
 9 | #include <map>
10 | #include <cassert>
11 | 
12 | #include "NvInfer.h"
13 | 
14 | nvinfer1::ITensor* upsampleLayer(
15 |     int layerIdx,
16 |     std::map<std::string, std::string>& block,
17 |     nvinfer1::ITensor* input,
18 |     nvinfer1::INetworkDefinition* network);
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/nvdsinfer_yolo_engine.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a
  5 |  * copy of this software and associated documentation files (the "Software"),
  6 |  * to deal in the Software without restriction, including without limitation
  7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 |  * and/or sell copies of the Software, and to permit persons to whom the
  9 |  * Software is furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 20 |  * DEALINGS IN THE SOFTWARE.
 21 |  *
 22 |  * Edited by Marcos Luciano
 23 |  * https://www.github.com/marcoslucianops
 24 |  */
 25 | 
 26 | #include "nvdsinfer_custom_impl.h"
 27 | #include "nvdsinfer_context.h"
 28 | #include "yoloPlugins.h"
 29 | #include "yolo.h"
 30 | 
 31 | #include <algorithm>
 32 | 
 33 | #define USE_CUDA_ENGINE_GET_API 1
 34 | 
 35 | static bool getYoloNetworkInfo(NetworkInfo &networkInfo, const NvDsInferContextInitParams* initParams)
 36 | {
 37 |     std::string yoloCfg = initParams->customNetworkConfigFilePath;
 38 |     std::string yoloType;
 39 | 
 40 |     std::transform(yoloCfg.begin(), yoloCfg.end(), yoloCfg.begin(), [] (uint8_t c) {
 41 |         return std::tolower(c);
 42 |     });
 43 | 
 44 |     yoloType = yoloCfg.substr(0, yoloCfg.find(".cfg"));
 45 | 
 46 |     networkInfo.inputBlobName = "data";
 47 |     networkInfo.networkType = yoloType;
 48 |     networkInfo.configFilePath = initParams->customNetworkConfigFilePath;
 49 |     networkInfo.wtsFilePath = initParams->modelFilePath;
 50 |     networkInfo.int8CalibPath = initParams->int8CalibrationFilePath;
 51 |     networkInfo.deviceType = (initParams->useDLA ? "kDLA" : "kGPU");
 52 |     networkInfo.numDetectedClasses = initParams->numDetectedClasses;
 53 |     networkInfo.clusterMode = initParams->clusterMode;
 54 |     networkInfo.scoreThreshold = initParams->perClassDetectionParams->preClusterThreshold;
 55 | 
 56 |     if (initParams->networkMode == 0)
 57 |         networkInfo.networkMode = "FP32";
 58 |     else if (initParams->networkMode == 1)
 59 |         networkInfo.networkMode = "INT8";
 60 |     else if (initParams->networkMode == 2)
 61 |         networkInfo.networkMode = "FP16";
 62 | 
 63 |     if (networkInfo.configFilePath.empty() ||  networkInfo.wtsFilePath.empty())
 64 |     {
 65 |         std::cerr << "YOLO config file or weights file is not specified\n" << std::endl;
 66 |         return false;
 67 |     }
 68 | 
 69 |     if (!fileExists(networkInfo.configFilePath) || !fileExists(networkInfo.wtsFilePath))
 70 |     {
 71 |         std::cerr << "YOLO config file or weights file is not exist\n" << std::endl;
 72 |         return false;
 73 |     }
 74 | 
 75 |     return true;
 76 | }
 77 | 
 78 | #if !USE_CUDA_ENGINE_GET_API
 79 | IModelParser* NvDsInferCreateModelParser(
 80 |     const NvDsInferContextInitParams* initParams) {
 81 |     NetworkInfo networkInfo;
 82 |     if (!getYoloNetworkInfo(networkInfo, initParams))
 83 |         return nullptr;
 84 | 
 85 |     return new Yolo(networkInfo);
 86 | }
 87 | #else
 88 | extern "C"
 89 | bool NvDsInferYoloCudaEngineGet(nvinfer1::IBuilder * const builder,
 90 |         nvinfer1::IBuilderConfig * const builderConfig,
 91 |         const NvDsInferContextInitParams * const initParams,
 92 |         nvinfer1::DataType dataType,
 93 |         nvinfer1::ICudaEngine *& cudaEngine);
 94 | 
 95 | extern "C"
 96 | bool NvDsInferYoloCudaEngineGet(nvinfer1::IBuilder * const builder,
 97 |         nvinfer1::IBuilderConfig * const builderConfig,
 98 |         const NvDsInferContextInitParams * const initParams,
 99 |         nvinfer1::DataType dataType,
100 |         nvinfer1::ICudaEngine *& cudaEngine)
101 | {
102 |     NetworkInfo networkInfo;
103 |     if (!getYoloNetworkInfo(networkInfo, initParams))
104 |         return false;
105 | 
106 |     Yolo yolo(networkInfo);
107 |     cudaEngine = yolo.createEngine (builder, builderConfig);
108 |     if (cudaEngine == nullptr)
109 |     {
110 |         std::cerr << "Failed to build CUDA engine on " << networkInfo.configFilePath << std::endl;
111 |         return false;
112 |     }
113 | 
114 |     return true;
115 | }
116 | #endif
117 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/nvdsparsebbox_Yolo.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a
  5 |  * copy of this software and associated documentation files (the "Software"),
  6 |  * to deal in the Software without restriction, including without limitation
  7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 |  * and/or sell copies of the Software, and to permit persons to whom the
  9 |  * Software is furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 20 |  * DEALINGS IN THE SOFTWARE.
 21 |  *
 22 |  * Edited by Marcos Luciano
 23 |  * https://www.github.com/marcoslucianops
 24 |  */
 25 | 
 26 | #include <algorithm>
 27 | #include <cmath>
 28 | #include <sstream>
 29 | #include "nvdsinfer_custom_impl.h"
 30 | #include "utils.h"
 31 | 
 32 | #include "yoloPlugins.h"
 33 | 
 34 | extern "C" bool NvDsInferParseYolo(
 35 |     std::vector<NvDsInferLayerInfo> const &outputLayersInfo, NvDsInferNetworkInfo const &networkInfo,
 36 |     NvDsInferParseDetectionParams const &detectionParams, std::vector<NvDsInferParseObjectInfo> &objectList);
 37 | 
 38 | static NvDsInferParseObjectInfo convertBBox(
 39 |     const float &bx1, const float &by1, const float &bx2, const float &by2, const uint &netW, const uint &netH)
 40 | {
 41 |     NvDsInferParseObjectInfo b;
 42 | 
 43 |     float x1 = bx1;
 44 |     float y1 = by1;
 45 |     float x2 = bx2;
 46 |     float y2 = by2;
 47 | 
 48 |     x1 = clamp(x1, 0, netW);
 49 |     y1 = clamp(y1, 0, netH);
 50 |     x2 = clamp(x2, 0, netW);
 51 |     y2 = clamp(y2, 0, netH);
 52 | 
 53 |     b.left = x1;
 54 |     b.width = clamp(x2 - x1, 0, netW);
 55 |     b.top = y1;
 56 |     b.height = clamp(y2 - y1, 0, netH);
 57 | 
 58 |     return b;
 59 | }
 60 | 
 61 | static void addBBoxProposal(
 62 |     const float bx1, const float by1, const float bx2, const float by2, const uint &netW, const uint &netH,
 63 |     const int maxIndex, const float maxProb, std::vector<NvDsInferParseObjectInfo> &binfo)
 64 | {
 65 |     NvDsInferParseObjectInfo bbi = convertBBox(bx1, by1, bx2, by2, netW, netH);
 66 |     if (bbi.width < 1 || bbi.height < 1)
 67 |         return;
 68 | 
 69 |     bbi.detectionConfidence = maxProb;
 70 |     bbi.classId = maxIndex;
 71 |     binfo.push_back(bbi);
 72 | }
 73 | 
 74 | static std::vector<NvDsInferParseObjectInfo> decodeYoloTensor(
 75 |     const int *counts, const float *boxes, const float *scores, const int *classes, const uint &netW, const uint &netH)
 76 | {
 77 |     std::vector<NvDsInferParseObjectInfo> binfo;
 78 | 
 79 |     uint numBoxes = counts[0];
 80 |     for (uint b = 0; b < numBoxes; ++b)
 81 |     {
 82 |         float bx1 = boxes[b * 4 + 0];
 83 |         float by1 = boxes[b * 4 + 1];
 84 |         float bx2 = boxes[b * 4 + 2];
 85 |         float by2 = boxes[b * 4 + 3];
 86 | 
 87 |         float maxProb = scores[b];
 88 |         int maxIndex = classes[b];
 89 | 
 90 |         addBBoxProposal(bx1, by1, bx2, by2, netW, netH, maxIndex, maxProb, binfo);
 91 |     }
 92 |     return binfo;
 93 | }
 94 | 
 95 | static bool NvDsInferParseCustomYolo(
 96 |     std::vector<NvDsInferLayerInfo> const &outputLayersInfo, NvDsInferNetworkInfo const &networkInfo,
 97 |     NvDsInferParseDetectionParams const &detectionParams, std::vector<NvDsInferParseObjectInfo> &objectList,
 98 |     const uint &numClasses)
 99 | {
100 |     if (outputLayersInfo.empty())
101 |     {
102 |         std::cerr << "ERROR: Could not find output layer in bbox parsing" << std::endl;
103 |         return false;
104 |     }
105 | 
106 |     std::vector<NvDsInferParseObjectInfo> objects;
107 | 
108 |     const NvDsInferLayerInfo &counts = outputLayersInfo[0];
109 |     const NvDsInferLayerInfo &boxes = outputLayersInfo[1];
110 |     const NvDsInferLayerInfo &scores = outputLayersInfo[2];
111 |     const NvDsInferLayerInfo &classes = outputLayersInfo[3];
112 | 
113 |     std::vector<NvDsInferParseObjectInfo> outObjs =
114 |         decodeYoloTensor(
115 |             (const int *)(counts.buffer), (const float *)(boxes.buffer), (const float *)(scores.buffer),
116 |             (const int *)(classes.buffer), networkInfo.width, networkInfo.height);
117 | 
118 |     objects.insert(objects.end(), outObjs.begin(), outObjs.end());
119 | 
120 |     objectList = objects;
121 | 
122 |     return true;
123 | }
124 | 
125 | extern "C" bool NvDsInferParseYolo(
126 |     std::vector<NvDsInferLayerInfo> const &outputLayersInfo, NvDsInferNetworkInfo const &networkInfo,
127 |     NvDsInferParseDetectionParams const &detectionParams, std::vector<NvDsInferParseObjectInfo> &objectList)
128 | {
129 |     int num_classes = kNUM_CLASSES;
130 | 
131 |     return NvDsInferParseCustomYolo(
132 |         outputLayersInfo, networkInfo, detectionParams, objectList, num_classes);
133 | }
134 | 
135 | CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseYolo);
136 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/utils.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a
  5 |  * copy of this software and associated documentation files (the "Software"),
  6 |  * to deal in the Software without restriction, including without limitation
  7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 |  * and/or sell copies of the Software, and to permit persons to whom the
  9 |  * Software is furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 20 |  * DEALINGS IN THE SOFTWARE.
 21 |  *
 22 |  * Edited by Marcos Luciano
 23 |  * https://www.github.com/marcoslucianops
 24 |  */
 25 | 
 26 | #include "utils.h"
 27 | 
 28 | #include <experimental/filesystem>
 29 | #include <iomanip>
 30 | #include <algorithm>
 31 | #include <math.h>
 32 | 
 33 | static void leftTrim(std::string& s)
 34 | {
 35 |     s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int ch) { return !isspace(ch); }));
 36 | }
 37 | 
 38 | static void rightTrim(std::string& s)
 39 | {
 40 |     s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !isspace(ch); }).base(), s.end());
 41 | }
 42 | 
 43 | std::string trim(std::string s)
 44 | {
 45 |     leftTrim(s);
 46 |     rightTrim(s);
 47 |     return s;
 48 | }
 49 | 
 50 | float clamp(const float val, const float minVal, const float maxVal)
 51 | {
 52 |     assert(minVal <= maxVal);
 53 |     return std::min(maxVal, std::max(minVal, val));
 54 | }
 55 | 
 56 | bool fileExists(const std::string fileName, bool verbose)
 57 | {
 58 |     if (!std::experimental::filesystem::exists(std::experimental::filesystem::path(fileName)))
 59 |     {
 60 |         if (verbose) std::cout << "\nFile does not exist: " << fileName << std::endl;
 61 |         return false;
 62 |     }
 63 |     return true;
 64 | }
 65 | 
 66 | std::vector<float> loadWeights(const std::string weightsFilePath, const std::string& networkType)
 67 | {
 68 |     assert(fileExists(weightsFilePath));
 69 |     std::cout << "\nLoading pre-trained weights" << std::endl;
 70 | 
 71 |     std::vector<float> weights;
 72 | 
 73 |     if (weightsFilePath.find(".weights") != std::string::npos) {
 74 |         std::ifstream file(weightsFilePath, std::ios_base::binary);
 75 |         assert(file.good());
 76 |         std::string line;
 77 | 
 78 |         if (networkType.find("yolov2") != std::string::npos && networkType.find("yolov2-tiny") == std::string::npos)
 79 |         {
 80 |             // Remove 4 int32 bytes of data from the stream belonging to the header
 81 |             file.ignore(4 * 4);
 82 |         }
 83 |         else
 84 |         {
 85 |             // Remove 5 int32 bytes of data from the stream belonging to the header
 86 |             file.ignore(4 * 5);
 87 |         }
 88 | 
 89 |         char floatWeight[4];
 90 |         while (!file.eof())
 91 |         {
 92 |             file.read(floatWeight, 4);
 93 |             assert(file.gcount() == 4);
 94 |             weights.push_back(*reinterpret_cast<float*>(floatWeight));
 95 |             if (file.peek() == std::istream::traits_type::eof()) break;
 96 |         }
 97 |     }
 98 | 
 99 |     else if (weightsFilePath.find(".wts") != std::string::npos) {
100 |         std::ifstream file(weightsFilePath);
101 |         assert(file.good());
102 |         int32_t count;
103 |         file >> count;
104 |         assert(count > 0 && "\nInvalid .wts file.");
105 | 
106 |         uint32_t floatWeight;
107 |         std::string name;
108 |         uint32_t size;
109 | 
110 |         while (count--) {
111 |             file >> name >> std::dec >> size;
112 |             for (uint32_t x = 0, y = size; x < y; ++x)
113 |             {
114 |                 file >> std::hex >> floatWeight;
115 |                 weights.push_back(*reinterpret_cast<float *>(&floatWeight));
116 |             };
117 |         }
118 |     }
119 | 
120 |     else {
121 |         std::cerr << "\nFile " << weightsFilePath << " is not supported" << std::endl;
122 |         std::abort();
123 |     }
124 | 
125 |     std::cout << "Loading weights of " << networkType << " complete"
126 |             << std::endl;
127 |     std::cout << "Total weights read: " << weights.size() << std::endl;
128 |     return weights;
129 | }
130 | 
131 | std::string dimsToString(const nvinfer1::Dims d)
132 | {
133 |     std::stringstream s;
134 |     assert(d.nbDims >= 1);
135 |     s << "[";
136 |     for (int i = 0; i < d.nbDims - 1; ++i)
137 |         s << d.d[i] << ", ";
138 |     s << d.d[d.nbDims - 1] << "]";
139 | 
140 |     return s.str();
141 | }
142 | 
143 | int getNumChannels(nvinfer1::ITensor* t)
144 | {
145 |     nvinfer1::Dims d = t->getDimensions();
146 |     assert(d.nbDims == 3);
147 | 
148 |     return d.d[0];
149 | }
150 | 
151 | void printLayerInfo(
152 |     std::string layerIndex, std::string layerName, std::string layerInput, std::string layerOutput, std::string weightPtr)
153 | {
154 |     std::cout << std::setw(8) << std::left << layerIndex << std::setw(30) << std::left << layerName;
155 |     std::cout << std::setw(20) << std::left << layerInput << std::setw(20) << std::left << layerOutput;
156 |     std::cout << weightPtr << std::endl;
157 | }
158 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/utils.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a
 5 |  * copy of this software and associated documentation files (the "Software"),
 6 |  * to deal in the Software without restriction, including without limitation
 7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 8 |  * and/or sell copies of the Software, and to permit persons to whom the
 9 |  * Software is furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice shall be included in
12 |  * all copies or substantial portions of the Software.
13 |  *
14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 |  * DEALINGS IN THE SOFTWARE.
21 |  *
22 |  * Edited by Marcos Luciano
23 |  * https://www.github.com/marcoslucianops
24 |  */
25 | 
26 | 
27 | #ifndef __UTILS_H__
28 | #define __UTILS_H__
29 | 
30 | #include <map>
31 | #include <vector>
32 | #include <cassert>
33 | #include <iostream>
34 | #include <fstream>
35 | 
36 | #include "NvInfer.h"
37 | 
38 | std::string trim(std::string s);
39 | float clamp(const float val, const float minVal, const float maxVal);
40 | bool fileExists(const std::string fileName, bool verbose = true);
41 | std::vector<float> loadWeights(const std::string weightsFilePath, const std::string& networkType);
42 | std::string dimsToString(const nvinfer1::Dims d);
43 | int getNumChannels(nvinfer1::ITensor* t);
44 | void printLayerInfo(
45 |     std::string layerIndex, std::string layerName, std::string layerInput,  std::string layerOutput, std::string weightPtr);
46 | 
47 | #endif
48 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/yolo.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a
  5 |  * copy of this software and associated documentation files (the "Software"),
  6 |  * to deal in the Software without restriction, including without limitation
  7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 |  * and/or sell copies of the Software, and to permit persons to whom the
  9 |  * Software is furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 20 |  * DEALINGS IN THE SOFTWARE.
 21 |  *
 22 |  * Edited by Marcos Luciano
 23 |  * https://www.github.com/marcoslucianops
 24 |  */
 25 | 
 26 | #include "yolo.h"
 27 | #include "yoloPlugins.h"
 28 | #include <stdlib.h>
 29 | 
 30 | #ifdef OPENCV
 31 | #include "calibrator.h"
 32 | #endif
 33 | 
 34 | Yolo::Yolo(const NetworkInfo& networkInfo)
 35 |     : m_InputBlobName(networkInfo.inputBlobName),
 36 |       m_NetworkType(networkInfo.networkType),
 37 |       m_ConfigFilePath(networkInfo.configFilePath),
 38 |       m_WtsFilePath(networkInfo.wtsFilePath),
 39 |       m_Int8CalibPath(networkInfo.int8CalibPath),
 40 |       m_DeviceType(networkInfo.deviceType),
 41 |       m_NumDetectedClasses(networkInfo.numDetectedClasses),
 42 |       m_ClusterMode(networkInfo.clusterMode),
 43 |       m_NetworkMode(networkInfo.networkMode),
 44 |       m_ScoreThreshold(networkInfo.scoreThreshold),
 45 |       m_InputH(0),
 46 |       m_InputW(0),
 47 |       m_InputC(0),
 48 |       m_InputSize(0),
 49 |       m_NumClasses(0),
 50 |       m_LetterBox(0),
 51 |       m_NewCoords(0),
 52 |       m_YoloCount(0)
 53 | {}
 54 | 
 55 | Yolo::~Yolo()
 56 | {
 57 |     destroyNetworkUtils();
 58 | }
 59 | 
 60 | nvinfer1::ICudaEngine *Yolo::createEngine(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config)
 61 | {
 62 |     assert (builder);
 63 | 
 64 |     m_ConfigBlocks = parseConfigFile(m_ConfigFilePath);
 65 |     parseConfigBlocks();
 66 | 
 67 |     nvinfer1::INetworkDefinition *network = builder->createNetworkV2(0);
 68 |     if (parseModel(*network) != NVDSINFER_SUCCESS)
 69 |     {
 70 |         delete network;
 71 |         return nullptr;
 72 |     }
 73 | 
 74 |     std::cout << "Building the TensorRT Engine\n" << std::endl;
 75 | 
 76 |     if (m_NumClasses != m_NumDetectedClasses)
 77 |     {
 78 |         std::cout << "NOTE: Number of classes mismatch, make sure to set num-detected-classes=" << m_NumClasses
 79 |                   << " in config_infer file\n" << std::endl;
 80 |     }
 81 |     if (m_LetterBox == 1)
 82 |     {
 83 |         std::cout << "NOTE: letter_box is set in cfg file, make sure to set maintain-aspect-ratio=1 in config_infer file"
 84 |                   << " to get better accuracy\n" << std::endl;
 85 |     }
 86 |     if (m_ClusterMode != 2)
 87 |     {
 88 |         std::cout << "NOTE: Wrong cluster-mode is set, make sure to set cluster-mode=2 in config_infer file\n"
 89 |                   << std::endl;
 90 |     }
 91 | 
 92 |     if (m_NetworkMode == "INT8" && !fileExists(m_Int8CalibPath))
 93 |     {
 94 |         assert(builder->platformHasFastInt8());
 95 | #ifdef OPENCV
 96 |         std::string calib_image_list;
 97 |         int calib_batch_size;
 98 |         if (getenv("INT8_CALIB_IMG_PATH"))
 99 |             calib_image_list = getenv("INT8_CALIB_IMG_PATH");
100 |         else
101 |         {
102 |             std::cerr << "INT8_CALIB_IMG_PATH not set" << std::endl;
103 |             std::abort();
104 |         }
105 |         if (getenv("INT8_CALIB_BATCH_SIZE"))
106 |             calib_batch_size = std::stoi(getenv("INT8_CALIB_BATCH_SIZE"));
107 |         else
108 |         {
109 |             std::cerr << "INT8_CALIB_BATCH_SIZE not set" << std::endl;
110 |             std::abort();
111 |         }
112 |         nvinfer1::Int8EntropyCalibrator2 *calibrator = new nvinfer1::Int8EntropyCalibrator2(
113 |             calib_batch_size, m_InputC, m_InputH, m_InputW, m_LetterBox, calib_image_list, m_Int8CalibPath);
114 |         config->setFlag(nvinfer1::BuilderFlag::kINT8);
115 |         config->setInt8Calibrator(calibrator);
116 | #else
117 |         std::cerr << "OpenCV is required to run INT8 calibrator\n" << std::endl;
118 |         assert(0);
119 | #endif
120 |     }
121 | 
122 |     nvinfer1::ICudaEngine *engine = builder->buildEngineWithConfig(*network, *config);
123 |     if (engine)
124 |         std::cout << "Building complete\n" << std::endl;
125 |     else
126 |         std::cerr << "Building engine failed\n" << std::endl;
127 | 
128 |     delete network;
129 |     return engine;
130 | }
131 | 
132 | NvDsInferStatus Yolo::parseModel(nvinfer1::INetworkDefinition& network) {
133 |     destroyNetworkUtils();
134 | 
135 |     std::vector<float> weights = loadWeights(m_WtsFilePath, m_NetworkType);
136 |     std::cout << "Building YOLO network\n" << std::endl;
137 |     NvDsInferStatus status = buildYoloNetwork(weights, network);
138 | 
139 |     if (status == NVDSINFER_SUCCESS)
140 |         std::cout << "Building YOLO network complete" << std::endl;
141 |     else
142 |         std::cerr << "Building YOLO network failed" << std::endl;
143 | 
144 |     return status;
145 | }
146 | 
147 | NvDsInferStatus Yolo::buildYoloNetwork(std::vector<float>& weights, nvinfer1::INetworkDefinition& network)
148 | {
149 |     int weightPtr = 0;
150 | 
151 |     std::string weightsType;
152 |     if (m_WtsFilePath.find(".weights") != std::string::npos)
153 |         weightsType = "weights";
154 |     else
155 |         weightsType = "wts";
156 | 
157 |     float eps = 1.0e-5;
158 |     if (m_NetworkType.find("yolov5") != std::string::npos || m_NetworkType.find("yolov7") != std::string::npos)
159 |         eps = 1.0e-3;
160 |     else if (m_NetworkType.find("yolor") != std::string::npos)
161 |         eps = 1.0e-4;
162 | 
163 |     nvinfer1::ITensor* data = network.addInput(
164 |         m_InputBlobName.c_str(), nvinfer1::DataType::kFLOAT,
165 |         nvinfer1::Dims{3, {static_cast<int>(m_InputC), static_cast<int>(m_InputH), static_cast<int>(m_InputW)}});
166 |     assert(data != nullptr && data->getDimensions().nbDims > 0);
167 | 
168 |     nvinfer1::ITensor* previous = data;
169 |     std::vector<nvinfer1::ITensor*> tensorOutputs;
170 | 
171 |     nvinfer1::ITensor* yoloTensorInputs[m_YoloCount];
172 |     uint yoloCountInputs = 0;
173 | 
174 |     int modelType = -1;
175 | 
176 |     for (uint i = 0; i < m_ConfigBlocks.size(); ++i)
177 |     {
178 |         std::string layerIndex = "(" + std::to_string(tensorOutputs.size()) + ")";
179 | 
180 |         if (m_ConfigBlocks.at(i).at("type") == "net")
181 |             printLayerInfo("", "Layer", "Input Shape", "Output Shape", "WeightPtr");
182 |         
183 |         else if (m_ConfigBlocks.at(i).at("type") == "convolutional")
184 |         {
185 |             int channels = getNumChannels(previous);
186 |             std::string inputVol = dimsToString(previous->getDimensions());
187 |             previous = convolutionalLayer(
188 |                 i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, weightsType, channels, eps, previous, &network);
189 |             assert(previous != nullptr);
190 |             std::string outputVol = dimsToString(previous->getDimensions());
191 |             tensorOutputs.push_back(previous);
192 |             std::string layerName = "conv_" + m_ConfigBlocks.at(i).at("activation");
193 |             printLayerInfo(layerIndex, layerName, inputVol, outputVol, std::to_string(weightPtr));
194 |         }
195 | 
196 |         else if (m_ConfigBlocks.at(i).at("type") == "batchnorm")
197 |         {
198 |             std::string inputVol = dimsToString(previous->getDimensions());
199 |             previous = batchnormLayer(
200 |                 i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, weightsType, eps, previous, &network);
201 |             assert(previous != nullptr);
202 |             std::string outputVol = dimsToString(previous->getDimensions());
203 |             tensorOutputs.push_back(previous);
204 |             std::string layerName = "batchnorm_" + m_ConfigBlocks.at(i).at("activation");
205 |             printLayerInfo(layerIndex, layerName, inputVol, outputVol, std::to_string(weightPtr));
206 |         }
207 | 
208 |         else if (m_ConfigBlocks.at(i).at("type") == "implicit_add" || m_ConfigBlocks.at(i).at("type") == "implicit_mul")
209 |         {
210 |             previous = implicitLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, &network);
211 |             assert(previous != nullptr);
212 |             std::string outputVol = dimsToString(previous->getDimensions());
213 |             tensorOutputs.push_back(previous);
214 |             std::string layerName =  m_ConfigBlocks.at(i).at("type");
215 |             printLayerInfo(layerIndex, layerName, "-", outputVol, std::to_string(weightPtr));
216 |         }
217 | 
218 |         else if (m_ConfigBlocks.at(i).at("type") == "shift_channels" ||
219 |             m_ConfigBlocks.at(i).at("type") == "control_channels")
220 |         {
221 |             assert(m_ConfigBlocks.at(i).find("from") != m_ConfigBlocks.at(i).end());
222 |             int from = stoi(m_ConfigBlocks.at(i).at("from"));
223 |             if (from > 0)
224 |                 from = from - i + 1;
225 |             assert((i - 2 >= 0) && (i - 2 < tensorOutputs.size()));
226 |             assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size()));
227 |             assert(i + from - 1 < i - 2);
228 | 
229 |             std::string inputVol = dimsToString(previous->getDimensions());
230 |             previous = channelsLayer(i, m_ConfigBlocks.at(i), previous, tensorOutputs[i + from - 1], &network);
231 |             assert(previous != nullptr);
232 |             std::string outputVol = dimsToString(previous->getDimensions());
233 |             tensorOutputs.push_back(previous);
234 |             std::string layerName = m_ConfigBlocks.at(i).at("type") + ": " + std::to_string(i + from - 1);
235 |             printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-");
236 |         }
237 | 
238 |         else if (m_ConfigBlocks.at(i).at("type") == "shortcut")
239 |         {
240 |             assert(m_ConfigBlocks.at(i).find("from") != m_ConfigBlocks.at(i).end());
241 |             int from = stoi(m_ConfigBlocks.at(i).at("from"));
242 |             if (from > 0)
243 |                 from = from - i + 1;
244 |             assert((i - 2 >= 0) && (i - 2 < tensorOutputs.size()));
245 |             assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size()));
246 |             assert(i + from - 1 < i - 2);
247 | 
248 |             std::string mode = "add";
249 |             if (m_ConfigBlocks.at(i).find("mode") != m_ConfigBlocks.at(i).end())
250 |                 mode = m_ConfigBlocks.at(i).at("mode");
251 | 
252 |             std::string activation = "linear";
253 |             if (m_ConfigBlocks.at(i).find("activation") != m_ConfigBlocks.at(i).end())
254 |                 activation = m_ConfigBlocks.at(i).at("activation");
255 | 
256 |             std::string inputVol = dimsToString(previous->getDimensions());
257 |             std::string shortcutVol = dimsToString(tensorOutputs[i + from - 1]->getDimensions());
258 |             previous = shortcutLayer(
259 |                 i, mode, activation, inputVol, shortcutVol, m_ConfigBlocks.at(i), previous, tensorOutputs[i + from - 1],
260 |                 &network);
261 |             assert(previous != nullptr);
262 |             std::string outputVol = dimsToString(previous->getDimensions());
263 |             tensorOutputs.push_back(previous);
264 |             std::string layerName = "shortcut_" + mode + "_" + activation + ": " + std::to_string(i + from - 1);
265 |             printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-");
266 | 
267 |             if (mode == "add" && inputVol != shortcutVol)
268 |                 std::cout << inputVol << " +" << shortcutVol << std::endl;
269 |         }
270 | 
271 |         else if (m_ConfigBlocks.at(i).at("type") == "route")
272 |         {
273 |             std::string layers;
274 |             previous = routeLayer(i, layers, m_ConfigBlocks.at(i), tensorOutputs, &network);
275 |             assert(previous != nullptr);
276 |             std::string outputVol = dimsToString(previous->getDimensions());
277 |             tensorOutputs.push_back(previous);
278 |             std::string layerName = "route: " + layers;
279 |             printLayerInfo(layerIndex, layerName, "-", outputVol, "-");
280 |         }
281 | 
282 |         else if (m_ConfigBlocks.at(i).at("type") == "upsample")
283 |         {
284 |             std::string inputVol = dimsToString(previous->getDimensions());
285 |             previous = upsampleLayer(i, m_ConfigBlocks[i], previous, &network);
286 |             assert(previous != nullptr);
287 |             std::string outputVol = dimsToString(previous->getDimensions());
288 |             tensorOutputs.push_back(previous);
289 |             std::string layerName = "upsample";
290 |             printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-");
291 |         }
292 | 
293 |         else if (m_ConfigBlocks.at(i).at("type") == "maxpool" || m_ConfigBlocks.at(i).at("type") == "avgpool")
294 |         {
295 |             std::string inputVol = dimsToString(previous->getDimensions());
296 |             previous = poolingLayer(i, m_ConfigBlocks.at(i), previous, &network);
297 |             assert(previous != nullptr);
298 |             std::string outputVol = dimsToString(previous->getDimensions());
299 |             tensorOutputs.push_back(previous);
300 |             std::string layerName = m_ConfigBlocks.at(i).at("type");
301 |             printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-");
302 |         }
303 | 
304 |         else if (m_ConfigBlocks.at(i).at("type") == "reorg")
305 |         {
306 |             std::string inputVol = dimsToString(previous->getDimensions());
307 |             if (m_NetworkType.find("yolov2") != std::string::npos) {
308 |                 nvinfer1::IPluginV2* reorgPlugin = createReorgPlugin(2);
309 |                 assert(reorgPlugin != nullptr);
310 |                 nvinfer1::IPluginV2Layer* reorg = network.addPluginV2(&previous, 1, *reorgPlugin);
311 |                 assert(reorg != nullptr);
312 |                 std::string reorglayerName = "reorg_" + std::to_string(i);
313 |                 reorg->setName(reorglayerName.c_str());
314 |                 previous = reorg->getOutput(0);
315 |             }
316 |             else
317 |                 previous = reorgLayer(i, m_ConfigBlocks.at(i), previous, &network);
318 |             assert(previous != nullptr);
319 |             std::string outputVol = dimsToString(previous->getDimensions());
320 |             tensorOutputs.push_back(previous);
321 |             std::string layerName = "reorg";
322 |             printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-");
323 |         }
324 | 
325 |         else if (m_ConfigBlocks.at(i).at("type") == "reduce")
326 |         {
327 |             std::string inputVol = dimsToString(previous->getDimensions());
328 |             previous = reduceLayer(i, m_ConfigBlocks.at(i), previous, &network);
329 |             assert(previous != nullptr);
330 |             std::string outputVol = dimsToString(previous->getDimensions());
331 |             tensorOutputs.push_back(previous);
332 |             std::string layerName = "reduce";
333 |             printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-");
334 |         }
335 | 
336 |         else if (m_ConfigBlocks.at(i).at("type") == "shuffle")
337 |         {
338 |             std::string layer;
339 |             std::string inputVol = dimsToString(previous->getDimensions());
340 |             previous = shuffleLayer(i, layer, m_ConfigBlocks.at(i), previous, tensorOutputs, &network);
341 |             assert(previous != nullptr);
342 |             std::string outputVol = dimsToString(previous->getDimensions());
343 |             tensorOutputs.push_back(previous);
344 |             std::string layerName = "shuffle: " + layer;
345 |             printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-");
346 |         }
347 | 
348 |         else if (m_ConfigBlocks.at(i).at("type") == "softmax")
349 |         {
350 |             std::string inputVol = dimsToString(previous->getDimensions());
351 |             previous = softmaxLayer(i, m_ConfigBlocks.at(i), previous, &network);
352 |             assert(previous != nullptr);
353 |             std::string outputVol = dimsToString(previous->getDimensions());
354 |             tensorOutputs.push_back(previous);
355 |             std::string layerName = "softmax";
356 |             printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-");
357 |         }
358 | 
359 |         else if (m_ConfigBlocks.at(i).at("type") == "yolo" || m_ConfigBlocks.at(i).at("type") == "region")
360 |         {
361 |             if (m_ConfigBlocks.at(i).at("type") == "yolo")
362 |                 if (m_NetworkType.find("yolor") != std::string::npos)
363 |                     modelType = 2;
364 |                 else
365 |                     modelType = 1;
366 |             else
367 |                 modelType = 0;
368 | 
369 |             std::string blobName = modelType != 0 ? "yolo_" + std::to_string(i) : "region_" + std::to_string(i);
370 |             nvinfer1::Dims prevTensorDims = previous->getDimensions();
371 |             TensorInfo& curYoloTensor = m_YoloTensors.at(yoloCountInputs);
372 |             curYoloTensor.blobName = blobName;
373 |             curYoloTensor.gridSizeX = prevTensorDims.d[2];
374 |             curYoloTensor.gridSizeY = prevTensorDims.d[1];
375 | 
376 |             std::string inputVol = dimsToString(previous->getDimensions());
377 |             tensorOutputs.push_back(previous);
378 |             yoloTensorInputs[yoloCountInputs] = previous;
379 |             ++yoloCountInputs;
380 |             std::string layerName = modelType != 0 ? "yolo" : "region";
381 |             printLayerInfo(layerIndex, layerName, inputVol, "-", "-");
382 |         }
383 | 
384 |         else if (m_ConfigBlocks.at(i).at("type") == "cls")
385 |         {
386 |             modelType = 3;
387 | 
388 |             std::string blobName = "cls_" + std::to_string(i);
389 |             nvinfer1::Dims prevTensorDims = previous->getDimensions();
390 |             TensorInfo& curYoloTensor = m_YoloTensors.at(yoloCountInputs);
391 |             curYoloTensor.blobName = blobName;
392 |             curYoloTensor.numBBoxes = prevTensorDims.d[1];
393 |             m_NumClasses = prevTensorDims.d[0];
394 | 
395 |             std::string inputVol = dimsToString(previous->getDimensions());
396 |             previous = clsLayer(i, m_ConfigBlocks.at(i), previous, &network);
397 |             assert(previous != nullptr);
398 |             std::string outputVol = dimsToString(previous->getDimensions());
399 |             tensorOutputs.push_back(previous);
400 |             yoloTensorInputs[yoloCountInputs] = previous;
401 |             ++yoloCountInputs;
402 |             std::string layerName = "cls";
403 |             printLayerInfo(layerIndex, layerName, inputVol, outputVol, "-");
404 |         }
405 | 
406 |         else if (m_ConfigBlocks.at(i).at("type") == "reg")
407 |         {
408 |             modelType = 3;
409 | 
410 |             std::string blobName = "reg_" + std::to_string(i);
411 |             nvinfer1::Dims prevTensorDims = previous->getDimensions();
412 |             TensorInfo& curYoloTensor = m_YoloTensors.at(yoloCountInputs);
413 |             curYoloTensor.blobName = blobName;
414 |             curYoloTensor.numBBoxes = prevTensorDims.d[1];
415 | 
416 |             std::string inputVol = dimsToString(previous->getDimensions());
417 |             previous = regLayer(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, previous, &network);
418 |             assert(previous != nullptr);
419 |             std::string outputVol = dimsToString(previous->getDimensions());
420 |             tensorOutputs.push_back(previous);
421 |             yoloTensorInputs[yoloCountInputs] = previous;
422 |             ++yoloCountInputs;
423 |             std::string layerName = "reg";
424 |             printLayerInfo(layerIndex, layerName, inputVol, outputVol, std::to_string(weightPtr));
425 |         }
426 | 
427 |         else
428 |         {
429 |             std::cout << "\nUnsupported layer type --> \"" << m_ConfigBlocks.at(i).at("type") << "\"" << std::endl;
430 |             assert(0);
431 |         }
432 |     }
433 | 
434 |     if ((int)weights.size() != weightPtr)
435 |     {
436 |         std::cout << "\nNumber of unused weights left: " << weights.size() - weightPtr << std::endl;
437 |         assert(0);
438 |     }
439 | 
440 |     if (m_YoloCount == yoloCountInputs)
441 |     {
442 |         assert((modelType != -1) && "\nCould not determine model type"); 
443 | 
444 |         uint64_t outputSize = 0;
445 |         for (uint j = 0; j < yoloCountInputs; ++j)
446 |         {
447 |             TensorInfo& curYoloTensor = m_YoloTensors.at(j);
448 |             if (modelType == 3)
449 |                 outputSize = curYoloTensor.numBBoxes;
450 |             else
451 |                 outputSize += curYoloTensor.gridSizeX * curYoloTensor.gridSizeY * curYoloTensor.numBBoxes;
452 |         }
453 | 
454 |         nvinfer1::IPluginV2* yoloPlugin = new YoloLayer(
455 |             m_InputW, m_InputH, m_NumClasses, m_NewCoords, m_YoloTensors, outputSize, modelType, m_ScoreThreshold);
456 |         assert(yoloPlugin != nullptr);
457 |         nvinfer1::IPluginV2Layer* yolo = network.addPluginV2(yoloTensorInputs, m_YoloCount, *yoloPlugin);
458 |         assert(yolo != nullptr);
459 |         std::string yoloLayerName = "yolo";
460 |         yolo->setName(yoloLayerName.c_str());
461 | 
462 |         std::string outputlayerName;
463 |         nvinfer1::ITensor* num_detections = yolo->getOutput(0);
464 |         outputlayerName = "num_detections";
465 |         num_detections->setName(outputlayerName.c_str());
466 |         nvinfer1::ITensor* detection_boxes = yolo->getOutput(1);
467 |         outputlayerName = "detection_boxes";
468 |         detection_boxes->setName(outputlayerName.c_str());
469 |         nvinfer1::ITensor* detection_scores = yolo->getOutput(2);
470 |         outputlayerName = "detection_scores";
471 |         detection_scores->setName(outputlayerName.c_str());
472 |         nvinfer1::ITensor* detection_classes = yolo->getOutput(3);
473 |         outputlayerName = "detection_classes";
474 |         detection_classes->setName(outputlayerName.c_str());
475 |         network.markOutput(*num_detections);
476 |         network.markOutput(*detection_boxes);
477 |         network.markOutput(*detection_scores);
478 |         network.markOutput(*detection_classes);
479 |     }
480 |     else {
481 |         std::cout << "\nError in yolo cfg file" << std::endl;
482 |         assert(0);
483 |     }
484 | 
485 |     std::cout << "\nOutput YOLO blob names: " << std::endl;
486 |     for (auto& tensor : m_YoloTensors)
487 |     {
488 |         std::cout << tensor.blobName << std::endl;
489 |     }
490 | 
491 |     int nbLayers = network.getNbLayers();
492 |     std::cout << "\nTotal number of YOLO layers: " << nbLayers << "\n" << std::endl;
493 | 
494 |     return NVDSINFER_SUCCESS;
495 | }
496 | 
497 | std::vector<std::map<std::string, std::string>>
498 | Yolo::parseConfigFile (const std::string cfgFilePath)
499 | {
500 |     assert(fileExists(cfgFilePath));
501 |     std::ifstream file(cfgFilePath);
502 |     assert(file.good());
503 |     std::string line;
504 |     std::vector<std::map<std::string, std::string>> blocks;
505 |     std::map<std::string, std::string> block;
506 | 
507 |     while (getline(file, line))
508 |     {
509 |         if (line.size() == 0) continue;
510 |         if (line.front() == ' ') continue;
511 |         if (line.front() == '#') continue;
512 |         line = trim(line);
513 |         if (line.front() == '[')
514 |         {
515 |             if (block.size() > 0)
516 |             {
517 |                 blocks.push_back(block);
518 |                 block.clear();
519 |             }
520 |             std::string key = "type";
521 |             std::string value = trim(line.substr(1, line.size() - 2));
522 |             block.insert(std::pair<std::string, std::string>(key, value));
523 |         }
524 |         else
525 |         {
526 |             int cpos = line.find('=');
527 |             std::string key = trim(line.substr(0, cpos));
528 |             std::string value = trim(line.substr(cpos + 1));
529 |             block.insert(std::pair<std::string, std::string>(key, value));
530 |         }
531 |     }
532 |     blocks.push_back(block);
533 |     return blocks;
534 | }
535 | 
536 | void Yolo::parseConfigBlocks()
537 | {
538 |     for (auto block : m_ConfigBlocks)
539 |     {
540 |         if (block.at("type") == "net")
541 |         {
542 |             assert((block.find("height") != block.end()) && "Missing 'height' param in network cfg");
543 |             assert((block.find("width") != block.end()) && "Missing 'width' param in network cfg");
544 |             assert((block.find("channels") != block.end()) && "Missing 'channels' param in network cfg");
545 | 
546 |             m_InputH = std::stoul(block.at("height"));
547 |             m_InputW = std::stoul(block.at("width"));
548 |             m_InputC = std::stoul(block.at("channels"));
549 |             m_InputSize = m_InputC * m_InputH * m_InputW;
550 | 
551 |             if (block.find("letter_box") != block.end())
552 |             {
553 |                 m_LetterBox = std::stoul(block.at("letter_box"));
554 |             }
555 |         }
556 |         else if ((block.at("type") == "region") || (block.at("type") == "yolo"))
557 |         {
558 |             assert((block.find("num") != block.end())
559 |                    && std::string("Missing 'num' param in " + block.at("type") + " layer").c_str());
560 |             assert((block.find("classes") != block.end())
561 |                    && std::string("Missing 'classes' param in " + block.at("type") + " layer").c_str());
562 |             assert((block.find("anchors") != block.end())
563 |                    && std::string("Missing 'anchors' param in " + block.at("type") + " layer").c_str());
564 | 
565 |             ++m_YoloCount;
566 | 
567 |             m_NumClasses = std::stoul(block.at("classes"));
568 | 
569 |             if (block.find("new_coords") != block.end())
570 |             {
571 |                 m_NewCoords = std::stoul(block.at("new_coords"));
572 |             }
573 | 
574 |             TensorInfo outputTensor;
575 | 
576 |             std::string anchorString = block.at("anchors");
577 |             while (!anchorString.empty())
578 |             {
579 |                 int npos = anchorString.find_first_of(',');
580 |                 if (npos != -1)
581 |                 {
582 |                     float anchor = std::stof(trim(anchorString.substr(0, npos)));
583 |                     outputTensor.anchors.push_back(anchor);
584 |                     anchorString.erase(0, npos + 1);
585 |                 }
586 |                 else
587 |                 {
588 |                     float anchor = std::stof(trim(anchorString));
589 |                     outputTensor.anchors.push_back(anchor);
590 |                     break;
591 |                 }
592 |             }
593 | 
594 |             if (block.find("mask") != block.end())
595 |             {
596 |                 std::string maskString = block.at("mask");
597 |                 while (!maskString.empty())
598 |                 {
599 |                     int npos = maskString.find_first_of(',');
600 |                     if (npos != -1)
601 |                     {
602 |                         int mask = std::stoul(trim(maskString.substr(0, npos)));
603 |                         outputTensor.mask.push_back(mask);
604 |                         maskString.erase(0, npos + 1);
605 |                     }
606 |                     else
607 |                     {
608 |                         int mask = std::stoul(trim(maskString));
609 |                         outputTensor.mask.push_back(mask);
610 |                         break;
611 |                     }
612 |                 }
613 |             }
614 | 
615 |             if (block.find("scale_x_y") != block.end())
616 |             {
617 |                 outputTensor.scaleXY = std::stof(block.at("scale_x_y"));
618 |             }
619 |             else
620 |             {
621 |                 outputTensor.scaleXY = 1.0;
622 |             }
623 | 
624 |             outputTensor.numBBoxes
625 |                 = outputTensor.mask.size() > 0 ? outputTensor.mask.size() : std::stoul(trim(block.at("num")));
626 |             
627 |             m_YoloTensors.push_back(outputTensor);
628 |         }
629 |         else if ((block.at("type") == "cls") || (block.at("type") == "reg"))
630 |         {
631 |             ++m_YoloCount;
632 |             TensorInfo outputTensor;
633 |             m_YoloTensors.push_back(outputTensor);
634 |         }
635 |     }
636 | }
637 | 
638 | void Yolo::destroyNetworkUtils()
639 | {
640 |     for (uint i = 0; i < m_TrtWeights.size(); ++i)
641 |         if (m_TrtWeights[i].count > 0)
642 |             free(const_cast<void*>(m_TrtWeights[i].values));
643 |     m_TrtWeights.clear();
644 | }
645 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/yolo.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a
  5 |  * copy of this software and associated documentation files (the "Software"),
  6 |  * to deal in the Software without restriction, including without limitation
  7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 |  * and/or sell copies of the Software, and to permit persons to whom the
  9 |  * Software is furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 20 |  * DEALINGS IN THE SOFTWARE.
 21 |  *
 22 |  * Edited by Marcos Luciano
 23 |  * https://www.github.com/marcoslucianops
 24 |  */
 25 | 
 26 | #ifndef _YOLO_H_
 27 | #define _YOLO_H_
 28 | 
 29 | #include "layers/convolutional_layer.h"
 30 | #include "layers/batchnorm_layer.h"
 31 | #include "layers/implicit_layer.h"
 32 | #include "layers/channels_layer.h"
 33 | #include "layers/shortcut_layer.h"
 34 | #include "layers/route_layer.h"
 35 | #include "layers/upsample_layer.h"
 36 | #include "layers/pooling_layer.h"
 37 | #include "layers/reorg_layer.h"
 38 | #include "layers/reduce_layer.h"
 39 | #include "layers/shuffle_layer.h"
 40 | #include "layers/softmax_layer.h"
 41 | #include "layers/cls_layer.h"
 42 | #include "layers/reg_layer.h"
 43 | 
 44 | #include "nvdsinfer_custom_impl.h"
 45 | 
 46 | struct NetworkInfo
 47 | {
 48 |     std::string inputBlobName;
 49 |     std::string networkType;
 50 |     std::string configFilePath;
 51 |     std::string wtsFilePath;
 52 |     std::string int8CalibPath;
 53 |     std::string deviceType;
 54 |     uint numDetectedClasses;
 55 |     int clusterMode;
 56 |     float scoreThreshold;
 57 |     std::string networkMode;
 58 | };
 59 | 
 60 | struct TensorInfo
 61 | {
 62 |     std::string blobName;
 63 |     uint gridSizeX {0};
 64 |     uint gridSizeY {0};
 65 |     uint numBBoxes {0};
 66 |     float scaleXY;
 67 |     std::vector<float> anchors;
 68 |     std::vector<int> mask;
 69 | };
 70 | 
 71 | class Yolo : public IModelParser {
 72 | public:
 73 |     Yolo(const NetworkInfo& networkInfo);
 74 | 
 75 |     ~Yolo() override;
 76 | 
 77 |     bool hasFullDimsSupported() const override { return false; }
 78 | 
 79 |     const char* getModelName() const override {
 80 |         return m_ConfigFilePath.empty() ? m_NetworkType.c_str() : m_ConfigFilePath.c_str();
 81 |     }
 82 | 
 83 |     NvDsInferStatus parseModel(nvinfer1::INetworkDefinition& network) override;
 84 | 
 85 |     nvinfer1::ICudaEngine *createEngine (nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config);
 86 | 
 87 | protected:
 88 |     const std::string m_InputBlobName;
 89 |     const std::string m_NetworkType;
 90 |     const std::string m_ConfigFilePath;
 91 |     const std::string m_WtsFilePath;
 92 |     const std::string m_Int8CalibPath;
 93 |     const std::string m_DeviceType;
 94 |     const uint m_NumDetectedClasses;
 95 |     const int m_ClusterMode;
 96 |     const std::string m_NetworkMode;
 97 |     const float m_ScoreThreshold;
 98 | 
 99 |     uint m_InputH;
100 |     uint m_InputW;
101 |     uint m_InputC;
102 |     uint64_t m_InputSize;
103 |     uint m_NumClasses;
104 |     uint m_LetterBox;
105 |     uint m_NewCoords;
106 |     uint m_YoloCount;
107 | 
108 |     std::vector<TensorInfo> m_YoloTensors;
109 |     std::vector<std::map<std::string, std::string>> m_ConfigBlocks;
110 |     std::vector<nvinfer1::Weights> m_TrtWeights;
111 | 
112 | private:
113 |     NvDsInferStatus buildYoloNetwork(std::vector<float>& weights, nvinfer1::INetworkDefinition& network);
114 | 
115 |     std::vector<std::map<std::string, std::string>> parseConfigFile(const std::string cfgFilePath);
116 | 
117 |     void parseConfigBlocks();
118 | 
119 |     void destroyNetworkUtils();
120 | };
121 | 
122 | #endif // _YOLO_H_
123 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/yoloForward.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Created by Marcos Luciano
  3 |  * https://www.github.com/marcoslucianops
  4 |  */
  5 | 
  6 | #include <stdint.h>
  7 | 
  8 | inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); }
  9 | 
 10 | __global__ void gpuYoloLayer(
 11 |     const float* input, int* num_detections, float* detection_boxes, float* detection_scores, int* detection_classes,
 12 |     const float scoreThreshold, const uint netWidth, const uint netHeight, const uint gridSizeX, const uint gridSizeY,
 13 |     const uint numOutputClasses, const uint numBBoxes, const float scaleXY, const float* anchors, const int* mask)
 14 | {
 15 |     uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
 16 |     uint y_id = blockIdx.y * blockDim.y + threadIdx.y;
 17 |     uint z_id = blockIdx.z * blockDim.z + threadIdx.z;
 18 | 
 19 |     if (x_id >= gridSizeX || y_id >= gridSizeY || z_id >= numBBoxes)
 20 |         return;
 21 | 
 22 |     const int numGridCells = gridSizeX * gridSizeY;
 23 |     const int bbindex = y_id * gridSizeX + x_id;
 24 | 
 25 |     const float objectness
 26 |         = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]);
 27 | 
 28 |     if (objectness < scoreThreshold)
 29 |         return;
 30 | 
 31 |     int count = (int)atomicAdd(num_detections, 1);
 32 | 
 33 |     const float alpha = scaleXY;
 34 |     const float beta = -0.5 * (scaleXY - 1);
 35 | 
 36 |     float x
 37 |         = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)])
 38 |           * alpha + beta + x_id) * netWidth / gridSizeX;
 39 | 
 40 |     float y
 41 |         = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)])
 42 |           * alpha + beta + y_id) * netHeight / gridSizeY;
 43 | 
 44 |     float w
 45 |         = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)])
 46 |           * anchors[mask[z_id] * 2];
 47 | 
 48 |     float h
 49 |         = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)])
 50 |           * anchors[mask[z_id] * 2 + 1];
 51 | 
 52 |     float maxProb = 0.0f;
 53 |     int maxIndex = -1;
 54 | 
 55 |     for (uint i = 0; i < numOutputClasses; ++i)
 56 |     {
 57 |         float prob
 58 |             = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]);
 59 | 
 60 |         if (prob > maxProb)
 61 |         {
 62 |             maxProb = prob;
 63 |             maxIndex = i;
 64 |         }
 65 |     }
 66 | 
 67 |     detection_boxes[count * 4 + 0] = x - 0.5 * w;
 68 |     detection_boxes[count * 4 + 1] = y - 0.5 * h;
 69 |     detection_boxes[count * 4 + 2] = x + 0.5 * w;
 70 |     detection_boxes[count * 4 + 3] = y + 0.5 * h;
 71 |     detection_scores[count] = objectness * maxProb;
 72 |     detection_classes[count] = maxIndex;
 73 | }
 74 | 
 75 | cudaError_t cudaYoloLayer(
 76 |     const void* input, void* num_detections, void* detection_boxes, void* detection_scores, void* detection_classes,
 77 |     const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth,
 78 |     const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
 79 |     const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream);
 80 | 
 81 | cudaError_t cudaYoloLayer(
 82 |     const void* input, void* num_detections, void* detection_boxes, void* detection_scores, void* detection_classes,
 83 |     const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth,
 84 |     const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
 85 |     const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream)
 86 | {
 87 |     dim3 threads_per_block(16, 16, 4);
 88 |     dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1,
 89 |                           (gridSizeY / threads_per_block.y) + 1,
 90 |                           (numBBoxes / threads_per_block.z) + 1);
 91 | 
 92 |     for (unsigned int batch = 0; batch < batchSize; ++batch)
 93 |     {
 94 |         gpuYoloLayer<<<number_of_blocks, threads_per_block, 0, stream>>>(
 95 |             reinterpret_cast<const float*>(input) + (batch * inputSize),
 96 |             reinterpret_cast<int*>(num_detections) + (batch),
 97 |             reinterpret_cast<float*>(detection_boxes) + (batch * 4 * outputSize),
 98 |             reinterpret_cast<float*>(detection_scores) + (batch * outputSize),
 99 |             reinterpret_cast<int*>(detection_classes) + (batch * outputSize),
100 |             scoreThreshold, netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, scaleXY,
101 |             reinterpret_cast<const float*>(anchors), reinterpret_cast<const int*>(mask));
102 |     }
103 |     return cudaGetLastError();
104 | }
105 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/yoloForward_e.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Created by Marcos Luciano
 3 |  * https://www.github.com/marcoslucianops
 4 |  */
 5 | 
 6 | #include <stdint.h>
 7 | #include <stdio.h>
 8 | 
 9 | __global__ void gpuYoloLayer_e(
10 |     const float* cls, const float* reg, int* num_detections, float* detection_boxes, float* detection_scores,
11 |     int* detection_classes, const float scoreThreshold, const uint netWidth, const uint netHeight,
12 |     const uint numOutputClasses, const uint64_t outputSize)
13 | {
14 |     uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
15 | 
16 |     if (x_id >= outputSize)
17 |         return;
18 | 
19 |     float maxProb = 0.0f;
20 |     int maxIndex = -1;
21 | 
22 |     for (uint i = 0; i < numOutputClasses; ++i)
23 |     {
24 |         float prob
25 |             = cls[x_id * numOutputClasses + i];
26 | 
27 |         if (prob > maxProb)
28 |         {
29 |             maxProb = prob;
30 |             maxIndex = i;
31 |         }
32 |     }
33 | 
34 |     if (maxProb < scoreThreshold)
35 |         return;
36 | 
37 |     int count = (int)atomicAdd(num_detections, 1);
38 | 
39 |     detection_boxes[count * 4 + 0] = reg[x_id * 4 + 0];
40 |     detection_boxes[count * 4 + 1] = reg[x_id * 4 + 1];
41 |     detection_boxes[count * 4 + 2] = reg[x_id * 4 + 2];
42 |     detection_boxes[count * 4 + 3] = reg[x_id * 4 + 3];
43 |     detection_scores[count] = maxProb;
44 |     detection_classes[count] = maxIndex;
45 | }
46 | 
47 | cudaError_t cudaYoloLayer_e(
48 |     const void* cls, const void* reg, void* num_detections, void* detection_boxes, void* detection_scores,
49 |     void* detection_classes, const uint& batchSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth,
50 |     const uint& netHeight, const uint& numOutputClasses, cudaStream_t stream);
51 | 
52 | cudaError_t cudaYoloLayer_e(
53 |     const void* cls, const void* reg, void* num_detections, void* detection_boxes, void* detection_scores,
54 |     void* detection_classes, const uint& batchSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth,
55 |     const uint& netHeight, const uint& numOutputClasses, cudaStream_t stream)
56 | {
57 |     int threads_per_block = 16;
58 |     int number_of_blocks = (outputSize / threads_per_block) + 1;
59 | 
60 |     for (unsigned int batch = 0; batch < batchSize; ++batch)
61 |     {
62 |         gpuYoloLayer_e<<<number_of_blocks, threads_per_block, 0, stream>>>(
63 |             reinterpret_cast<const float*>(cls) + (batch * numOutputClasses * outputSize),
64 |             reinterpret_cast<const float*>(reg) + (batch * 4 * outputSize),
65 |             reinterpret_cast<int*>(num_detections) + (batch),
66 |             reinterpret_cast<float*>(detection_boxes) + (batch * 4 * outputSize),
67 |             reinterpret_cast<float*>(detection_scores) + (batch * outputSize),
68 |             reinterpret_cast<int*>(detection_classes) + (batch * outputSize),
69 |             scoreThreshold, netWidth, netHeight, numOutputClasses, outputSize);
70 |     }
71 |     return cudaGetLastError();
72 | }
73 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/yoloForward_nc.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Created by Marcos Luciano
  3 |  * https://www.github.com/marcoslucianops
  4 |  */
  5 | 
  6 | #include <stdint.h>
  7 | 
  8 | __global__ void gpuYoloLayer_nc(
  9 |     const float* input, int* num_detections, float* detection_boxes, float* detection_scores, int* detection_classes,
 10 |     const float scoreThreshold, const uint netWidth, const uint netHeight, const uint gridSizeX, const uint gridSizeY,
 11 |     const uint numOutputClasses, const uint numBBoxes, const float scaleXY, const float* anchors, const int* mask)
 12 | {
 13 |     uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
 14 |     uint y_id = blockIdx.y * blockDim.y + threadIdx.y;
 15 |     uint z_id = blockIdx.z * blockDim.z + threadIdx.z;
 16 | 
 17 |     if (x_id >= gridSizeX || y_id >= gridSizeY || z_id >= numBBoxes)
 18 |         return;
 19 | 
 20 |     const int numGridCells = gridSizeX * gridSizeY;
 21 |     const int bbindex = y_id * gridSizeX + x_id;
 22 | 
 23 |     const float objectness
 24 |         = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)];
 25 | 
 26 |     if (objectness < scoreThreshold)
 27 |         return;
 28 | 
 29 |     int count = (int)atomicAdd(num_detections, 1);
 30 | 
 31 |     const float alpha = scaleXY;
 32 |     const float beta = -0.5 * (scaleXY - 1);
 33 | 
 34 |     float x
 35 |         = (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]
 36 |           * alpha + beta + x_id) * netWidth / gridSizeX;
 37 | 
 38 |     float y
 39 |         = (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]
 40 |           * alpha + beta + y_id) * netHeight / gridSizeY;
 41 | 
 42 |     float w
 43 |         = __powf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)] * 2, 2)
 44 |           * anchors[mask[z_id] * 2];
 45 | 
 46 |     float h
 47 |         = __powf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)] * 2, 2)
 48 |           * anchors[mask[z_id] * 2 + 1];
 49 | 
 50 |     float maxProb = 0.0f;
 51 |     int maxIndex = -1;
 52 | 
 53 |     for (uint i = 0; i < numOutputClasses; ++i)
 54 |     {
 55 |         float prob
 56 |             = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))];
 57 | 
 58 |         if (prob > maxProb)
 59 |         {
 60 |             maxProb = prob;
 61 |             maxIndex = i;
 62 |         }
 63 |     }
 64 | 
 65 |     detection_boxes[count * 4 + 0] = x - 0.5 * w;
 66 |     detection_boxes[count * 4 + 1] = y - 0.5 * h;
 67 |     detection_boxes[count * 4 + 2] = x + 0.5 * w;
 68 |     detection_boxes[count * 4 + 3] = y + 0.5 * h;
 69 |     detection_scores[count] = objectness * maxProb;
 70 |     detection_classes[count] = maxIndex;
 71 | }
 72 | 
 73 | cudaError_t cudaYoloLayer_nc(
 74 |     const void* input, void* num_detections, void* detection_boxes, void* detection_scores, void* detection_classes,
 75 |     const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth,
 76 |     const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
 77 |     const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream);
 78 | 
 79 | cudaError_t cudaYoloLayer_nc(
 80 |     const void* input, void* num_detections, void* detection_boxes, void* detection_scores, void* detection_classes,
 81 |     const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth,
 82 |     const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
 83 |     const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream)
 84 | {
 85 |     dim3 threads_per_block(16, 16, 4);
 86 |     dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1,
 87 |                           (gridSizeY / threads_per_block.y) + 1,
 88 |                           (numBBoxes / threads_per_block.z) + 1);
 89 | 
 90 |     for (unsigned int batch = 0; batch < batchSize; ++batch)
 91 |     {
 92 |         gpuYoloLayer_nc<<<number_of_blocks, threads_per_block, 0, stream>>>(
 93 |             reinterpret_cast<const float*>(input) + (batch * inputSize),
 94 |             reinterpret_cast<int*>(num_detections) + (batch),
 95 |             reinterpret_cast<float*>(detection_boxes) + (batch * 4 * outputSize),
 96 |             reinterpret_cast<float*>(detection_scores) + (batch * outputSize),
 97 |             reinterpret_cast<int*>(detection_classes) + (batch * outputSize),
 98 |             scoreThreshold, netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, scaleXY,
 99 |             reinterpret_cast<const float*>(anchors), reinterpret_cast<const int*>(mask));
100 |     }
101 |     return cudaGetLastError();
102 | }
103 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/yoloForward_r.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Created by Marcos Luciano
  3 |  * https://www.github.com/marcoslucianops
  4 |  */
  5 | 
  6 | #include <stdint.h>
  7 | 
  8 | inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); }
  9 | 
 10 | __global__ void gpuYoloLayer_r(
 11 |     const float* input, int* num_detections, float* detection_boxes, float* detection_scores, int* detection_classes,
 12 |     const float scoreThreshold, const uint netWidth, const uint netHeight, const uint gridSizeX, const uint gridSizeY,
 13 |     const uint numOutputClasses, const uint numBBoxes, const float scaleXY, const float* anchors, const int* mask)
 14 | {
 15 |     uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
 16 |     uint y_id = blockIdx.y * blockDim.y + threadIdx.y;
 17 |     uint z_id = blockIdx.z * blockDim.z + threadIdx.z;
 18 | 
 19 |     if (x_id >= gridSizeX || y_id >= gridSizeY || z_id >= numBBoxes)
 20 |         return;
 21 | 
 22 |     const int numGridCells = gridSizeX * gridSizeY;
 23 |     const int bbindex = y_id * gridSizeX + x_id;
 24 | 
 25 |     const float objectness
 26 |         = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]);
 27 | 
 28 |     if (objectness < scoreThreshold)
 29 |         return;
 30 | 
 31 |     int count = (int)atomicAdd(num_detections, 1);
 32 | 
 33 |     const float alpha = scaleXY;
 34 |     const float beta = -0.5 * (scaleXY - 1);
 35 | 
 36 |     float x
 37 |         = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)])
 38 |           * alpha + beta + x_id) * netWidth / gridSizeX;
 39 | 
 40 |     float y
 41 |         = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)])
 42 |           * alpha + beta + y_id) * netHeight / gridSizeY;
 43 | 
 44 |     float w
 45 |         = __powf(sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]) * 2, 2)
 46 |           * anchors[mask[z_id] * 2];
 47 | 
 48 |     float h
 49 |         = __powf(sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]) * 2, 2)
 50 |           * anchors[mask[z_id] * 2 + 1];
 51 | 
 52 |     float maxProb = 0.0f;
 53 |     int maxIndex = -1;
 54 | 
 55 |     for (uint i = 0; i < numOutputClasses; ++i)
 56 |     {
 57 |         float prob
 58 |             = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]);
 59 | 
 60 |         if (prob > maxProb)
 61 |         {
 62 |             maxProb = prob;
 63 |             maxIndex = i;
 64 |         }
 65 |     }
 66 | 
 67 |     detection_boxes[count * 4 + 0] = x - 0.5 * w;
 68 |     detection_boxes[count * 4 + 1] = y - 0.5 * h;
 69 |     detection_boxes[count * 4 + 2] = x + 0.5 * w;
 70 |     detection_boxes[count * 4 + 3] = y + 0.5 * h;
 71 |     detection_scores[count] = objectness * maxProb;
 72 |     detection_classes[count] = maxIndex;
 73 | }
 74 | 
 75 | cudaError_t cudaYoloLayer_r(
 76 |     const void* input, void* num_detections, void* detection_boxes, void* detection_scores, void* detection_classes,
 77 |     const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth,
 78 |     const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
 79 |     const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream);
 80 | 
 81 | cudaError_t cudaYoloLayer_r(
 82 |     const void* input, void* num_detections, void* detection_boxes, void* detection_scores, void* detection_classes,
 83 |     const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth,
 84 |     const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
 85 |     const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream)
 86 | {
 87 |     dim3 threads_per_block(16, 16, 4);
 88 |     dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1,
 89 |                           (gridSizeY / threads_per_block.y) + 1,
 90 |                           (numBBoxes / threads_per_block.z) + 1);
 91 | 
 92 |     for (unsigned int batch = 0; batch < batchSize; ++batch)
 93 |     {
 94 |         gpuYoloLayer_r<<<number_of_blocks, threads_per_block, 0, stream>>>(
 95 |             reinterpret_cast<const float*>(input) + (batch * inputSize),
 96 |             reinterpret_cast<int*>(num_detections) + (batch),
 97 |             reinterpret_cast<float*>(detection_boxes) + (batch * 4 * outputSize),
 98 |             reinterpret_cast<float*>(detection_scores) + (batch * outputSize),
 99 |             reinterpret_cast<int*>(detection_classes) + (batch * outputSize),
100 |             scoreThreshold, netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes, scaleXY,
101 |             reinterpret_cast<const float*>(anchors), reinterpret_cast<const int*>(mask));
102 |     }
103 |     return cudaGetLastError();
104 | }
105 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/yoloForward_v2.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Created by Marcos Luciano
  3 |  * https://www.github.com/marcoslucianops
  4 |  */
  5 | 
  6 | #include <stdint.h>
  7 | 
  8 | inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); }
  9 | 
 10 | __device__ void softmaxGPU(
 11 |     const float* input, const int bbindex, const int numGridCells, uint z_id, const uint numOutputClasses, float temp,
 12 |     float* output)
 13 | {
 14 |     int i;
 15 |     float sum = 0;
 16 |     float largest = -INFINITY;
 17 |     for (i = 0; i < numOutputClasses; ++i) {
 18 |         int val = input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))];
 19 |         largest = (val>largest) ? val : largest;
 20 |     }
 21 |     for (i = 0; i < numOutputClasses; ++i) {
 22 |         float e = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] / temp - largest / temp);
 23 |         sum += e;
 24 |         output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] = e;
 25 |     }
 26 |     for (i = 0; i < numOutputClasses; ++i) {
 27 |         output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] /= sum;
 28 |     }
 29 | }
 30 | 
 31 | __global__ void gpuRegionLayer(
 32 |     const float* input, float* softmax, int* num_detections, float* detection_boxes, float* detection_scores,
 33 |     int* detection_classes, const float scoreThreshold, const uint netWidth, const uint netHeight, const uint gridSizeX,
 34 |     const uint gridSizeY, const uint numOutputClasses, const uint numBBoxes, const float* anchors)
 35 | {
 36 |     uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
 37 |     uint y_id = blockIdx.y * blockDim.y + threadIdx.y;
 38 |     uint z_id = blockIdx.z * blockDim.z + threadIdx.z;
 39 | 
 40 |     if (x_id >= gridSizeX || y_id >= gridSizeY || z_id >= numBBoxes)
 41 |         return;
 42 | 
 43 |     const int numGridCells = gridSizeX * gridSizeY;
 44 |     const int bbindex = y_id * gridSizeX + x_id;
 45 | 
 46 |     const float objectness
 47 |         = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]);
 48 | 
 49 |     if (objectness < scoreThreshold)
 50 |         return;
 51 | 
 52 |     int count = (int)atomicAdd(num_detections, 1);
 53 | 
 54 |     float x
 55 |         = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)])
 56 |           + x_id) * netWidth / gridSizeX;
 57 | 
 58 |     float y
 59 |         = (sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)])
 60 |           + y_id) * netHeight / gridSizeY;
 61 | 
 62 |     float w
 63 |         = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)])
 64 |           * anchors[z_id * 2] * netWidth / gridSizeX;
 65 | 
 66 |     float h
 67 |         = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)])
 68 |           * anchors[z_id * 2 + 1] * netHeight / gridSizeY;
 69 | 
 70 |     softmaxGPU(input, bbindex, numGridCells, z_id, numOutputClasses, 1.0, softmax);
 71 | 
 72 |     float maxProb = 0.0f;
 73 |     int maxIndex = -1;
 74 | 
 75 |     for (uint i = 0; i < numOutputClasses; ++i)
 76 |     {
 77 |         float prob
 78 |             = softmax[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))];
 79 | 
 80 |         if (prob > maxProb)
 81 |         {
 82 |             maxProb = prob;
 83 |             maxIndex = i;
 84 |         }
 85 |     }
 86 | 
 87 |     detection_boxes[count * 4 + 0] = x - 0.5 * w;
 88 |     detection_boxes[count * 4 + 1] = y - 0.5 * h;
 89 |     detection_boxes[count * 4 + 2] = x + 0.5 * w;
 90 |     detection_boxes[count * 4 + 3] = y + 0.5 * h;
 91 |     detection_scores[count] = objectness * maxProb;
 92 |     detection_classes[count] = maxIndex;
 93 | }
 94 | 
 95 | cudaError_t cudaRegionLayer(
 96 |     const void* input, void* softmax, void* num_detections, void* detection_boxes, void* detection_scores,
 97 |     void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold,
 98 |     const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
 99 |     const uint& numBBoxes, const void* anchors, cudaStream_t stream);
100 | 
101 | cudaError_t cudaRegionLayer(
102 |     const void* input, void* softmax, void* num_detections, void* detection_boxes, void* detection_scores,
103 |     void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold,
104 |     const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
105 |     const uint& numBBoxes, const void* anchors, cudaStream_t stream)
106 | {
107 |     dim3 threads_per_block(16, 16, 4);
108 |     dim3 number_of_blocks((gridSizeX / threads_per_block.x) + 1,
109 |                           (gridSizeY / threads_per_block.y) + 1,
110 |                           (numBBoxes / threads_per_block.z) + 1);
111 | 
112 |     for (unsigned int batch = 0; batch < batchSize; ++batch)
113 |     {
114 |         gpuRegionLayer<<<number_of_blocks, threads_per_block, 0, stream>>>(
115 |             reinterpret_cast<const float*>(input) + (batch * inputSize),
116 |             reinterpret_cast<float*>(softmax) + (batch * inputSize),
117 |             reinterpret_cast<int*>(num_detections) + (batch),
118 |             reinterpret_cast<float*>(detection_boxes) + (batch * 4 * outputSize),
119 |             reinterpret_cast<float*>(detection_scores) + (batch * outputSize),
120 |             reinterpret_cast<int*>(detection_classes) + (batch * outputSize),
121 |             scoreThreshold, netWidth, netHeight, gridSizeX, gridSizeY, numOutputClasses, numBBoxes,
122 |             reinterpret_cast<const float*>(anchors));
123 |     }
124 |     return cudaGetLastError();
125 | }
126 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/yoloPlugins.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a
  5 |  * copy of this software and associated documentation files (the "Software"),
  6 |  * to deal in the Software without restriction, including without limitation
  7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 |  * and/or sell copies of the Software, and to permit persons to whom the
  9 |  * Software is furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 20 |  * DEALINGS IN THE SOFTWARE.
 21 |  *
 22 |  * Edited by Marcos Luciano
 23 |  * https://www.github.com/marcoslucianops
 24 |  */
 25 | 
 26 | #include "yoloPlugins.h"
 27 | #include "NvInferPlugin.h"
 28 | #include <cassert>
 29 | #include <iostream>
 30 | #include <memory>
 31 | 
 32 | uint kNUM_CLASSES;
 33 | 
 34 | namespace {
 35 |     template <typename T>
 36 |     void write(char*& buffer, const T& val)
 37 |     {
 38 |         *reinterpret_cast<T*>(buffer) = val;
 39 |         buffer += sizeof(T);
 40 |     }
 41 | 
 42 |     template <typename T>
 43 |     void read(const char*& buffer, T& val)
 44 |     {
 45 |         val = *reinterpret_cast<const T*>(buffer);
 46 |         buffer += sizeof(T);
 47 |     }
 48 | }
 49 | 
 50 | cudaError_t cudaYoloLayer_e(
 51 |     const void* cls, const void* reg, void* num_detections, void* detection_boxes, void* detection_scores,
 52 |     void* detection_classes, const uint& batchSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth,
 53 |     const uint& netHeight, const uint& numOutputClasses, cudaStream_t stream);
 54 | 
 55 | cudaError_t cudaYoloLayer_r(
 56 |     const void* input, void* num_detections, void* detection_boxes, void* detection_scores, void* detection_classes,
 57 |     const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth,
 58 |     const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
 59 |     const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream);
 60 | 
 61 | cudaError_t cudaYoloLayer_nc(
 62 |     const void* input, void* num_detections, void* detection_boxes, void* detection_scores, void* detection_classes,
 63 |     const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth,
 64 |     const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
 65 |     const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream);
 66 | 
 67 | cudaError_t cudaYoloLayer(
 68 |     const void* input, void* num_detections, void* detection_boxes, void* detection_scores, void* detection_classes,
 69 |     const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold, const uint& netWidth,
 70 |     const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses, const uint& numBBoxes,
 71 |     const float& scaleXY, const void* anchors, const void* mask, cudaStream_t stream);
 72 | 
 73 | cudaError_t cudaRegionLayer(
 74 |     const void* input, void* softmax, void* num_detections, void* detection_boxes, void* detection_scores,
 75 |     void* detection_classes, const uint& batchSize, uint64_t& inputSize, uint64_t& outputSize, const float& scoreThreshold,
 76 |     const uint& netWidth, const uint& netHeight, const uint& gridSizeX, const uint& gridSizeY, const uint& numOutputClasses,
 77 |     const uint& numBBoxes, const void* anchors, cudaStream_t stream);
 78 | 
 79 | YoloLayer::YoloLayer (const void* data, size_t length)
 80 | {
 81 |     const char *d = static_cast<const char*>(data);
 82 | 
 83 |     read(d, m_NetWidth);
 84 |     read(d, m_NetHeight);
 85 |     read(d, m_NumClasses);
 86 |     read(d, m_NewCoords);
 87 |     read(d, m_OutputSize);
 88 |     read(d, m_Type);
 89 |     read(d, m_ScoreThreshold);
 90 | 
 91 |     if (m_Type != 3) {
 92 |         uint yoloTensorsSize;
 93 |         read(d, yoloTensorsSize);
 94 |         for (uint i = 0; i < yoloTensorsSize; ++i)
 95 |         {
 96 |             TensorInfo curYoloTensor;
 97 |             read(d, curYoloTensor.gridSizeX);
 98 |             read(d, curYoloTensor.gridSizeY);
 99 |             read(d, curYoloTensor.numBBoxes);
100 |             read(d, curYoloTensor.scaleXY);
101 | 
102 |             uint anchorsSize;
103 |             read(d, anchorsSize);
104 |             for (uint j = 0; j < anchorsSize; j++)
105 |             {
106 |                 float result;
107 |                 read(d, result);
108 |                 curYoloTensor.anchors.push_back(result);
109 |             }
110 | 
111 |             uint maskSize;
112 |             read(d, maskSize);
113 |             for (uint j = 0; j < maskSize; j++)
114 |             {
115 |                 int result;
116 |                 read(d, result);
117 |                 curYoloTensor.mask.push_back(result);
118 |             }
119 |             m_YoloTensors.push_back(curYoloTensor);
120 |         }
121 |     }
122 | 
123 |     kNUM_CLASSES = m_NumClasses;
124 | };
125 | 
126 | YoloLayer::YoloLayer(
127 |     const uint& netWidth, const uint& netHeight, const uint& numClasses, const uint& newCoords,
128 |     const std::vector<TensorInfo>& yoloTensors, const uint64_t& outputSize, const uint& modelType,
129 |     const float& scoreThreshold) :
130 |     m_NetWidth(netWidth),
131 |     m_NetHeight(netHeight),
132 |     m_NumClasses(numClasses),
133 |     m_NewCoords(newCoords),
134 |     m_YoloTensors(yoloTensors),
135 |     m_OutputSize(outputSize),
136 |     m_Type(modelType),
137 |     m_ScoreThreshold(scoreThreshold)
138 | {
139 |     assert(m_NetWidth > 0);
140 |     assert(m_NetHeight > 0);
141 | 
142 |     kNUM_CLASSES = m_NumClasses;
143 | };
144 | 
145 | nvinfer1::Dims
146 | YoloLayer::getOutputDimensions(
147 |     int index, const nvinfer1::Dims* inputs, int nbInputDims) noexcept
148 | {
149 |     assert(index <= 4);
150 |     if (index == 0) {
151 |         return nvinfer1::Dims{1, {1}};
152 |     }
153 |     else if (index == 1) {
154 |         return nvinfer1::Dims{2, {static_cast<int>(m_OutputSize), 4}};
155 |     }
156 |     return nvinfer1::Dims{1, {static_cast<int>(m_OutputSize)}};
157 | }
158 | 
159 | bool YoloLayer::supportsFormat (
160 |     nvinfer1::DataType type, nvinfer1::PluginFormat format) const noexcept {
161 |     return (type == nvinfer1::DataType::kFLOAT &&
162 |             format == nvinfer1::PluginFormat::kLINEAR);
163 | }
164 | 
165 | void
166 | YoloLayer::configureWithFormat (
167 |     const nvinfer1::Dims* inputDims, int nbInputs,
168 |     const nvinfer1::Dims* outputDims, int nbOutputs,
169 |     nvinfer1::DataType type, nvinfer1::PluginFormat format, int maxBatchSize) noexcept
170 | {
171 |     assert(nbInputs > 0);
172 |     assert(format == nvinfer1::PluginFormat::kLINEAR);
173 |     assert(inputDims != nullptr);
174 | }
175 | 
176 | int32_t YoloLayer::enqueue (
177 |     int batchSize, void const* const* inputs, void* const* outputs, void* workspace,	
178 |     cudaStream_t stream) noexcept
179 | {
180 |     void* num_detections = outputs[0];
181 |     void* detection_boxes = outputs[1];
182 |     void* detection_scores = outputs[2];
183 |     void* detection_classes = outputs[3];
184 | 
185 |     CUDA_CHECK(cudaMemsetAsync((int*)num_detections, 0, sizeof(int) * batchSize, stream));
186 |     CUDA_CHECK(cudaMemsetAsync((float*)detection_boxes, 0, sizeof(float) * m_OutputSize * 4 * batchSize, stream));
187 |     CUDA_CHECK(cudaMemsetAsync((float*)detection_scores, 0, sizeof(float) * m_OutputSize * batchSize, stream));
188 |     CUDA_CHECK(cudaMemsetAsync((int*)detection_classes, 0, sizeof(int) * m_OutputSize * batchSize, stream));
189 | 
190 |     if (m_Type == 3)
191 |     {
192 |         CUDA_CHECK(cudaYoloLayer_e(
193 |             inputs[0], inputs[1], num_detections, detection_boxes, detection_scores, detection_classes, batchSize,
194 |             m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, m_NumClasses, stream));
195 |     }
196 |     else
197 |     {
198 |         uint yoloTensorsSize = m_YoloTensors.size();
199 |         for (uint i = 0; i < yoloTensorsSize; ++i)
200 |         {
201 |             TensorInfo& curYoloTensor = m_YoloTensors.at(i);
202 | 
203 |             uint numBBoxes = curYoloTensor.numBBoxes;
204 |             float scaleXY = curYoloTensor.scaleXY;
205 |             uint gridSizeX = curYoloTensor.gridSizeX;
206 |             uint gridSizeY = curYoloTensor.gridSizeY;
207 |             std::vector<float> anchors = curYoloTensor.anchors;
208 |             std::vector<int> mask = curYoloTensor.mask;
209 | 
210 |             void* v_anchors;
211 |             void* v_mask;
212 |             if (anchors.size() > 0) {
213 |                 float* f_anchors = anchors.data();
214 |                 CUDA_CHECK(cudaMalloc(&v_anchors, sizeof(float) * anchors.size()));
215 |                 CUDA_CHECK(cudaMemcpy(v_anchors, f_anchors, sizeof(float) * anchors.size(), cudaMemcpyHostToDevice));
216 |             }
217 |             if (mask.size() > 0) {
218 |                 int* f_mask = mask.data();
219 |                 CUDA_CHECK(cudaMalloc(&v_mask, sizeof(int) * mask.size()));
220 |                 CUDA_CHECK(cudaMemcpy(v_mask, f_mask, sizeof(int) * mask.size(), cudaMemcpyHostToDevice));
221 |             }
222 | 
223 |             uint64_t inputSize = gridSizeX * gridSizeY * (numBBoxes * (4 + 1 + m_NumClasses));
224 | 
225 |             if (m_Type == 2) {  // YOLOR incorrect param: scale_x_y = 2.0
226 |                 CUDA_CHECK(cudaYoloLayer_r(
227 |                     inputs[i], num_detections, detection_boxes, detection_scores, detection_classes, batchSize, inputSize,
228 |                     m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY, m_NumClasses, numBBoxes,
229 |                     2.0, v_anchors, v_mask, stream));
230 |             }
231 |             else if (m_Type == 1) {
232 |                 if (m_NewCoords) {
233 |                     CUDA_CHECK(cudaYoloLayer_nc(
234 |                         inputs[i], num_detections, detection_boxes, detection_scores, detection_classes, batchSize,
235 |                         inputSize, m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY,
236 |                         m_NumClasses, numBBoxes, scaleXY, v_anchors, v_mask, stream));
237 |                 }
238 |                 else {
239 |                     CUDA_CHECK(cudaYoloLayer(
240 |                         inputs[i], num_detections, detection_boxes, detection_scores, detection_classes, batchSize,
241 |                         inputSize, m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY,
242 |                         m_NumClasses, numBBoxes, scaleXY, v_anchors, v_mask, stream));
243 |                 }
244 |             }
245 |             else {
246 |                 void* softmax;
247 |                 CUDA_CHECK(cudaMalloc(&softmax, sizeof(float) * inputSize * batchSize));
248 |                 CUDA_CHECK(cudaMemsetAsync((float*)softmax, 0, sizeof(float) * inputSize * batchSize));
249 | 
250 |                 CUDA_CHECK(cudaRegionLayer(
251 |                     inputs[i], softmax, num_detections, detection_boxes, detection_scores, detection_classes, batchSize,
252 |                     inputSize, m_OutputSize, m_ScoreThreshold, m_NetWidth, m_NetHeight, gridSizeX, gridSizeY, m_NumClasses,
253 |                     numBBoxes, v_anchors, stream));
254 | 
255 |                 CUDA_CHECK(cudaFree(softmax));
256 |             }
257 | 
258 |             if (anchors.size() > 0) {
259 |                 CUDA_CHECK(cudaFree(v_anchors));
260 |             }
261 |             if (mask.size() > 0) {
262 |                 CUDA_CHECK(cudaFree(v_mask));
263 |             }
264 |         }
265 |     }
266 | 
267 |     return 0;
268 | }
269 | 
270 | size_t YoloLayer::getSerializationSize() const noexcept
271 | {
272 |     size_t totalSize = 0;
273 | 
274 |     totalSize += sizeof(m_NetWidth);
275 |     totalSize += sizeof(m_NetHeight);
276 |     totalSize += sizeof(m_NumClasses);
277 |     totalSize += sizeof(m_NewCoords);
278 |     totalSize += sizeof(m_OutputSize);
279 |     totalSize += sizeof(m_Type);
280 |     totalSize += sizeof(m_ScoreThreshold);
281 | 
282 |     if (m_Type != 3) {
283 |         uint yoloTensorsSize = m_YoloTensors.size();
284 |         totalSize += sizeof(yoloTensorsSize);
285 | 
286 |         for (uint i = 0; i < yoloTensorsSize; ++i)
287 |         {
288 |             const TensorInfo& curYoloTensor = m_YoloTensors.at(i);
289 |             totalSize += sizeof(curYoloTensor.gridSizeX);
290 |             totalSize += sizeof(curYoloTensor.gridSizeY);
291 |             totalSize += sizeof(curYoloTensor.numBBoxes);
292 |             totalSize += sizeof(curYoloTensor.scaleXY);
293 |             totalSize += sizeof(uint) + sizeof(curYoloTensor.anchors[0]) * curYoloTensor.anchors.size();
294 |             totalSize += sizeof(uint) + sizeof(curYoloTensor.mask[0]) * curYoloTensor.mask.size();
295 |         }
296 |     }
297 | 
298 |     return totalSize;
299 | }
300 | 
301 | void YoloLayer::serialize(void* buffer) const noexcept
302 | {
303 |     char *d = static_cast<char*>(buffer);
304 | 
305 |     write(d, m_NetWidth);
306 |     write(d, m_NetHeight);
307 |     write(d, m_NumClasses);
308 |     write(d, m_NewCoords);
309 |     write(d, m_OutputSize);
310 |     write(d, m_Type);
311 |     write(d, m_ScoreThreshold);
312 | 
313 |     if (m_Type != 3) {
314 |         uint yoloTensorsSize = m_YoloTensors.size();
315 |         write(d, yoloTensorsSize);
316 |         for (uint i = 0; i < yoloTensorsSize; ++i)
317 |         {
318 |             const TensorInfo& curYoloTensor = m_YoloTensors.at(i);
319 |             write(d, curYoloTensor.gridSizeX);
320 |             write(d, curYoloTensor.gridSizeY);
321 |             write(d, curYoloTensor.numBBoxes);
322 |             write(d, curYoloTensor.scaleXY);
323 | 
324 |             uint anchorsSize = curYoloTensor.anchors.size();
325 |             write(d, anchorsSize);
326 |             for (uint j = 0; j < anchorsSize; ++j)
327 |             {
328 |                 write(d, curYoloTensor.anchors[j]);
329 |             }
330 | 
331 |             uint maskSize = curYoloTensor.mask.size();
332 |             write(d, maskSize);
333 |             for (uint j = 0; j < maskSize; ++j)
334 |             {
335 |                 write(d, curYoloTensor.mask[j]);
336 |             }
337 |         }
338 |     }
339 | }
340 | 
341 | nvinfer1::IPluginV2* YoloLayer::clone() const noexcept
342 | {
343 |     return new YoloLayer (
344 |         m_NetWidth, m_NetHeight, m_NumClasses, m_NewCoords, m_YoloTensors, m_OutputSize, m_Type, m_ScoreThreshold);
345 | }
346 | 
347 | REGISTER_TENSORRT_PLUGIN(YoloLayerPluginCreator);
348 | 


--------------------------------------------------------------------------------
/nvdsinfer_custom_impl_Yolo/yoloPlugins.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a
  5 |  * copy of this software and associated documentation files (the "Software"),
  6 |  * to deal in the Software without restriction, including without limitation
  7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 |  * and/or sell copies of the Software, and to permit persons to whom the
  9 |  * Software is furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 20 |  * DEALINGS IN THE SOFTWARE.
 21 |  *
 22 |  * Edited by Marcos Luciano
 23 |  * https://www.github.com/marcoslucianops
 24 |  */
 25 | 
 26 | #ifndef __YOLO_PLUGINS__
 27 | #define __YOLO_PLUGINS__
 28 | 
 29 | #include <cassert>
 30 | #include <cstring>
 31 | #include <cuda_runtime_api.h>
 32 | #include <iostream>
 33 | #include <memory>
 34 | 
 35 | #include <vector>
 36 | 
 37 | #include "NvInferPlugin.h"
 38 | 
 39 | #include "yolo.h"
 40 | 
 41 | #define CUDA_CHECK(status)                                                                                         \
 42 |     {                                                                                                              \
 43 |         if (status != 0)                                                                                           \
 44 |         {                                                                                                          \
 45 |             std::cout << "CUDA failure: " << cudaGetErrorString(status) << " in file " << __FILE__  << " at line " \
 46 |                       << __LINE__ << std::endl;                                                                    \
 47 |             abort();                                                                                               \
 48 |         }                                                                                                          \
 49 |     }
 50 | 
 51 | namespace
 52 | {
 53 | const char* YOLOLAYER_PLUGIN_VERSION {"1"};
 54 | const char* YOLOLAYER_PLUGIN_NAME {"YoloLayer_TRT"};
 55 | } // namespace
 56 | 
 57 | class YoloLayer : public nvinfer1::IPluginV2
 58 | {
 59 | public:
 60 |     YoloLayer (const void* data, size_t length);
 61 | 
 62 |     YoloLayer (
 63 |         const uint& netWidth, const uint& netHeight, const uint& numClasses, const uint& newCoords,
 64 |         const std::vector<TensorInfo>& yoloTensors, const uint64_t& outputSize, const uint& modelType,
 65 |         const float& scoreThreshold);
 66 | 
 67 |     const char* getPluginType () const noexcept override { return YOLOLAYER_PLUGIN_NAME; }
 68 | 
 69 |     const char* getPluginVersion () const noexcept override { return YOLOLAYER_PLUGIN_VERSION; }
 70 | 
 71 |     int getNbOutputs () const noexcept override { return 4; }
 72 | 
 73 |     nvinfer1::Dims getOutputDimensions (
 74 |         int index, const nvinfer1::Dims* inputs,
 75 |         int nbInputDims) noexcept override;
 76 | 
 77 |     bool supportsFormat (
 78 |         nvinfer1::DataType type, nvinfer1::PluginFormat format) const noexcept override;
 79 | 
 80 |     void configureWithFormat (
 81 |         const nvinfer1::Dims* inputDims, int nbInputs, const nvinfer1::Dims* outputDims, int nbOutputs,
 82 |         nvinfer1::DataType type, nvinfer1::PluginFormat format, int maxBatchSize) noexcept override;
 83 | 
 84 |     int initialize () noexcept override { return 0; }
 85 | 
 86 |     void terminate () noexcept override {}
 87 | 
 88 |     size_t getWorkspaceSize (int maxBatchSize) const noexcept override { return 0; }
 89 | 
 90 |     int32_t enqueue (
 91 |         int batchSize, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream)
 92 |         noexcept override;
 93 | 
 94 |     size_t getSerializationSize() const noexcept override;
 95 | 
 96 |     void serialize (void* buffer) const noexcept override;
 97 | 
 98 |     void destroy () noexcept override { delete this; }
 99 | 
100 |     nvinfer1::IPluginV2* clone() const noexcept override;
101 | 
102 |     void setPluginNamespace (const char* pluginNamespace) noexcept override {
103 |         m_Namespace = pluginNamespace;
104 |     }
105 | 
106 |     virtual const char* getPluginNamespace () const noexcept override {
107 |         return m_Namespace.c_str();
108 |     }
109 | 
110 | private:
111 |     std::string m_Namespace {""};
112 |     uint m_NetWidth {0};
113 |     uint m_NetHeight {0};
114 |     uint m_NumClasses {0};
115 |     uint m_NewCoords {0};
116 |     std::vector<TensorInfo> m_YoloTensors;
117 |     uint64_t m_OutputSize {0};
118 |     uint m_Type {0};
119 |     float m_ScoreThreshold {0};
120 | };
121 | 
122 | class YoloLayerPluginCreator : public nvinfer1::IPluginCreator
123 | {
124 | public:
125 |     YoloLayerPluginCreator () {}
126 | 
127 |     ~YoloLayerPluginCreator () {}
128 | 
129 |     const char* getPluginName () const noexcept override { return YOLOLAYER_PLUGIN_NAME; }
130 | 
131 |     const char* getPluginVersion () const noexcept override { return YOLOLAYER_PLUGIN_VERSION; }
132 | 
133 |     const nvinfer1::PluginFieldCollection* getFieldNames() noexcept override {
134 |         std::cerr<< "YoloLayerPluginCreator::getFieldNames is not implemented" << std::endl;
135 |         return nullptr;
136 |     }
137 | 
138 |     nvinfer1::IPluginV2* createPlugin (
139 |         const char* name, const nvinfer1::PluginFieldCollection* fc) noexcept override
140 |     {
141 |         std::cerr<< "YoloLayerPluginCreator::getFieldNames is not implemented";
142 |         return nullptr;
143 |     }
144 | 
145 |     nvinfer1::IPluginV2* deserializePlugin (
146 |         const char* name, const void* serialData, size_t serialLength) noexcept override
147 |     {
148 |         std::cout << "Deserialize yoloLayer plugin: " << name << std::endl;
149 |         return new YoloLayer(serialData, serialLength);
150 |     }
151 | 
152 |     void setPluginNamespace(const char* libNamespace) noexcept override {
153 |         m_Namespace = libNamespace;
154 |     }
155 |     const char* getPluginNamespace() const noexcept override {
156 |         return m_Namespace.c_str();
157 |     }
158 | 
159 | private:
160 |     std::string m_Namespace {""};
161 | };
162 | 
163 | extern uint kNUM_CLASSES;
164 | 
165 | #endif // __YOLO_PLUGINS__
166 | 


--------------------------------------------------------------------------------