├── .gitignore
├── CMakeLists.txt
├── README.md
├── configs
    ├── bdd100k.names
    ├── lightNet-BDD100K-1280x960-chPruning.cfg
    ├── lightNet-BDD100K-1280x960-chPruning.txt
    ├── lightNet-BDD100K-1280x960.cfg
    ├── lightNet-BDD100K-1280x960.txt
    ├── lightNet-BDD100K-chPruning-det-semaseg-1280x960.cfg
    ├── lightNet-BDD100K-chPruning-det-semaseg-1280x960.txt
    ├── lightNet-BDD100K-det-semaseg-1280x960.cfg
    ├── lightNet-BDD100K-det-semaseg-1280x960.txt
    ├── yolov8x-BDD100K-640x640-T4-lane4cls.cfg
    └── yolov8x-BDD100K-640x640-T4-lane4cls.txt
├── extra
    ├── API.h
    └── class_timer.hpp
├── launch
    └── lightnet_trt.launch.xml
├── modules
    ├── calibrator.cpp
    ├── calibrator.h
    ├── chunk.cu
    ├── chunk.h
    ├── class_detector.cpp
    ├── class_detector.h
    ├── class_yolo_detector.hpp
    ├── colormap.hpp
    ├── detect.cu
    ├── detect.h
    ├── ds_image.cpp
    ├── ds_image.h
    ├── err.log
    ├── hardswish.cu
    ├── hardswish.h
    ├── kernel.cu
    ├── logging.h
    ├── mish.cu
    ├── mish.h
    ├── plugin_factory.cpp
    ├── plugin_factory.h
    ├── preprocess.cu
    ├── preprocess.h
    ├── profiler.cpp
    ├── profiler.h
    ├── trt_utils.cpp
    ├── trt_utils.h
    ├── yolo.cpp
    ├── yolo.h
    ├── yolo_config_parser.cpp
    ├── yolo_config_parser.h
    ├── yoloplugin_lib.cpp
    ├── yoloplugin_lib.h
    ├── yolov2.cpp
    ├── yolov2.h
    ├── yolov3.cpp
    ├── yolov3.h
    ├── yolov4.cpp
    ├── yolov4.h
    ├── yolov5.cpp
    └── yolov5.h
├── package.xml
├── samples
    ├── parallel_executor.cpp
    └── sample_detector.cpp
└── src
    ├── lightnet_trt_core.cpp
    ├── lightnet_trt_core.hpp
    ├── lightnet_trt_node.cpp
    ├── lightnet_trt_node.hpp
    ├── main.cpp
    ├── utils.cpp
    └── utils.hpp


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.o
 2 | *.a
 3 | *.dSYM
 4 | *.csv
 5 | *.out
 6 | *.png
 7 | *.so
 8 | *.exe
 9 | *.dll
10 | *.lib
11 | *.dylib
12 | *.pyc
13 | *~
14 | mnist/
15 | data/
16 | caffe/
17 | grasp/
18 | images/
19 | opencv/
20 | convnet/
21 | decaf/
22 | submission/
23 | cfg/
24 | temp/
25 | build/darknet/*
26 | build_*/
27 | ninja/
28 | ninja.zip
29 | vcpkg_installed/
30 | !build/darknet/YoloWrapper.cs
31 | .fuse*
32 | *.weights
33 | *.db
34 | *.engine
35 | *calibration.table
36 | build/*
37 | build/*.cmake
38 | build/*.ninja
39 | build/*.txt
40 | build/*.json
41 | build/CMakeFiles/
42 | build/detect_cuda_compute_capabilities.cu
43 | build/.ninja_deps
44 | build/.ninja_log
45 | build/Makefile
46 | */vcpkg-manifest-install.log
47 | build.log
48 | __pycache__/
49 | 
50 | # OS Generated #
51 | .DS_Store*
52 | ehthumbs.db
53 | Icon?
54 | Thumbs.db
55 | *.swp
56 | 
57 | # IDE generated #
58 | .vs/
59 | .vscode/
60 | 
61 | # Managed by CMake
62 | src/version.h
63 | 
64 | # Build artifacts
65 | lib/
66 | share/
67 | include/darknet/
68 | uselib
69 | uselib_track
70 | darknet
71 | vcpkg/
72 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # cmake_minimum_required(VERSION 3.1)
 2 | cmake_minimum_required(VERSION 3.14)
 3 | project(lightnet_trt VERSION 1.0)         
 4 | 
 5 | enable_language(CUDA) # not supported on CMAKE 3.14
 6 | 
 7 | find_package(OpenMP REQUIRED)
 8 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr") # Added for CMAKE 3.14
 9 | set(CMAKE_CXX_COMPILER "/usr/bin/g++")
10 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -O3 -Wno-unused-function -Wno-maybe-uninitialized -Wno-narrowing -Wno-misleading-indentation -Wno-unused-but-set-variable -Wno-unused-variable -Wno-return-type -Wno-deprecated-declarations -Wno-write-strings -Wno-reorder -Wno-sign-compare -Wno-unused-parameter -Wno-unused-value -Wno-uninitialized ${OpenMP_CXX_FLAGS} -fopenmp")
11 | #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -O0 -g -Wno-write-strings")
12 | #set(CMAKE_CXX_FLAGS_DEBUG "-O0 -ggdb -g")
13 | set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-rpath -Wl,$ORIGIN")
14 | 
15 | include_directories(/usr/local/cuda/include)
16 | find_package(CUDA REQUIRED)
17 | 
18 | find_package(OpenCV REQUIRED)
19 | 
20 | file(GLOB_RECURSE sources modules/*.hpp modules/*.cpp modules/*.h modules/*.cu extra/*.h module/preprocess.cu)
21 | add_library(detector SHARED ${sources})
22 | 
23 | target_include_directories(detector PRIVATE extra/ modules/ ${OpenCV_INCLUDE_DIRS} ${CUDA_TOOLKIT_ROOT_DIR}/include)
24 | target_link_libraries(detector ${CUDA_LIBRARIES} nvinfer nvinfer_plugin nvparsers gflags "stdc++fs")
25 | 
26 | add_executable(lightnet_trt samples/sample_detector.cpp ${sources})
27 | 
28 | target_include_directories(lightnet_trt PRIVATE modules/ extra/)
29 | target_link_libraries(lightnet_trt ${CUDA_LIBRARIES} ${OpenCV_LIBS} nvinfer nvinfer_plugin nvparsers gflags boost_system boost_filesystem "stdc++fs" "stdc++")
30 | 
31 | 
32 | # ROS 2 package
33 | if(BUILD_ROS2_LIGHTNET_TRT)
34 |   find_package(autoware_cmake REQUIRED)
35 |   autoware_package()
36 | 
37 |   find_package(Eigen3 REQUIRED)
38 | 
39 |   # build nodelet
40 |   # TODO
41 | 
42 |   # build node
43 |   ament_auto_add_executable(lightnet_trt_node
44 |     src/main.cpp
45 |     src/lightnet_trt_node.cpp
46 |     src/lightnet_trt_core.cpp
47 |     src/utils.cpp
48 |   )
49 |   ament_target_dependencies(lightnet_trt_node)
50 |   target_include_directories(lightnet_trt_node PRIVATE modules/ extra/)
51 |   target_link_libraries(lightnet_trt_node detector)
52 |   target_link_libraries(lightnet_trt_node ${CUDA_LIBRARIES} ${OpenCV_LIBS} Eigen3::Eigen nvinfer nvinfer_plugin nvparsers gflags boost_system boost_filesystem "stdc++fs" "stdc++")
53 | 
54 |   ament_auto_package(INSTALL_TO_SHARE
55 |     launch
56 |     configs
57 |   )
58 | endif()
59 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # LightNet-TRT: High-Efficiency and Real-Time CNN Implementation on Edge AI
  2 | 
  3 | LightNet-TRT is a CNN implementation optimized for edge AI devices that combines the advantages of LightNet <sup>[[1]](#references)</sup> and TensorRT <sup>[[2]](#references)</sup>. LightNet is a lightweight and high-performance neural network framework designed for edge devices, while TensorRT is a high-performance deep learning inference engine developed by NVIDIA for optimizing and running deep learning models on GPUs. LightNet-TRT uses the Network Definition API provided by TensorRT to integrate LightNet into TensorRT, allowing it to run efficiently and in real-time on edge devices.
  4 | 
  5 | ![lightNet-detection-1024x256](https://user-images.githubusercontent.com/43815838/227718872-b9dfdb7b-4d3c-45ce-b1d9-585a99380434.gif)
  6 | 
  7 | 
  8 | ## Key Improvements
  9 | 
 10 | ### 2:4 Structured Sparsity
 11 | 
 12 | LightNet-TRT utilizes 2:4 structured sparsity <sup>[[3]](#references)</sup>  to further optimize the network. 2:4 structured sparsity means that two values must be zero in each contiguous block of four values, resulting in a 50% reduction in the number of weights. This technique allows the network to use fewer weights and computations while maintaining accuracy.
 13 | 
 14 | ![Sparsity](https://developer-blogs.nvidia.com/ja-jp/wp-content/uploads/sites/6/2022/06/2-4-structured-sparse-matrix.png "sparsity")
 15 | 
 16 | ### NVDLA Execution
 17 | 
 18 | LightNet-TRT also supports the execution of the neural network on the NVIDIA Deep Learning Accelerator (NVDLA) <sup>[[4]](#references)</sup> , a free and open architecture that provides high performance and low power consumption for deep learning inference on edge devices. By using NVDLA, LightNet-TRT can further improve the efficiency and performance of the network on edge devices.
 19 | 
 20 | ![NVDLA](https://i0.wp.com/techgrabyte.com/wp-content/uploads/2019/09/Nvidia-Open-Source-Its-Deep-Learning-Inference-Compiler-NVDLA-2.png?w=768&ssl=1 "NVDLA")
 21 | 
 22 | 
 23 | ### Multi-Precision Quantization
 24 | 
 25 | In addition to post training quantization <sup>[[5]](#references)</sup>, LightNet-TRT also supports multi-precision quantization, which allows the network to use different precision for weights and activations. By using mixed precision, LightNet-TRT can further reduce the memory usage and computational requirements of the network while still maintaining accuracy. By writing it in CFG, you can set the precision for each layer of your CNN.
 26 | 
 27 | ![Quantization](https://developer-blogs.nvidia.com/wp-content/uploads/2021/07/qat-training-precision.png "Quantization")
 28 | 
 29 | 
 30 | 
 31 | ### Multitask Execution (Detection/Segmentation)
 32 | 
 33 | LightNet-TRT also supports multitask execution, allowing the network to perform both object detection and segmentation tasks simultaneously. This enables the network to perform multiple tasks efficiently on edge devices, saving computational resources and power.
 34 | 
 35 | [![](https://img.youtube.com/vi/TmlW-b_t3sQ/0.jpg)](https://www.youtube.com/watch?v=TmlW-b_t3sQ)
 36 | 
 37 | ## Installation
 38 | 
 39 | ### Requirements
 40 | 
 41 | -   CUDA 11.0 or higher
 42 | -   TensorRT 8.0 or higher
 43 | -   OpenCV 3.0 or higher
 44 | 
 45 | ### Steps
 46 | 
 47 | 1.  Clone the repository.
 48 |     
 49 | ```shell
 50 | $ git clone https://github.com/daniel89710/lightNet-TRT.git
 51 | $ cd lightNet-TRT
 52 | ```
 53 | 	
 54 | 2.  Install libraries.
 55 | 						    
 56 | ```shell
 57 | $ sudo apt update
 58 | $ sudo apt install libgflags-dev
 59 | $ sudo apt install libboost-all-dev
 60 | ```
 61 | 										    
 62 | 3.  Compile the TensorRT implementation.
 63 | 											    
 64 | ```shell
 65 | $ mkdir build
 66 | $ cmake ../
 67 | $ make -j
 68 | ```
 69 | 
 70 | ## Model	
 71 | | Model | Resolutions | GFLOPS | Params | Precision | Sparsity | DNN time on RTX3080 | DNN time on Jetson Orin NX 16GB GPU | DNN time on Jetson Orin NX 16GB DLA| DNN time on Jetson Orin Nano 4GB GPU | cfg | weights |
 72 | |---|---|---|---|---|---|---|---|---|---|---|---|
 73 | | lightNet | 1280x960 | 58.01 | 9.0M | int8 | 49.8% | 1.30ms | 7.6ms | 14.2ms | 14.9ms | [github](https://github.com/daniel89710/lightNet/blob/master/cfg/lightNet-BDD100K-1280x960.cfg) |[GoogleDrive](https://drive.google.com/file/d/1qTBQ0BkIYqcyu1BwC54_Z9T1_b702HKf/view?usp=sharing) |
 74 | | LightNet+Semseg | 1280x960 | 76.61 | 9.7M | int8 | 49.8% | 2.06ms | 15.3ms | 23.2ms | 26.2ms | [github](https://github.com/daniel89710/lightNet-TRT/blob/main/configs/lightNet-BDD100K-det-semaseg-1280x960.cfg) | [GoogleDrive](https://drive.google.com/file/d/1ttdVtlDiPun13EQCB4Nyls3Q8w5aXg1i/view?usp=sharing)|
 75 | | lightNet pruning | 1280x960 | 35.03 | 4.3M | int8 | 49.8% | 1.21ms | 8.89ms | 11.68ms | 13.75ms | [github](https://github.com/daniel89710/lightNet-TRT/blob/main/configs/lightNet-BDD100K-1280x960-chPruning.cfg) |[GoogleDrive](https://drive.google.com/file/d/1OGZApPeNH7K08-89eJ8tGhzA9kkRFLii/view?usp=sharing) |
 76 | | LightNet pruning +SemsegLight | 1280x960 | 44.35 | 4.9M | int8 | 49.8% | 1.80ms | 9.89ms | 15.26ms | 23.35ms | [github](https://github.com/daniel89710/lightNet-TRT/blob/main/configs/lightNet-BDD100K-chPruning-det-semaseg-1280x960.cfg) | [GoogleDrive](https://drive.google.com/file/d/1dytYnqS4h_5YK73tr6DOZTUYUEKZYsFe/view?usp=drive_link)|
 77 |  	
 78 |  * "DNN time" refers to the time measured by IProfiler during the enqueueV2 operation, excluding pre-process and post-process times.
 79 |  * Orin NX has three independent AI processors, allowing lightNet to be parallelized across a GPU and two DLAs.																
 80 |  * Orin Nano 4GB has only iGPU with 512 CUDA cores.
 81 | ## Usage
 82 | 
 83 | ### Converting a LightNet model to a TensorRT engine
 84 | 						
 85 | Build FP32 engine
 86 | ```shell
 87 | $ ./lightNet-TRT --flagfile ../configs/lightNet-BDD100K-det-semaseg-1280x960.txt --precision kFLOAT
 88 | ```
 89 | 
 90 | Build FP16(HALF) engine
 91 | ```shell
 92 | $ ./lightNet-TRT --flagfile ../configs/lightNet-BDD100K-det-semaseg-1280x960.txt --precision kHALF
 93 | ```
 94 | 
 95 | Build INT8 engine 
 96 | (You need to prepare a list for calibration in "configs/calibration_images.txt".)
 97 | ```shell
 98 | $ ./lightNet-TRT --flagfile ../configs/lightNet-BDD100K-det-semaseg-1280x960.txt --precision kINT8
 99 | ```
100 | 
101 | Build DLA engine (Supported by only Xavier and Orin)
102 | ```shell
103 | $ ./lightNet-TRT --flagfile ../configs/lightNet-BDD100K-det-semaseg-1280x960.txt --precision kINT8 --dla [0/1]
104 | ```
105 | 
106 | ### Inference with the TensorRT engine
107 | 
108 | Inference from images
109 | ```shell
110 | $ ./lightNet-TRT --flagfile ../configs/lightNet-BDD100K-det-semaseg-1280x960.txt --precision [kFLOAT/kHALF/kINT8] {--dla [0/1]} --d DIRECTORY
111 | ```
112 | 
113 | Inference from images
114 | ```shell
115 | $ ./lightNet-TRT --flagfile ../configs/lightNet-BDD100K-det-semaseg-1280x960.txt --precision [kFLOAT/kHALF/kINT8] {--dla [0/1]} --d VIDEO
116 | ```
117 | 
118 | ### Inference with ROS 2 node
119 | 
120 | You can build as follows. Note that current implementation requires Autoware installed and sourced.
121 | 
122 | ```bash
123 | mkdir lightnet_trt_ws/src -p
124 | cd lightnet_trt_ws/src
125 | git clone https://github.com/kminoda/lightNet-TRT-ROS2.git
126 | cd ..
127 | colcon build --symlink-install --cmake-args -DCMAKE_BUILD_TYPE=Release -DBUILD_ROS2_LIGHTNET_TRT=ON
128 | ```
129 | 
130 | Then, you can run the node as follows.
131 | 
132 | ```bash
133 | ros2 launch lightnet_trt lightnet_trt.launch.xml
134 | ```
135 | 
136 | ## Implementation
137 | 
138 | LightNet-TRT is built on the LightNet framework and integrates with TensorRT using the Network Definition API. The implementation is based on the following repositories:
139 | 
140 | -   LightNet: [https://github.com/daniel89710/lightNet](https://github.com/daniel89710/lightNet)
141 | -   TensorRT: [https://github.com/NVIDIA/TensorRT](https://github.com/NVIDIA/TensorRT)
142 | -   NVIDIA DeepStream SDK: [https://github.com/NVIDIA-AI-IOT/deepstream\_reference\_apps/tree/restructure](https://github.com/NVIDIA-AI-IOT/deepstream_reference_apps/tree/restructure)
143 | -   YOLO-TensorRT: [https://github.com/enazoe/yolo-tensorrt](https://github.com/enazoe/yolo-tensorrt)
144 | 
145 | ## Conclusion
146 | 
147 | LightNet-TRT is a powerful and efficient implementation of CNNs using Edge AI. With its advanced features and integration with TensorRT, it is an excellent choice for real-time object detection and semantic segmentation applications on edge devices.
148 | 
149 | # References
150 | [1]. [LightNet](https://github.com/daniel89710/lightNet)  
151 | [2]. [TensorRT](https://developer.nvidia.com/tensorrt)  
152 | [3]. [Accelerating Inference with Sparsity Using the NVIDIA Ampere Architecture and NVIDIA TensorRT](https://developer.nvidia.com/blog/accelerating-inference-with-sparsity-using-ampere-and-tensorrt/)  
153 | [4]. [NVDLA](http://nvdla.org/)  
154 | [5]. [Achieving FP32 Accuracy for INT8 Inference Using Quantization Aware Training with NVIDIA TensorRT](https://developer.nvidia.com/blog/achieving-fp32-accuracy-for-int8-inference-using-quantization-aware-training-with-tensorrt/)  
155 | 
156 | 
157 | 


--------------------------------------------------------------------------------
/configs/bdd100k.names:
--------------------------------------------------------------------------------
 1 | car
 2 | bus
 3 | person
 4 | bike
 5 | truck
 6 | motor
 7 | train
 8 | rider
 9 | traffic_sign
10 | traffic_light


--------------------------------------------------------------------------------
/configs/lightNet-BDD100K-1280x960-chPruning.txt:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved.
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # config_file_path  : Darknet cfg file
24 | # wts_file_path : Darknet weights file
25 | 
26 | --network_type=YOLOV3
27 | --config_file_path=../configs/lightNet-BDD100K-1280x960-chPruning.cfg
28 | --wts_file_path=../configs/lightNet-BDD100K-1280x960-chPruning.weights
29 | 
30 | #Support kFLOAT/kHALF/kINT8
31 | #kINT8 requires 'calibration_images.txt'
32 | --precision=kFLOAT
33 | 
34 | #Support DLA for Xavier/Orin series
35 | #--dla=[0/1]
36 | --mp=true
37 | 


--------------------------------------------------------------------------------
/configs/lightNet-BDD100K-1280x960.txt:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved.
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # config_file_path  : Darknet cfg file
24 | # wts_file_path : Darknet weights file
25 | 
26 | --network_type=YOLOV3
27 | --config_file_path=../configs/lightNet-BDD100K-1280x960.cfg
28 | --wts_file_path=../configs/lightNet-BDD100K-1280x960.weights
29 | 
30 | #Support kFLOAT/kHALF/kINT8
31 | --precision=kFLOAT
32 | 
33 | #Support DLA for Xavier/Orin series
34 | #--dla=[0/1]
35 | --mp=true
36 | 


--------------------------------------------------------------------------------
/configs/lightNet-BDD100K-chPruning-det-semaseg-1280x960.txt:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved.
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | ### This is a sample config file for yolo plugin and the trt-yolo-app
24 | # Statements starting with '#' are ignored.
25 | 
26 | # The following 4 config params are required for yolo plugin and trt-yolo-app
27 | 
28 | # network_type : Type of network architecture. Choose from yolov2, yolov2-tiny, yolov3 and yolov3-tiny
29 | # config_file_path  : Darknet cfg file
30 | # wts_file_path : Darknet weights file
31 | # labels_file_path : Text file with a list of object class labels
32 | 
33 | --network_type=YOLOV3
34 | --config_file_path=../configs/lightNet-BDD100K-chPruning-det-semaseg-1280x960.cfg
35 | --wts_file_path=../configs/lightNet-BDD100K-chPruning-det-semaseg-1280x960.weights
36 | 
37 | #Support kFLOAT/kHALF/kINT8
38 | --precision=kFLOAT
39 | 
40 | #Support DLA for Xavier/Orin series
41 | #--dla=[0/1]
42 | #--mp=true
43 | 


--------------------------------------------------------------------------------
/configs/lightNet-BDD100K-det-semaseg-1280x960.txt:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved.
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | ### This is a sample config file for yolo plugin and the trt-yolo-app
24 | # Statements starting with '#' are ignored.
25 | 
26 | # The following 4 config params are required for yolo plugin and trt-yolo-app
27 | 
28 | # network_type : Type of network architecture. Choose from yolov2, yolov2-tiny, yolov3 and yolov3-tiny
29 | # config_file_path  : Darknet cfg file
30 | # wts_file_path : Darknet weights file
31 | # labels_file_path : Text file with a list of object class labels
32 | 
33 | --network_type=YOLOV3
34 | --config_file_path=../configs/lightNet-BDD100K-det-semaseg-1280x960.cfg
35 | --wts_file_path=../configs/lightNet-BDD100K-det-semaseg-1280x960.weights
36 | 
37 | #Support kFLOAT/kHALF/kINT8
38 | --precision=kFLOAT
39 | 
40 | #Support DLA for Xavier/Orin series
41 | #--dla=[0/1]
42 | #--mp=true
43 | 


--------------------------------------------------------------------------------
/configs/yolov8x-BDD100K-640x640-T4-lane4cls.txt:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved.
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | ### This is a sample config file for yolo plugin and the trt-yolo-app
24 | # Statements starting with '#' are ignored.
25 | 
26 | # The following 4 config params are required for yolo plugin and trt-yolo-app
27 | 
28 | # network_type : Type of network architecture. Choose from yolov2, yolov2-tiny, yolov3 and yolov3-tiny
29 | # config_file_path  : Darknet cfg file
30 | # wts_file_path : Darknet weights file
31 | # labels_file_path : Text file with a list of object class labels
32 | 
33 | --network_type=YOLOV8X
34 | --config_file_path=../configs/yolov8x-BDD100K-640x640-T4-lane4cls.cfg
35 | --wts_file_path=../weights/yolov8x-BDD100K-640x640-T4-lane4cls-scratch-pseudo-finetune_last.weights
36 | 
37 | #Support kFLOAT/kHALF/kINT8
38 | --precision=kFLOAT
39 | 
40 | #Support DLA for Xavier/Orin series
41 | #--dla=[0/1]
42 | #--mp=true
43 | 


--------------------------------------------------------------------------------
/extra/API.h:
--------------------------------------------------------------------------------
 1 | #ifdef API_EXPORTS
 2 | 
 3 | #if defined(_MSC_VER)
 4 | #define API __declspec(dllexport) 
 5 | #else
 6 | #define API __attribute__((visibility("default")))
 7 | #endif
 8 | 
 9 | #else
10 | 	
11 | #if defined(_MSC_VER)
12 | #define API __declspec(dllimport) 
13 | #else
14 | #define API 
15 | #endif
16 | 
17 | #endif


--------------------------------------------------------------------------------
/extra/class_timer.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <iostream>
 4 | #include <string>
 5 | #include <chrono>
 6 | 
 7 | class Timer
 8 | {
 9 | public:
10 | 	Timer() : beg_(clock_::now())
11 | 	{}
12 | 	void reset()
13 | 	{
14 | 		beg_ = clock_::now();
15 | 	}
16 | 
17 | 	double elapsed() const
18 | 	{
19 | 		return std::chrono::duration_cast<second_>(clock_::now() - beg_).count();
20 | 	}
21 | 
22 | 	void out(std::string message = "")
23 | 	{
24 | 		double t = elapsed();
25 | 		std::cout << message << " elasped time:" << t << "ms" << std::endl;
26 | 		reset();
27 | 	}
28 | 
29 | 	double get_duration()const
30 | 	{
31 | 		return elapsed();
32 | 	}
33 | private:
34 | 	using clock_ = std::chrono::high_resolution_clock;
35 | 	using second_ = std::chrono::duration<double, std::milli>;
36 | 	std::chrono::time_point<clock_> beg_;
37 | };
38 | 


--------------------------------------------------------------------------------
/launch/lightnet_trt.launch.xml:
--------------------------------------------------------------------------------
 1 | <launch>
 2 |   <!-- image topic name to be subscribed -->
 3 |   <arg name="input/image" default="/sensing/camera/camera0/image_rect_color"/>
 4 |   <arg name="input/camera_info" default="/sensing/camera/camera0/camera_info"/>
 5 | 
 6 |   <!-- output topic name to be published -->
 7 |   <arg name="output/objects" default="/perception/object_recognition/detection/rois0"/>
 8 | 
 9 |   <!-- path to the lightNet-TRT cfg file to be loaded -->
10 |   <!-- <arg name="model_cfg" default="$(find-pkg-share lightnet_trt)/configs/lightNet-BDD100K-det-semaseg-1280x960.cfg"/> -->
11 |   <!-- <arg name="model_weights" default="$(find-pkg-share lightnet_trt)/configs/lightNet-BDD100K-det-semaseg-1280x960.weights"/> -->
12 |   <!-- <arg name="width" default="1280"/> -->
13 |   <!-- <arg name="height" default="960"/> -->
14 | 
15 |   <arg name="model_cfg" default="$(find-pkg-share lightnet_trt)/configs/yolov8x-BDD100K-640x640-T4-lane4cls.cfg"/>
16 |   <arg name="model_weights" default="$(find-pkg-share lightnet_trt)/configs/yolov8x-BDD100K-640x640-T4-lane4cls-scratch-pseudo-finetune_last.weights"/>
17 |   <arg name="width" default="640"/>
18 |   <arg name="height" default="640"/>
19 | 
20 |   <group>
21 |     <node pkg="lightnet_trt" exec="lightnet_trt_node" name="lightnet_trt_node" output="screen">
22 |       <remap from="~/in/image" to="$(var input/image)"/>
23 |       <remap from="~/in/camera_info" to="$(var input/camera_info)"/>
24 |       <remap from="~/out/image" to="$(var output/objects)/debug/image"/>
25 |       <param name="model_cfg" value="$(var model_cfg)"/>
26 |       <param name="model_weights" value="$(var model_weights)"/>
27 |       <param name="width" value="$(var width)"/>
28 |       <param name="height" value="$(var height)"/>
29 |     </node>
30 |   </group>
31 | 
32 | </launch>


--------------------------------------------------------------------------------
/modules/calibrator.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 | MIT License
  3 | 
  4 | Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved.
  5 | 
  6 | Permission is hereby granted, free of charge, to any person obtaining a copy
  7 | of this software and associated documentation files (the "Software"), to deal
  8 | in the Software without restriction, including without limitation the rights
  9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 10 | copies of the Software, and to permit persons to whom the Software is
 11 | furnished to do so, subject to the following conditions:
 12 | 
 13 | The above copyright notice and this permission notice shall be included in all
 14 | copies or substantial portions of the Software.
 15 | 
 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 22 | SOFTWARE.
 23 | *
 24 | */
 25 | 
 26 | #include "calibrator.h"
 27 | #include <fstream>
 28 | #include <iostream>
 29 | #include <iterator>
 30 | #include <cuda_runtime.h>
 31 | #include <cuda.h>
 32 | Int8EntropyCalibrator::Int8EntropyCalibrator(const uint32_t& batchSize, const std::string& calibImages,
 33 | 	const std::string& calibImagesPath,
 34 | 	const std::string& calibTableFilePath,
 35 | 	const uint64_t& inputSize, const uint32_t& inputH,
 36 | 	const uint32_t& inputW, const std::string& inputBlobName,
 37 | 	const std::string &s_net_type_) :
 38 |     m_BatchSize(batchSize),
 39 |     m_InputH(inputH),
 40 |     m_InputW(inputW),
 41 |     m_InputSize(inputSize),
 42 |     m_InputCount(batchSize * inputSize),
 43 |     m_InputBlobName(inputBlobName),
 44 |     m_CalibTableFilePath(calibTableFilePath),
 45 |     m_ImageIndex(0),
 46 | 	_s_net_type(s_net_type_)
 47 | {
 48 |     if (!fileExists(m_CalibTableFilePath, false))
 49 |     {
 50 |         m_ImageList = loadImageList(calibImages, calibImagesPath);
 51 |         m_ImageList.resize(static_cast<int>(m_ImageList.size() / m_BatchSize) * m_BatchSize);
 52 |         std::random_shuffle(m_ImageList.begin(), m_ImageList.end(),
 53 |                             [](int i) { return rand() % i; });
 54 |     }
 55 | 
 56 |     NV_CUDA_CHECK(cudaMalloc(&m_DeviceInput, m_InputCount * sizeof(float)));
 57 | }
 58 | 
 59 | Int8EntropyCalibrator::~Int8EntropyCalibrator() { NV_CUDA_CHECK(cudaFree(m_DeviceInput)); }
 60 | 
 61 | bool Int8EntropyCalibrator::getBatch(void* bindings[], const char* names[], int nbBindings)noexcept
 62 | {
 63 |     if (m_ImageIndex + m_BatchSize >= m_ImageList.size()) return false;
 64 | 
 65 |     // Load next batch
 66 |     std::vector<DsImage> dsImages(m_BatchSize);
 67 |     for (uint32_t j = m_ImageIndex; j < m_ImageIndex + m_BatchSize; ++j)
 68 |     {
 69 |         dsImages.at(j - m_ImageIndex) = DsImage(m_ImageList.at(j), _s_net_type, m_InputH, m_InputW);
 70 |     }
 71 |     m_ImageIndex += m_BatchSize;
 72 | 
 73 |     cv::Mat trtInput = blobFromDsImages(dsImages, m_InputH, m_InputW);
 74 | 
 75 |     NV_CUDA_CHECK(cudaMemcpy(m_DeviceInput, trtInput.ptr<float>(0), m_InputCount * sizeof(float),
 76 |                              cudaMemcpyHostToDevice));
 77 |     assert(!strcmp(names[0], m_InputBlobName.c_str()));
 78 |     bindings[0] = m_DeviceInput;
 79 |     return true;
 80 | }
 81 | 
 82 | const void* Int8EntropyCalibrator::readCalibrationCache(size_t& length) noexcept
 83 | {
 84 |     void* output;
 85 |     m_CalibrationCache.clear();
 86 |     assert(!m_CalibTableFilePath.empty());
 87 |     std::ifstream input(m_CalibTableFilePath, std::ios::binary | std::ios::in);
 88 |     input >> std::noskipws;
 89 |     if (m_ReadCache && input.good())
 90 |         std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(),
 91 |                   std::back_inserter(m_CalibrationCache));
 92 | 
 93 |     length = m_CalibrationCache.size();
 94 |     if (length)
 95 |     {
 96 |         std::cout << "Using cached calibration table to build the engine" << std::endl;
 97 |         output = &m_CalibrationCache[0];
 98 |     }
 99 | 
100 |     else
101 |     {
102 |         std::cout << "New calibration table will be created to build the engine" << std::endl;
103 |         output = nullptr;
104 |     }
105 | 
106 |     return output;
107 | }
108 | 
109 | void Int8EntropyCalibrator::writeCalibrationCache(const void* cache, size_t length) noexcept
110 | {
111 |     assert(!m_CalibTableFilePath.empty());
112 |     std::ofstream output(m_CalibTableFilePath, std::ios::binary);
113 |     output.write(reinterpret_cast<const char*>(cache), length);
114 |     output.close();
115 | }


--------------------------------------------------------------------------------
/modules/calibrator.h:
--------------------------------------------------------------------------------
 1 | /**
 2 | MIT License
 3 | 
 4 | Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | *
24 | */
25 | #ifndef _CALIBRATOR_H_
26 | #define _CALIBRATOR_H_
27 | 
28 | #include "NvInfer.h"
29 | #include "ds_image.h"
30 | #include "trt_utils.h"
31 | 
32 | class Int8EntropyCalibrator : public nvinfer1::IInt8EntropyCalibrator2
33 | {
34 | public:
35 |     Int8EntropyCalibrator(const uint32_t& batchSize, const std::string& calibImages,
36 |                           const std::string& calibImagesPath, const std::string& calibTableFilePath,
37 |                           const uint64_t& inputSize, const uint32_t& inputH, const uint32_t& inputW,
38 | 							const std::string& inputBlobName,const std::string &s_net_type_);
39 |     virtual ~Int8EntropyCalibrator();
40 | 
41 |     int getBatchSize() const noexcept  override { return m_BatchSize; }
42 |     bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept  override;
43 |     const void* readCalibrationCache(size_t& length) noexcept  override;
44 |     void writeCalibrationCache(const void* cache, size_t length) noexcept  override;
45 | 
46 | private:
47 |     const uint32_t m_BatchSize;
48 |     const uint32_t m_InputH;
49 |     const uint32_t m_InputW;
50 |     const uint64_t m_InputSize;
51 |     const uint64_t m_InputCount;
52 |     const std::string m_InputBlobName;
53 | 	const std::string _s_net_type;
54 |     const std::string m_CalibTableFilePath{nullptr};
55 |     uint32_t m_ImageIndex;
56 |     bool m_ReadCache{true};
57 |     void* m_DeviceInput{nullptr};
58 |     std::vector<std::string> m_ImageList;
59 |     std::vector<char> m_CalibrationCache;
60 | };
61 | 
62 | #endif


--------------------------------------------------------------------------------
/modules/chunk.cu:
--------------------------------------------------------------------------------
  1 | #include <cmath>
  2 | #include <stdio.h>
  3 | #include <cassert>
  4 | #include <iostream>
  5 | #include "chunk.h"
  6 | #include <cuda_runtime.h>
  7 | #define NV_CUDA_CHECK(status)                                                                      \
  8 |     {                                                                                              \
  9 |         if (status != 0)                                                                           \
 10 |         {                                                                                          \
 11 |             std::cout << "Cuda failure: " << cudaGetErrorString(status) << " in file " << __FILE__ \
 12 |                       << " at line " << __LINE__ << std::endl;                                     \
 13 |             abort();                                                                               \
 14 |         }                                                                                          \
 15 |     }
 16 | 
 17 | 
 18 | namespace nvinfer1
 19 | {
 20 | 	Chunk::Chunk()
 21 | 	{
 22 | 
 23 | 	}
 24 | 	Chunk::Chunk(const void* buffer, size_t size) 
 25 | 	{
 26 | 		assert(size == sizeof(_n_size_split));
 27 | 		_n_size_split = *reinterpret_cast<const int*>(buffer);
 28 | 	}
 29 | 	Chunk::~Chunk()
 30 | 	{
 31 | 
 32 | 	}
 33 | 	int Chunk::getNbOutputs() const noexcept
 34 | 	{
 35 | 		return 2;
 36 | 	}
 37 | 
 38 | 	Dims Chunk::getOutputDimensions(int index, const Dims* inputs, int nbInputDims)noexcept
 39 | 	{
 40 | 		assert(nbInputDims == 1);
 41 | 		assert(index == 0 || index == 1);
 42 | 		return Dims3(inputs[0].d[0] / 2, inputs[0].d[1], inputs[0].d[2]);
 43 | 	}
 44 | 
 45 | 	int Chunk::initialize() noexcept
 46 | 	{
 47 | 		return 0;
 48 | 	}
 49 | 
 50 | 	void Chunk::terminate() noexcept
 51 | 	{
 52 | 	}
 53 | 
 54 | 	size_t Chunk::getWorkspaceSize(int maxBatchSize) const noexcept
 55 | 	{
 56 | 		return 0;
 57 | 	}
 58 | 	
 59 | 	/*int Chunk::enqueue(int batchSize,
 60 | 		const void* const* inputs,
 61 | 		void** outputs,
 62 | 		void* workspace,
 63 | 		cudaStream_t stream)noexcept*/
 64 | 
 65 | 	int Chunk::enqueue(int batchSize, 
 66 | 		const void* const* inputs,
 67 | 		void* const* outputs, 
 68 | 		void* workspace,
 69 | 		cudaStream_t stream) noexcept
 70 | 	{
 71 | 		//batch
 72 | 		for (int b = 0; b < batchSize; ++b)
 73 | 		{
 74 | 		printf("Chunk %d, %d\n", _n_size_split, _n_size_split/sizeof(float));
 75 | 		/*
 76 | 			NV_CUDA_CHECK(cudaMemcpyAsync((char*)outputs[0] + b * _n_size_split, (char*)inputs[0] + b * 2 * _n_size_split + _n_size_split/sizeof(float), _n_size_split, cudaMemcpyDeviceToDevice, stream));
 77 | 			NV_CUDA_CHECK(cudaMemcpyAsync((char*)outputs[1] + b * _n_size_split, (char*)inputs[0] + b * 2 * _n_size_split + _n_size_split/sizeof(float), _n_size_split, cudaMemcpyDeviceToDevice, stream));
 78 | 			*/
 79 | 			NV_CUDA_CHECK(cudaMemcpy((char*)outputs[0] + b * _n_size_split, (char*)inputs[0] + b * 2 * _n_size_split + _n_size_split/sizeof(float), _n_size_split, cudaMemcpyDeviceToDevice));
 80 | 			NV_CUDA_CHECK(cudaMemcpy((char*)outputs[1] + b * _n_size_split, (char*)inputs[0] + b * 2 * _n_size_split + _n_size_split/sizeof(float), _n_size_split, cudaMemcpyDeviceToDevice));			
 81 | 		}
 82 | 	//	NV_CUDA_CHECK(cudaMemcpy(outputs[0], inputs[0], _n_size_split, cudaMemcpyDeviceToDevice));
 83 | 	//	NV_CUDA_CHECK(cudaMemcpy(outputs[1], (void*)((char*)inputs[0] + _n_size_split), _n_size_split, cudaMemcpyDeviceToDevice));
 84 | 		return 0;
 85 | 	}
 86 | 
 87 | 	size_t Chunk::getSerializationSize() const noexcept
 88 | 	{
 89 | 		return sizeof(_n_size_split);
 90 | 	}
 91 | 
 92 | 	void Chunk::serialize(void *buffer)const noexcept
 93 | 	{
 94 | 		*reinterpret_cast<int*>(buffer) = _n_size_split;
 95 | 	}
 96 | 	
 97 | 	const char* Chunk::getPluginType()const noexcept
 98 | 	{
 99 | 		return "CHUNK_TRT";
100 | 	}
101 | 	const char* Chunk::getPluginVersion() const noexcept
102 | 	{	
103 | 		return "1.0";
104 | 	}
105 | 
106 | 	void Chunk::destroy() noexcept
107 | 	{
108 | 		delete this;
109 | 	}
110 | 	
111 | 	void Chunk::setPluginNamespace(const char* pluginNamespace) noexcept
112 | 	{
113 | 		_s_plugin_namespace = pluginNamespace;
114 | 	}
115 | 
116 | 	const char* Chunk::getPluginNamespace() const noexcept
117 | 	{
118 | 		return _s_plugin_namespace.c_str();
119 | 	}
120 | 
121 | 	DataType Chunk::getOutputDataType(int index,
122 | 		const nvinfer1::DataType* inputTypes,
123 | 		int nbInputs) const noexcept
124 | 	{
125 | 		assert(index == 0 || index == 1);
126 | 		return DataType::kFLOAT;
127 | 	}
128 | 
129 | 	bool Chunk::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const noexcept
130 | 	{
131 | 		return false;
132 | 	}
133 | 
134 | 	bool Chunk::canBroadcastInputAcrossBatch(int inputIndex) const noexcept
135 | 	{
136 | 		return false;
137 | 	}
138 | 
139 | 	void Chunk::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) {}
140 | 
141 | 	void Chunk::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput)
142 | 	{
143 | 		_n_size_split = in->dims.d[0] / 2 * in->dims.d[1] * in->dims.d[2] *sizeof(float);
144 | 				printf("Chunk in configuring %d, %d\n", _n_size_split, _n_size_split/sizeof(float));
145 | 	}
146 | 	void Chunk::detachFromContext() {}
147 | 
148 | 	bool Chunk::supportsFormat(DataType type, PluginFormat format) const noexcept
149 | 	{
150 | 		return (type == DataType::kFLOAT && format == PluginFormat::kLINEAR);
151 | 	}
152 | 
153 | 	void Chunk::configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) noexcept
154 | 	{
155 | 
156 | 	}
157 | 
158 | 	// Clone the plugin
159 | 	IPluginV2* Chunk::clone() const noexcept
160 | 	{
161 | 		Chunk *p = new Chunk();
162 | 		p->_n_size_split = _n_size_split;
163 | 		p->setPluginNamespace(_s_plugin_namespace.c_str());
164 | 		return p;
165 | 	}
166 | 
167 | 	//----------------------------
168 | 	PluginFieldCollection ChunkPluginCreator::_fc{};
169 | 	std::vector<PluginField> ChunkPluginCreator::_vec_plugin_attributes;
170 | 
171 | 	ChunkPluginCreator::ChunkPluginCreator()
172 | 	{
173 | 		_vec_plugin_attributes.clear();
174 | 		_fc.nbFields = _vec_plugin_attributes.size();
175 | 		_fc.fields = _vec_plugin_attributes.data();
176 | 	}
177 | 
178 | 	const char* ChunkPluginCreator::getPluginName() const noexcept
179 | 	{
180 | 		return "CHUNK_TRT";
181 | 	}
182 | 	
183 | 	const char* ChunkPluginCreator::getPluginVersion() const noexcept
184 | 	{
185 | 		return "1.0";
186 | 	}
187 | 
188 | 	const PluginFieldCollection* ChunkPluginCreator::getFieldNames()noexcept
189 | 	{
190 | 		return &_fc;
191 | 	}
192 | 
193 | 	IPluginV2* ChunkPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc)noexcept
194 | 	{
195 | 		Chunk* obj = new Chunk();
196 | 		obj->setPluginNamespace(_s_name_space.c_str());
197 | 		return obj;
198 | 	}
199 | 
200 | 	IPluginV2* ChunkPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength)noexcept
201 | 	{
202 | 		Chunk* obj = new Chunk(serialData,serialLength);
203 | 		obj->setPluginNamespace(_s_name_space.c_str());
204 | 		return obj;
205 | 	}
206 | 
207 | 	void ChunkPluginCreator::setPluginNamespace(const char* libNamespace)noexcept
208 | 	{
209 | 		_s_name_space = libNamespace;
210 | 	}
211 | 
212 | 	const char* ChunkPluginCreator::getPluginNamespace() const noexcept
213 | 	{
214 | 		return _s_name_space.c_str();
215 | 	}
216 | }//namespace nvinfer1
217 | 


--------------------------------------------------------------------------------
/modules/chunk.h:
--------------------------------------------------------------------------------
 1 | #ifndef CHUNK_H_
 2 | #define CHUNK_H_
 3 | 
 4 | //#include "NvInfer.h"
 5 | //#include "NvInferPlugin.h"
 6 | //#include "NvInferRuntimeCommon.h"
 7 | //#include <cuda_runtime.h>
 8 | //#include <iostream>
 9 | //#include <memory>
10 | //#include <sstream>
11 | //#include <string>
12 | //#include <cassert>
13 | //#include <vector>
14 | 
15 | #include <string>
16 | #include <vector>
17 | #include "NvInfer.h"
18 | 
19 | namespace nvinfer1
20 | {
21 | 	class Chunk : public IPluginV2
22 | 	{
23 | 	public:
24 | 		Chunk();
25 | 		Chunk(const void* buffer, size_t length);
26 | 		~Chunk();
27 | 		int getNbOutputs()const noexcept override;
28 | 		Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims)noexcept  override;
29 | 		int initialize()noexcept  override;
30 | 		void terminate()noexcept  override;
31 | 		size_t getWorkspaceSize(int maxBatchSize) const noexcept  override;
32 | 	//	int enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream)noexcept override;
33 | 
34 | 		int enqueue(int batchSize, const void* const* inputs, void* const* outputs, void* workspace,
35 | 			cudaStream_t stream) noexcept override;
36 | 
37 | 		size_t getSerializationSize() const noexcept  override;
38 | 		void serialize(void* buffer) const noexcept  override;
39 | 		const char* getPluginType() const noexcept  override;
40 | 		const char* getPluginVersion() const noexcept  override;
41 | 		void destroy()noexcept  override;
42 | 		void setPluginNamespace(const char* pluginNamespace)noexcept  override;
43 | 		const char* getPluginNamespace() const noexcept  override;
44 | 		DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const noexcept;
45 | 		bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const noexcept;
46 | 		bool canBroadcastInputAcrossBatch(int inputIndex) const noexcept;
47 | 		void attachToContext(
48 | 			cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator);
49 | 		void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput);
50 | 		void detachFromContext();
51 | 		bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const
52 | 		{
53 | 			return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT;
54 | 		}
55 | 		IPluginV2* clone() const noexcept  override;
56 | 		bool supportsFormat(DataType type, PluginFormat format) const noexcept override;
57 | 		void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) noexcept override;
58 | 
59 | 	private:
60 | 		std::string _s_plugin_namespace;
61 | 		int _n_size_split;
62 | 	};
63 | 
64 | 	class ChunkPluginCreator : public IPluginCreator
65 | 	{
66 | 	public:
67 | 		ChunkPluginCreator();
68 | 		~ChunkPluginCreator() override = default;
69 | 		const char* getPluginName()const noexcept  override;
70 | 		const char* getPluginVersion() const noexcept  override;
71 | 		const PluginFieldCollection* getFieldNames()noexcept  override;
72 | 		IPluginV2* createPlugin(const char* name, const PluginFieldCollection* fc)noexcept  override;
73 | 		IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength)noexcept  override;
74 | 		void setPluginNamespace(const char* libNamespace)noexcept  override;
75 | 		const char* getPluginNamespace() const noexcept  override;
76 | 	private:
77 | 		std::string _s_name_space;
78 | 		static PluginFieldCollection _fc;
79 | 		static std::vector<PluginField> _vec_plugin_attributes;
80 | 	};
81 | 
82 | }//nampespace nvinfer1
83 | 
84 | 
85 | #endif
86 | 
87 | 
88 | 


--------------------------------------------------------------------------------
/modules/class_detector.cpp:
--------------------------------------------------------------------------------
 1 | #include "class_detector.h"
 2 | #include "class_yolo_detector.hpp"
 3 | 
 4 | class Detector::Impl
 5 | {
 6 | public:
 7 | 	Impl() {}
 8 | 
 9 | 	~Impl(){}
10 | 
11 | 	YoloDectector _detector;
12 | };
13 | 
14 | Detector::Detector()
15 | {
16 | 	_impl = new Impl();
17 | }
18 | 
19 | Detector::~Detector()
20 | {
21 | 	if (_impl)
22 | 	{
23 | 		delete _impl;
24 | 		_impl = nullptr;
25 | 	}
26 | }
27 | 
28 | void Detector::init(const Config &config)
29 | {
30 | 	_impl->_detector.init(config);
31 | }
32 | 
33 | void Detector::dump_profiling(void)
34 | {
35 |   _impl->_detector.dump_profiling();
36 | }
37 | 
38 | void Detector::detect(const std::vector<cv::Mat> &mat_image, std::vector<BatchResult> &vec_batch_result, const bool cuda)
39 | {
40 |   _impl->_detector.detect(mat_image, vec_batch_result, cuda);
41 | }
42 | 
43 | void Detector::save_image(cv::Mat &img, const std::string &dir, const std::string &name)
44 | {
45 |   _impl->_detector.save_image(img, dir, name);
46 | }
47 | 
48 | void Detector::segment(const std::vector<cv::Mat> &mat_image, std::string filename)
49 | {
50 |   _impl->_detector.segment(mat_image, filename);
51 | }
52 | 
53 | void Detector::regress(const std::vector<cv::Mat> &mat_image, std::vector<BatchResult> &vec_batch_result, std::string filename)
54 | {
55 |   _impl->_detector.regress(mat_image, vec_batch_result, filename);
56 | }
57 | 
58 | void Detector::draw_BBox(cv::Mat &img,Result result)
59 | {
60 |   _impl->_detector.draw_BBox(img, result);
61 | }
62 | 


--------------------------------------------------------------------------------
/modules/class_detector.h:
--------------------------------------------------------------------------------
 1 | #ifndef CLASS_DETECTOR_H_
 2 | #define CLASS_DETECTOR_H_
 3 | 
 4 | #include "API.h"
 5 | #include <iostream>
 6 | #include <opencv2/opencv.hpp>
 7 | 
 8 | struct Result
 9 | {
10 | 	int		 id		= -1;
11 | 	float	 prob	= 0.f;
12 | 	cv::Rect rect;
13 | };
14 | 
15 | using BatchResult = std::vector<Result>;
16 | 
17 | enum ModelType
18 | {
19 | 	YOLOV3,
20 | 	YOLOV4,
21 | 	YOLOV5
22 | };
23 | 
24 | enum Precision
25 | {
26 | 	INT8 = 0,
27 | 	FP16,
28 | 	FP32
29 | };
30 | 
31 | struct Config
32 | {
33 | 	std::string file_model_cfg					= "configs/yolov4.cfg";
34 | 
35 | 	std::string file_model_weights				= "configs/yolov4.weights";
36 | 
37 | 	float detect_thresh							= 0.9;
38 | 
39 | 	ModelType	net_type						= YOLOV4;
40 | 
41 | 	Precision	inference_precison				= FP32;
42 | 	
43 | 	int	gpu_id									= 0;
44 | 
45 | 	std::string calibration_image_list_file_txt = "configs/calibration_images.txt";
46 |   int batch = 0;
47 |   int width = 0;
48 |   int height = 0;
49 |   int dla = -1;
50 | };
51 | 
52 | class API Detector
53 | {
54 |  public:
55 |  explicit Detector();
56 |  
57 |  ~Detector();
58 | 
59 |  void init(const Config &config);
60 |  void save_image(cv::Mat &img, const std::string &dir, const std::string &name);
61 |  void segment(const std::vector<cv::Mat> &mat_image, std::string filename);
62 |  void regress(const std::vector<cv::Mat> &mat_image, std::vector<BatchResult> &vec_batch_result, std::string filename);
63 |  void detect(const std::vector<cv::Mat> &mat_image, std::vector<BatchResult> &vec_batch_result, const bool cuda);
64 |  void dump_profiling(void);
65 |  void draw_BBox(cv::Mat &img, Result result); 
66 |  private:
67 |  
68 |  Detector(const Detector &);
69 |  const Detector &operator =(const Detector &);
70 |  class Impl;
71 |  Impl *_impl;
72 | };
73 | 
74 | #endif // !CLASS_QH_DETECTOR_H_
75 | 


--------------------------------------------------------------------------------
/modules/colormap.hpp:
--------------------------------------------------------------------------------
  1 | #define MAX_DISTANCE 120
  2 | const unsigned char jet_colormap[MAX_DISTANCE][3] = {
  3 |   {0, 0, 127},
  4 |   {0, 0, 137},
  5 |   {0, 0, 146},
  6 |   {0, 0, 156},
  7 |   {0, 0, 166},
  8 |   {0, 0, 176},
  9 |   {0, 0, 185},
 10 |   {0, 0, 195},
 11 |   {0, 0, 205},
 12 |   {0, 0, 215},
 13 |   {0, 0, 224},
 14 |   {0, 0, 234},
 15 |   {0, 0, 244},
 16 |   {0, 0, 254},
 17 |   {0, 0, 255},
 18 |   {0, 1, 255},
 19 |   {0, 9, 255},
 20 |   {0, 18, 255},
 21 |   {0, 26, 255},
 22 |   {0, 35, 255},
 23 |   {0, 43, 255},
 24 |   {0, 52, 255},
 25 |   {0, 61, 255},
 26 |   {0, 69, 255},
 27 |   {0, 78, 255},
 28 |   {0, 86, 255},
 29 |   {0, 95, 255},
 30 |   {0, 103, 255},
 31 |   {0, 112, 255},
 32 |   {0, 121, 255},
 33 |   {0, 129, 255},
 34 |   {0, 138, 255},
 35 |   {0, 146, 255},
 36 |   {0, 155, 255},
 37 |   {0, 163, 255},
 38 |   {0, 172, 255},
 39 |   {0, 181, 255},
 40 |   {0, 189, 255},
 41 |   {0, 198, 255},
 42 |   {0, 206, 255},
 43 |   {0, 215, 255},
 44 |   {0, 223, 251},
 45 |   {2, 232, 244},
 46 |   {9, 241, 237},
 47 |   {16, 249, 230},
 48 |   {23, 255, 223},
 49 |   {30, 255, 216},
 50 |   {36, 255, 209},
 51 |   {43, 255, 202},
 52 |   {50, 255, 195},
 53 |   {57, 255, 189},
 54 |   {64, 255, 182},
 55 |   {71, 255, 175},
 56 |   {78, 255, 168},
 57 |   {85, 255, 161},
 58 |   {92, 255, 154},
 59 |   {99, 255, 147},
 60 |   {106, 255, 140},
 61 |   {113, 255, 133},
 62 |   {119, 255, 126},
 63 |   {126, 255, 119},
 64 |   {133, 255, 113},
 65 |   {140, 255, 106},
 66 |   {147, 255, 99},
 67 |   {154, 255, 92},
 68 |   {161, 255, 85},
 69 |   {168, 255, 78},
 70 |   {175, 255, 71},
 71 |   {182, 255, 64},
 72 |   {189, 255, 57},
 73 |   {195, 255, 50},
 74 |   {202, 255, 43},
 75 |   {209, 255, 36},
 76 |   {216, 255, 30},
 77 |   {223, 255, 23},
 78 |   {230, 255, 16},
 79 |   {237, 255, 9},
 80 |   {244, 248, 2},
 81 |   {251, 240, 0},
 82 |   {255, 232, 0},
 83 |   {255, 224, 0},
 84 |   {255, 216, 0},
 85 |   {255, 208, 0},
 86 |   {255, 200, 0},
 87 |   {255, 192, 0},
 88 |   {255, 184, 0},
 89 |   {255, 176, 0},
 90 |   {255, 168, 0},
 91 |   {255, 161, 0},
 92 |   {255, 153, 0},
 93 |   {255, 145, 0},
 94 |   {255, 137, 0},
 95 |   {255, 129, 0},
 96 |   {255, 121, 0},
 97 |   {255, 113, 0},
 98 |   {255, 105, 0},
 99 |   {255, 97, 0},
100 |   {255, 89, 0},
101 |   {255, 81, 0},
102 |   {255, 73, 0},
103 |   {255, 65, 0},
104 |   {255, 57, 0},
105 |   {255, 49, 0},
106 |   {255, 41, 0},
107 |   {255, 34, 0},
108 |   {255, 26, 0},
109 |   {254, 18, 0},
110 |   {244, 10, 0},
111 |   {234, 2, 0},
112 |   {224, 0, 0},
113 |   {215, 0, 0},
114 |   {205, 0, 0},
115 |   {195, 0, 0},
116 |   {185, 0, 0},
117 |   {176, 0, 0},
118 |   {166, 0, 0},
119 |   {156, 0, 0},
120 |   {146, 0, 0},
121 |   {137, 0, 0},
122 |   {127, 0, 0}
123 | };
124 | 
125 | unsigned char magma_colormap[MAX_DISTANCE][3] = {
126 |   {251, 252, 191},
127 |   {251, 249, 187},
128 |   {252, 245, 183},
129 |   {252, 241, 179},
130 |   {252, 238, 176},
131 |   {252, 234, 172},
132 |   {252, 230, 168},
133 |   {253, 225, 163},
134 |   {253, 221, 159},
135 |   {253, 218, 156},
136 |   {253, 214, 152},
137 |   {253, 210, 149},
138 |   {253, 207, 146},
139 |   {254, 203, 142},
140 |   {254, 198, 137},
141 |   {254, 194, 134},
142 |   {254, 190, 131},
143 |   {254, 187, 128},
144 |   {254, 183, 125},
145 |   {254, 179, 123},
146 |   {254, 174, 118},
147 |   {254, 170, 116},
148 |   {254, 166, 113},
149 |   {253, 162, 111},
150 |   {253, 159, 108},
151 |   {253, 155, 106},
152 |   {253, 151, 104},
153 |   {252, 146, 101},
154 |   {252, 142, 99},
155 |   {251, 138, 98},
156 |   {251, 134, 96},
157 |   {250, 130, 95},
158 |   {250, 127, 94},
159 |   {249, 123, 93},
160 |   {248, 117, 92},
161 |   {247, 113, 91},
162 |   {246, 110, 91},
163 |   {245, 106, 91},
164 |   {243, 103, 91},
165 |   {242, 99, 92},
166 |   {239, 94, 93},
167 |   {238, 91, 94},
168 |   {236, 88, 95},
169 |   {234, 85, 96},
170 |   {231, 82, 98},
171 |   {229, 80, 99},
172 |   {226, 77, 101},
173 |   {222, 74, 103},
174 |   {220, 72, 105},
175 |   {217, 70, 106},
176 |   {214, 68, 108},
177 |   {211, 66, 109},
178 |   {208, 65, 111},
179 |   {203, 62, 113},
180 |   {200, 61, 114},
181 |   {197, 60, 116},
182 |   {194, 58, 117},
183 |   {190, 57, 118},
184 |   {187, 56, 119},
185 |   {184, 55, 120},
186 |   {179, 53, 122},
187 |   {176, 52, 123},
188 |   {172, 51, 123},
189 |   {169, 50, 124},
190 |   {166, 49, 125},
191 |   {163, 48, 126},
192 |   {159, 47, 126},
193 |   {154, 45, 127},
194 |   {151, 44, 127},
195 |   {148, 43, 128},
196 |   {145, 42, 128},
197 |   {141, 41, 128},
198 |   {138, 40, 129},
199 |   {133, 38, 129},
200 |   {130, 37, 129},
201 |   {127, 36, 129},
202 |   {124, 35, 129},
203 |   {121, 34, 129},
204 |   {118, 33, 129},
205 |   {115, 31, 129},
206 |   {110, 30, 129},
207 |   {107, 28, 128},
208 |   {104, 27, 128},
209 |   {101, 26, 128},
210 |   {97, 24, 127},
211 |   {94, 23, 127},
212 |   {90, 21, 126},
213 |   {87, 20, 125},
214 |   {83, 19, 124},
215 |   {80, 18, 123},
216 |   {77, 17, 122},
217 |   {74, 16, 121},
218 |   {71, 15, 119},
219 |   {66, 15, 116},
220 |   {62, 15, 114},
221 |   {59, 15, 111},
222 |   {55, 15, 108},
223 |   {52, 16, 104},
224 |   {48, 16, 101},
225 |   {45, 16, 96},
226 |   {40, 17, 89},
227 |   {37, 17, 85},
228 |   {34, 17, 80},
229 |   {31, 17, 75},
230 |   {28, 16, 70},
231 |   {26, 16, 65},
232 |   {22, 14, 58},
233 |   {20, 13, 53},
234 |   {17, 12, 49},
235 |   {15, 11, 44},
236 |   {13, 10, 40},
237 |   {11, 8, 36},
238 |   {9, 7, 31},
239 |   {6, 5, 25},
240 |   {4, 4, 21},
241 |   {3, 3, 17},
242 |   {2, 2, 13},
243 |   {1, 1, 9},
244 |   {0, 0, 6},
245 |   {0, 0, 3},
246 | };
247 | 


--------------------------------------------------------------------------------
/modules/detect.cu:
--------------------------------------------------------------------------------
  1 | //sys
  2 | #include <cmath>
  3 | #include <stdio.h>
  4 | #include <cassert>
  5 | #include <iostream>
  6 | #include <cuda_runtime.h>
  7 | #include <cuda.h>
  8 | #include <stdint.h>
  9 | #include <string.h>
 10 | //my
 11 | #include "detect.h"
 12 | 
 13 | #define NV_CUDA_CHECK(status)                                                                      \
 14 |     {                                                                                              \
 15 |         if (status != 0)                                                                           \
 16 |         {                                                                                          \
 17 |             std::cout << "Cuda failure: " << cudaGetErrorString(status) << " in file " << __FILE__ \
 18 |                       << " at line " << __LINE__ << std::endl;                                     \
 19 |             abort();                                                                               \
 20 |         }                                                                                          \
 21 |     }
 22 | 
 23 | namespace nvinfer1
 24 | {
 25 | 	Detect::Detect()
 26 | 	{
 27 | 	}
 28 | 
 29 | 	Detect::Detect(const void* data, size_t length)
 30 | 	{
 31 | 		const char *d = reinterpret_cast<const char*>(data), *a = d;
 32 | 		read(d,_n_anchor);
 33 | 		read(d,_n_classes);
 34 | 		read(d,_n_grid_h);
 35 | 		read(d, _n_grid_w);
 36 | 		read(d, _n_output_size);
 37 | 		//printf("anchor:%d,classes:%d,gh:%d,gw:%d,size:%d\n", _n_anchor, _n_classes, _n_grid_h, _n_grid_w, _n_output_size);
 38 | 		assert(d == a + length);
 39 | 	}
 40 | 
 41 | 	Detect::Detect(const uint32_t n_anchor_, const uint32_t n_classes_,
 42 | 		const uint32_t n_grid_h_, const uint32_t n_grid_w_/*,
 43 | 		const uint32_t &n_stride_h_, const uint32_t &n_stride_w_*/):
 44 | 		_n_anchor(n_anchor_),
 45 | 		_n_classes(n_classes_),
 46 | 		_n_grid_h(n_grid_h_),
 47 | 		_n_grid_w(n_grid_w_)
 48 | 	{
 49 | 		_n_output_size = (5 + _n_classes)*_n_anchor*_n_grid_h*_n_grid_w;
 50 | 	}
 51 | 	Detect::~Detect()
 52 | 	{}
 53 | 
 54 | 	inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); }
 55 | 
 56 | 	__global__ void gpu_detect_layer(const float *input_,
 57 | 		float* output_,
 58 | 		const uint32_t n_grid_h_,
 59 | 		const uint32_t n_grid_w_,
 60 | 		const uint32_t n_classes_,
 61 | 		const uint32_t n_anchor_)
 62 | 	{
 63 | 		uint32_t x_id = blockIdx.x * blockDim.x + threadIdx.x;
 64 | 		uint32_t y_id = blockIdx.y * blockDim.y + threadIdx.y;
 65 | 		uint32_t z_id = blockIdx.z * blockDim.z + threadIdx.z;
 66 | 
 67 | 		if ((x_id >= n_grid_w_) || (y_id >= n_grid_h_) || (z_id >= n_anchor_))
 68 | 		{
 69 | 			return;
 70 | 		}
 71 | 		//	printf("grid_h:%d,grid_w:%d,class:%d,anchor:%d\n", n_grid_h_, n_grid_w_, n_classes_, n_anchor_);
 72 | 		const int numGridCells = n_grid_h_ * n_grid_w_;
 73 | 		const int bbindex = y_id * n_grid_w_ + x_id;
 74 | 
 75 | 		output_[bbindex + numGridCells * (z_id * (5 + n_classes_) + 0)]
 76 | 			= 2.f * sigmoidGPU(input_[bbindex + numGridCells * (z_id * (5 + n_classes_) + 0)])-0.5f;
 77 | 
 78 | 		output_[bbindex + numGridCells * (z_id * (5 + n_classes_) + 1)]
 79 | 			= 2.f * sigmoidGPU(input_[bbindex + numGridCells * (z_id * (5 + n_classes_) + 1)])-0.5f;
 80 | 
 81 | 		float w = 2.f * sigmoidGPU(input_[bbindex + numGridCells * (z_id * (5 + n_classes_) + 2)]);
 82 | 		output_[bbindex + numGridCells * (z_id * (5 + n_classes_) + 2)]
 83 | 			= w*w;
 84 | 
 85 | 		float h = 2.f* sigmoidGPU(input_[bbindex + numGridCells * (z_id * (5 + n_classes_) + 3)]);
 86 | 		output_[bbindex + numGridCells * (z_id * (5 + n_classes_) + 3)]
 87 | 			= h*h;
 88 | 
 89 | 		output_[bbindex + numGridCells * (z_id * (5 + n_classes_) + 4)]
 90 | 			= sigmoidGPU(input_[bbindex + numGridCells * (z_id * (5 + n_classes_) + 4)]);
 91 | 		for (uint32_t i = 0; i < n_classes_; ++i)
 92 | 		{
 93 | 			output_[bbindex + numGridCells * (z_id * (5 + n_classes_) + (5 + i))]
 94 | 				= sigmoidGPU(input_[bbindex + numGridCells * (z_id * (5 + n_classes_) + (5 + i))]);
 95 | 		}
 96 | 	}
 97 | 
 98 | 	cudaError_t cuda_detect_layer(const void* input_, 
 99 | 		void* output_,
100 | 		const uint32_t& batch_size_,
101 | 		const uint32_t& grid_h_,
102 | 		const uint32_t& grid_w_,
103 | 		const uint32_t& n_classes_,
104 | 		const uint32_t& n_anchor_,
105 | 		uint64_t n_output_size_, 
106 | 		cudaStream_t stream_)
107 | 	{
108 | 		dim3 threads_per_block(16, 16, 4);
109 | 		dim3 number_of_blocks((grid_w_ / threads_per_block.x) + 1,
110 | 			(grid_h_ / threads_per_block.y) + 1,
111 | 			(n_anchor_ / threads_per_block.z) + 1);
112 | 		for (int batch = 0; batch < batch_size_; ++batch)
113 | 		{
114 | 			gpu_detect_layer << <number_of_blocks, threads_per_block, 0, stream_ >> >(
115 | 				reinterpret_cast<const float*>(input_) + (batch * n_output_size_),
116 | 				reinterpret_cast<float*>(output_) + (batch * n_output_size_),
117 | 				grid_h_,
118 | 				grid_w_,
119 | 				n_classes_,
120 | 				n_anchor_);
121 | 		}
122 | 		return cudaGetLastError();
123 | 	}
124 | 
125 | 	int Detect::enqueue(int batchSize,
126 | 		const void* const* inputs,
127 | 		void* const* outputs,
128 | 		void* workspace,
129 | 		cudaStream_t stream) noexcept
130 | 	{
131 | 		NV_CUDA_CHECK(cuda_detect_layer(inputs[0], outputs[0], batchSize, _n_grid_h, _n_grid_w, _n_classes, _n_anchor, _n_output_size, stream));
132 | 		return 0;
133 | 	}
134 | 
135 | 
136 | 	bool Detect::supportsFormat(DataType type, PluginFormat format) const noexcept
137 | 	{
138 | 		return (type == DataType::kFLOAT && format == PluginFormat::kLINEAR);
139 | 	}
140 | 
141 | 	void Detect::configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) noexcept
142 | 	{
143 | 
144 | 	}
145 | 
146 | 	size_t Detect::getSerializationSize() const noexcept
147 | 	{
148 | 		return sizeof(_n_anchor) + sizeof(_n_classes) + sizeof(_n_grid_h) + sizeof(_n_grid_w)
149 | 			+ sizeof(_n_output_size);
150 | 	}
151 | 
152 | 	void Detect::serialize(void *buffer) const noexcept
153 | 	{
154 | 		char *d = static_cast<char*>(buffer), *a = d;
155 | 		write(d,_n_anchor);
156 | 		write(d, _n_classes);
157 | 		write(d, _n_grid_h);
158 | 		write(d, _n_grid_w);
159 | 		write(d, _n_output_size);
160 | 		assert(d == a + getSerializationSize());
161 | 	}
162 | 
163 | 	void Detect::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput)
164 | 	{
165 | 
166 | 	}
167 | 	IPluginV2* Detect::clone() const noexcept
168 | 	{
169 | 		Detect *p = new Detect(_n_anchor,_n_classes,_n_grid_h,_n_grid_w);
170 | 		p->setPluginNamespace(_s_plugin_namespace.c_str());
171 | 		return p;
172 | 	}
173 | 	 
174 | 
175 | 	//
176 | 	PluginFieldCollection DetectPluginCreator::_fc{};
177 | 	std::vector<PluginField> DetectPluginCreator::_vec_plugin_attributes;
178 | 
179 | 	DetectPluginCreator::DetectPluginCreator()
180 | 	{
181 | 		_vec_plugin_attributes.clear();
182 | 		_fc.nbFields = _vec_plugin_attributes.size();
183 | 		_fc.fields = _vec_plugin_attributes.data();
184 | 	}
185 | 
186 | 	const char* DetectPluginCreator::getPluginName() const noexcept
187 | 	{
188 | 		return "DETECT_TRT";
189 | 	}
190 | 
191 | 	const char* DetectPluginCreator::getPluginVersion() const noexcept
192 | 	{
193 | 		return "1.0";
194 | 	}
195 | 
196 | 	const PluginFieldCollection* DetectPluginCreator::getFieldNames() noexcept
197 | 	{
198 | 		return &_fc;
199 | 	}
200 | 
201 | 	IPluginV2* DetectPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc) noexcept
202 | 	{
203 | 		Detect* obj = new Detect();
204 | 		obj->setPluginNamespace(_s_name_space.c_str());
205 | 		return obj;
206 | 	}
207 | 
208 | 	IPluginV2* DetectPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength) noexcept
209 | 	{
210 | 		Detect* obj = new Detect(serialData, serialLength);
211 | 		obj->setPluginNamespace(_s_name_space.c_str());
212 | 		return obj;
213 | 	}
214 | 
215 | 	void DetectPluginCreator::setPluginNamespace(const char* libNamespace) noexcept
216 | 	{
217 | 		_s_name_space = libNamespace;
218 | 	}
219 | 
220 | 	const char* DetectPluginCreator::getPluginNamespace() const noexcept
221 | 	{
222 | 		return _s_name_space.c_str();
223 | 	}
224 | }//end namespace nvinfer1
225 | 


--------------------------------------------------------------------------------
/modules/detect.h:
--------------------------------------------------------------------------------
  1 | #ifndef _DETECT_H_
  2 | #define _DETECT_H_
  3 | 
  4 | #include <string>
  5 | #include <vector>
  6 | #include "NvInfer.h"
  7 | 
  8 | namespace nvinfer1
  9 | {
 10 | 	template <typename T>
 11 | 	void write(char*& buffer, const T& val)
 12 | 	{
 13 | 		*reinterpret_cast<T*>(buffer) = val;
 14 | 		buffer += sizeof(T);
 15 | 	}
 16 | 
 17 | 	template <typename T>
 18 | 	void read(const char*& buffer, T& val)
 19 | 	{
 20 | 		val = *reinterpret_cast<const T*>(buffer);
 21 | 		buffer += sizeof(T);
 22 | 	}
 23 | 
 24 | 	class Detect :public IPluginV2
 25 | 	{
 26 | 	public:
 27 | 		Detect();
 28 | 		Detect(const void* data, size_t length);
 29 | 		Detect(const uint32_t n_anchor_, const uint32_t _n_classes_,
 30 | 			const uint32_t n_grid_h_, const uint32_t n_grid_w_/*,
 31 | 			const uint32_t &n_stride_h_, const uint32_t &n_stride_w_*/);
 32 | 		~Detect();
 33 | 		int getNbOutputs()const noexcept override
 34 | 		{
 35 | 			return 1;
 36 | 		}
 37 | 		Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) noexcept  override
 38 | 		{
 39 | 			return inputs[0];
 40 | 		}
 41 | 		int initialize() noexcept  override
 42 | 		{
 43 | 			return 0;
 44 | 		}
 45 | 		void terminate() noexcept  override
 46 | 		{
 47 | 		}
 48 | 		size_t getWorkspaceSize(int maxBatchSize) const noexcept  override
 49 | 		{
 50 | 			return 0;
 51 | 		}
 52 | 		int enqueue(int batchSize, const void* const* inputs, void* const* outputs, void* workspace,
 53 | 			cudaStream_t stream) noexcept override;
 54 | 
 55 | 		bool supportsFormat(DataType type, PluginFormat format) const noexcept override;
 56 | 		void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) noexcept override;
 57 | 
 58 | 		size_t getSerializationSize() const noexcept  override;
 59 | 		void serialize(void* buffer) const noexcept  override;
 60 | 		const char* getPluginType() const noexcept  override
 61 | 		{
 62 | 			return "DETECT_TRT";
 63 | 		}
 64 | 		const char* getPluginVersion() const noexcept  override
 65 | 		{
 66 | 			return "1.0";
 67 | 		}
 68 | 		void destroy() noexcept  override
 69 | 		{
 70 | 			delete this;
 71 | 		}
 72 | 		void setPluginNamespace(const char* pluginNamespace) noexcept  override
 73 | 		{
 74 | 			_s_plugin_namespace = pluginNamespace;
 75 | 		}
 76 | 		const char* getPluginNamespace() const  noexcept override
 77 | 		{
 78 | 			return _s_plugin_namespace.c_str();
 79 | 		}
 80 | 		DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const noexcept
 81 | 		{
 82 | 			return DataType::kFLOAT;
 83 | 		}
 84 | 		bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const noexcept
 85 | 		{
 86 | 			return false;
 87 | 		}
 88 | 		bool canBroadcastInputAcrossBatch(int inputIndex) const noexcept
 89 | 		{
 90 | 			return false;
 91 | 		}
 92 | 		void attachToContext(
 93 | 			cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator)
 94 | 		{}
 95 | 		void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) ;
 96 | 		void detachFromContext()
 97 | 		{}
 98 | 		bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const noexcept
 99 | 		{
100 | 			return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT;
101 | 		}
102 | 		IPluginV2* clone() const noexcept override;
103 | 	private:
104 | 		
105 | 		uint32_t _n_anchor;
106 | 		uint32_t _n_classes;
107 | 		uint32_t _n_grid_h;
108 | 		uint32_t _n_grid_w;
109 | 		//uint32_t _n_stride_h;
110 | 	//	uint32_t _n_stride_w;
111 | 		uint64_t _n_output_size;
112 | 		std::string _s_plugin_namespace;
113 | 	}; //end detect
114 | 
115 | 	class DetectPluginCreator : public IPluginCreator
116 | 	{
117 | 	public:
118 | 		DetectPluginCreator();
119 | 		~DetectPluginCreator() override = default;
120 | 		const char* getPluginName()const noexcept  override;
121 | 		const char* getPluginVersion() const  noexcept override;
122 | 		const PluginFieldCollection* getFieldNames() noexcept  override;
123 | 		IPluginV2* createPlugin(const char* name, const PluginFieldCollection* fc) noexcept  override;
124 | 		IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) noexcept  override;
125 | 		void setPluginNamespace(const char* libNamespace)  noexcept override;
126 | 		const char* getPluginNamespace() const noexcept  override;
127 | 	private:
128 | 		std::string _s_name_space;
129 | 		static PluginFieldCollection _fc;
130 | 		static std::vector<PluginField> _vec_plugin_attributes;
131 | 	};//end detect creator
132 | 
133 | }//end namespace nvinfer1
134 | 
135 | 
136 | 
137 | #endif
138 | 


--------------------------------------------------------------------------------
/modules/ds_image.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 | MIT License
  3 | 
  4 | Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved.
  5 | 
  6 | Permission is hereby granted, free of charge, to any person obtaining a copy
  7 | of this software and associated documentation files (the "Software"), to deal
  8 | in the Software without restriction, including without limitation the rights
  9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 10 | copies of the Software, and to permit persons to whom the Software is
 11 | furnished to do so, subject to the following conditions:
 12 | 
 13 | The above copyright notice and this permission notice shall be included in all
 14 | copies or substantial portions of the Software.
 15 | 
 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 22 | SOFTWARE.
 23 | *
 24 | */
 25 | #include "ds_image.h"
 26 | #include <experimental/filesystem>
 27 | 
 28 | DsImage::DsImage() :
 29 |     m_Height(0),
 30 |     m_Width(0),
 31 |     m_XOffset(0),
 32 |     m_YOffset(0),
 33 |     m_ScalingFactor(0.0),
 34 |     m_RNG(cv::RNG(unsigned(std::time(0)))),
 35 |     m_ImageName()
 36 | {
 37 | }
 38 | 
 39 | DsImage::DsImage(const cv::Mat& mat_image_, const std::string &s_net_type_, const int& inputH, const int& inputW) :
 40 | 	m_Height(0),
 41 | 	m_Width(0),
 42 | 	m_XOffset(0),
 43 | 	m_YOffset(0),
 44 | 	m_ScalingFactor(0.0),
 45 | 	m_RNG(cv::RNG(unsigned(std::time(0)))),
 46 | 	m_ImageName()
 47 | {
 48 | 	m_OrigImage = mat_image_;
 49 | 	m_Height = m_OrigImage.rows;
 50 | 	m_Width = m_OrigImage.cols;
 51 | 	if (!m_OrigImage.data || m_OrigImage.cols <= 0 || m_OrigImage.rows <= 0)
 52 | 	{
 53 | 		std::cout << "empty image !"<< std::endl;
 54 | 		assert(0);
 55 | 	}
 56 | 	if (m_OrigImage.channels() != 3)
 57 | 	{
 58 | 		std::cout << "Non RGB images are not supported "<< std::endl;
 59 | 		assert(0);
 60 | 	}
 61 | 	if ("yolov5" == s_net_type_)
 62 | 	{
 63 | 		// resize the DsImage with scale
 64 | 		float r = std::min(static_cast<float>(inputH) / static_cast<float>(m_Height), static_cast<float>(inputW) / static_cast<float>(m_Width));
 65 |         int resizeH = (std::round(m_Height*r));
 66 |         int resizeW = (std::round(m_Width*r));
 67 | 
 68 | 		// Additional checks for images with non even dims
 69 | 		if ((inputW - resizeW) % 2) resizeW--;
 70 | 		if ((inputH - resizeH) % 2) resizeH--;
 71 | 		assert((inputW - resizeW) % 2 == 0);
 72 | 		assert((inputH - resizeH) % 2 == 0);
 73 | 
 74 | 		m_XOffset = (inputW - resizeW) / 2;
 75 | 		m_YOffset = (inputH - resizeH) / 2;
 76 | 
 77 | 		assert(2 * m_XOffset + resizeW == inputW);
 78 | 		assert(2 * m_YOffset + resizeH == inputH);
 79 | 
 80 | 		// resizing
 81 | 		cv::resize(m_OrigImage, m_LetterboxImage, cv::Size(resizeW, resizeH), 0, 0, cv::INTER_CUBIC);
 82 | 		// letterboxing
 83 | 		cv::copyMakeBorder(m_LetterboxImage, m_LetterboxImage, m_YOffset, m_YOffset, m_XOffset,
 84 | 			m_XOffset, cv::BORDER_CONSTANT, cv::Scalar(128, 128, 128));
 85 | 		//	cv::imwrite("letter.jpg", m_LetterboxImage);
 86 | 		// converting to RGB
 87 | 		//cv::cvtColor(m_LetterboxImage, m_LetterboxImage, cv::COLOR_BGR2RGB);
 88 | 	}
 89 | 	else
 90 | 	{
 91 | 		cv::resize(m_OrigImage, m_LetterboxImage, cv::Size(inputW, inputH), 0, 0, cv::INTER_CUBIC);
 92 | 		// converting to RGB
 93 | 		//cv::cvtColor(m_LetterboxImage, m_LetterboxImage, cv::COLOR_BGR2RGB);
 94 | 	}
 95 | 	
 96 | }
 97 | DsImage::DsImage(const std::string& path, const std::string &s_net_type_, const int& inputH, const int& inputW) :
 98 | 	m_Height(0),
 99 | 	m_Width(0),
100 | 	m_XOffset(0),
101 | 	m_YOffset(0),
102 | 	m_ScalingFactor(0.0),
103 | 	m_RNG(cv::RNG(unsigned(std::time(0)))),
104 | 	m_ImageName()
105 | {
106 | 	m_ImageName = std::experimental::filesystem::path(path).stem().string();
107 | 	m_OrigImage = cv::imread(path, cv::IMREAD_UNCHANGED);
108 | 	m_Height = m_OrigImage.rows;
109 | 	m_Width = m_OrigImage.cols;
110 | 	if (!m_OrigImage.data || m_OrigImage.cols <= 0 || m_OrigImage.rows <= 0)
111 | 	{
112 | 		std::cout << "Unable to open image : " << path << std::endl;
113 | 		assert(0);
114 | 	}
115 | 
116 | 	if (m_OrigImage.channels() != 3)
117 | 	{
118 | 		std::cout << "Non RGB images are not supported : " << path << std::endl;
119 | 		assert(0);
120 | 	}
121 | 
122 | 	if ("yolov5" == s_net_type_)
123 | 	{
124 | 		// resize the DsImage with scale
125 | 		float dim = std::max(m_Height, m_Width);
126 | 		int resizeH = ((m_Height / dim) * inputH);
127 | 		int resizeW = ((m_Width / dim) * inputW);
128 | 		m_ScalingFactor = static_cast<float>(resizeH) / static_cast<float>(m_Height);
129 | 		float	m_ScalingFactorw = static_cast<float>(resizeW) / static_cast<float>(m_Width);
130 | 
131 | 		// Additional checks for images with non even dims
132 | 		if ((inputW - resizeW) % 2) resizeW--;
133 | 		if ((inputH - resizeH) % 2) resizeH--;
134 | 		assert((inputW - resizeW) % 2 == 0);
135 | 		assert((inputH - resizeH) % 2 == 0);
136 | 
137 | 		m_XOffset = (inputW - resizeW) / 2;
138 | 		m_YOffset = (inputH - resizeH) / 2;
139 | 
140 | 		assert(2 * m_XOffset + resizeW == inputW);
141 | 		assert(2 * m_YOffset + resizeH == inputH);
142 | 
143 | 		// resizing
144 | 		cv::resize(m_OrigImage, m_LetterboxImage, cv::Size(resizeW, resizeH), 0, 0, cv::INTER_CUBIC);
145 | 		// letterboxing
146 | 		cv::copyMakeBorder(m_LetterboxImage, m_LetterboxImage, m_YOffset, m_YOffset, m_XOffset,
147 | 			m_XOffset, cv::BORDER_CONSTANT, cv::Scalar(128, 128, 128));
148 | 		//	cv::imwrite("letter.jpg", m_LetterboxImage);
149 | 		// converting to RGB
150 | 	//	cv::cvtColor(m_LetterboxImage, m_LetterboxImage, cv::COLOR_BGR2RGB);
151 | 	}
152 | 	else
153 | 	{
154 | 		cv::resize(m_OrigImage, m_LetterboxImage, cv::Size(inputW, inputH), 0, 0, cv::INTER_CUBIC);
155 | 		// converting to RGB
156 | 	//	cv::cvtColor(m_LetterboxImage, m_LetterboxImage, cv::COLOR_BGR2RGB);
157 | 	}
158 | }
159 | 
160 | void DsImage::letterbox(const int& inputH, const int& inputW)
161 | {
162 | 	//m_OrigImage.copyTo(m_MarkedImage);
163 | 	m_Height = m_OrigImage.rows;
164 | 	m_Width = m_OrigImage.cols;
165 | 
166 | 	// resize the DsImage with scale
167 | 	float dim = std::max(m_Height, m_Width);
168 | 	int resizeH = ((m_Height / dim) * inputH);
169 | 	int resizeW = ((m_Width / dim) * inputW);
170 | 	m_ScalingFactor = static_cast<float>(resizeH) / static_cast<float>(m_Height);
171 | 	float	m_ScalingFactorw = static_cast<float>(resizeW) / static_cast<float>(m_Width);
172 | 
173 | 	// Additional checks for images with non even dims
174 | 	if ((inputW - resizeW) % 2) resizeW--;
175 | 	if ((inputH - resizeH) % 2) resizeH--;
176 | 	assert((inputW - resizeW) % 2 == 0);
177 | 	assert((inputH - resizeH) % 2 == 0);
178 | 
179 | 	m_XOffset = (inputW - resizeW) / 2;
180 | 	m_YOffset = (inputH - resizeH) / 2;
181 | 
182 | 	assert(2 * m_XOffset + resizeW == inputW);
183 | 	assert(2 * m_YOffset + resizeH == inputH);
184 | 
185 | 	// resizing
186 | 	cv::resize(m_OrigImage, m_LetterboxImage, cv::Size(resizeW, resizeH), 0, 0, cv::INTER_CUBIC);
187 | 	// letterboxing
188 | 	cv::copyMakeBorder(m_LetterboxImage, m_LetterboxImage, m_YOffset, m_YOffset, m_XOffset,
189 | 		m_XOffset, cv::BORDER_CONSTANT, cv::Scalar(128, 128, 128));
190 | 	//	cv::imwrite("letter.jpg", m_LetterboxImage);
191 | 	// converting to RGB
192 | 	cv::cvtColor(m_LetterboxImage, m_LetterboxImage, cv::COLOR_BGR2RGB);
193 | }
194 | 
195 | 
196 | void DsImage::addBBox(BBoxInfo box, const std::string& labelName)
197 | {
198 |     m_Bboxes.push_back(box);
199 |     const int x = box.box.x1;
200 |     const int y = box.box.y1;
201 |     const int w = box.box.x2 - box.box.x1;
202 |     const int h = box.box.y2 - box.box.y1;
203 |     const cv::Scalar color
204 |         = cv::Scalar(m_RNG.uniform(0, 255), m_RNG.uniform(0, 255), m_RNG.uniform(0, 255));
205 | 
206 |     cv::rectangle(m_MarkedImage, cv::Rect(x, y, w, h), color, 1);
207 |     const cv::Size tsize
208 |         = cv::getTextSize(labelName, cv::FONT_HERSHEY_COMPLEX_SMALL, 0.5, 1, nullptr);
209 |     cv::rectangle(m_MarkedImage, cv::Rect(x, y, tsize.width + 3, tsize.height + 4), color, -1);
210 |     cv::putText(m_MarkedImage, labelName.c_str(), cv::Point(x, y + tsize.height),
211 |                 cv::FONT_HERSHEY_COMPLEX_SMALL, 0.5, cv::Scalar(255, 255, 255), 1);
212 | }
213 | 
214 | void DsImage::showImage() const
215 | {
216 |     cv::namedWindow(m_ImageName);
217 |     cv::imshow(m_ImageName.c_str(), m_MarkedImage);
218 |     cv::waitKey(0);
219 | }
220 | 
221 | void DsImage::saveImageJPEG(const std::string& dirPath) const
222 | {
223 |     cv::imwrite(dirPath + m_ImageName + ".jpeg", m_MarkedImage);
224 | }
225 | std::string DsImage::exportJson() const
226 | {
227 |     if (m_Bboxes.empty()) return "";
228 |     std::stringstream json;
229 |     json.precision(2);
230 |     json << std::fixed;
231 |     for (uint32_t i = 0; i < m_Bboxes.size(); ++i)
232 |     {
233 |         json << "\n{\n";
234 |         json << "  \"image_id\"         : " << std::stoi(m_ImageName) << ",\n";
235 |         json << "  \"category_id\"      : " << m_Bboxes.at(i).classId << ",\n";
236 |         json << "  \"bbox\"             : ";
237 |         json << "[" << m_Bboxes.at(i).box.x1 << ", " << m_Bboxes.at(i).box.y1 << ", ";
238 |         json << m_Bboxes.at(i).box.x2 - m_Bboxes.at(i).box.x1 << ", "
239 |              << m_Bboxes.at(i).box.y2 - m_Bboxes.at(i).box.y1 << "],\n";
240 |         json << "  \"score\"            : " << m_Bboxes.at(i).prob << "\n";
241 |         if (i != m_Bboxes.size() - 1)
242 |             json << "},";
243 |         else
244 |             json << "}";
245 |     }
246 |     return json.str();
247 | }


--------------------------------------------------------------------------------
/modules/ds_image.h:
--------------------------------------------------------------------------------
 1 | /**
 2 | MIT License
 3 | 
 4 | Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | *
24 | */
25 | #ifndef __IMAGE_H__
26 | #define __IMAGE_H__
27 | 
28 | #include "trt_utils.h"
29 | 
30 | struct BBoxInfo;
31 | 
32 | class DsImage
33 | {
34 | public:
35 |     DsImage();
36 |     DsImage(const std::string& path, const std::string &s_net_type_, const int& inputH, const int& inputW);
37 |     DsImage(const cv::Mat& mat_image_, const std::string &s_net_type_, const int& inputH, const int& inputW);
38 |     int getImageHeight() const { return m_Height; }
39 |     int getImageWidth() const { return m_Width; }
40 |     cv::Mat getLetterBoxedImage() const { return m_LetterboxImage; }
41 |     cv::Mat getOriginalImage() const { return m_OrigImage; }
42 |     std::string getImageName() const { return m_ImageName; }
43 |     void addBBox(BBoxInfo box, const std::string& labelName);
44 |     void showImage() const;
45 |     void saveImageJPEG(const std::string& dirPath) const;
46 |     std::string exportJson() const;
47 | 	void letterbox(const int& inputH, const int& inputW);
48 | private:
49 |     int m_Height;
50 |     int m_Width;
51 |     int m_XOffset;
52 |     int m_YOffset;
53 |     float m_ScalingFactor;
54 |     std::string m_ImagePath;
55 |     cv::RNG m_RNG;
56 |     std::string m_ImageName;
57 |     std::vector<BBoxInfo> m_Bboxes;
58 | 
59 |     // unaltered original Image
60 |     cv::Mat m_OrigImage;
61 |     // letterboxed Image given to the network as input
62 |     cv::Mat m_LetterboxImage;
63 |     // final image marked with the bounding boxes
64 |     cv::Mat m_MarkedImage;
65 | };
66 | 
67 | #endif
68 | 


--------------------------------------------------------------------------------
/modules/err.log:
--------------------------------------------------------------------------------
1 | make: *** ターゲットが指定されておらず, makefile も見つかりません.  中止.
2 | 


--------------------------------------------------------------------------------
/modules/hardswish.cu:
--------------------------------------------------------------------------------
  1 | //sys
  2 | #include <cmath>
  3 | #include <stdio.h>
  4 | #include <cassert>
  5 | #include <iostream>
  6 | #include <cuda_runtime.h>
  7 | #include <cuda.h>
  8 | #include <stdint.h>
  9 | #include <string.h>
 10 | //my
 11 | #include "hardswish.h"
 12 | 
 13 | #define NV_CUDA_CHECK(status)                                                                      \
 14 |     {                                                                                              \
 15 |         if (status != 0)                                                                           \
 16 |         {                                                                                          \
 17 |             std::cout << "Cuda failure: " << cudaGetErrorString(status) << " in file " << __FILE__ \
 18 |                       << " at line " << __LINE__ << std::endl;                                     \
 19 |             abort();                                                                               \
 20 |         }                                                                                          \
 21 |     }
 22 | 
 23 | namespace nvinfer1
 24 | {
 25 | 	Hardswish::Hardswish()
 26 | 	{
 27 | 		cudaDeviceProp prop;
 28 | 		cudaGetDeviceProperties(&prop, 0);
 29 | 		_n_max_thread_pre_block = prop.maxThreadsPerBlock;
 30 | 	//	printf("Hardswish():%d\n", _n_max_thread_pre_block);
 31 | 	}
 32 | 
 33 | 	Hardswish::Hardswish(const void* data, size_t length)
 34 | 	{
 35 | 		const char *d = reinterpret_cast<const char*>(data), *a = d;
 36 | 		r(d, _n_max_thread_pre_block);
 37 | 		r(d, _n_output_size);
 38 | //		printf("r:threads:%d,size:%d\n", _n_max_thread_pre_block, _n_output_size);
 39 | 		assert(d == a + length);
 40 | 	}
 41 | 
 42 | 	Hardswish::~Hardswish()
 43 | 	{}
 44 | 	
 45 | 	__global__ void kernel_hardswish(const float *input_, float *output_, int n_data_size_)
 46 | 	{
 47 | 		int i = threadIdx.x + blockIdx.x * blockDim.x;
 48 | 		if (i >= n_data_size_)return;
 49 | 		if (input_[i] >= 3.0f)
 50 | 		{
 51 | 			output_[i] = input_[i];
 52 | 		}
 53 | 		else if (input_[i] <= -3.0f)
 54 | 		{
 55 | 			output_[i] = 0.0f;
 56 | 		}
 57 | 		else
 58 | 		{
 59 | 			output_[i] = input_[i] * (input_[i] + 3.0f) / 6.0f;
 60 | 		}
 61 | 	}
 62 | 
 63 | 	cudaError_t cuda_hardswish_layer(const void* input_,
 64 | 		void* output_,
 65 | 		const int n_batch_size_,
 66 | 		const int n_output_size_,
 67 | 		const int threads_,
 68 | 		cudaStream_t stream_)
 69 | 	{
 70 | 		int n_data_size = n_batch_size_ * n_output_size_;
 71 | //		printf("cuda_hardswish_layer:%d,size:%d\n", n_batch_size_, n_output_size_);
 72 | 		kernel_hardswish << <(n_data_size + threads_ -1)/threads_, threads_, 0, stream_ >> >(
 73 | 				reinterpret_cast<const float*>(input_),
 74 | 				reinterpret_cast<float*>(output_),
 75 | 				n_data_size);
 76 | 		return cudaGetLastError();
 77 | 	}
 78 | 
 79 | 	int Hardswish::enqueue(int batchSize,
 80 | 		const void* const* inputs,
 81 | 		void* const* outputs,
 82 | 		void* workspace,
 83 | 		cudaStream_t stream) noexcept
 84 | 	{
 85 | //		printf("batch_size:%d,output_size:%d,threads:%d\n", batchSize, _n_output_size, _n_max_thread_pre_block);
 86 | 		NV_CUDA_CHECK(cuda_hardswish_layer(inputs[0], outputs[0], batchSize, _n_output_size , _n_max_thread_pre_block, stream));
 87 | 		return 0;
 88 | 	}
 89 | 
 90 | 	size_t Hardswish::getSerializationSize() const noexcept
 91 | 	{
 92 | 		return sizeof(_n_max_thread_pre_block) +sizeof(_n_output_size);
 93 | 	}
 94 | 
 95 | 	void Hardswish::serialize(void *buffer) const noexcept
 96 | 	{
 97 | 		char *d = static_cast<char*>(buffer), *a = d;
 98 | 		w(d, _n_max_thread_pre_block);
 99 | 		w(d, _n_output_size);
100 | //		printf("serialize:%d,%d\n", _n_max_thread_pre_block, _n_output_size);
101 | 		assert(d == a + getSerializationSize());
102 | 	}
103 | 
104 | 
105 | 	bool Hardswish::supportsFormat(DataType type, PluginFormat format) const noexcept
106 | 	{
107 | 		return (type == DataType::kFLOAT && format == PluginFormat::kLINEAR);
108 | 	}
109 | 
110 | 	void Hardswish::configureWithFormat(const Dims* inputDims, int nbInputs,
111 | 		const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) noexcept
112 | 	{
113 | 	
114 | 	}
115 | 
116 | 
117 | 	void Hardswish::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) noexcept
118 | 	{
119 | 		
120 | 		_n_output_size = in->dims.d[0] * in->dims.d[1] * in->dims.d[2];
121 | //		printf("configurePlugin:%d,%d,%d\n", in->dims.d[0], in->dims.d[1], in->dims.d[2]);
122 | 	}
123 | 	IPluginV2* Hardswish::clone() const noexcept
124 | 	{
125 | 		Hardswish *p = new Hardswish();
126 | 		p->setPluginNamespace(_s_plugin_namespace.c_str());
127 | 		p->_n_max_thread_pre_block = _n_max_thread_pre_block;
128 | 		p->_n_output_size = _n_output_size;
129 | //		printf("clone:%d,%d\n", _n_max_thread_pre_block, _n_output_size);
130 | 		return p;
131 | 	}
132 | 
133 | 
134 | 	//
135 | 	PluginFieldCollection HardswishPluginCreator::_fc{};
136 | 	std::vector<PluginField> HardswishPluginCreator::_vec_plugin_attributes;
137 | 
138 | 	HardswishPluginCreator::HardswishPluginCreator()
139 | 	{
140 | 		_vec_plugin_attributes.clear();
141 | 		_fc.nbFields = _vec_plugin_attributes.size();
142 | 		_fc.fields = _vec_plugin_attributes.data();
143 | 	}
144 | 
145 | 	const char* HardswishPluginCreator::getPluginName() const noexcept
146 | 	{
147 | 		return "HARDSWISH_TRT";
148 | 	}
149 | 
150 | 	const char* HardswishPluginCreator::getPluginVersion() const noexcept
151 | 	{
152 | 		return "1.0";
153 | 	}
154 | 
155 | 	const PluginFieldCollection* HardswishPluginCreator::getFieldNames() noexcept
156 | 	{
157 | 		return &_fc;
158 | 	}
159 | 
160 | 	IPluginV2* HardswishPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc) noexcept
161 | 	{
162 | 		Hardswish* obj = new Hardswish();
163 | 		obj->setPluginNamespace(_s_name_space.c_str());
164 | 		return obj;
165 | 	}
166 | 
167 | 	IPluginV2* HardswishPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength) noexcept
168 | 	{
169 | 		Hardswish* obj = new Hardswish(serialData, serialLength);
170 | 		obj->setPluginNamespace(_s_name_space.c_str());
171 | 		return obj;
172 | 	}
173 | 
174 | 	void HardswishPluginCreator::setPluginNamespace(const char* libNamespace) noexcept
175 | 	{
176 | 		_s_name_space = libNamespace;
177 | 	}
178 | 
179 | 	const char* HardswishPluginCreator::getPluginNamespace() const noexcept
180 | 	{
181 | 		return _s_name_space.c_str();
182 | 	}
183 | }//end namespace nvinfer1
184 | 


--------------------------------------------------------------------------------
/modules/hardswish.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #ifndef _HARDSWISH_H_
  3 | #define _HARDSWISH_H_ 
  4 | 
  5 | #include <string>
  6 | #include <vector>
  7 | #include "NvInfer.h"
  8 | 
  9 | namespace nvinfer1
 10 | {
 11 | 	template <typename T>
 12 | 	void w(char*& buffer, const T& val)
 13 | 	{
 14 | 		*reinterpret_cast<T*>(buffer) = val;
 15 | 		buffer += sizeof(T);
 16 | 	}
 17 | 
 18 | 	template <typename T>
 19 | 	void r(const char*& buffer, T& val)
 20 | 	{
 21 | 		val = *reinterpret_cast<const T*>(buffer);
 22 | 		buffer += sizeof(T);
 23 | 	}
 24 | 
 25 | 	class Hardswish :public IPluginV2
 26 | 	{
 27 | 	public:
 28 | 		Hardswish();
 29 | 		Hardswish(const void* data, size_t length);
 30 | 		~Hardswish();
 31 | 		int getNbOutputs()const noexcept  override
 32 | 		{
 33 | 			return 1;
 34 | 		}
 35 | 		Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) noexcept  override
 36 | 		{
 37 | 			return inputs[0];
 38 | 		}
 39 | 		int initialize() noexcept  override
 40 | 		{
 41 | 			return 0;
 42 | 		}
 43 | 		void terminate() noexcept  override
 44 | 		{
 45 | 		}
 46 | 		size_t getWorkspaceSize(int maxBatchSize) const noexcept  override
 47 | 		{
 48 | 			return 0;
 49 | 		}
 50 | 
 51 | 		bool supportsFormat(DataType type, PluginFormat format) const noexcept override;
 52 | 		void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) noexcept override;
 53 | 
 54 | 		int enqueue(int batchSize, const void* const* inputs, void* const* outputs, void* workspace,
 55 | 			cudaStream_t stream) noexcept override;
 56 | 
 57 | 		size_t getSerializationSize() const noexcept  override;
 58 | 		void serialize(void* buffer) const noexcept  override;
 59 | 		const char* getPluginType() const noexcept  override
 60 | 		{
 61 | 			return "HARDSWISH_TRT";
 62 | 		}
 63 | 		const char* getPluginVersion() const noexcept  override
 64 | 		{
 65 | 			return "1.0";
 66 | 		}
 67 | 		void destroy() noexcept  override
 68 | 		{
 69 | 			delete this;
 70 | 		}
 71 | 		void setPluginNamespace(const char* pluginNamespace) noexcept  override
 72 | 		{
 73 | 			_s_plugin_namespace = pluginNamespace;
 74 | 		}
 75 | 		const char* getPluginNamespace() const noexcept  override
 76 | 		{
 77 | 			return _s_plugin_namespace.c_str();
 78 | 		}
 79 | 		DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const noexcept
 80 | 		{
 81 | 			return DataType::kFLOAT;
 82 | 		}
 83 | 		bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const noexcept
 84 | 		{
 85 | 			return false;
 86 | 		}
 87 | 		bool canBroadcastInputAcrossBatch(int inputIndex) const noexcept
 88 | 		{
 89 | 			return false;
 90 | 		}
 91 | 		void attachToContext(
 92 | 			cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept
 93 | 		{}
 94 | 		void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) noexcept;
 95 | 		void detachFromContext()  noexcept
 96 | 		{}
 97 | 		bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const noexcept
 98 | 		{
 99 | 			return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT;
100 | 		}
101 | 		IPluginV2* clone() const  noexcept override;
102 | 	private:
103 | 
104 | 		uint32_t _n_max_thread_pre_block;
105 | 		uint32_t _n_output_size;
106 | 		std::string _s_plugin_namespace;
107 | 	}; //end detect
108 | 
109 | 	class HardswishPluginCreator : public IPluginCreator
110 | 	{
111 | 	public:
112 | 		HardswishPluginCreator();
113 | 		~HardswishPluginCreator() override = default;
114 | 		const char* getPluginName()const noexcept  override;
115 | 		const char* getPluginVersion() const  noexcept override;
116 | 		const PluginFieldCollection* getFieldNames() noexcept  override;
117 | 		IPluginV2* createPlugin(const char* name, const PluginFieldCollection* fc) noexcept  override;
118 | 		IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) noexcept  override;
119 | 		void setPluginNamespace(const char* libNamespace) noexcept  override;
120 | 		const char* getPluginNamespace() const noexcept  override;
121 | 	private:
122 | 		std::string _s_name_space;
123 | 		static PluginFieldCollection _fc;
124 | 		static std::vector<PluginField> _vec_plugin_attributes;
125 | 	};//end detect creator
126 | 
127 | }//end namespace nvinfer1
128 | 
129 | 
130 | 
131 | #endif


--------------------------------------------------------------------------------
/modules/kernel.cu:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <cuda.h>
  3 | #include <cuda_runtime.h>
  4 | #include <stdint.h>
  5 | #include <stdio.h>
  6 | #include <string.h>
  7 | 
  8 | inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); }
  9 | 
 10 | __global__ void gpuYoloLayerV3(const float* input, float* output, const uint32_t grid_h_,
 11 | 								const uint32_t grid_w_, const uint32_t numOutputClasses,
 12 |                                const uint32_t numBBoxes)
 13 | {
 14 |     uint32_t x_id = blockIdx.x * blockDim.x + threadIdx.x;
 15 |     uint32_t y_id = blockIdx.y * blockDim.y + threadIdx.y;
 16 |     uint32_t z_id = blockIdx.z * blockDim.z + threadIdx.z;
 17 | 
 18 |     if ((x_id >= grid_w_) || (y_id >= grid_h_) || (z_id >= numBBoxes))
 19 |     {
 20 |         return;
 21 |     }
 22 | 
 23 |     const int numGridCells = grid_h_ * grid_w_;
 24 |     const int bbindex = y_id * grid_w_ + x_id;
 25 | 
 26 |     output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]
 27 |         = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]);
 28 | 
 29 |     output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]
 30 |         = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]);
 31 | 
 32 |     output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]
 33 |         = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]);
 34 | 
 35 |     output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]
 36 |         = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]);
 37 | 
 38 |     output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]
 39 |         = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]);
 40 | 
 41 |     for (uint32_t i = 0; i < numOutputClasses; ++i)
 42 |     {
 43 |         output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]
 44 |             = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]);
 45 |     }
 46 | }
 47 | 
 48 | 
 49 | //Scaled YOLOv4 format
 50 | //https://alexeyab84.medium.com/scaled-yolo-v4-is-the-best-neural-network-for-object-detection-on-ms-coco-dataset-39dfa22fa982
 51 | __global__ void gpuYoloV4Layer(const float* input, float* output, const uint32_t grid_h_,
 52 | 								const uint32_t grid_w_, const uint32_t numOutputClasses,
 53 |                                const uint32_t numBBoxes, float scale_x_y)
 54 | {
 55 |     uint32_t x_id = blockIdx.x * blockDim.x + threadIdx.x;
 56 |     uint32_t y_id = blockIdx.y * blockDim.y + threadIdx.y;
 57 |     uint32_t z_id = blockIdx.z * blockDim.z + threadIdx.z;
 58 | 
 59 |     if ((x_id >= grid_w_) || (y_id >= grid_h_) || (z_id >= numBBoxes))
 60 |     {
 61 |         return;
 62 |     }
 63 | 
 64 |     const int numGridCells = grid_h_ * grid_w_;
 65 |     const int bbindex = y_id * grid_w_ + x_id;
 66 |     //bx
 67 |     scale_x_y = 1.0;
 68 |     output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]
 69 |       = scale_x_y * (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]) - 0.5 * (scale_x_y -1);
 70 |     //by
 71 |     output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]
 72 |         = scale_x_y * (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]) - 0.5 * (scale_x_y -1);
 73 |     //bw
 74 |     output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]
 75 |         = (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]) * (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]) * 4;
 76 |     //bh
 77 |     output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]
 78 |         = (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]) * (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]) * 4;
 79 | 
 80 |     output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]
 81 |         = (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]);
 82 | 
 83 |     for (uint32_t i = 0; i < numOutputClasses; ++i)
 84 |     {
 85 |         output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]
 86 |             = (input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]);
 87 |     }
 88 | }
 89 | 
 90 | 
 91 | cudaError_t cudaYoloLayerV3(const void* input, void* output, const uint32_t& batchSize, 
 92 | 							const uint32_t& n_grid_h_,const uint32_t& n_grid_w_,
 93 |                             const uint32_t& numOutputClasses, const uint32_t& numBBoxes,
 94 |                             uint64_t outputSize, const uint32_t &new_coords, cudaStream_t stream)
 95 | {
 96 |     dim3 threads_per_block(16, 16, 4);
 97 |     dim3 number_of_blocks((n_grid_w_ / threads_per_block.x) + 1,
 98 |                           (n_grid_h_ / threads_per_block.y) + 1,
 99 |                           (numBBoxes / threads_per_block.z) + 1);
100 | //    printf("YOLOv3 (V4) \n");
101 |     for (int batch = 0; batch < batchSize; ++batch)
102 |     {
103 |       if (new_coords) {
104 |             gpuYoloV4Layer<<<number_of_blocks, threads_per_block, 0, stream>>>(     
105 |             reinterpret_cast<const float*>(input) + (batch * outputSize),
106 |             reinterpret_cast<float*>(output) + (batch * outputSize), n_grid_h_, n_grid_w_, numOutputClasses,
107 |             numBBoxes, 2.0);     
108 |       } else {
109 |             gpuYoloLayerV3<<<number_of_blocks, threads_per_block, 0, stream>>>(
110 |             reinterpret_cast<const float*>(input) + (batch * outputSize),
111 |             reinterpret_cast<float*>(output) + (batch * outputSize), n_grid_h_, n_grid_w_, numOutputClasses,
112 |             numBBoxes);     	
113 |       }
114 |       
115 |     }
116 |     return cudaGetLastError();
117 | }
118 | 
119 | 
120 | cudaError_t cudaYoloV4Layer(const void* input, void* output, const uint32_t& batchSize, 
121 | 							const uint32_t& n_grid_h_,const uint32_t& n_grid_w_,
122 |                             const uint32_t& numOutputClasses, const uint32_t& numBBoxes,
123 |                             uint64_t outputSize, const uint32_t &new_coords, const float scale_x_y, cudaStream_t stream)
124 | {
125 |     dim3 threads_per_block(16, 16, 4);
126 |     dim3 number_of_blocks((n_grid_w_ / threads_per_block.x) + 1,
127 |                           (n_grid_h_ / threads_per_block.y) + 1,
128 |                           (numBBoxes / threads_per_block.z) + 1);
129 | 			  //    printf("Scaled YOLOv4 \n");
130 |     for (int batch = 0; batch < batchSize; ++batch)
131 |     {
132 |       gpuYoloV4Layer<<<number_of_blocks, threads_per_block, 0, stream>>>(
133 |             reinterpret_cast<const float*>(input) + (batch * outputSize),
134 |             reinterpret_cast<float*>(output) + (batch * outputSize), n_grid_h_, n_grid_w_, numOutputClasses,
135 |             numBBoxes, scale_x_y);
136 |     }
137 |     return cudaGetLastError();
138 | }
139 | 


--------------------------------------------------------------------------------
/modules/mish.cu:
--------------------------------------------------------------------------------
  1 | #include <cmath>
  2 | #include <stdio.h>
  3 | #include <cassert>
  4 | #include <iostream>
  5 | #include "mish.h"
  6 | 
  7 | namespace nvinfer1
  8 | {
  9 |     MishPlugin::MishPlugin()
 10 |     {
 11 |     }
 12 | 
 13 |     MishPlugin::~MishPlugin()
 14 |     {
 15 |     }
 16 | 
 17 |     // create the plugin at runtime from a byte stream
 18 |     MishPlugin::MishPlugin(const void* data, size_t length)
 19 |     {
 20 |         assert(length == sizeof(input_size_));
 21 |         input_size_ = *reinterpret_cast<const int*>(data);
 22 |     }
 23 | 
 24 |     void MishPlugin::serialize(void* buffer) const noexcept
 25 |     {
 26 |         *reinterpret_cast<int*>(buffer) = input_size_;
 27 |     }
 28 | 
 29 |     size_t MishPlugin::getSerializationSize() const noexcept
 30 |     {  
 31 |         return sizeof(input_size_);
 32 |     }
 33 | 
 34 |     int MishPlugin::initialize()noexcept
 35 |     { 
 36 |         return 0;
 37 |     }
 38 | 
 39 | 	bool MishPlugin::supportsFormat(DataType type, PluginFormat format) const noexcept
 40 | 	{
 41 | 		return (type == DataType::kFLOAT && format == PluginFormat::kLINEAR);
 42 | 	}
 43 | 
 44 | 	void MishPlugin::configureWithFormat(const Dims* inputDims, int nbInputs,
 45 | 		const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) noexcept
 46 | 	{
 47 | 
 48 | 	}
 49 | 
 50 |     Dims MishPlugin::getOutputDimensions(int index, const Dims* inputs, int nbInputDims)noexcept
 51 |     {
 52 |         assert(nbInputDims == 1);
 53 |         assert(index == 0);
 54 |         input_size_ = inputs[0].d[0] * inputs[0].d[1] * inputs[0].d[2];
 55 |         // Output dimensions
 56 |         return Dims3(inputs[0].d[0], inputs[0].d[1], inputs[0].d[2]);
 57 |     }
 58 | 
 59 |     // Set plugin namespace
 60 |     void MishPlugin::setPluginNamespace(const char* pluginNamespace)noexcept
 61 |     {
 62 |         mPluginNamespace = pluginNamespace;
 63 |     }
 64 | 
 65 |     const char* MishPlugin::getPluginNamespace() const noexcept
 66 |     {
 67 |         return mPluginNamespace;
 68 |     }
 69 | 
 70 |     // Return the DataType of the plugin output at the requested index
 71 |     DataType MishPlugin::getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const noexcept
 72 |     {
 73 |         return DataType::kFLOAT;
 74 |     }
 75 | 
 76 |     // Return true if output tensor is broadcast across a batch.
 77 |     bool MishPlugin::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const noexcept
 78 |     {
 79 |         return false;
 80 |     }
 81 | 
 82 |     // Return true if plugin can use input that is broadcast across batch without replication.
 83 |     bool MishPlugin::canBroadcastInputAcrossBatch(int inputIndex) const noexcept
 84 |     {
 85 |         return false;
 86 |     }
 87 | 
 88 |     void MishPlugin::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput)noexcept
 89 |     {
 90 |     }
 91 | 
 92 |     // Attach the plugin object to an execution context and grant the plugin the access to some context resource.
 93 |     void MishPlugin::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator)noexcept
 94 |     {
 95 |     }
 96 | 
 97 |     // Detach the plugin object from its execution context.
 98 |     void MishPlugin::detachFromContext()noexcept {}
 99 | 
100 |     const char* MishPlugin::getPluginType() const noexcept
101 |     {
102 |         return "Mish_TRT";
103 |     }
104 | 
105 |     const char* MishPlugin::getPluginVersion() const noexcept
106 |     {
107 |         return "1";
108 |     }
109 | 
110 |     void MishPlugin::destroy()noexcept
111 |     {
112 |         delete this;
113 |     }
114 | 
115 |     // Clone the plugin
116 |     IPluginV2* MishPlugin::clone() const noexcept
117 |     {
118 |         MishPlugin *p = new MishPlugin();
119 |         p->input_size_ = input_size_;
120 |         p->setPluginNamespace(mPluginNamespace);
121 |         return p;
122 |     }
123 | 
124 |     __device__ float tanh_activate_kernel(float x){return (2/(1 + expf(-2*x)) - 1);}
125 | 
126 |     __device__ float softplus_kernel(float x, float threshold = 20) 
127 | 	{
128 |         if (x > threshold) return x;                // too large
129 |         else if (x < -threshold) return expf(x);    // too small
130 |         return logf(expf(x) + 1);
131 |     }
132 | 
133 |     __global__ void mish_kernel(const float *input, float *output, int num_elem) 
134 | 	{
135 | 
136 |         int idx = threadIdx.x + blockDim.x * blockIdx.x;
137 |         if (idx >= num_elem) return;
138 | 
139 |         //float t = exp(input[idx]);
140 |         //if (input[idx] > 20.0) {
141 |         //    t *= t;
142 |         //    output[idx] = (t - 1.0) / (t + 1.0);
143 |         //} else {
144 |         //    float tt = t * t;
145 |         //    output[idx] = (tt + 2.0 * t) / (tt + 2.0 * t + 2.0);
146 |         //}
147 |         //output[idx] *= input[idx];
148 |         output[idx] = input[idx] * tanh_activate_kernel(softplus_kernel(input[idx]));
149 |     }
150 | 
151 |     void MishPlugin::forwardGpu(const float *const * inputs, float* output, cudaStream_t stream, int batchSize)
152 | 	{
153 |         int block_size = thread_count_;
154 |         int grid_size = (input_size_ * batchSize + block_size - 1) / block_size;
155 |         mish_kernel<<<grid_size, block_size, 0, stream>>>(inputs[0], output, input_size_ * batchSize);
156 |     }
157 | 
158 | 	int MishPlugin::enqueue(int batchSize,
159 | 		const void* const* inputs,
160 | 		void* const* outputs,
161 | 		void* workspace,
162 | 		cudaStream_t stream) noexcept 
163 | 	{
164 |         //assert(batchSize == 1);
165 |         //GPU
166 |         //CUDA_CHECK(cudaStreamSynchronize(stream));
167 |         forwardGpu((const float *const *)inputs, (float*)outputs[0], stream, batchSize);
168 |         return 0;
169 |     }
170 | 
171 |     PluginFieldCollection MishPluginCreator::mFC{};
172 |     std::vector<PluginField> MishPluginCreator::mPluginAttributes;
173 | 
174 |     MishPluginCreator::MishPluginCreator()
175 |     {
176 |         mPluginAttributes.clear();
177 | 
178 |         mFC.nbFields = mPluginAttributes.size();
179 |         mFC.fields = mPluginAttributes.data();
180 |     }
181 | 
182 |     const char* MishPluginCreator::getPluginName() const noexcept
183 |     {
184 |             return "Mish_TRT";
185 |     }
186 | 
187 |     const char* MishPluginCreator::getPluginVersion() const noexcept
188 |     {
189 |             return "1";
190 |     }
191 | 
192 |     const PluginFieldCollection* MishPluginCreator::getFieldNames()noexcept
193 |     {
194 |             return &mFC;
195 |     }
196 | 
197 |     IPluginV2* MishPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc)noexcept
198 |     {
199 |         MishPlugin* obj = new MishPlugin();
200 |         obj->setPluginNamespace(mNamespace.c_str());
201 |         return obj;
202 |     }
203 | 
204 |     IPluginV2* MishPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength)noexcept
205 |     {
206 |         // This object will be deleted when the network is destroyed, which will
207 |         // call MishPlugin::destroy()
208 |         MishPlugin* obj = new MishPlugin(serialData, serialLength);
209 |         obj->setPluginNamespace(mNamespace.c_str());
210 |         return obj;
211 |     }
212 | 
213 | 
214 | 	void MishPluginCreator::setPluginNamespace(const char* libNamespace)noexcept
215 | 	{
216 | 		mNamespace = libNamespace;
217 | 	}
218 | 
219 | 	const char* MishPluginCreator::getPluginNamespace() const noexcept
220 | 	{
221 | 		return mNamespace.c_str();
222 | 	}
223 | 
224 | 
225 | }
226 | 
227 | 


--------------------------------------------------------------------------------
/modules/mish.h:
--------------------------------------------------------------------------------
  1 | #ifndef _MISH_PLUGIN_H
  2 | #define _MISH_PLUGIN_H
  3 | 
  4 | #include <string>
  5 | #include <vector>
  6 | #include "NvInfer.h"
  7 | 
  8 | 
  9 | //https://github.com/wang-xinyu/tensorrtx
 10 | namespace nvinfer1
 11 | {
 12 |     class MishPlugin: public IPluginV2
 13 |     {
 14 |         public:
 15 |             explicit MishPlugin();
 16 |             MishPlugin(const void* data, size_t length);
 17 | 
 18 |             ~MishPlugin();
 19 | 
 20 |             int getNbOutputs() const  noexcept override
 21 |             {
 22 |                 return 1;
 23 |             }
 24 | 
 25 |             Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) noexcept  override;
 26 | 
 27 |             int initialize() noexcept  override;
 28 | 
 29 |             virtual void terminate() noexcept  override {}
 30 | 
 31 |             virtual size_t getWorkspaceSize(int maxBatchSize) const noexcept  override { return 0;}
 32 | 
 33 |          //   virtual int enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream);
 34 | 			int enqueue(int batchSize, const void* const* inputs, void* const* outputs, void* workspace,
 35 | 				cudaStream_t stream) noexcept override;
 36 | 			bool supportsFormat(DataType type, PluginFormat format) const noexcept override;
 37 | 			void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) noexcept override;
 38 | 
 39 |             virtual size_t getSerializationSize() const noexcept  override;
 40 | 
 41 |             virtual void serialize(void* buffer) const noexcept  override;
 42 | 
 43 |             bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const noexcept {
 44 |                 return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT;
 45 |             }
 46 | 
 47 |             const char* getPluginType() const noexcept  override;
 48 | 
 49 |             const char* getPluginVersion() const noexcept  override;
 50 | 
 51 |             void destroy()  noexcept override;
 52 | 
 53 |             IPluginV2* clone() const noexcept  override;
 54 | 
 55 |             void setPluginNamespace(const char* pluginNamespace) noexcept  override;
 56 | 
 57 |             const char* getPluginNamespace() const  noexcept override;
 58 | 
 59 |             DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const noexcept;
 60 | 
 61 |             bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const noexcept;
 62 | 
 63 |             bool canBroadcastInputAcrossBatch(int inputIndex) const noexcept;
 64 | 
 65 |             void attachToContext(
 66 |                     cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator)noexcept;
 67 | 
 68 |             void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput)noexcept;
 69 | 
 70 |             void detachFromContext()noexcept;
 71 | 
 72 |             int input_size_;
 73 |         private:
 74 |             void forwardGpu(const float *const * inputs, float* output, cudaStream_t stream, int batchSize = 1);
 75 |             int thread_count_ = 256;
 76 |             const char* mPluginNamespace;
 77 |     };
 78 | 
 79 |     class MishPluginCreator : public IPluginCreator
 80 |     {
 81 |         public:
 82 |             MishPluginCreator();
 83 | 
 84 |             ~MishPluginCreator() override = default;
 85 | 
 86 |             const char* getPluginName() const noexcept  override;
 87 | 
 88 |             const char* getPluginVersion() const  noexcept override;
 89 | 
 90 |             const PluginFieldCollection* getFieldNames() noexcept  override;
 91 | 
 92 |             IPluginV2* createPlugin(const char* name, const PluginFieldCollection* fc)  noexcept override;
 93 | 
 94 |             IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) noexcept  override;
 95 | 
 96 | 			void setPluginNamespace(const char* libNamespace) noexcept  override;
 97 | 
 98 | 			const char* getPluginNamespace() const noexcept  override;
 99 | 
100 |         private:
101 |             std::string mNamespace;
102 |             static PluginFieldCollection mFC;
103 |             static std::vector<PluginField> mPluginAttributes;
104 |     };
105 | }
106 | #endif 
107 | 


--------------------------------------------------------------------------------
/modules/plugin_factory.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #include "plugin_factory.h"
  4 | #include "trt_utils.h"
  5 | 
  6 | 
  7 | /******* Yolo Layer V3 *******/
  8 | /*****************************/
  9 | namespace nvinfer1
 10 | {
 11 |   YoloLayer::YoloLayer()
 12 |   {}
 13 | 
 14 |   YoloLayer::YoloLayer(const void* data, size_t length)
 15 |   {
 16 |     const char *d = static_cast<const char*>(data), *a = d;
 17 |     re(d, m_NumBoxes);
 18 |     re(d, m_NumClasses);
 19 |     re(d, _n_grid_h);
 20 |     re(d, _n_grid_w);
 21 |     re(d, m_OutputSize);
 22 |     re(d, m_new_coords);		
 23 |     assert(d = a + length);
 24 |   }
 25 |   void YoloLayer::serialize(void* buffer)const noexcept
 26 |   {
 27 |     char *d = static_cast<char*>(buffer), *a = d;
 28 |     wr(d, m_NumBoxes);
 29 |     wr(d, m_NumClasses);
 30 |     wr(d, _n_grid_h);
 31 |     wr(d, _n_grid_w);
 32 |     wr(d, m_OutputSize);
 33 |     wr(d, m_new_coords);		
 34 |     assert(d == a + getSerializationSize());
 35 |     printf("Serialize V3\n");
 36 |   }
 37 | 
 38 |   bool YoloLayer::supportsFormat(DataType type, PluginFormat format) const noexcept
 39 |   {
 40 |     return (type == DataType::kFLOAT && format == PluginFormat::kLINEAR);
 41 |   }
 42 | 
 43 |   void YoloLayer::configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) noexcept
 44 |   {
 45 | 
 46 |   }
 47 | 
 48 |   IPluginV2* YoloLayer::clone() const noexcept
 49 |   {
 50 |     YoloLayer *p = new YoloLayer(m_NumBoxes,m_NumClasses,_n_grid_h,_n_grid_w, m_new_coords);
 51 |     p->setPluginNamespace(_s_plugin_namespace.c_str());
 52 |     return p;
 53 |   }
 54 | 
 55 |   YoloLayer::YoloLayer(const uint32_t& numBoxes, const uint32_t& numClasses, const uint32_t& grid_h_, const uint32_t &grid_w_, const uint32_t& new_coords) :
 56 |     m_NumBoxes(numBoxes),
 57 |     m_NumClasses(numClasses),
 58 |     _n_grid_h(grid_h_),
 59 |     _n_grid_w(grid_w_),
 60 |     m_new_coords(new_coords)
 61 |   {
 62 |     assert(m_NumBoxes > 0);
 63 |     assert(m_NumClasses > 0);
 64 |     assert(_n_grid_h > 0);
 65 |     assert(_n_grid_w > 0);
 66 |     m_OutputSize = _n_grid_h * _n_grid_w * (m_NumBoxes * (4 + 1 + m_NumClasses));
 67 |   }
 68 | 
 69 |   int YoloLayer::getNbOutputs() const noexcept { return 1; }
 70 | 
 71 |   nvinfer1::Dims YoloLayer::getOutputDimensions(int index, const nvinfer1::Dims* inputs,
 72 | 						int nbInputDims) noexcept
 73 |   {
 74 |     assert(index == 0);
 75 |     assert(nbInputDims == 1);
 76 |     return inputs[0];
 77 |   }
 78 | 
 79 |   //void YoloLayerV3::configure(const nvinfer1::Dims* inputDims, int nbInputs,
 80 |   //                            const nvinfer1::Dims* outputDims, int nbOutputs, int maxBatchSize) noexcept
 81 |   //{
 82 |   //    assert(nbInputs == 1);
 83 |   //    assert(inputDims != nullptr);
 84 |   //}
 85 | 
 86 |   int YoloLayer::initialize() noexcept { return 0; }
 87 | 
 88 |   void YoloLayer::terminate() noexcept {}
 89 | 
 90 |   size_t YoloLayer::getWorkspaceSize(int maxBatchSize) const noexcept
 91 |   {
 92 |     return 0;
 93 |   }
 94 | 
 95 |   int YoloLayer::enqueue(int batchSize,
 96 | 			 const void* const* inputs,
 97 | 			 void* const* outputs,
 98 | 			 void* workspace,
 99 | 			 cudaStream_t stream) noexcept
100 |   {
101 |     NV_CUDA_CHECK(cudaYoloLayerV3(inputs[0], outputs[0], batchSize, _n_grid_h, _n_grid_w, m_NumClasses,
102 | 				  m_NumBoxes, m_OutputSize, m_new_coords, stream));
103 |     return 0;
104 |   }
105 | 
106 |   size_t YoloLayer::getSerializationSize()const noexcept
107 |   {
108 |     return sizeof(m_NumBoxes) + sizeof(m_NumClasses) + sizeof(_n_grid_w) + sizeof(_n_grid_h) + sizeof(m_OutputSize) + sizeof(m_new_coords);
109 |   }
110 | 
111 | 
112 | 
113 | 
114 |   PluginFieldCollection YoloLayerPluginCreator::mFC{};
115 |   std::vector<PluginField> YoloLayerPluginCreator::mPluginAttributes;
116 | 
117 |   YoloLayerPluginCreator::YoloLayerPluginCreator()
118 |   {
119 |     mPluginAttributes.clear();
120 | 
121 |     mFC.nbFields = mPluginAttributes.size();
122 |     mFC.fields = mPluginAttributes.data();
123 | 
124 |   }
125 | 
126 |   const char* YoloLayerPluginCreator::getPluginName() const noexcept
127 |   {
128 |     return "YOLO_TRT";
129 |   }
130 | 
131 |   const char* YoloLayerPluginCreator::getPluginVersion() const noexcept
132 |   {
133 |     return "1.0";
134 |   }
135 | 
136 |   const PluginFieldCollection* YoloLayerPluginCreator::getFieldNames()noexcept
137 |   {
138 |     return &mFC;
139 |   }
140 | 
141 |   IPluginV2* YoloLayerPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc)noexcept
142 |   {
143 | 
144 |     YoloLayer* obj = new YoloLayer();
145 |     obj->setPluginNamespace(mNamespace.c_str());
146 |     return obj;
147 |   }
148 | 
149 |   IPluginV2* YoloLayerPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength)noexcept
150 |   {
151 |     // This object will be deleted when the network is destroyed, which will
152 |     // call MishPlugin::destroy()
153 |     YoloLayer* obj = new YoloLayer(serialData, serialLength);
154 |     obj->setPluginNamespace(mNamespace.c_str());
155 |     return obj;
156 |   }
157 | 
158 | 
159 |   void YoloLayerPluginCreator::setPluginNamespace(const char* libNamespace)noexcept
160 |   {
161 |     mNamespace = libNamespace;
162 |   }
163 | 
164 |   const char* YoloLayerPluginCreator::getPluginNamespace() const noexcept
165 |   {
166 |     return mNamespace.c_str();
167 |   }
168 | }
169 | 
170 | namespace nvinfer1
171 | {
172 |   //Scaled YOLOv4
173 |   YoloV4Layer::YoloV4Layer()
174 |   {}
175 | 
176 |   YoloV4Layer::YoloV4Layer(const void* data, size_t length)
177 |   {
178 |     const char *d = static_cast<const char*>(data), *a = d;
179 |     re(d, m_NumBoxes);
180 |     re(d, m_NumClasses);
181 |     re(d, _n_grid_h);
182 |     re(d, _n_grid_w);
183 |     re(d, m_OutputSize);
184 |     re(d, m_new_coords);  		
185 |     re(d, m_scale_x_y);		
186 |     assert(d = a + length);
187 |   }
188 |   void YoloV4Layer::serialize(void* buffer)const noexcept
189 |   {
190 |     char *d = static_cast<char*>(buffer), *a = d;
191 |     wr(d, m_NumBoxes);
192 |     wr(d, m_NumClasses);
193 |     wr(d, _n_grid_h);
194 |     wr(d, _n_grid_w);
195 |     wr(d, m_OutputSize);
196 |     wr(d, m_new_coords);				
197 |     wr(d, m_scale_x_y);		
198 |     assert(d == a + getSerializationSize());
199 |   }
200 | 
201 |   bool YoloV4Layer::supportsFormat(DataType type, PluginFormat format) const noexcept
202 |   {
203 |     return (type == DataType::kFLOAT && format == PluginFormat::kLINEAR);
204 |   }
205 | 
206 |   void YoloV4Layer::configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) noexcept
207 |   {
208 | 
209 |   }
210 | 
211 |   IPluginV2* YoloV4Layer::clone() const noexcept
212 |   {
213 |     YoloV4Layer *p = new YoloV4Layer(m_NumBoxes,m_NumClasses,_n_grid_h,_n_grid_w, m_new_coords, m_scale_x_y);
214 |     p->setPluginNamespace(_s_plugin_namespace.c_str());
215 |     return p;
216 |   }
217 | 
218 |   YoloV4Layer::YoloV4Layer(const uint32_t& numBoxes, const uint32_t& numClasses, const uint32_t& grid_h_, const uint32_t &grid_w_, const uint32_t &new_coords, const float& scale_x_y) :
219 |     m_NumBoxes(numBoxes),
220 |     m_NumClasses(numClasses),
221 |     _n_grid_h(grid_h_),
222 |     _n_grid_w(grid_w_),
223 |     m_scale_x_y(scale_x_y),
224 |     m_new_coords(new_coords)		
225 |   {
226 |     assert(m_NumBoxes > 0);
227 |     assert(m_NumClasses > 0);
228 |     assert(_n_grid_h > 0);
229 |     assert(_n_grid_w > 0);
230 |     assert(m_scale_x_y > 0);		
231 |     m_OutputSize = _n_grid_h * _n_grid_w * (m_NumBoxes * (4 + 1 + m_NumClasses));
232 |   }
233 | 
234 |   int YoloV4Layer::getNbOutputs() const noexcept { return 1; }
235 | 
236 |   nvinfer1::Dims YoloV4Layer::getOutputDimensions(int index, const nvinfer1::Dims* inputs,
237 | 						  int nbInputDims) noexcept
238 |   {
239 |     assert(index == 0);
240 |     assert(nbInputDims == 1);
241 |     return inputs[0];
242 |   }
243 | 
244 |   //void YoloV4LayerV3::configure(const nvinfer1::Dims* inputDims, int nbInputs,
245 |   //                            const nvinfer1::Dims* outputDims, int nbOutputs, int maxBatchSize) noexcept
246 |   //{
247 |   //    assert(nbInputs == 1);
248 |   //    assert(inputDims != nullptr);
249 |   //}
250 | 
251 |   int YoloV4Layer::initialize() noexcept { return 0; }
252 | 
253 |   void YoloV4Layer::terminate() noexcept {}
254 | 
255 |   size_t YoloV4Layer::getWorkspaceSize(int maxBatchSize) const noexcept
256 |   {
257 |     return 0;
258 |   }
259 | 
260 |   int YoloV4Layer::enqueue(int batchSize,
261 | 			   const void* const* inputs,
262 | 			   void* const* outputs,
263 | 			   void* workspace,
264 | 			   cudaStream_t stream) noexcept
265 |   {
266 |     NV_CUDA_CHECK(cudaYoloV4Layer(inputs[0], outputs[0], batchSize, _n_grid_h, _n_grid_w, m_NumClasses,
267 | 				  m_NumBoxes, m_OutputSize, m_new_coords, m_scale_x_y, stream));
268 |     return 0;
269 |   }
270 | 
271 |   size_t YoloV4Layer::getSerializationSize()const noexcept
272 |   {
273 |     printf("Serialize V4\n");
274 |     return sizeof(m_NumBoxes) + sizeof(m_NumClasses) + sizeof(_n_grid_w) + sizeof(_n_grid_h) + sizeof(m_OutputSize) + sizeof(m_scale_x_y) + sizeof(m_new_coords);
275 |   }
276 | 
277 | 
278 | 
279 | 
280 |   PluginFieldCollection YoloV4LayerPluginCreator::mFC{};
281 |   std::vector<PluginField> YoloV4LayerPluginCreator::mPluginAttributes;
282 | 
283 |   YoloV4LayerPluginCreator::YoloV4LayerPluginCreator()
284 |   {
285 |     mPluginAttributes.clear();
286 | 
287 |     mFC.nbFields = mPluginAttributes.size();
288 |     mFC.fields = mPluginAttributes.data();
289 | 
290 |   }
291 | 
292 |   const char* YoloV4LayerPluginCreator::getPluginName() const noexcept
293 |   {
294 |     return "YOLOV4_TRT";
295 |   }
296 | 
297 |   const char* YoloV4LayerPluginCreator::getPluginVersion() const noexcept
298 |   {
299 |     return "1.0";
300 |   }
301 | 
302 |   const PluginFieldCollection* YoloV4LayerPluginCreator::getFieldNames()noexcept
303 |   {
304 |     return &mFC;
305 |   }
306 | 
307 |   IPluginV2* YoloV4LayerPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc)noexcept
308 |   {
309 | 
310 |     YoloV4Layer* obj = new YoloV4Layer();
311 |     obj->setPluginNamespace(mNamespace.c_str());
312 |     return obj;
313 |   }
314 | 
315 |   IPluginV2* YoloV4LayerPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength)noexcept
316 |   {
317 |     // This object will be deleted when the network is destroyed, which will
318 |     // call MishPlugin::destroy()
319 |     YoloV4Layer* obj = new YoloV4Layer(serialData, serialLength);
320 |     obj->setPluginNamespace(mNamespace.c_str());
321 |     return obj;
322 |   }
323 | 
324 | 
325 |   void YoloV4LayerPluginCreator::setPluginNamespace(const char* libNamespace)noexcept
326 |   {
327 |     mNamespace = libNamespace;
328 |   }
329 | 
330 |   const char* YoloV4LayerPluginCreator::getPluginNamespace() const noexcept
331 |   {
332 |     return mNamespace.c_str();
333 |   }
334 | 
335 | 
336 | }
337 | 


--------------------------------------------------------------------------------
/modules/plugin_factory.h:
--------------------------------------------------------------------------------
  1 | /**
  2 | MIT License
  3 | 
  4 | Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved.
  5 | 
  6 | Permission is hereby granted, free of charge, to any person obtaining a copy
  7 | of this software and associated documentation files (the "Software"), to deal
  8 | in the Software without restriction, including without limitation the rights
  9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 10 | copies of the Software, and to permit persons to whom the Software is
 11 | furnished to do so, subject to the following conditions:
 12 | 
 13 | The above copyright notice and this permission notice shall be included in all
 14 | copies or substantial portions of the Software.
 15 | 
 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 22 | SOFTWARE.
 23 | *
 24 | */
 25 | 
 26 | #ifndef __PLUGIN_LAYER_H__
 27 | #define __PLUGIN_LAYER_H__
 28 | 
 29 | #include <cassert>
 30 | #include <cstring>
 31 | #include <cudnn.h>
 32 | #include <iostream>
 33 | #include <memory>
 34 | #include <vector>
 35 | #include "NvInfer.h"
 36 | 
 37 | #define NV_CUDA_CHECK(status)                                                                      \
 38 |     {                                                                                              \
 39 |         if (status != 0)                                                                           \
 40 |         {                                                                                          \
 41 |             std::cout << "Cuda failure: " << cudaGetErrorString(status) << " in file " << __FILE__ \
 42 |                       << " at line " << __LINE__ << std::endl;                                     \
 43 |             abort();                                                                               \
 44 |         }                                                                                          \
 45 |     }
 46 | 
 47 | // Forward declaration of cuda kernels
 48 | cudaError_t cudaYoloLayerV3(const void* input, void* output, const uint32_t& batchSize,
 49 | 	const uint32_t& n_grid_h_, const uint32_t& n_grid_w_,
 50 | 	const uint32_t& numOutputClasses, const uint32_t& numBBoxes,
 51 | 			    uint64_t outputSize, const uint32_t& m_new_coords, cudaStream_t stream);
 52 | 
 53 | cudaError_t cudaYoloV4Layer(const void* input, void* output, const uint32_t& batchSize, 
 54 | 							const uint32_t& n_grid_h_,const uint32_t& n_grid_w_,
 55 |                             const uint32_t& numOutputClasses, const uint32_t& numBBoxes,
 56 |                             uint64_t outputSize, const uint32_t& m_new_coords, const float scale_x_y, cudaStream_t stream);
 57 | //class PluginFactory : public nvinfer1::IPluginFactory
 58 | //{
 59 | //
 60 | //public:
 61 | //    PluginFactory();
 62 | //    nvinfer1::IPlugin* createPlugin(const char* layerName, const void* serialData,
 63 | //                                    size_t serialLength);
 64 | //    bool isPlugin(const char* name);
 65 | //    void destroy();
 66 | //
 67 | //private:
 68 | //    static const int m_MaxLeakyLayers = 72;
 69 | //    static const int m_ReorgStride = 2;
 70 | //    static constexpr float m_LeakyNegSlope = 0.1f;
 71 | //    static const int m_NumBoxes = 5;
 72 | //    static const int m_NumCoords = 4;
 73 | //    static const int m_NumClasses = 80;
 74 | //    static const int m_MaxYoloLayers = 3;
 75 | //    int m_LeakyReLUCount = 0;
 76 | //    int m_YoloLayerCount = 0;
 77 | //    nvinfer1::plugin::RegionParameters m_RegionParameters{m_NumBoxes, m_NumCoords, m_NumClasses,
 78 | //                                                          nullptr};
 79 | //
 80 | //    struct INvPluginDeleter
 81 | //    {
 82 | //        void operator()(nvinfer1::plugin::INvPlugin* ptr)
 83 | //        {
 84 | //            if (ptr)
 85 | //            {
 86 | //                ptr->destroy();
 87 | //            }
 88 | //        }
 89 | //    };
 90 | //    struct IPluginDeleter
 91 | //    {
 92 | //        void operator()(nvinfer1::IPlugin* ptr)
 93 | //        {
 94 | //            if (ptr)
 95 | //            {
 96 | //                ptr->terminate();
 97 | //            }
 98 | //        }
 99 | //    };
100 | //    typedef std::unique_ptr<nvinfer1::plugin::INvPlugin, INvPluginDeleter> unique_ptr_INvPlugin;
101 | //    typedef std::unique_ptr<nvinfer1::IPlugin, IPluginDeleter> unique_ptr_IPlugin;
102 | //
103 | //    unique_ptr_INvPlugin m_ReorgLayer;
104 | //    unique_ptr_INvPlugin m_RegionLayer;
105 | //    unique_ptr_INvPlugin m_LeakyReLULayers[m_MaxLeakyLayers];
106 | //    unique_ptr_IPlugin m_YoloLayers[m_MaxYoloLayers];
107 | //};
108 | namespace nvinfer1
109 | {
110 | 	template <typename T>
111 | 	void wr(char*& buffer, const T& val)
112 | 	{
113 | 		*reinterpret_cast<T*>(buffer) = val;
114 | 		buffer += sizeof(T);
115 | 	}
116 | 
117 | 	template <typename T>
118 | 	void re(const char*& buffer, T& val)
119 | 	{
120 | 		val = *reinterpret_cast<const T*>(buffer);
121 | 		buffer += sizeof(T);
122 | 	}
123 | 
124 | 	class YoloLayer : public IPluginV2
125 | 	{
126 | 	public:
127 | 		explicit YoloLayer();
128 | 		YoloLayer(const void* data, size_t length);
129 | 	  YoloLayer(const uint32_t& numBoxes, const uint32_t& numClasses, const uint32_t& grid_h_, const uint32_t &grid_w_, const uint32_t &new_coords);
130 | 		int getNbOutputs() const noexcept override;
131 | 		nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs,
132 | 			int nbInputDims)noexcept override;
133 | 		/*void configure(const nvinfer1::Dims* inputDims, int nbInputs, const nvinfer1::Dims* outputDims,
134 | 					   int nbOutputs, int maxBatchSize)noexcept override;*/
135 | 
136 | 					   /*void configure(const nvinfer1::Dims* inputDims, int nbInputs,
137 | 						   const nvinfer1::Dims* outputDims, int nbOutputs, int maxBatchSize)noexcept override;*/
138 | 
139 | 		int initialize()noexcept override;
140 | 		void terminate()noexcept override;
141 | 		size_t getWorkspaceSize(int maxBatchSize) const noexcept override;
142 | 		
143 | 		int enqueue(int batchSize, const void* const* inputs, void* const* outputs, void* workspace,
144 | 			cudaStream_t stream) noexcept override;
145 | 
146 | 		size_t getSerializationSize() const noexcept  override;
147 | 		void serialize(void* buffer) const noexcept  override;
148 | 		
149 | 		const char* getPluginType() const noexcept  override
150 | 		{
151 | 			return "YOLO_TRT";
152 | 		}
153 | 		bool supportsFormat(DataType type, PluginFormat format) const noexcept override;
154 | 		void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) noexcept override;
155 | 
156 | 		const char* getPluginVersion() const noexcept  override
157 | 		{
158 | 			return "1.0";
159 | 		}
160 | 
161 | 		void setPluginNamespace(const char* pluginNamespace) noexcept  override
162 | 		{
163 | 			_s_plugin_namespace = pluginNamespace;
164 | 		}
165 | 		const char* getPluginNamespace() const  noexcept override
166 | 		{
167 | 			return _s_plugin_namespace.c_str();
168 | 		}
169 | 		void destroy() noexcept  override
170 | 		{
171 | 			delete this;
172 | 		}
173 | 		IPluginV2* clone() const noexcept override;
174 | 	private:
175 | 	
176 | 		std::string _s_plugin_namespace;
177 | 		uint32_t m_NumBoxes;
178 | 		uint32_t m_NumClasses;
179 | 		uint32_t m_GridSize;
180 | 		uint64_t m_OutputSize;
181 | 		uint32_t _n_grid_h;
182 | 		uint32_t _n_grid_w;
183 |      	        uint32_t m_new_coords;  
184 | 	};
185 | 
186 | 
187 | 
188 | 	class YoloLayerPluginCreator : public IPluginCreator
189 | 	{
190 | 	public:
191 | 		YoloLayerPluginCreator();
192 | 
193 | 		~YoloLayerPluginCreator() override = default;
194 | 
195 | 		const char* getPluginName() const noexcept  override;
196 | 
197 | 		const char* getPluginVersion() const  noexcept override;
198 | 
199 | 		const PluginFieldCollection* getFieldNames() noexcept  override;
200 | 
201 | 		IPluginV2* createPlugin(const char* name, const PluginFieldCollection* fc)  noexcept override;
202 | 
203 | 		IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) noexcept  override;
204 | 
205 | 		void setPluginNamespace(const char* libNamespace) noexcept  override;
206 | 
207 | 		const char* getPluginNamespace() const noexcept  override;
208 | 
209 | 	private:
210 | 		std::string mNamespace;
211 | 		static PluginFieldCollection mFC;
212 | 		static std::vector<PluginField> mPluginAttributes;
213 | 	};
214 | 
215 | 
216 |   //Scaled YOLOv4
217 | 	class YoloV4Layer : public IPluginV2
218 | 	{
219 | 	public:
220 | 		explicit YoloV4Layer();
221 | 		YoloV4Layer(const void* data, size_t length);
222 | 	  YoloV4Layer(const uint32_t& numBoxes, const uint32_t& numClasses, const uint32_t& grid_h_, const uint32_t &grid_w_, const uint32_t &new_coords, const float& scale_x_y);
223 | 		int getNbOutputs() const noexcept override;
224 | 		nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs,
225 | 			int nbInputDims)noexcept override;
226 | 		/*void configure(const nvinfer1::Dims* inputDims, int nbInputs, const nvinfer1::Dims* outputDims,
227 | 					   int nbOutputs, int maxBatchSize)noexcept override;*/
228 | 
229 | 					   /*void configure(const nvinfer1::Dims* inputDims, int nbInputs,
230 | 						   const nvinfer1::Dims* outputDims, int nbOutputs, int maxBatchSize)noexcept override;*/
231 | 
232 | 		int initialize()noexcept override;
233 | 		void terminate()noexcept override;
234 | 		size_t getWorkspaceSize(int maxBatchSize) const noexcept override;
235 | 		
236 | 		int enqueue(int batchSize, const void* const* inputs, void* const* outputs, void* workspace,
237 | 			cudaStream_t stream) noexcept override;
238 | 
239 | 		size_t getSerializationSize() const noexcept  override;
240 | 		void serialize(void* buffer) const noexcept  override;
241 | 		
242 | 		const char* getPluginType() const noexcept  override
243 | 		{
244 | 			return "YOLO_TRT";
245 | 		}
246 | 		bool supportsFormat(DataType type, PluginFormat format) const noexcept override;
247 | 		void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) noexcept override;
248 | 
249 | 		const char* getPluginVersion() const noexcept  override
250 | 		{
251 | 			return "1.0";
252 | 		}
253 | 
254 | 		void setPluginNamespace(const char* pluginNamespace) noexcept  override
255 | 		{
256 | 			_s_plugin_namespace = pluginNamespace;
257 | 		}
258 | 		const char* getPluginNamespace() const  noexcept override
259 | 		{
260 | 			return _s_plugin_namespace.c_str();
261 | 		}
262 | 		void destroy() noexcept  override
263 | 		{
264 | 			delete this;
265 | 		}
266 | 		IPluginV2* clone() const noexcept override;
267 | 	private:
268 | 	
269 | 		std::string _s_plugin_namespace;
270 | 		uint32_t m_NumBoxes;
271 | 		uint32_t m_NumClasses;
272 | 		uint32_t m_GridSize;
273 | 		uint64_t m_OutputSize;
274 | 		uint32_t _n_grid_h;
275 | 		uint32_t _n_grid_w;
276 |       	        uint32_t m_new_coords;
277 | 		float m_scale_x_y;	  
278 | 	  
279 | 	};
280 | 
281 | 
282 | 
283 | 	class YoloV4LayerPluginCreator : public IPluginCreator
284 | 	{
285 | 	public:
286 | 		YoloV4LayerPluginCreator();
287 | 
288 | 		~YoloV4LayerPluginCreator() override = default;
289 | 
290 | 		const char* getPluginName() const noexcept  override;
291 | 
292 | 		const char* getPluginVersion() const  noexcept override;
293 | 
294 | 		const PluginFieldCollection* getFieldNames() noexcept  override;
295 | 
296 | 		IPluginV2* createPlugin(const char* name, const PluginFieldCollection* fc)  noexcept override;
297 | 
298 | 		IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) noexcept  override;
299 | 
300 | 		void setPluginNamespace(const char* libNamespace) noexcept  override;
301 | 
302 | 		const char* getPluginNamespace() const noexcept  override;
303 | 
304 | 	private:
305 | 		std::string mNamespace;
306 | 		static PluginFieldCollection mFC;
307 | 		static std::vector<PluginField> mPluginAttributes;
308 | 	};  
309 | }
310 | #endif // __PLUGIN_LAYER_H__
311 | 


--------------------------------------------------------------------------------
/modules/preprocess.cu:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <algorithm>
 4 | #include "preprocess.h"
 5 | 
 6 | #define BLOCK 512
 7 | 
 8 | dim3 cuda_gridsize(size_t n){
 9 |     size_t k = (n-1) / BLOCK + 1;
10 |     size_t x = k;
11 |     size_t y = 1;
12 |     if(x > 65535){
13 |         x = ceil(sqrt(k));
14 |         y = (n-1)/(x*BLOCK) + 1;
15 |     }
16 |     dim3 d;
17 |     d.x = x;
18 |     d.y = y;
19 |     d.z = 1;
20 |     return d;
21 | }
22 | 
23 | 
24 | __global__ void SimpleblobFromImageKernel(int N, float* dst_img, unsigned char* src_img, 
25 | 				       int dst_h, int dst_w, int src_h, int src_w, 
26 | 				    float stride_h, float stride_w, float norm)
27 | {
28 |   int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
29 | 
30 |   if (index >= N) return;
31 |   int chan = 3; 
32 |   int w = index % dst_w;
33 |   int h = index / dst_w;
34 |   float centroid_h, centroid_w;
35 |   int c;
36 |   centroid_h = stride_h * (float)(h + 0.5); 
37 |   centroid_w = stride_w * (float)(w + 0.5);
38 |   int src_h_idx = lroundf(centroid_h)-1;
39 |   int src_w_idx = lroundf(centroid_w)-1;
40 |   if (src_h_idx<0){src_h_idx=0;}
41 |   if (src_w_idx<0){src_w_idx=0;}  
42 |   index = chan * src_w_idx + chan* src_w * src_h_idx;
43 | 
44 |   for (c = 0; c < chan; c++) {
45 |     int dst_index = w + (dst_w*h) + (dst_w*dst_h*c);              
46 |     dst_img[dst_index] = (float)src_img[index+c]*norm;
47 |   }
48 | }
49 | 
50 | void blobFromImageGpu(float *dst, unsigned char*src, int d_w, int d_h, int d_c,
51 | 			 int s_w, int s_h, int s_c, float norm, cudaStream_t stream)
52 | {
53 |   int N =  d_w * d_h;
54 |   float stride_h = (float)s_h / (float)d_h;
55 |   float stride_w = (float)s_w / (float)d_w;
56 | 
57 |   SimpleblobFromImageKernel<<<cuda_gridsize(N), BLOCK, 0, stream>>>(N, dst, src, 
58 | 							      d_h, d_w,
59 | 							      s_h, s_w,
60 | 							      stride_h, stride_w, norm);
61 | 
62 | }
63 | 
64 | 


--------------------------------------------------------------------------------
/modules/preprocess.h:
--------------------------------------------------------------------------------
 1 | #include <cuda.h>
 2 | #include <cuda_runtime.h>
 3 | #include <curand.h>
 4 | #include <cublas_v2.h>
 5 | #include <cuda_runtime_api.h>
 6 | 
 7 | 
 8 | void blobFromImageGpu(float *dst, unsigned char*src, int d_w, int d_h, int d_c,
 9 | 		      int s_w, int s_h, int s_c, float norm, cudaStream_t stream);
10 | 


--------------------------------------------------------------------------------
/modules/profiler.cpp:
--------------------------------------------------------------------------------
  1 | //#include "profiler.h"
  2 | #include "profiler.h"
  3 | #include <iomanip>
  4 | 
  5 | using namespace nvinfer1;
  6 | 
  7 | 
  8 | SimpleProfiler::SimpleProfiler(std::string name,
  9 |                                const std::vector<SimpleProfiler>& src_profilers) :
 10 |     m_name(name)
 11 | {
 12 |   float total_time = 0.0;
 13 |   m_index = 0;
 14 |   for (const auto& src_profiler : src_profilers) {
 15 |     for (const auto& rec : src_profiler.m_profile) {
 16 |       auto it = m_profile.find(rec.first);
 17 | 
 18 |       if (it == m_profile.end()) {
 19 | 	m_profile.insert(rec);
 20 |       } else {
 21 | 	it->second.time += rec.second.time;
 22 | 	it->second.count += rec.second.count;
 23 | 	total_time += rec.second.time;
 24 |       }
 25 |     }
 26 |   }
 27 | }
 28 | 
 29 | void SimpleProfiler::reportLayerTime(const char* layerName, float ms) noexcept
 30 | {
 31 | 
 32 |   m_profile[layerName].count++;
 33 |   m_profile[layerName].time += ms;
 34 |   if (m_profile[layerName].min_time == -1.0) {
 35 |     m_profile[layerName].min_time = ms;
 36 |     m_profile[layerName].index = m_index;
 37 |     m_index++;    
 38 |   } else if (m_profile[layerName].min_time > ms) {
 39 |     m_profile[layerName].min_time = ms;
 40 |   }
 41 | }
 42 | 
 43 | void SimpleProfiler::setProfDict(nvinfer1::ILayer *layer) noexcept
 44 | {
 45 |   std::string name = layer->getName();
 46 |   auto t = layer->getType();
 47 |   m_layer_dict[name];
 48 |   m_layer_dict[name].type = layer->getType();
 49 |   if (layer->getType() == nvinfer1::LayerType::kCONVOLUTION) {
 50 |     nvinfer1::IConvolutionLayer* conv =  (nvinfer1::IConvolutionLayer*)layer;
 51 |     nvinfer1::ITensor* in = layer->getInput(0);
 52 |     nvinfer1::Dims dim_in = in->getDimensions();
 53 |     nvinfer1::ITensor* out = layer->getOutput(0);
 54 |     nvinfer1::Dims dim_out = out->getDimensions();
 55 |     nvinfer1::Dims k_dims = conv->getKernelSizeNd();
 56 |     nvinfer1::Dims s_dims = conv->getStrideNd();
 57 |     int groups = conv->getNbGroups();
 58 |     int stride = s_dims.d[0];
 59 |     int kernel = k_dims.d[0];
 60 |     m_layer_dict[name].in_c = dim_in.d[0];
 61 |     m_layer_dict[name].out_c = dim_out.d[0];
 62 |     m_layer_dict[name].w= dim_in.d[2];
 63 |     m_layer_dict[name].h = dim_in.d[1];
 64 |     m_layer_dict[name].k = kernel;;
 65 |     m_layer_dict[name].stride = stride;
 66 |     m_layer_dict[name].groups = groups;    
 67 |     
 68 |   }
 69 | }
 70 | 
 71 | LayerInfo *SimpleProfiler::getILayerFromName(std::string name) noexcept
 72 | {
 73 |   LayerInfo *linfo = NULL;
 74 |   /*
 75 |   if (m_layer_dict.find(name) != m_layer_dict.end()) {
 76 |     linfo = &m_layer_dict[name];
 77 |   }
 78 |   */
 79 |   for (auto it = m_layer_dict.begin(); it != m_layer_dict.end(); it++) {
 80 |     std::string key = it->first;
 81 |     if (!name.find(key)) {
 82 |       linfo = &m_layer_dict[key];
 83 |     }
 84 |   }
 85 |   return linfo;
 86 | }
 87 | 
 88 | std::ostream& operator<<(std::ostream& out, SimpleProfiler& value)
 89 | {
 90 |   out << "========== " << value.m_name << " profile ==========" << std::endl;
 91 |   float totalTime = 0;
 92 |   std::string layerNameStr = "Operation";
 93 | 
 94 |   int maxLayerNameLength = static_cast<int>(layerNameStr.size());
 95 |   for (const auto& elem : value.m_profile)
 96 |     {
 97 |       totalTime += elem.second.time;
 98 |       maxLayerNameLength = std::max(maxLayerNameLength, static_cast<int>(elem.first.size()));
 99 |     }
100 | 
101 |   auto old_settings = out.flags();
102 |   auto old_precision = out.precision();
103 |   // Output header
104 |   {
105 |     out << std::setw(maxLayerNameLength) << layerNameStr << " ";
106 |     out << std::setw(12) << "Runtime, "
107 | 	<< "%"
108 | 	<< " ";
109 |     out << std::setw(12) << "Invocations"
110 | 	<< " ";
111 |     out << std::setw(12) << "Runtime[ms]" ;
112 |     out << std::setw(12) << "Min Runtime[ms]" << std::endl;	
113 |   }
114 |   int index = value.m_index;
115 |   for (int i = 0; i < index; i++) {
116 |     for (const auto& elem : value.m_profile){
117 | 	if (elem.second.index == i) {
118 | 	  out << i << ":";
119 | 	  out << std::setw(maxLayerNameLength) << elem.first << ",";
120 | 	  out << std::setw(12) << std::fixed << std::setprecision(1) << (elem.second.time * 100.0F / totalTime) << "%"
121 | 	      << ",";
122 | 	  out << std::setw(12) << elem.second.count << ",";
123 | 	  out << std::setw(12) << std::fixed << std::setprecision(2) << elem.second.time << ", ";
124 | 	  out << std::setw(12) << std::fixed << std::setprecision(2) << elem.second.min_time << std::endl;
125 | 	}
126 |       }
127 |   }
128 |   out.flags(old_settings);
129 |   out.precision(old_precision);
130 |   out << "========== " << value.m_name << " total runtime = " << totalTime << " ms ==========" << std::endl;
131 |   out << "Conv Profile" << std::endl;
132 |   out << "index, w, h, in_c, out_c, k, stride, groups, mintime[us], name" << std::endl;
133 | 
134 |   for (int i = 0; i < index; i++) {
135 |     for (const auto& elem : value.m_profile){
136 | 	if (elem.second.index == i) {
137 | 	  LayerInfo* linfo  = value.getILayerFromName(elem.first);
138 | 	  if (linfo) {
139 | 	    if (linfo->type == nvinfer1::LayerType::kCONVOLUTION) {
140 | 	      out <<  std::setw(4) << std::fixed << i << ", ";
141 | 	      out <<  std::setw(4) << std::fixed << linfo->w << ", ";
142 | 	      out <<  std::setw(4) << std::fixed << linfo->h << ", ";
143 | 	      out <<  std::setw(4) << std::fixed << linfo->in_c << ", ";
144 | 	      out <<  std::setw(4) << std::fixed << linfo->out_c << ", ";
145 | 	      out <<  std::setw(4) << std::fixed << linfo->k << ", ";
146 | 	      out <<  std::setw(4) << std::fixed << linfo->stride << ", ";
147 | 	      out <<  std::setw(4) << std::fixed << linfo->groups << ", "; 	      	      	      	      
148 | 	    
149 | 	  
150 | 	      
151 | 	      out  <<  std::setw(10) << std::setprecision(4) << elem.second.min_time *1000 << ", ";
152 | 	      out  <<  std::setw(6) << elem.first << "," << std::endl;
153 | 	    }
154 | 	  }
155 | 	}
156 |     }
157 |   }
158 | 
159 |   std::vector<int> chan{8, 16, 24, 32, 48, 64, 80, 96, 128, 160, 192, 256, 320, 384};
160 |   //k=1 conv
161 |   int kernel = 1;
162 |   out << "###Conv Kernel(1)" <<std::endl;
163 |   out << "     ," << std::setw(4);  
164 |   for (int out_c : chan) {    
165 |     out <<  std::setw(10) << ","  << out_c;
166 |   }
167 |   out << std::endl;
168 |   for (int in_c : chan) {
169 |     out << "in_c, " << std::setw(4) << std::fixed << in_c << ", ";
170 |     for (int out_c : chan) {
171 |       bool flg  = false;
172 |       for (int i = 0; i < index && flg == false; i++) {
173 | 	for (const auto& elem : value.m_profile){
174 | 	  if (elem.second.index == i) {
175 | 	    LayerInfo* linfo  = value.getILayerFromName(elem.first);
176 | 	    if (linfo && linfo->k == kernel && in_c == linfo->in_c && out_c == linfo->out_c) {
177 | 	      if (linfo->type == nvinfer1::LayerType::kCONVOLUTION) {
178 | 		out  <<  std::setw(10) << std::setprecision(4) << elem.second.min_time *1000;
179 | 		flg = true;
180 | 		break;
181 | 	      }
182 | 	    }
183 | 	  }
184 | 	}
185 |       }
186 |       out << ", ";      
187 |     }
188 |     out   << std::endl;    
189 |   }
190 |   kernel = 3;
191 |   out << "###Conv Kernel(" << kernel << ")" <<std::endl;
192 |   out << "     ," << std::setw(4);  
193 |   for (int out_c : chan) {    
194 |     out <<  std::setw(10) << ","  << out_c;
195 |   }
196 |   out << std::endl;
197 |   for (int in_c : chan) {
198 |     out << "in_c, " << std::setw(4) << std::fixed << in_c << ", ";
199 |     for (int out_c : chan) {
200 |       bool flg  = false;
201 |       for (int i = 0; i < index && flg == false; i++) {
202 | 	for (const auto& elem : value.m_profile){
203 | 	  if (elem.second.index == i) {
204 | 	    LayerInfo* linfo  = value.getILayerFromName(elem.first);
205 | 	    if (linfo && linfo->k == kernel && in_c == linfo->in_c && out_c == linfo->out_c) {
206 | 	      if (linfo->type == nvinfer1::LayerType::kCONVOLUTION) {
207 | 		out  <<  std::setw(10) << std::setprecision(4) << elem.second.min_time *1000;
208 | 		flg = true;
209 | 		break;
210 | 	      }
211 | 	    }
212 | 	  }
213 | 	}
214 |       }
215 |       out << ", ";      
216 |     }
217 |     out   << std::endl;    
218 |   }
219 | 
220 |   kernel = 5;
221 |   out << "###Conv Kernel(" << kernel << ")" <<std::endl;
222 |   out << "     ," << std::setw(4);  
223 |   for (int out_c : chan) {    
224 |     out <<  std::setw(10) << ","  << out_c;
225 |   }
226 |   out << std::endl;
227 |   for (int in_c : chan) {
228 |     out << "in_c, " << std::setw(4) << std::fixed << in_c << ", ";
229 |     for (int out_c : chan) {
230 |       bool flg  = false;
231 |       for (int i = 0; i < index && flg == false; i++) {
232 | 	for (const auto& elem : value.m_profile){
233 | 	  if (elem.second.index == i) {
234 | 	    LayerInfo* linfo  = value.getILayerFromName(elem.first);
235 | 	    if (linfo && linfo->k == kernel && in_c == linfo->in_c && out_c == linfo->out_c) {
236 | 	      if (linfo->type == nvinfer1::LayerType::kCONVOLUTION) {
237 | 		out  <<  std::setw(10) << std::setprecision(4) << elem.second.min_time *1000;
238 | 		flg = true;
239 | 		break;
240 | 	      }
241 | 	    }
242 | 	  }
243 | 	}
244 |       }
245 |       out << ", ";      
246 |     }
247 |     out   << std::endl;    
248 |   }
249 |   kernel = 7;
250 |   out << "###Conv Kernel(" << kernel << ")" <<std::endl;
251 |   out << "     ," << std::setw(4);  
252 |   for (int out_c : chan) {    
253 |     out <<  std::setw(10) << ","  << out_c;
254 |   }
255 |   out << std::endl;
256 |   for (int in_c : chan) {
257 |     out << "in_c, " << std::setw(4) << std::fixed << in_c << ", ";
258 |     for (int out_c : chan) {
259 |       bool flg  = false;
260 |       for (int i = 0; i < index && flg == false; i++) {
261 | 	for (const auto& elem : value.m_profile){
262 | 	  if (elem.second.index == i) {
263 | 	    LayerInfo* linfo  = value.getILayerFromName(elem.first);
264 | 	    if (linfo && linfo->k == kernel && in_c == linfo->in_c && out_c == linfo->out_c) {
265 | 	      if (linfo->type == nvinfer1::LayerType::kCONVOLUTION) {
266 | 		out  <<  std::setw(10) << std::setprecision(4) << elem.second.min_time *1000;
267 | 		flg = true;
268 | 		break;
269 | 	      }
270 | 	    }
271 | 	  }
272 | 	}
273 |       }
274 |       out << ", ";      
275 |     }
276 |     out   << std::endl;    
277 |   }         
278 |   return out;
279 | }
280 | 
281 | 
282 | 


--------------------------------------------------------------------------------
/modules/profiler.h:
--------------------------------------------------------------------------------
 1 | #ifndef YOLO_PROFILER_H
 2 | #define YOLO_PROFILER_H
 3 | 
 4 | #include <NvInfer.h>
 5 | #include <vector>
 6 | #include <string>
 7 | #include <iostream>
 8 | #include <map>
 9 | 
10 | struct LayerInfo
11 | {
12 |   int in_c;
13 |   int out_c;
14 |   int w;
15 |   int h;
16 |   int k;      
17 |   int stride;
18 |   int groups;
19 |   nvinfer1::LayerType type;
20 | };
21 | 
22 | 
23 | class SimpleProfiler : public nvinfer1::IProfiler
24 | {
25 | public:
26 |     struct Record
27 |     {
28 |       float time{0};
29 |       int count{0};
30 |       float min_time{-1.0};
31 |       int index;
32 |     };
33 |     /*
34 |      *  Construct a profiler
35 |      *  name:           Name of the profiler
36 |      *  src_profilers:  Optionally initialize profiler with data of one or more other profilers
37 |      *                  This is usefull for aggregating results of different profilers
38 |      *                  Aggregation will sum all runtime periods and all invokations for each reported
39 |      *                  layer of all given profilers
40 |      */
41 |     SimpleProfiler(std::string name,
42 |         const std::vector<SimpleProfiler>& src_profilers = std::vector<SimpleProfiler>());
43 | 
44 |     void reportLayerTime(const char* layerName, float ms) noexcept override;
45 | 
46 |     void setProfDict(nvinfer1::ILayer *layer) noexcept;
47 |     LayerInfo *getILayerFromName(std::string name) noexcept;
48 |     friend std::ostream& operator<<(std::ostream& out, SimpleProfiler& value);
49 | 
50 | private:
51 |   std::string m_name;
52 |   std::map<std::string, Record> m_profile;
53 |   int m_index;
54 |   std::map<std::string, LayerInfo>m_layer_dict;
55 | };
56 | 
57 | 
58 | 
59 | #endif /* JETNET_PROFILER_H */
60 | 


--------------------------------------------------------------------------------
/modules/yolo.h:
--------------------------------------------------------------------------------
  1 | /**
  2 | MIT License
  3 | 
  4 | Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved.
  5 | 
  6 | Permission is hereby granted, free of charge, to any person obtaining a copy
  7 | of this software and associated documentation files (the "Software"), to deal
  8 | in the Software without restriction, including without limitation the rights
  9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 10 | copies of the Software, and to permit persons to whom the Software is
 11 | furnished to do so, subject to the following conditions:
 12 | 
 13 | The above copyright notice and this permission notice shall be included in all
 14 | copies or substantial portions of the Software.
 15 | 
 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 22 | SOFTWARE.
 23 | *
 24 | */
 25 | 
 26 | #ifndef _YOLO_H_
 27 | #define _YOLO_H_
 28 | 
 29 | #include "calibrator.h"
 30 | //#include "plugin_factory.h"
 31 | #include "trt_utils.h"
 32 | 
 33 | #include "NvInfer.h"
 34 | #include "NvInferPlugin.h"
 35 | #include "NvInferRuntimeCommon.h"
 36 | #include "cuda_runtime_api.h"
 37 | #include <algorithm>
 38 | #include <cmath>
 39 | #include <stdint.h>
 40 | #include <string>
 41 | #include <vector>
 42 | #include "class_timer.hpp"
 43 | #include "opencv2/opencv.hpp"
 44 | #include "detect.h"
 45 | #include "logging.h"
 46 | #include "profiler.h"
 47 | 
 48 | /**
 49 |  * Holds all the file paths required to build a network.
 50 |  */
 51 | struct NetworkInfo
 52 | {
 53 |   std::string networkType;
 54 |   std::string configFilePath;
 55 |   std::string wtsFilePath;
 56 |   std::string labelsFilePath;
 57 |   std::string precision;
 58 |   std::string deviceType;
 59 |   std::string calibrationTablePath;
 60 |   std::string enginePath;
 61 |   std::string inputBlobName;
 62 |   std::string data_path;
 63 |   int batch;
 64 |   int width;
 65 |   int height;
 66 |   int dla;
 67 | };
 68 | 
 69 | /**
 70 |  * Holds information about runtime inference params.
 71 |  */
 72 | struct InferParams
 73 | {
 74 |     bool printPerfInfo;
 75 |     bool printPredictionInfo;
 76 |     std::string calibImages;
 77 |     std::string calibImagesPath;
 78 |     float probThresh;
 79 |     float nmsThresh;
 80 |     bool prof;
 81 | };
 82 | 
 83 | /**
 84 |  * Holds information about an output tensor of the yolo network.
 85 |  */
 86 | struct TensorInfo
 87 | {
 88 |   std::string blobName;
 89 |   uint32_t stride{0};
 90 |   uint32_t stride_h{0};
 91 |   uint32_t stride_w{0};
 92 |   uint32_t gridSize{0};
 93 |   uint32_t grid_h{ 0 };
 94 |   uint32_t grid_w{ 0 };
 95 |   uint32_t numClasses{0};
 96 |   uint32_t numBBoxes{0};
 97 |   uint64_t volume{0};
 98 |   std::vector<uint32_t> masks;
 99 |   std::vector<float> anchors;
100 |   int bindingIndex{-1};
101 |   float* hostBuffer{nullptr};
102 |   bool segmenter{false};
103 |   bool regression{false};  
104 |   std::vector<uint32_t> colormap;
105 |   std::vector<std::string> names;
106 |   bool depth;
107 | };
108 | 
109 | 
110 | class Yolo
111 | {
112 | public:
113 |   std::string getNetworkType() const { return m_NetworkType; }
114 |   float getNMSThresh() const { return m_NMSThresh; }
115 |   std::string getClassName(const int& label) const { return m_ClassNames.at(label); }
116 |   int getClassId(const int& label) const { return m_ClassIds.at(label); }
117 |   uint32_t getInputH() const { return m_InputH; }
118 |   uint32_t getInputW() const { return m_InputW; }
119 |   uint32_t getNumClasses() const { return static_cast<uint32_t>(m_ClassNames.size()); }
120 |   bool isPrintPredictions() const { return m_PrintPredictions; }
121 |   bool isPrintPerfInfo() const { return m_PrintPerfInfo; }
122 |   void preprocess_gpu(unsigned char *src, int w, int h);
123 |   void doInference(const unsigned char* input, const uint32_t batchSize);
124 |   void doProfiling(const unsigned char* input, const uint32_t batchSize);  
125 |   std::vector<BBoxInfo> decodeDetections(const int& imageIdx,
126 | 					 const int& imageH,
127 | 					 const int& imageW);
128 |   std::vector<cv::Mat> apply_argmax(const int& imageIdx);
129 |   std::vector<cv::Mat> get_colorlbl(std::vector<cv::Mat> &argmax);
130 |   std::vector<cv::Mat> get_depthmap(std::vector<cv::Mat> &argmax) ;
131 |   std::vector<cv::Mat> get_depthmap_from_logistic(const int& imageIdx) ;
132 |   cv::Mat get_bev_from_lidar(cv::Mat &rangeImg, cv::Mat &seg);  
133 |   void get_backprojection(const int& imageIdx, int im_w, int im_h, cv::Mat &seg, std::vector<BBoxInfo> &binfos, cv::Mat &bev);
134 |   cv::Mat get_heightmap(const int& imageIdx, int im_w, int im_h);  
135 |   void get_filtered_bev_from_logistic(const int& imageIdx, int im_w, int im_h, cv::Mat &seg, std::vector<BBoxInfo> &binfos, cv::Mat &bev);
136 |   void visualize_vidar_with_pcl(const int& imageIdx, int im_w, int im_h, cv::Mat &seg);
137 |   uint32_t *get_detection_colormap(void);
138 |   std::vector<std::string> get_detection_names(int id);
139 |   void print_profiling();  
140 |     virtual ~Yolo();
141 | 
142 | protected:
143 |     Yolo( const NetworkInfo& networkInfo, const InferParams& inferParams);
144 |     std::string m_EnginePath;
145 |     const std::string m_NetworkType;
146 |     const std::string m_ConfigFilePath;
147 |     const std::string m_WtsFilePath;
148 |     const std::string m_LabelsFilePath;
149 |     const std::string m_Precision;
150 |     const std::string m_DeviceType;
151 |     const std::string m_CalibImages;
152 |     const std::string m_CalibImagesFilePath;
153 |     std::string m_CalibTableFilePath;
154 |     const std::string m_InputBlobName;
155 |     std::vector<TensorInfo> m_OutputTensors;
156 |     std::vector<std::map<std::string, std::string>> m_configBlocks;
157 |     uint32_t m_InputH;
158 |     uint32_t m_InputW;
159 |     uint32_t m_InputC;
160 |     uint64_t m_InputSize;
161 | 	uint32_t _n_classes = 0;
162 | 	float _f_depth_multiple = 0;
163 | 	float _f_width_multiple = 0;
164 |     const float m_ProbThresh;
165 |     const float m_NMSThresh;
166 |     std::vector<std::string> m_ClassNames;
167 |     // Class ids for coco benchmarking
168 |     const std::vector<int> m_ClassIds{
169 |         1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21,
170 |         22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
171 |         46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65,
172 |         67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90};
173 |     const bool m_PrintPerfInfo;
174 |     const bool m_PrintPredictions;
175 |     // TRT specific members
176 | 	//Logger glogger;
177 |     uint32_t m_BatchSize = 1;
178 |     nvinfer1::INetworkDefinition* m_Network;
179 |     nvinfer1::IBuilder* m_Builder ;
180 |     nvinfer1::IHostMemory* m_ModelStream;
181 |     nvinfer1::ICudaEngine* m_Engine;
182 |     nvinfer1::IExecutionContext* m_Context;
183 |     std::vector<void*> m_DeviceBuffers;
184 |     int m_InputBindingIndex;
185 |     cudaStream_t m_CudaStream;
186 | 
187 |     SimpleProfiler m_model_profiler;
188 |     SimpleProfiler m_host_profiler;
189 |     int m_dla;
190 |     unsigned char* m_h_img;
191 |     unsigned char* m_d_img;
192 |   
193 |     //PluginFactory* m_PluginFactory;
194 |    // std::unique_ptr<YoloTinyMaxpoolPaddingFormula> m_TinyMaxpoolPaddingFormula;
195 | 
196 |     virtual std::vector<BBoxInfo> decodeTensor(const int imageIdx, const int imageH,
197 |                                                const int imageW, const TensorInfo& tensor)
198 |         = 0;
199 | 
200 |     inline void addBBoxProposal(const float bx, const float by, const float bw, const float bh,
201 |                                 const uint32_t stride, const float scalingFactor, const float xOffset,
202 |                                 const float yOffset, const int maxIndex, const float maxProb,
203 | 		const uint32_t 	image_w, const uint32_t image_h,
204 |                                 std::vector<BBoxInfo>& binfo)
205 |     {
206 |         BBoxInfo bbi;
207 |         bbi.box = convertBBoxNetRes(bx, by, bw, bh, stride, m_InputW, m_InputH);
208 |         if ((bbi.box.x1 > bbi.box.x2) || (bbi.box.y1 > bbi.box.y2))
209 |         {
210 |             return;
211 |         }
212 |       //  convertBBoxImgRes(scalingFactor, m_InputW,m_InputH,image_w,image_h, bbi.box);
213 |         bbi.label = maxIndex;
214 |         bbi.prob = maxProb;
215 |         bbi.classId = getClassId(maxIndex);
216 |         binfo.push_back(bbi);
217 |     }
218 | 
219 | 	void calcuate_letterbox_message(const int m_InputH, const int m_InputW,
220 | 		const int imageH, const int imageW,
221 | 		float &sh,float &sw,
222 | 		int &xOffset,int &yOffset)
223 | 	{
224 |         float r = std::min(static_cast<float>(m_InputH) / static_cast<float>(imageH), static_cast<float>(m_InputW) / static_cast<float>(imageW));
225 |         int resizeH = (std::round(imageH*r));
226 |         int resizeW = (std::round(imageW*r));
227 | 
228 | 		sh = r;
229 | 		sw = r;
230 | 		if ((m_InputW - resizeW) % 2) resizeW--;
231 | 		if ((m_InputH - resizeH) % 2) resizeH--;
232 | 		assert((m_InputW - resizeW) % 2 == 0);
233 | 		assert((m_InputH - resizeH) % 2 == 0);
234 | 		xOffset = (m_InputW - resizeW) / 2;
235 | 		 yOffset = (m_InputH - resizeH) / 2;
236 | 	}
237 | 	BBox convert_bbox_res(const float& bx, const float& by, const float& bw, const float& bh,
238 | 		const uint32_t& stride_h_, const uint32_t& stride_w_, const uint32_t& netW, const uint32_t& netH)
239 | 	{
240 | 		BBox b;
241 | 		// Restore coordinates to network input resolution
242 | 		float x = bx * stride_w_;
243 | 		float y = by * stride_h_;
244 | 
245 | 		b.x1 = x - bw / 2;
246 | 		b.x2 = x + bw / 2;
247 | 
248 | 		b.y1 = y - bh / 2;
249 | 		b.y2 = y + bh / 2;
250 | 
251 | 		b.x1 = clamp(b.x1, 0, netW);
252 | 		b.x2 = clamp(b.x2, 0, netW);
253 | 		b.y1 = clamp(b.y1, 0, netH);
254 | 		b.y2 = clamp(b.y2, 0, netH);
255 | 
256 | 		return b;
257 | 	}
258 | 
259 | 	inline void cvt_box(const float sh,
260 | 		const float sw,
261 | 		const float xOffset,
262 | 		const float yOffset,
263 | 		BBox& bbox)
264 | 	{
265 | 		//// Undo Letterbox
266 | 		bbox.x1 -= xOffset;
267 | 		bbox.x2 -= xOffset;
268 | 		bbox.y1 -= yOffset;
269 | 		bbox.y2 -= yOffset;
270 | 		//// Restore to input resolution
271 | 		bbox.x1 /= sw;
272 | 		bbox.x2 /= sw;
273 | 		bbox.y1 /= sh;
274 | 		bbox.y2 /= sh;
275 | 	}
276 | 
277 | 	inline void add_bbox_proposal(const float bx, const float by, const float bw, const float bh,
278 | 		const uint32_t stride_h_, const uint32_t stride_w_, const float scaleH, const float scaleW, const float xoffset_, const float yoffset, const int maxIndex, const float maxProb,
279 | 		const uint32_t 	image_w, const uint32_t image_h,
280 | 		std::vector<BBoxInfo>& binfo)
281 | 	{
282 | 		BBoxInfo bbi;
283 | 		bbi.box = convert_bbox_res(bx, by, bw, bh, stride_h_, stride_w_, m_InputW, m_InputH);
284 | 		if ((bbi.box.x1 > bbi.box.x2) || (bbi.box.y1 > bbi.box.y2))
285 | 		{
286 | 			return;
287 | 		}
288 | 		if ("yolov5" == m_NetworkType)
289 | 		{
290 | 			cvt_box(scaleH, scaleW, xoffset_, yoffset, bbi.box);
291 | 		}
292 | 		else
293 | 		{
294 | 			bbi.box.x1 = ((float)bbi.box.x1 / (float)m_InputW)*(float)image_w;
295 | 			bbi.box.y1 = ((float)bbi.box.y1 / (float)m_InputH)*(float)image_h;
296 | 			bbi.box.x2 = ((float)bbi.box.x2 / (float)m_InputW)*(float)image_w;
297 | 			bbi.box.y2 = ((float)bbi.box.y2 / (float)m_InputH)*(float)image_h;
298 | 		}
299 | 		
300 | 		bbi.label = maxIndex;
301 | 		bbi.prob = maxProb;
302 | 		bbi.classId = getClassId(maxIndex);
303 | 		binfo.push_back(bbi);
304 | 	};
305 | private:
306 |     Logger m_Logger;
307 |     void createYOLOEngine(const nvinfer1::DataType dataType = nvinfer1::DataType::kFLOAT,
308 |                           Int8EntropyCalibrator* calibrator = nullptr);
309 |     std::vector<std::map<std::string, std::string>> parseConfigFile(const std::string cfgFilePath);
310 |     void parseConfigBlocks();
311 |     void allocateBuffers();
312 |     bool verifyYoloEngine();
313 |     void destroyNetworkUtils(std::vector<nvinfer1::Weights>& trtWeights);
314 |     void writePlanFileToDisk();
315 | 
316 | private:
317 | 	Timer _timer;
318 | 
319 | 	int _n_yolo_ind = 0;
320 | };
321 | 
322 | #define GRID_H 600
323 | #define GRID_W 400
324 | #endif // _YOLO_H_
325 | 


--------------------------------------------------------------------------------
/modules/yolo_config_parser.h:
--------------------------------------------------------------------------------
 1 | /**
 2 | MIT License
 3 | 
 4 | Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | *
24 | */
25 | 
26 | #ifndef _YOLO_CONFIG_PARSER_
27 | #define _YOLO_CONFIG_PARSER_
28 | 
29 | #include "yolo.h"
30 | 
31 | #include <ctime>
32 | #include <gflags/gflags.h>
33 | 
34 | // Init to be called at the very beginning to verify all config params are valid
35 | void yoloConfigParserInit(int argc, char** argv);
36 | 
37 | NetworkInfo getYoloNetworkInfo();
38 | NetworkInfo getYoloNetworkInfo1();
39 | NetworkInfo getYoloNetworkInfo2();
40 | std::string getDirectoryPath(void);
41 | std::string getVideoPath(void);
42 | int getCameraID(void);
43 | //InferParams getYoloInferParams();
44 | //uint64_t getSeed();
45 | //std::string getNetworkType();
46 | //std::string getPrecision();
47 | //std::string getTestImages();
48 | //std::string getTestImagesPath();
49 | //bool getDecode();
50 | //bool getDoBenchmark();
51 | //bool getViewDetections();
52 | bool getSaveDetections();
53 | std::string getSaveDetectionsPath();
54 | //uint32_t getBatchSize();
55 | //bool getShuffleTestSet();
56 | bool
57 | get_dont_show_flg(void);
58 | bool
59 | get_prof_flg(void);
60 | std::vector<std::string>
61 | get_names(void);
62 | std::string
63 | get_dump_path(void);
64 | double
65 | get_score_thresh(void);
66 | bool
67 | get_multi_precision_flg(void);
68 | bool
69 | get_cuda_flg(void);
70 | std::string
71 | get_output_path(void);
72 | std::string
73 | get_target_label(void);
74 | bool
75 | get_lidar_flg(void);
76 | std::string
77 | get_depth_colormap(void);
78 | #endif //_YOLO_CONFIG_PARSER_
79 | 


--------------------------------------------------------------------------------
/modules/yoloplugin_lib.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 | MIT License
  3 | 
  4 | Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved.
  5 | 
  6 | Permission is hereby granted, free of charge, to any person obtaining a copy
  7 | of this software and associated documentation files (the "Software"), to deal
  8 | in the Software without restriction, including without limitation the rights
  9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 10 | copies of the Software, and to permit persons to whom the Software is
 11 | furnished to do so, subject to the following conditions:
 12 | 
 13 | The above copyright notice and this permission notice shall be included in all
 14 | copies or substantial portions of the Software.
 15 | 
 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 22 | SOFTWARE.
 23 | *
 24 | */
 25 | 
 26 | #include "yoloplugin_lib.h"
 27 | #include "yolo_config_parser.h"
 28 | #include "yolov2.h"
 29 | #include "yolov3.h"
 30 | 
 31 | #include <iomanip>
 32 | #include <chrono>
 33 | 
 34 | static void decodeBatchDetections(const YoloPluginCtx* ctx, std::vector<YoloPluginOutput*>& outputs)
 35 | {
 36 |     for (uint32_t p = 0; p < ctx->batchSize; ++p)
 37 |     {
 38 |         YoloPluginOutput* out = new YoloPluginOutput;
 39 |         std::vector<BBoxInfo> binfo = ctx->inferenceNetwork->decodeDetections(
 40 |             p, ctx->initParams.processingHeight, ctx->initParams.processingWidth);
 41 | 		std::vector<BBoxInfo> remaining;
 42 | 		/*std::vector<BBoxInfo> remaining = nmsAllClasses(
 43 | 			ctx->inferenceNetwork->getNMSThresh(), binfo, ctx->inferenceNetwork->getNumClasses(),);*/
 44 |         out->numObjects = remaining.size();
 45 |         assert(out->numObjects <= MAX_OBJECTS_PER_FRAME);
 46 |         for (uint32_t j = 0; j < remaining.size(); ++j)
 47 |         {
 48 |             BBoxInfo b = remaining.at(j);
 49 |             YoloPluginObject obj;
 50 |             obj.left = static_cast<int>(b.box.x1);
 51 |             obj.top = static_cast<int>(b.box.y1);
 52 |             obj.width = static_cast<int>(b.box.x2 - b.box.x1);
 53 |             obj.height = static_cast<int>(b.box.y2 - b.box.y1);
 54 |             strcpy(obj.label, ctx->inferenceNetwork->getClassName(b.label).c_str());
 55 |             out->object[j] = obj;
 56 | 
 57 |             if (ctx->inferParams.printPredictionInfo)
 58 |             {
 59 |                 printPredictions(b, ctx->inferenceNetwork->getClassName(b.label));
 60 |             }
 61 |         }
 62 |         outputs.at(p) = out;
 63 |     }
 64 | }
 65 | 
 66 | static void dsPreProcessBatchInput(const std::vector<cv::Mat*>& cvmats, cv::Mat& batchBlob,
 67 |                                    const int& processingHeight, const int& processingWidth,
 68 |                                    const int& inputH, const int& inputW)
 69 | {
 70 | 
 71 |     std::vector<cv::Mat> batch_images(
 72 |         cvmats.size(), cv::Mat(cv::Size(processingWidth, processingHeight), CV_8UC3));
 73 |     for (uint32_t i = 0; i < cvmats.size(); ++i)
 74 |     {
 75 |         cv::Mat imageResize, imageBorder, inputImage;
 76 |         inputImage = *cvmats.at(i);
 77 |         int maxBorder = std::max(inputImage.size().width, inputImage.size().height);
 78 | 
 79 |         assert((maxBorder - inputImage.size().height) % 2 == 0);
 80 |         assert((maxBorder - inputImage.size().width) % 2 == 0);
 81 | 
 82 |         int yOffset = (maxBorder - inputImage.size().height) / 2;
 83 |         int xOffset = (maxBorder - inputImage.size().width) / 2;
 84 | 
 85 |         // Letterbox and resize to maintain aspect ratio
 86 |         cv::copyMakeBorder(inputImage, imageBorder, yOffset, yOffset, xOffset, xOffset,
 87 |                            cv::BORDER_CONSTANT, cv::Scalar(127.5, 127.5, 127.5));
 88 |         cv::resize(imageBorder, imageResize, cv::Size(inputW, inputH), 0, 0, cv::INTER_CUBIC);
 89 |         batch_images.at(i) = imageResize;
 90 |     }
 91 | 
 92 |     batchBlob = cv::dnn::blobFromImages(batch_images, 1.0, cv::Size(inputW, inputH),
 93 |                                         cv::Scalar(0.0, 0.0, 0.0), false);
 94 | }
 95 | 
 96 | YoloPluginCtx* YoloPluginCtxInit(YoloPluginInitParams* initParams, size_t batchSize)
 97 | {
 98 |     char** gArgV = new char*[2];
 99 |     gArgV[0] = new char[64];
100 |     gArgV[1] = new char[512];
101 |     strcpy(gArgV[0], "yolo_plugin_ctx");
102 |     strcpy(gArgV[1], std::string("--flagfile=" + initParams->configFilePath).c_str());
103 | //    yoloConfigParserInit(2, gArgV);
104 | 
105 |     YoloPluginCtx* ctx = new YoloPluginCtx;
106 |     ctx->initParams = *initParams;
107 |     ctx->batchSize = batchSize;
108 | 	ctx->networkInfo;// = getYoloNetworkInfo();
109 | 	ctx->inferParams;// = getYoloInferParams();
110 | 	uint32_t configBatchSize;// = getBatchSize();
111 | 
112 |     // Check if config batchsize matches buffer batch size in the pipeline
113 |     if (ctx->batchSize != configBatchSize)
114 |     {
115 |         std::cerr
116 |             << "WARNING: Batchsize set in config file overriden by pipeline. New batchsize is "
117 |             << ctx->batchSize << std::endl;
118 |         int npos = ctx->networkInfo.wtsFilePath.find(".weights");
119 |         assert(npos != std::string::npos
120 |                && "wts file file not recognised. File needs to be of '.weights' format");
121 |         std::string dataPath = ctx->networkInfo.wtsFilePath.substr(0, npos);
122 |         ctx->networkInfo.enginePath = dataPath + "-" + ctx->networkInfo.precision + "-batch"
123 |             + std::to_string(ctx->batchSize) + ".engine";
124 |     }
125 | 
126 |     if ((ctx->networkInfo.networkType == "yolov2")
127 |         || (ctx->networkInfo.networkType == "yolov2-tiny"))
128 |     {
129 |         ctx->inferenceNetwork = new YoloV2(ctx->networkInfo, ctx->inferParams);
130 |     }
131 |     else if ((ctx->networkInfo.networkType == "yolov3")
132 |              || (ctx->networkInfo.networkType == "yolov3-tiny"))
133 |     {
134 |         ctx->inferenceNetwork = new YoloV3( ctx->networkInfo, ctx->inferParams);
135 |     }
136 |     else
137 |     {
138 |         std::cerr << "ERROR: Unrecognized network type " << ctx->networkInfo.networkType
139 |                   << std::endl;
140 |         std::cerr << "Network Type has to be one among the following : yolov2, yolov2-tiny, yolov3 "
141 |                      "and yolov3-tiny"
142 |                   << std::endl;
143 |         return nullptr;
144 |     }
145 | 
146 |     delete[] gArgV;
147 |     return ctx;
148 | }
149 | 
150 | std::vector<YoloPluginOutput*> YoloPluginProcess(YoloPluginCtx* ctx, std::vector<cv::Mat*>& cvmats)
151 | {
152 |     assert((cvmats.size() <= ctx->batchSize) && "Image batch size exceeds TRT engines batch size");
153 |     std::vector<YoloPluginOutput*> outputs = std::vector<YoloPluginOutput*>(cvmats.size(), nullptr);
154 |     cv::Mat preprocessedImages;
155 |     std::chrono::duration<double> preElapsed, inferElapsed, postElapsed;
156 | 	std::chrono::time_point<std::chrono::steady_clock> preStart, preEnd, inferStart, inferEnd, postStart, postEnd;
157 | 
158 |     if (cvmats.size() > 0)
159 |     {
160 | //		preStart = std::chrono::high_resolution_clock::now();
161 |         dsPreProcessBatchInput(cvmats, preprocessedImages, ctx->initParams.processingWidth,
162 |                                ctx->initParams.processingHeight, ctx->inferenceNetwork->getInputH(),
163 |                                ctx->inferenceNetwork->getInputW());
164 | //		preEnd = std::chrono::high_resolution_clock::now();
165 | 
166 | //		inferStart = std::chrono::high_resolution_clock::now();
167 |         ctx->inferenceNetwork->doInference(preprocessedImages.data, cvmats.size());
168 |   //      inferEnd = std::chrono::high_resolution_clock::now();
169 | 
170 | //		postStart = std::chrono::high_resolution_clock::now();
171 |         decodeBatchDetections(ctx, outputs);
172 | //		postEnd = std::chrono::high_resolution_clock::now();
173 |     }
174 | 
175 |     // Perf calc
176 |     if (ctx->inferParams.printPerfInfo)
177 |     {
178 |         preElapsed
179 |             = ((preEnd - preStart) + (preEnd - preStart) / 1000000.0)
180 |             * 1000;
181 |         inferElapsed = ((inferEnd - inferStart)
182 |                         + (inferEnd - inferStart) / 1000000.0)
183 |             * 1000;
184 |         postElapsed = ((postEnd - postStart)
185 |                        + (postEnd - postStart) / 1000000.0)
186 |             * 1000;
187 | 
188 |         ctx->inferTime += inferElapsed.count();
189 |         ctx->preTime += preElapsed.count();
190 |         ctx->postTime += postElapsed.count();
191 |         ctx->imageCount += cvmats.size();
192 |     }
193 |     return outputs;
194 | }
195 | 
196 | void YoloPluginCtxDeinit(YoloPluginCtx* ctx)
197 | {
198 |     if (ctx->inferParams.printPerfInfo)
199 |     {
200 |         std::cout << "Yolo Plugin Perf Summary " << std::endl;
201 |         std::cout << "Batch Size : " << ctx->batchSize << std::endl;
202 |         std::cout << std::fixed << std::setprecision(4)
203 |                   << "PreProcess : " << ctx->preTime / ctx->imageCount
204 |                   << " ms Inference : " << ctx->inferTime / ctx->imageCount
205 |                   << " ms PostProcess : " << ctx->postTime / ctx->imageCount << " ms Total : "
206 |                   << (ctx->preTime + ctx->postTime + ctx->inferTime) / ctx->imageCount
207 |                   << " ms per Image" << std::endl;
208 |     }
209 | 
210 |     delete ctx->inferenceNetwork;
211 |     delete ctx;
212 | }
213 | 


--------------------------------------------------------------------------------
/modules/yoloplugin_lib.h:
--------------------------------------------------------------------------------
 1 | /**
 2 | MIT License
 3 | 
 4 | Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | *
24 | */
25 | #ifndef __YOLOPLUGIN_LIB__
26 | #define __YOLOPLUGIN_LIB__
27 | 
28 | //#include <glib.h>
29 | 
30 | #include "calibrator.h"
31 | #include "trt_utils.h"
32 | #include "yolo.h"
33 | 
34 | //#ifdef __cplusplus
35 | //extern "C" {
36 | //#endif
37 | 
38 | #define MAX_OBJECTS_PER_FRAME 32
39 | typedef struct YoloPluginCtx YoloPluginCtx;
40 | typedef struct YoloPluginOutput YoloPluginOutput;
41 | // Init parameters structure as input, required for instantiating yoloplugin_lib
42 | typedef struct
43 | {
44 |     // Width at which frame/object will be scaled
45 |     int processingWidth;
46 |     // height at which frame/object will be scaled
47 |     int processingHeight;
48 |     // Flag to indicate whether operating on crops of full frame
49 |     int fullFrame;
50 |     // Plugin config file
51 |     std::string configFilePath;
52 | } YoloPluginInitParams;
53 | 
54 | struct YoloPluginCtx
55 | {
56 |     YoloPluginInitParams initParams;
57 |     NetworkInfo networkInfo;
58 |     InferParams inferParams;
59 |     Yolo* inferenceNetwork;
60 | 
61 |     // perf vars
62 |     float inferTime = 0.0, preTime = 0.0, postTime = 0.0;
63 |     uint32_t batchSize = 0;
64 |     uint64_t imageCount = 0;
65 | };
66 | 
67 | // Detected/Labelled object structure, stores bounding box info along with label
68 | typedef struct
69 | {
70 |     int left;
71 |     int top;
72 |     int width;
73 |     int height;
74 |     char label[64];
75 | } YoloPluginObject;
76 | 
77 | // Output data returned after processing
78 | struct YoloPluginOutput
79 | {
80 |     int numObjects;
81 |     YoloPluginObject object[MAX_OBJECTS_PER_FRAME];
82 | };
83 | 
84 | // Initialize library context
85 | YoloPluginCtx* YoloPluginCtxInit(YoloPluginInitParams* initParams, size_t batchSize);
86 | 
87 | // Dequeue processed output
88 | std::vector<YoloPluginOutput*> YoloPluginProcess(YoloPluginCtx* ctx, std::vector<cv::Mat*>& cvmats);
89 | 
90 | // Deinitialize library context
91 | void YoloPluginCtxDeinit(YoloPluginCtx* ctx);
92 | 
93 | //#ifdef __cplusplus
94 | //}
95 | //#endif
96 | 
97 | #endif
98 | 


--------------------------------------------------------------------------------
/modules/yolov2.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 | MIT License
 3 | 
 4 | Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | *
24 | */
25 | 
26 | #include "yolov2.h"
27 | #include <algorithm>
28 | 
29 | YoloV2::YoloV2(const NetworkInfo& networkInfo,
30 |                const InferParams& inferParams) :
31 |     Yolo(networkInfo, inferParams){}
32 | 
33 | std::vector<BBoxInfo> YoloV2::decodeTensor(const int imageIdx, const int imageH, const int imageW,
34 |                                            const TensorInfo& tensor)
35 | {
36 |     float scalingFactor
37 |         = std::min(static_cast<float>(m_InputW) / imageW, static_cast<float>(m_InputH) / imageH);
38 |     float xOffset = (m_InputW - scalingFactor * imageW) / 2;
39 |     float yOffset = (m_InputH - scalingFactor * imageH) / 2;
40 | 
41 |     float* detections = &tensor.hostBuffer[imageIdx * tensor.volume];
42 | 
43 |     std::vector<BBoxInfo> binfo;
44 |     for (uint32_t y = 0; y < tensor.gridSize; y++)
45 |     {
46 |         for (uint32_t x = 0; x < tensor.gridSize; x++)
47 |         {
48 |             for (uint32_t b = 0; b < tensor.numBBoxes; b++)
49 |             {
50 |                 const float pw = tensor.anchors[2 * b];
51 |                 const float ph = tensor.anchors[2 * b + 1];
52 |                 const int numGridCells = tensor.gridSize * tensor.gridSize;
53 |                 const int bbindex = y * tensor.gridSize + x;
54 |                 const float bx
55 |                     = x + detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 0)];
56 |                 const float by
57 |                     = y + detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 1)];
58 |                 const float bw = pw
59 |                     * exp(detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 2)]);
60 |                 const float bh = ph
61 |                     * exp(detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 3)]);
62 | 
63 |                 const float objectness
64 |                     = detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 4)];
65 |                 float maxProb = 0.0f;
66 |                 int maxIndex = -1;
67 | 
68 |                 for (uint32_t i = 0; i < tensor.numClasses; i++)
69 |                 {
70 |                     float prob
71 |                         = detections[bbindex
72 |                                      + numGridCells * (b * (5 + tensor.numClasses) + (5 + i))];
73 | 
74 |                     if (prob > maxProb)
75 |                     {
76 |                         maxProb = prob;
77 |                         maxIndex = i;
78 |                     }
79 |                 }
80 | 
81 |                 maxProb = objectness * maxProb;
82 | 
83 |                 if (maxProb > m_ProbThresh)
84 |                 {
85 |                     addBBoxProposal(bx, by, bw, bh, tensor.stride, scalingFactor, xOffset, yOffset,
86 |                                     maxIndex, maxProb,imageW,imageH, binfo);
87 |                 }
88 |             }
89 |         }
90 |     }
91 |     return binfo;
92 | }
93 | 


--------------------------------------------------------------------------------
/modules/yolov2.h:
--------------------------------------------------------------------------------
 1 | /**
 2 | MIT License
 3 | 
 4 | Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | *
24 | */
25 | 
26 | #ifndef _YOLO_V2_
27 | #define _YOLO_V2_
28 | 
29 | #include "yolo.h"
30 | 
31 | #include <stdint.h>
32 | #include <string>
33 | #include <vector>
34 | 
35 | class YoloV2 : public Yolo
36 | {
37 | public:
38 |     YoloV2( const NetworkInfo& networkInfo, const InferParams& inferParams);
39 | 
40 | private:
41 |     std::vector<BBoxInfo> decodeTensor(const int imageIdx, const int imageH, const int imageW,
42 |                                        const TensorInfo& tensor) override;
43 | };
44 | 
45 | #endif // _YOLO_V2_


--------------------------------------------------------------------------------
/modules/yolov3.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 | MIT License
 3 | 
 4 | Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | *
24 | */
25 | #include "yolov3.h"
26 | 
27 | YoloV3::YoloV3(const NetworkInfo& networkInfo,
28 |                const InferParams& inferParams) :
29 |     Yolo( networkInfo, inferParams){}
30 | 
31 | std::vector<BBoxInfo> YoloV3::decodeTensor(const int imageIdx,
32 | 										   const int imageH, 
33 | 										   const int imageW,
34 |                                            const TensorInfo& tensor)
35 | {
36 | 	float	scale_h = 1.f;
37 | 	float	scale_w = 1.f;
38 | 	int	xOffset = 0;
39 | 	int yOffset = 0;
40 | 
41 |     const float* detections = &tensor.hostBuffer[imageIdx * tensor.volume];
42 | 
43 |     std::vector<BBoxInfo> binfo;
44 |     for (uint32_t y = 0; y < tensor.grid_h; ++y)
45 |     {
46 |         for (uint32_t x = 0; x < tensor.grid_w; ++x)
47 |         {
48 |             for (uint32_t b = 0; b < tensor.numBBoxes; ++b)
49 |             {
50 |                 const float pw = tensor.anchors[tensor.masks[b] * 2];
51 |                 const float ph = tensor.anchors[tensor.masks[b] * 2 + 1];
52 | 
53 |                 const int numGridCells = tensor.grid_h * tensor.grid_w;
54 |                 const int bbindex = y * tensor.grid_w+ x;
55 |                 const float bx
56 |                     = x + detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 0)];
57 | 
58 |                 const float by
59 |                     = y + detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 1)];
60 |                 const float bw
61 |                     = pw * detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 2)];
62 |                 const float bh
63 |                     = ph * detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 3)];
64 | 
65 |                 const float objectness
66 |                     = detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 4)];
67 | 
68 |                 float maxProb = 0.0f;
69 |                 int maxIndex = -1;
70 | 
71 |                 for (uint32_t i = 0; i < tensor.numClasses; ++i)
72 |                 {
73 |                     float prob
74 |                         = (detections[bbindex
75 |                                       + numGridCells * (b * (5 + tensor.numClasses) + (5 + i))]);
76 | 
77 |                     if (prob > maxProb)
78 |                     {
79 |                         maxProb = prob;
80 |                         maxIndex = i;
81 |                     }
82 |                 }
83 |                 maxProb = objectness * maxProb;
84 | 
85 |                 if (maxProb > m_ProbThresh)
86 |                 {
87 | 					add_bbox_proposal(bx, by, bw, bh, tensor.stride_h, tensor.stride_w, scale_h, scale_w, xOffset, yOffset, maxIndex, maxProb, imageW, imageH, binfo);
88 |                 }
89 |             }
90 |         }
91 |     }
92 |     return binfo;
93 | }
94 | 


--------------------------------------------------------------------------------
/modules/yolov3.h:
--------------------------------------------------------------------------------
 1 | /**
 2 | MIT License
 3 | 
 4 | Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | *
24 | */
25 | 
26 | #ifndef _YOLO_V3_
27 | #define _YOLO_V3_
28 | 
29 | #include "yolo.h"
30 | 
31 | #include <stdint.h>
32 | #include <string>
33 | #include <vector>
34 | 
35 | class YoloV3 : public Yolo
36 | {
37 | public:
38 |     YoloV3(const NetworkInfo& networkInfo, const InferParams& inferParams);
39 | 
40 | private:
41 |     std::vector<BBoxInfo> decodeTensor(const int imageIdx, const int imageH, const int imageW,
42 |                                        const TensorInfo& tensor) override;
43 | };
44 | 
45 | #endif // _YOLO_V3_


--------------------------------------------------------------------------------
/modules/yolov4.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "yolov4.h"
 3 | 
 4 | YoloV4::YoloV4(	const NetworkInfo &network_info_,
 5 | 	const InferParams &infer_params_) :
 6 | 	Yolo(network_info_, infer_params_) {}
 7 | 
 8 | std::vector<BBoxInfo> YoloV4::decodeTensor(const int imageIdx, const int imageH, const int imageW, const TensorInfo& tensor)
 9 | {
10 | 	float	scale_h = 1.f;
11 | 	float	scale_w = 1.f;
12 | 	int	xOffset = 0;
13 | 	int yOffset = 0;
14 | 
15 | 	const float* detections = &tensor.hostBuffer[imageIdx * tensor.volume];
16 | 
17 | 	std::vector<BBoxInfo> binfo;
18 | 	float scale_x_y = 2.0;
19 | 	float offset =  0.5 * (scale_x_y-1.0);
20 | 	for (uint32_t y = 0; y < tensor.grid_h; ++y)
21 | 	{
22 | 		for (uint32_t x = 0; x < tensor.grid_w; ++x)
23 | 		{
24 | 			for (uint32_t b = 0; b < tensor.numBBoxes; ++b)
25 | 			{
26 | 				const int numGridCells = tensor.grid_h * tensor.grid_w;
27 | 				const int bbindex = y * tensor.grid_w + x;
28 | 			  
29 | 			        const float objectness
30 | 				  = detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 4)];
31 | 				if (objectness < m_ProbThresh) {
32 | 				  continue;
33 | 				}
34 | 
35 | 				const float pw = tensor.anchors[tensor.masks[b] * 2];
36 | 				const float ph = tensor.anchors[tensor.masks[b] * 2 + 1];
37 | 
38 | 				const float bx
39 | 					= x + scale_x_y * detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 0)] - offset;
40 | 				const float by
41 | 					= y + scale_x_y * detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 1)] - offset;
42 | 				const float bw
43 | 					= pw * detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 2)]  * detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 2)] * 4;
44 | 				const float bh
45 | 					= ph * detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 3)] * detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 3)] * 4;
46 | 
47 | 
48 | 				float maxProb = 0.0f;
49 | 				int maxIndex = -1;
50 | 
51 | 				for (uint32_t i = 0; i < tensor.numClasses; ++i)
52 | 				{
53 | 					float prob
54 | 						= (detections[bbindex
55 | 							+ numGridCells * (b * (5 + tensor.numClasses) + (5 + i))]);
56 | 
57 | 					if (prob > maxProb)
58 | 					{
59 | 						maxProb = prob;
60 | 						maxIndex = i;
61 | 					}
62 | 				}
63 | 				maxProb = objectness * maxProb;
64 | 
65 | 				if (maxProb > m_ProbThresh)
66 | 				{
67 | 					add_bbox_proposal(bx, by, bw, bh, tensor.stride_h, tensor.stride_w, scale_h, scale_w, xOffset, yOffset, maxIndex, maxProb, imageW, imageH, binfo);
68 | 				}
69 | 			}
70 | 		}
71 | 	}
72 | 	return binfo;
73 | }
74 | 


--------------------------------------------------------------------------------
/modules/yolov4.h:
--------------------------------------------------------------------------------
 1 | #ifndef CLASS_YOLOV4_H_
 2 | #define CLASS_YOLOV4_H_
 3 | #include "yolo.h"
 4 | class YoloV4 :public Yolo
 5 | {
 6 | public:
 7 | 	YoloV4(
 8 | 		const NetworkInfo &network_info_,
 9 | 		const InferParams &infer_params_);
10 | private:
11 | 	std::vector<BBoxInfo> decodeTensor(const int imageIdx,
12 | 		const int imageH,
13 | 		const int imageW,
14 | 		const TensorInfo& tensor) override;
15 | };
16 | 
17 | #endif
18 | 


--------------------------------------------------------------------------------
/modules/yolov5.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "yolov5.h"
 3 | 
 4 | 
 5 | YoloV5::YoloV5(
 6 | 	const NetworkInfo &network_info_,
 7 | 	const InferParams &infer_params_) :
 8 | 	Yolo( network_info_, infer_params_) {}
 9 | 
10 | std::vector<BBoxInfo> YoloV5::decodeTensor(const int imageIdx, const int imageH, const int imageW, const TensorInfo& tensor)
11 | {
12 | 	float	scale_h = 1.f;
13 | 	float	scale_w = 1.f;
14 | 	int	xOffset = 0;
15 | 	int yOffset = 0;
16 | 	calcuate_letterbox_message(m_InputH, m_InputW, imageH, imageW, scale_h, scale_w, xOffset, yOffset);
17 | 	const float* detections = &tensor.hostBuffer[imageIdx * tensor.volume];
18 | 
19 | 	std::vector<BBoxInfo> binfo;
20 | 	for (uint32_t y = 0; y < tensor.grid_h; ++y)
21 | 	{
22 | 		for (uint32_t x = 0; x < tensor.grid_w; ++x)
23 | 		{
24 | 			for (uint32_t b = 0; b < tensor.numBBoxes; ++b)
25 | 			{
26 | 				const float pw = tensor.anchors[tensor.masks[b] * 2];
27 | 				const float ph = tensor.anchors[tensor.masks[b] * 2 + 1];
28 | 
29 | 				const int numGridCells = tensor.grid_h * tensor.grid_w;
30 | 				const int bbindex = y * tensor.grid_w+ x;
31 | 				const float bx
32 | 					= x + detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 0)];
33 | 
34 | 				const float by
35 | 					= y + detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 1)];
36 | 				const float bw
37 | 					= pw * detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 2)];
38 | 				const float bh
39 | 					= ph * detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 3)];
40 | 
41 | 				const float objectness
42 | 					= detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 4)];
43 | 
44 | 				float maxProb = 0.0f;
45 | 				int maxIndex = -1;
46 | 
47 | 				for (uint32_t i = 0; i < tensor.numClasses; ++i)
48 | 				{
49 | 					float prob
50 | 						= (detections[bbindex
51 | 							+ numGridCells * (b * (5 + tensor.numClasses) + (5 + i))]);
52 | 
53 | 					if (prob > maxProb)
54 | 					{
55 | 						maxProb = prob;
56 | 						maxIndex = i;
57 | 					}
58 | 				}
59 | 				maxProb = objectness * maxProb;
60 | 
61 | 				if (maxProb > m_ProbThresh)
62 | 				{
63 | 					add_bbox_proposal(bx, by, bw, bh, tensor.stride_h, tensor.stride_w, scale_h, scale_w,xOffset, yOffset, maxIndex, maxProb, imageW, imageH, binfo);
64 | 				}
65 | 			}
66 | 		}
67 | 	}
68 | 	return binfo;
69 | }
70 | 


--------------------------------------------------------------------------------
/modules/yolov5.h:
--------------------------------------------------------------------------------
 1 | #ifndef CLASS_YOLOV5_H_
 2 | #define CLASS_YOLOV5_H_
 3 | #include "yolo.h"
 4 | class YoloV5 :public Yolo
 5 | {
 6 | public:
 7 | 	YoloV5(
 8 | 		const NetworkInfo &network_info_,
 9 | 		const InferParams &infer_params_);
10 | 
11 | 	BBox convert_bbox_res(const float& bx, const float& by, const float& bw, const float& bh,
12 | 		const uint32_t& stride_h_, const uint32_t& stride_w_, const uint32_t& netW, const uint32_t& netH)
13 | 	{
14 | 		BBox b;
15 | 		// Restore coordinates to network input resolution
16 | 		float x = bx * stride_w_;
17 | 		float y = by * stride_h_;
18 | 
19 | 		b.x1 = x - bw / 2;
20 | 		b.x2 = x + bw / 2;
21 | 
22 | 		b.y1 = y - bh / 2;
23 | 		b.y2 = y + bh / 2;
24 | 
25 | 		b.x1 = clamp(b.x1, 0, netW);
26 | 		b.x2 = clamp(b.x2, 0, netW);
27 | 		b.y1 = clamp(b.y1, 0, netH);
28 | 		b.y2 = clamp(b.y2, 0, netH);
29 | 
30 | 		return b;
31 | 	}
32 | 
33 | 	
34 | private:
35 | 	std::vector<BBoxInfo> decodeTensor(const int imageIdx,
36 | 		const int imageH,
37 | 		const int imageW,
38 | 		const TensorInfo& tensor) override;
39 | };
40 | 
41 | #endif


--------------------------------------------------------------------------------
/package.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
 3 | <package format="3">
 4 |   <name>lightnet_trt</name>
 5 |   <version>0.0.0</version>
 6 |   <description>The ROS 2 package for lightNet-TRT</description>
 7 |   <maintainer email="koji.minoda@tier4.jp">Koji Minoda</maintainer>
 8 |   <license>Apache License 2.0</license>
 9 |   <author email="koji.minoda@tier4.jp">Koji Minoda</author>
10 | 
11 |   <buildtool_depend>ament_cmake_auto</buildtool_depend>
12 |   <buildtool_depend>autoware_cmake</buildtool_depend>
13 | 
14 |   <depend>sensor_msgs</depend>
15 |   <depend>rclcpp</depend>
16 |   <depend>cv_bridge</depend>
17 |   <depend>image_transport</depend>
18 |   <depend>rclcpp_components</depend>
19 |   <depend>tier4_autoware_utils</depend>
20 |   <depend>tf2_eigen</depend>
21 | 
22 |   <test_depend>ament_cmake_ros</test_depend>
23 |   <test_depend>ament_lint_auto</test_depend>
24 |   <test_depend>autoware_lint_common</test_depend>
25 | 
26 |   <export>
27 |     <build_type>ament_cmake</build_type>
28 |   </export>
29 | </package>
30 | 


--------------------------------------------------------------------------------
/samples/parallel_executor.cpp:
--------------------------------------------------------------------------------
  1 | #include "class_timer.hpp"
  2 | #include "class_detector.h"
  3 | #include "yolo_config_parser.h"
  4 | #include <memory>
  5 | #include <thread>
  6 | 
  7 | #include <filesystem>
  8 | #include <iostream>
  9 | #include <sys/stat.h>
 10 | #include <iostream>
 11 | #include <boost/filesystem.hpp>
 12 | #include <omp.h>
 13 | namespace fs = boost::filesystem;
 14 | 
 15 | template <typename ... Args>
 16 | std::string format(const std::string& fmt, Args ... args )
 17 | {
 18 |   size_t len = std::snprintf( nullptr, 0, fmt.c_str(), args ... );
 19 |   std::vector<char> buf(len + 1);
 20 |   std::snprintf(&buf[0], len + 1, fmt.c_str(), args ... );
 21 |   return std::string(&buf[0], &buf[0] + len);
 22 | }
 23 | 
 24 | void
 25 | write_prediction(std::string dumpPath, std::string filename, std::vector<std::string> names, std::vector<Result> objects, int width, int height)
 26 | {
 27 |   int pos = filename.find_last_of(".");
 28 |   std::string body = filename.substr(0, pos);
 29 |   std::string dstName = body + ".txt";
 30 |   std::ofstream writing_file;
 31 |   fs::path p = dumpPath;
 32 |   fs::create_directory(p);  
 33 |   p.append(dstName);
 34 |   writing_file.open(p.string(), std::ios::out);  
 35 |   for (const auto & object : objects) {
 36 |     const auto left = object.rect.x;
 37 |     const auto top = object.rect.y;
 38 |     const auto right = std::clamp(left + object.rect.width, 0, width);
 39 |     const auto bottom = std::clamp(top + object.rect.height, 0, height);
 40 |     std::string writing_text = format("%s %f %d %d %d %d", names[object.id].c_str(), object.prob, left, top, right, bottom);
 41 |     writing_file << writing_text << std::endl;
 42 |   }
 43 |   writing_file.close();
 44 | }
 45 | 
 46 | 
 47 | int main(int argc, char** argv)
 48 | {
 49 |   gflags::SetUsageMessage(
 50 | 			  "Usage : trt-yolo-app --flagfile=</path/to/config_file.txt> --<flag>=value ...");
 51 | 
 52 |   // parse config params
 53 |   yoloConfigParserInit(argc, argv);
 54 |   NetworkInfo yoloInfo = getYoloNetworkInfo();
 55 |   NetworkInfo yoloInfo1 = getYoloNetworkInfo1();  
 56 |   NetworkInfo yoloInfo2 = getYoloNetworkInfo2();  
 57 |   std::string directory = getDirectoryPath();
 58 |   std::string videoPath = getVideoPath();
 59 |   bool flg_save = getSaveDetections();
 60 |   std::string save_path = getSaveDetectionsPath();
 61 |   bool dont_show = get_dont_show_flg();
 62 |   const std::string dumpPath = get_dump_path();
 63 |   const bool cuda = get_cuda_flg();
 64 |   Config config;
 65 |   config.net_type = YOLOV4;
 66 |   config.file_model_cfg = yoloInfo.configFilePath;//"../configs/yolov7-tiny-relu-BDD100K-960x960-opt-params-mse-sparse.cfg";	
 67 |   config.file_model_weights = yoloInfo.wtsFilePath;//"../configs/yolov7-tiny-relu-BDD100K-960x960-opt-params-mse-sparse_last.weights";		
 68 |   config.calibration_image_list_file_txt = "../configs/calibration_images.txt";
 69 |   if (yoloInfo.precision == "kHALF") {
 70 |     config.inference_precison = FP16;
 71 |   }else if (yoloInfo.precision == "kINT8") {
 72 |     config.inference_precison = INT8;    
 73 |   } else {
 74 |     config.inference_precison = FP32;
 75 |   }
 76 |   config.detect_thresh = (float)get_score_thresh();
 77 |   config.batch =  yoloInfo.batch;
 78 |   config.width =  yoloInfo.width;
 79 |   config.height=  yoloInfo.height;
 80 |   config.dla = yoloInfo.dla;
 81 |   std::unique_ptr<Detector> detector(new Detector());
 82 |   detector->init(config);
 83 |   /*
 84 |   Config config1;
 85 |   config1.net_type = YOLOV4;
 86 |   config1.file_model_cfg = yoloInfo1.configFilePath;
 87 |   config1.file_model_weights = yoloInfo1.wtsFilePath;//"../configs/yolov7-tiny-relu-BDD100K-960x960-opt-params-mse-sparse_last.weights";		
 88 |   config1.calibration_image_list_file_txt = "../configs/calibration_images.txt";
 89 |   if (yoloInfo1.precision == "kHALF") {
 90 |     config1.inference_precison = FP16;
 91 |   }else if (yoloInfo1.precision == "kINT8") {
 92 |     config1.inference_precison = INT8;    
 93 |   } else {
 94 |     config1.inference_precison = FP32;
 95 |   }
 96 |   config1.detect_thresh = (float)get_score_thresh();
 97 |   config1.batch =  yoloInfo1.batch;
 98 |   config1.width =  yoloInfo1.width;
 99 |   config1.height=  yoloInfo1.height;
100 |   config1.dla = yoloInfo1.dla;
101 |   std::unique_ptr<Detector> detector1(new Detector());
102 |   detector1->init(config1);
103 |   */
104 |   
105 |   Config config2;
106 |   config2.net_type = YOLOV4;
107 |   config2.file_model_cfg = yoloInfo2.configFilePath;
108 |   config2.file_model_weights = yoloInfo2.wtsFilePath;//"../configs/yolov7-tiny-relu-BDD100K-960x960-opt-params-mse-sparse_last.weights";		
109 |   config2.calibration_image_list_file_txt = "../configs/calibration_images.txt";
110 |   if (yoloInfo2.precision == "kHALF") {
111 |     config2.inference_precison = FP16;
112 |   }else if (yoloInfo2.precision == "kINT8") {
113 |     config2.inference_precison = INT8;    
114 |   } else {
115 |     config2.inference_precison = FP32;
116 |   }
117 |   config2.detect_thresh = (float)get_score_thresh();
118 |   config2.batch =  yoloInfo2.batch;
119 |   config2.width =  yoloInfo2.width;
120 |   config2.height=  yoloInfo2.height;
121 |   config2.dla = yoloInfo2.dla;
122 |   std::unique_ptr<Detector> detector2(new Detector());
123 |   detector2->init(config2);
124 |   
125 |   std::vector<BatchResult> batch_res;
126 |   std::vector<BatchResult> batch_res1;
127 |   std::vector<BatchResult> batch_res2;  
128 |   Timer timer;
129 |   if (flg_save) {
130 |     fs::create_directory(save_path);
131 |     fs::path p = save_path;
132 |     p.append("detections");
133 |     fs::create_directory(p);
134 |   }
135 |   
136 |   
137 |   if (directory != "") {
138 |     for (const auto & file : std::filesystem::directory_iterator(directory)) {
139 |       std::cout << file.path() << std::endl;
140 |       cv::Mat src = cv::imread(file.path(), cv::IMREAD_UNCHANGED);
141 |       std::vector<cv::Mat> batch_img;
142 |       //      batch_img.push_back(src);
143 |       for (int b = 0; b < config.batch; b++) {
144 | 	batch_img.push_back(src);
145 |       }      
146 |       timer.reset();
147 | #pragma omp parallel sections
148 |       {
149 | #pragma omp section
150 | 	{
151 | 	  detector->detect(batch_img, batch_res, cuda);
152 | 	}
153 | #pragma omp section
154 | 	{
155 | 	  //detector1->detect(batch_img, batch_res, cuda);
156 | 	}	
157 | #pragma omp section
158 | 	{
159 | 	  detector2->detect(batch_img, batch_res2, cuda);
160 | 	}
161 |       }
162 |       detector->segment(batch_img);
163 |       //detector1->segment(batch_img);                
164 |       detector2->segment(batch_img);          
165 |       timer.out("detect");
166 | 
167 |       if (dumpPath != "not-specified") {
168 | 	fs::path p (file.path());
169 | 	std::string filename = p.filename().string();
170 | 	std::vector<std::string> names = get_names();
171 | 	write_prediction(dumpPath, filename, names, batch_res[0], src.cols, src.rows);
172 |       }      
173 |       //disp
174 |       if (dont_show == true) {
175 | 	continue;
176 |       }
177 |       for (int i=0;i<batch_img.size();++i) {
178 | 	for (const auto &r : batch_res[i]) {
179 | 	  detector->draw_BBox(batch_img[i], r);
180 | 	}
181 | 	/*
182 | 	for (const auto &r : batch_res1[i]) {
183 | 	  detector1->draw_BBox(batch_img[i], r);
184 | 	}
185 | 	*/	
186 | 	for (const auto &r : batch_res2[i]) {
187 | 	  detector2->draw_BBox(batch_img[i], r);	  
188 | 	}	
189 | 	cv::namedWindow("image" + std::to_string(i), cv::WINDOW_NORMAL);
190 | 	cv::imshow("image"+std::to_string(i), batch_img[i]);
191 | 	int k = cv::waitKey(0);
192 | 	if (k == 32) {
193 | 	  fs::path p (file.path());
194 | 	  std::string name = p.filename().string();
195 | 	  std::cout << "Save... " << name << std::endl;
196 | 	  detector->save_image(src, "log/", name);
197 | 	  cv::waitKey(0);
198 | 	}
199 |       }
200 |     }
201 |   } else if (videoPath != ""){
202 |     std::cout << videoPath << std::endl;
203 |     cv::VideoCapture video;
204 |     video.open(videoPath);    
205 |     cv::Mat frame;
206 |     int count = 0;
207 |     while (1) {
208 |       video >> frame;
209 |       if (frame.empty() == true) break;
210 | 
211 |       std::vector<cv::Mat> batch_img;
212 |       batch_img.push_back(frame);
213 |       timer.reset();
214 | #pragma omp parallel sections
215 |       {
216 | #pragma omp section
217 | 	{
218 | 	  detector->detect(batch_img, batch_res, cuda);
219 | 	}
220 | #pragma omp section
221 | 	{
222 | 	  //detector1->detect(batch_img, batch_res1, cuda);
223 | 	}	
224 | #pragma omp section
225 | 	{
226 | 	  detector2->detect(batch_img, batch_res2, cuda);
227 | 	}
228 |       }      
229 |       detector->segment(batch_img);
230 |       //      detector1->segment(batch_img);      
231 |       detector2->segment(batch_img);          
232 |       timer.out("detect");
233 | 
234 |       if (dont_show == true) {
235 | 	continue;
236 |       }      
237 |       //disp
238 |       for (int i=0;i<batch_img.size();++i) {
239 | 	for (const auto &r : batch_res[i]) {
240 | 	  detector->draw_BBox(batch_img[i], r);
241 | 	}
242 | 	/*
243 | 	for (const auto &r : batch_res1[i]) {
244 | 	  detector1->draw_BBox(batch_img[i], r);
245 | 	}
246 | 	*/
247 | 	for (const auto &r : batch_res2[i]) {
248 | 	  detector2->draw_BBox(batch_img[i], r);
249 | 	}		
250 | 	//cv::namedWindow("image" + std::to_string(i), cv::WINDOW_NORMAL);
251 | 	cv::namedWindow("image" + std::to_string(i));
252 | 	cv::imshow("image"+std::to_string(i), batch_img[i]);
253 | 
254 | 	if (flg_save) {
255 | 	  std::ostringstream sout;
256 | 	  sout << std::setfill('0') << std::setw(6) << count;	  
257 | 	  std::string name = "frame_" + sout.str() + ".png";
258 | 	  fs::path p = save_path;
259 | 	  p.append("detections");
260 | 	  detector->save_image(batch_img[i], p.string(), name);
261 | 	}
262 | 	if (i > 0) {
263 | 	  count++;
264 | 	}
265 |       }
266 |       count++;
267 |       if (cv::waitKey(1) == 'q') break;
268 |     }    
269 |   }
270 |   if (get_prof_flg()) {
271 |     detector->dump_profiling();
272 |   }
273 | }
274 | 


--------------------------------------------------------------------------------
/samples/sample_detector.cpp:
--------------------------------------------------------------------------------
  1 | #include "class_timer.hpp"
  2 | #include "class_detector.h"
  3 | #include "yolo_config_parser.h"
  4 | #include <memory>
  5 | #include <thread>
  6 | 
  7 | #include <filesystem>
  8 | #include <iostream>
  9 | #include <sys/stat.h>
 10 | #include <iostream>
 11 | #include <boost/filesystem.hpp>
 12 | 
 13 | namespace fs = boost::filesystem;
 14 | 
 15 | template <typename ... Args>
 16 | std::string format(const std::string& fmt, Args ... args )
 17 | {
 18 |   size_t len = std::snprintf( nullptr, 0, fmt.c_str(), args ... );
 19 |   std::vector<char> buf(len + 1);
 20 |   std::snprintf(&buf[0], len + 1, fmt.c_str(), args ... );
 21 |   return std::string(&buf[0], &buf[0] + len);
 22 | }
 23 | 
 24 | void
 25 | write_prediction(std::string dumpPath, std::string filename, std::vector<std::string> names, std::vector<Result> objects, int width, int height)
 26 | {
 27 |   int pos = filename.find_last_of(".");
 28 |   std::string body = filename.substr(0, pos);
 29 |   std::string dstName = body + ".txt";
 30 |   std::ofstream writing_file;
 31 |   fs::path p = dumpPath;
 32 |   fs::create_directory(p);  
 33 |   p.append(dstName);
 34 |   writing_file.open(p.string(), std::ios::out);
 35 |   for (const auto & object : objects) {
 36 |     const auto left = object.rect.x;
 37 |     const auto top = object.rect.y;
 38 |     const auto right = std::clamp(left + object.rect.width, 0, width);
 39 |     const auto bottom = std::clamp(top + object.rect.height, 0, height);
 40 |     std::string writing_text = format("%s %f %d %d %d %d", names[object.id].c_str(), object.prob, left, top, right, bottom);
 41 |     writing_file << writing_text << std::endl;
 42 |   }
 43 |   writing_file.close();
 44 | }
 45 | 
 46 | 
 47 | void
 48 | write_label(std::string outputPath, std::string filename, std::vector<std::string> names, std::vector<Result> objects, int width, int height)
 49 | {
 50 |   int pos = filename.find_last_of(".");
 51 |   std::string body = filename.substr(0, pos);
 52 |   std::string dstName = body + ".txt";
 53 |   std::ofstream writing_file;
 54 |   fs::path p = outputPath;
 55 |   fs::create_directory(p);  
 56 |   p.append(dstName);
 57 |   writing_file.open(p.string(), std::ios::out);
 58 |   std::cout << "Write" << p.string() << std::endl;
 59 |   for (const auto & object : objects) {
 60 |     const auto left = object.rect.x;
 61 |     const auto top = object.rect.y;
 62 |     const auto right = std::clamp(left + object.rect.width, 0, width);
 63 |     const auto bottom = std::clamp(top + object.rect.height, 0, height);
 64 |     double x = (left + right) / 2.0 / (double)width;
 65 |     double y = (top + bottom) / 2.0 / (double)height;
 66 |     double w = (right - left) / (double)width;
 67 |     double h = (bottom - top) / (double)height;    
 68 |     std::string writing_text = format("%d %f %f %f %f", object.id, x, y, w, h);
 69 |     writing_file << writing_text << std::endl;
 70 |   }
 71 |   writing_file.close();
 72 | }
 73 | 
 74 | 
 75 | int main(int argc, char** argv)
 76 | {
 77 |   gflags::SetUsageMessage(
 78 | 			  "Usage : trt-yolo-app --flagfile=</path/to/config_file.txt> --<flag>=value ...");
 79 | 
 80 |   // parse config params
 81 |   yoloConfigParserInit(argc, argv);
 82 |   NetworkInfo yoloInfo = getYoloNetworkInfo();
 83 |   std::string directory = getDirectoryPath();
 84 |   std::string videoPath = getVideoPath();
 85 |   int cam_id = getCameraID();
 86 |   bool flg_save = getSaveDetections();
 87 |   std::string save_path = getSaveDetectionsPath();
 88 |   bool dont_show = get_dont_show_flg();
 89 |   const std::string dumpPath = get_dump_path();
 90 |   const bool cuda = get_cuda_flg();
 91 |   const std::string target = get_target_label();
 92 |   const std::string outputPath = get_output_path();
 93 |   Config config;
 94 |   config.net_type = YOLOV4;
 95 |   config.file_model_cfg = yoloInfo.configFilePath;//"../configs/yolov7-tiny-relu-BDD100K-960x960-opt-params-mse-sparse.cfg";	
 96 |   config.file_model_weights = yoloInfo.wtsFilePath;//"../configs/yolov7-tiny-relu-BDD100K-960x960-opt-params-mse-sparse_last.weights";		
 97 |   config.calibration_image_list_file_txt = "../configs/calibration_images.txt";
 98 |   if (yoloInfo.precision == "kHALF") {
 99 |     config.inference_precison = FP16;
100 |   }else if (yoloInfo.precision == "kINT8") {
101 |     config.inference_precison = INT8;    
102 |   } else {
103 |     config.inference_precison = FP32;
104 |   }
105 |   config.detect_thresh = (float)get_score_thresh();
106 |   config.batch =  yoloInfo.batch;
107 |   config.width =  yoloInfo.width;
108 |   config.height=  yoloInfo.height;
109 |   config.dla = yoloInfo.dla;
110 |   std::unique_ptr<Detector> detector(new Detector());
111 |   detector->init(config);
112 |   std::vector<BatchResult> batch_res;
113 |   if (flg_save) {
114 |     fs::create_directory(save_path);
115 |     fs::path p = save_path;
116 |     p.append("detections");
117 |     fs::create_directory(p);
118 |   }
119 |   
120 |   
121 |   if (directory != "") {
122 |     for (const auto & file : std::filesystem::directory_iterator(directory)) {
123 |       std::cout << file.path() << std::endl;
124 |       //      cv::Mat src = cv::imread(file.path(), cv::IMREAD_UNCHANGED);
125 |       cv::Mat src = cv::imread(file.path());//, cv::IMREAD_COLOR);      
126 |       fs::path p (file.path());
127 |       std::string name = p.filename().string();
128 |       
129 |       std::vector<cv::Mat> batch_img;
130 |       //      batch_img.push_back(src);
131 |       for (int b = 0; b < config.batch; b++) {
132 | 	batch_img.push_back(src);
133 |       }      
134 |       detector->detect(batch_img, batch_res, cuda);
135 |       detector->segment(batch_img, name);
136 |       detector->regress(batch_img, batch_res, name);          
137 |       
138 |       if (dumpPath != "not-specified") {
139 | 	fs::path p (file.path());
140 | 	std::string filename = p.filename().string();
141 | 	std::vector<std::string> names = get_names();
142 | 	write_prediction(dumpPath, filename, names, batch_res[0], src.cols, src.rows);
143 |       }
144 |       if (outputPath != "not-specified") {
145 | 	fs::path p (file.path());
146 | 	std::string filename = p.filename().string();
147 | 	std::vector<std::string> names = get_names();
148 | 	write_label(outputPath, filename, names, batch_res[0], src.cols, src.rows);
149 |       }      
150 |       if (target != "") {
151 | 	std::vector<std::string> names = get_names();
152 | 	for (int i=0;i<batch_img.size();++i) {
153 | 	  for (const auto &r : batch_res[i]) {
154 | 	    int id = r.id;
155 | 	    if (names[id] == target) {
156 | 	      fs::path p = save_path;
157 | 	      p.append("target");
158 | 	      std::cout << "Find " << target << " -> save into " << p.string() << std::endl;
159 | 	      detector->save_image(batch_img[i], p.string(), name);	    
160 | 	    }
161 | 	  }
162 | 	}
163 |       }
164 |       //disp
165 |       for (int i=0;i<batch_img.size();++i) {
166 | 	for (const auto &r : batch_res[i]) {
167 | 	  //	  std::cout <<"batch "<<i<< " id:" << r.id << " prob:" << r.prob << " rect:" << r.rect << std::endl;
168 | 	  detector->draw_BBox(batch_img[i], r);
169 | 	}
170 | 	if (dont_show == true) {
171 | 	  continue;
172 | 	}	
173 | 	cv::namedWindow("image" + std::to_string(i), cv::WINDOW_NORMAL);
174 | 	cv::imshow("image"+std::to_string(i), batch_img[i]);
175 | 	int k = cv::waitKey(0);
176 | 	if (k == 32) {
177 | 	  cv::Mat tmp = cv::imread(file.path(), cv::IMREAD_UNCHANGED);
178 | 	  std::cout << "Save... " << name << std::endl;
179 | 	  detector->save_image(tmp, "log/", name);
180 | 	  cv::waitKey(0);
181 | 	}
182 |       }
183 |       if (flg_save) {
184 | 	fs::path p = save_path;
185 | 	p.append("detections");
186 | 	detector->save_image(batch_img[0], p.string(), name);
187 |       }            
188 |     }
189 |   } else if (videoPath != "" || cam_id != -1){
190 |     std::cout << videoPath << std::endl;
191 |     cv::VideoCapture video;
192 |     if (cam_id != -1) {
193 |       video.open(cam_id);
194 |     } else {
195 |       video.open(videoPath);
196 |     }
197 |     cv::Mat frame;
198 |     int count = 0;
199 |     cv::namedWindow("image" + std::to_string(0), cv::WINDOW_AUTOSIZE);
200 |     while (1) {
201 |       video >> frame;
202 |       if (frame.empty() == true) break;
203 | 
204 |       std::vector<cv::Mat> batch_img;
205 |       batch_img.push_back(frame);
206 | 
207 |       if (cam_id != -1 && flg_save) {      
208 | 	std::ostringstream sout;
209 | 	sout << std::setfill('0') << std::setw(6) << count;	  
210 | 	std::string name = "frame_" + sout.str() + ".jpg";
211 | 	fs::path p = save_path;	
212 | 	p.append("JPEGImages");
213 | 	fs::create_directory(p);
214 | 	std::cout << "Save... " << name << std::endl;
215 | 	detector->save_image(frame, p.string(), name);
216 |       }
217 |       
218 |       detector->detect(batch_img, batch_res, cuda);
219 |       detector->segment(batch_img, "");
220 |       detector->regress(batch_img, batch_res, "");          
221 | 
222 |       //disp
223 |       for (int i=0;i<batch_img.size();++i) {
224 | 	for (const auto &r : batch_res[i]) {
225 | 	  //	  std::cout <<"batch "<<i<< " id:" << r.id << " prob:" << r.prob << " rect:" << r.rect << std::endl;
226 | 	  detector->draw_BBox(batch_img[i], r);
227 | 	}
228 | 	if (!dont_show) {
229 | 	  //cv::namedWindow("image" + std::to_string(i), cv::WINDOW_NORMAL);
230 | 	  //cv::namedWindow("image" + std::to_string(i), cv::WINDOW_KEEPRATIO);
231 | 	  //cv::ResizeWindow("image" + std::to_string(i), 960, 640); 
232 | 	  cv::imshow("image"+std::to_string(i), batch_img[i]);
233 | 	}
234 | 	if (flg_save) {
235 | 	  std::ostringstream sout;
236 | 	  sout << std::setfill('0') << std::setw(6) << count;	  
237 | 	  std::string name = "frame_" + sout.str() + ".jpg";
238 | 	  fs::path p = save_path;
239 | 	  p.append("detections");
240 | 	  detector->save_image(batch_img[i], p.string(), name);
241 | 	}
242 | 	if (i > 0) {
243 | 	  count++;
244 | 	}
245 |       }
246 |       count++;
247 |       if (cv::waitKey(1) == 'q') break;
248 |     }    
249 |   }
250 |   if (get_prof_flg()) {
251 |     detector->dump_profiling();
252 |   }
253 | }
254 | 


--------------------------------------------------------------------------------
/src/lightnet_trt_core.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 TIER IV
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "lightnet_trt_core.hpp"
16 | 
17 | LightNetTensorRT::LightNetTensorRT(const ::Config &config)
18 | {
19 |   const bool cuda = get_cuda_flg();
20 | 
21 |   detector_ = std::make_unique<::Detector>();
22 |   detector_->init(config);
23 | }
24 | 
25 | void LightNetTensorRT::doInference(const std::vector<cv::Mat> & images, std::vector<cv::Mat> & masks)
26 | {
27 |   std::vector<BatchResult> batch_res;
28 |   detector_->detect(images, batch_res, cuda);
29 |   detector_->get_mask(masks);
30 | }
31 | 


--------------------------------------------------------------------------------
/src/lightnet_trt_core.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 TIER IV
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #ifndef LIGHTNET_TRT__LIGHTNET_TRT_CORE_HPP_
16 | #define LIGHTNET_TRT__LIGHTNET_TRT_CORE_HPP_
17 | 
18 | #include "class_detector.h"
19 | #include "yolo_config_parser.h"
20 | 
21 | #include <opencv2/opencv.hpp>
22 | 
23 | #include <vector>
24 | 
25 | class LightNetTensorRT
26 | {
27 | public:
28 |   LightNetTensorRT(const ::Config &config);
29 | 
30 |   void doInference(const std::vector<cv::Mat> & images, std::vector<cv::Mat> & masks);
31 | 
32 |   std::unique_ptr<Detector> detector_;
33 |   const bool cuda = false;
34 | };
35 | #endif  // LIGHTNET_TRT__LIGHTNET_TRT_CORE_HPP_
36 | 


--------------------------------------------------------------------------------
/src/lightnet_trt_node.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2023 TIER IV
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | #include "lightnet_trt_node.hpp"
 16 | 
 17 | LightNetTensorRTNode::LightNetTensorRTNode(const rclcpp::NodeOptions &node_options)
 18 | : Node("tensorrt_lightnet", node_options),
 19 |   ipm_projector_(nullptr),
 20 |   base_frame_("base_link")
 21 | {
 22 |   transform_listener_ = std::make_shared<tier4_autoware_utils::TransformListener>(this);
 23 | 
 24 |   // Initialize a detector
 25 |   config_ = loadConfig();
 26 | 
 27 |   RCLCPP_INFO(this->get_logger(), "Start loading YOLO");
 28 |   RCLCPP_INFO(this->get_logger(), "Model Config: %s", config_.file_model_cfg.c_str());
 29 |   RCLCPP_INFO(this->get_logger(), "Model Weights: %s", config_.file_model_weights.c_str());
 30 |   RCLCPP_INFO(this->get_logger(), "Input size: (%d, %d)", config_.width, config_.height);
 31 |   lightnet_trt_ = std::make_unique<LightNetTensorRT>(config_);
 32 |   RCLCPP_INFO(this->get_logger(), "Finished loading YOLO");
 33 | 
 34 |   image_sub_ = image_transport::create_subscription(
 35 |     this, "~/in/image", [this](const sensor_msgs::msg::Image::ConstSharedPtr msg) { onImage(msg); }, "raw",
 36 |     rmw_qos_profile_sensor_data);
 37 |   camera_info_sub_ = this->create_subscription<sensor_msgs::msg::CameraInfo>(
 38 |     "~/in/camera_info", rclcpp::SensorDataQoS(),
 39 |     [this](const sensor_msgs::msg::CameraInfo::ConstSharedPtr msg) { onCameraInfo(msg); });
 40 | 
 41 |   image_pub_ = image_transport::create_publisher(this, "~/out/image");
 42 |   image_projected_pub_ = image_transport::create_publisher(this, "~/out/image_projected");
 43 | }
 44 | 
 45 | ::Config LightNetTensorRTNode::loadConfig()
 46 | {
 47 |   const std::string model_cfg = declare_parameter<std::string>("model_cfg");
 48 |   const std::string model_weights = declare_parameter<std::string>("model_weights");
 49 |   const int width = declare_parameter<int>("width");
 50 |   const int height = declare_parameter<int>("height");
 51 | 
 52 |   // Initialize a detector
 53 |   ::Config config;
 54 | 
 55 |   config.net_type = YOLOV4;
 56 |   config.file_model_cfg = model_cfg;
 57 |   config.file_model_weights = model_weights;
 58 |   config.inference_precison = FP32;
 59 |   config.batch = 1;
 60 |   config.width = width;
 61 |   config.height = height;
 62 |   config.dla = -1;
 63 | 
 64 |   return config;
 65 | }
 66 | 
 67 | void LightNetTensorRTNode::onCameraInfo(const sensor_msgs::msg::CameraInfo::ConstSharedPtr msg)
 68 | {
 69 |   if (ipm_projector_ != nullptr) return;
 70 |   const Eigen::Matrix3f intrinsic = get_intrinsic_matrix(*msg);
 71 |   const Eigen::Matrix3f intrinsic_resized = resize_intrinsic_matrix(intrinsic, config_.width, config_.height);
 72 | 
 73 |   const std::string camera_frame = msg->header.frame_id;
 74 |   geometry_msgs::msg::TransformStamped::ConstSharedPtr tf_base2cam_ptr =
 75 |     transform_listener_->getLatestTransform(base_frame_, camera_frame);
 76 |   if (!tf_base2cam_ptr) return;
 77 | 
 78 |   const Eigen::Matrix4f extrinsic = tf2::transformToEigen(tf_base2cam_ptr->transform).matrix().cast<float>();
 79 |   ipm_projector_ = std::make_unique<IPM>(intrinsic_resized, extrinsic);
 80 |   return;
 81 | }
 82 | 
 83 | void LightNetTensorRTNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg)
 84 | {
 85 |   RCLCPP_INFO(this->get_logger(), "Image received");
 86 |   cv_bridge::CvImagePtr in_image_ptr;
 87 |   in_image_ptr = cv_bridge::toCvCopy(msg, sensor_msgs::image_encodings::BGR8);
 88 | 
 89 |   std::vector<cv::Mat> masks;
 90 |   lightnet_trt_->doInference({in_image_ptr->image}, masks);
 91 |   RCLCPP_INFO(this->get_logger(), "Inference succeeded");
 92 | 
 93 |   // Publish result
 94 |   cv_bridge::CvImage out_image;
 95 |   out_image.header = msg->header;
 96 |   out_image.encoding = sensor_msgs::image_encodings::BGR8;
 97 |   out_image.image = masks[0];
 98 |   image_pub_.publish(out_image.toImageMsg());
 99 | 
100 |   // Post processing
101 |   if (!ipm_projector_)
102 |   {
103 |     RCLCPP_WARN(this->get_logger(), "IPM projector is not initialized.");
104 |     return;
105 |   }
106 |   RCLCPP_WARN(this->get_logger(), "IPM projector running.");
107 | 
108 |   cv::Mat projected_mask;
109 |   ipm_projector_->run(masks[0], projected_mask);
110 | 
111 |   // Publish result
112 |   cv_bridge::CvImage out_projected_image;
113 |   out_projected_image.header = msg->header;
114 |   out_projected_image.encoding = sensor_msgs::image_encodings::BGR8;
115 |   out_projected_image.image = projected_mask;
116 |   image_projected_pub_.publish(out_projected_image.toImageMsg());
117 | }
118 | 
119 | #include "rclcpp_components/register_node_macro.hpp"
120 | RCLCPP_COMPONENTS_REGISTER_NODE(LightNetTensorRTNode)
121 | 


--------------------------------------------------------------------------------
/src/lightnet_trt_node.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 TIER IV
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #ifndef LIGHTNET_TRT__LIGHTNET_TRT_NODE_HPP_
16 | #define LIGHTNET_TRT__LIGHTNET_TRT_NODE_HPP_
17 | 
18 | #include "lightnet_trt_core.hpp"
19 | #include "utils.hpp"
20 | 
21 | #include <rclcpp/rclcpp.hpp>
22 | 
23 | #include <cv_bridge/cv_bridge.h>
24 | #include <image_transport/image_transport.hpp>
25 | #include <sensor_msgs/msg/image.hpp>
26 | #include <tf2_eigen/tf2_eigen.hpp>
27 | #include <tier4_autoware_utils/ros/transform_listener.hpp>
28 | 
29 | #include <optional>
30 | 
31 | class LightNetTensorRTNode : public rclcpp::Node
32 | {
33 | public:
34 |   explicit LightNetTensorRTNode(const rclcpp::NodeOptions &node_options);
35 | 
36 | private:
37 |   void onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg);
38 |   void onCameraInfo(const sensor_msgs::msg::CameraInfo::ConstSharedPtr msg);
39 |   ::Config loadConfig();
40 | 
41 |   // ROS related variables
42 |   image_transport::Subscriber image_sub_;
43 |   rclcpp::Subscription<sensor_msgs::msg::CameraInfo>::SharedPtr camera_info_sub_;
44 |   image_transport::Publisher image_pub_;
45 |   image_transport::Publisher image_projected_pub_;
46 |   std::shared_ptr<tier4_autoware_utils::TransformListener> transform_listener_;
47 | 
48 |   // Tools
49 |   std::unique_ptr<LightNetTensorRT> lightnet_trt_;
50 |   std::unique_ptr<IPM> ipm_projector_;
51 | 
52 |   // Misc
53 |   bool received_camera_info_;
54 |   const std::string base_frame_;
55 |   ::Config config_;
56 | };
57 | #endif  // LIGHTNET_TRT__LIGHTNET_TRT_NODE_HPP_
58 | 


--------------------------------------------------------------------------------
/src/main.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 TIER IV
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "lightnet_trt_node.hpp"
16 | 
17 | #include <memory>
18 | 
19 | int main(int argc, char ** argv)
20 | {
21 |   rclcpp::init(argc, argv);
22 |   rclcpp::NodeOptions node_options;
23 |   auto node = std::make_shared<LightNetTensorRTNode>(node_options);
24 | 
25 |   rclcpp::spin(node);
26 | 
27 |   return 0;
28 | }
29 | 


--------------------------------------------------------------------------------
/src/utils.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 TIER IV
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "utils.hpp"
16 | 
17 | Eigen::Matrix3f get_intrinsic_matrix(const sensor_msgs::msg::CameraInfo & camera_info)
18 | {
19 | 	Eigen::Matrix3f intrinsic_matrix;
20 | 	intrinsic_matrix << camera_info.k[0], camera_info.k[1], camera_info.k[2],
21 | 		camera_info.k[3], camera_info.k[4], camera_info.k[5],
22 | 		camera_info.k[6], camera_info.k[7], camera_info.k[8];
23 | 	return intrinsic_matrix;
24 | }
25 | 
26 | Eigen::Matrix3f resize_intrinsic_matrix(const Eigen::Matrix3f & intrinsic, const int resized_width, const int resized_height)
27 | {
28 | 	Eigen::Matrix3f resized_intrinsic;
29 | 	const float width_ratio = resized_width / 2.0 / intrinsic(0, 2);
30 | 	const float height_ratio = resized_height / 2.0 / intrinsic(1, 2);
31 | 	resized_intrinsic << intrinsic(0, 0) * width_ratio, 0, resized_width / 2,
32 | 		0, intrinsic(1, 1) * height_ratio, resized_height / 2,
33 | 		0, 0, 1;
34 | 	return resized_intrinsic;
35 | }
36 | 
37 | // TODO: Implement this in a smarter way
38 | IPM::IPM(const Eigen::Matrix3f & intrinsic, const Eigen::Matrix4f & extrinsic)
39 | {
40 | 	// Define points on the ground plane in the world coordinate system
41 | 	// 50 x 20 m rectangle
42 | 	const Eigen::Vector4f dst_p1{55, -10, 0, 1};
43 | 	const Eigen::Vector4f dst_p2{55, 10, 0, 1};
44 | 	const Eigen::Vector4f dst_p3{5, 10, 0, 1};
45 | 	const Eigen::Vector4f dst_p4{5, -10, 0, 1};
46 | 
47 | 	// Convert the points from the world coordinate system to the camera coordinate system
48 | 	const Eigen::Matrix4f extrinsic_inv = extrinsic.inverse();
49 | 	Eigen::Vector3f src_p1 = (extrinsic_inv * dst_p1).head<3>();
50 | 	Eigen::Vector3f src_p2 = (extrinsic_inv * dst_p2).head<3>();
51 | 	Eigen::Vector3f src_p3 = (extrinsic_inv * dst_p3).head<3>();
52 | 	Eigen::Vector3f src_p4 = (extrinsic_inv * dst_p4).head<3>();
53 | 
54 | 	// Project the points from 3D camera coordinate system to 2D image plane
55 | 	src_p1 = intrinsic * src_p1; src_p1 /= src_p1(2);
56 | 	src_p2 = intrinsic * src_p2; src_p2 /= src_p2(2);
57 | 	src_p3 = intrinsic * src_p3; src_p3 /= src_p3(2);
58 | 	src_p4 = intrinsic * src_p4; src_p4 /= src_p4(2);
59 | 
60 | 	// Create source and destination points for cv::getPerspectiveTransform
61 | 	const std::vector<cv::Point2f> src_pts = {
62 | 		cv::Point2f(src_p1(0), src_p1(1)),
63 | 		cv::Point2f(src_p2(0), src_p2(1)),
64 | 		cv::Point2f(src_p3(0), src_p3(1)),
65 | 		cv::Point2f(src_p4(0), src_p4(1))
66 | 	};
67 | 	const std::vector<cv::Point2f> dst_pts = {
68 | 		cv::Point2f(dst_p3(1) * 12 + 320, dst_p3(0) * 12),
69 | 		cv::Point2f(dst_p4(1) * 12 + 320, dst_p4(0) * 12),
70 | 		cv::Point2f(dst_p1(1) * 12 + 320, dst_p1(0) * 12),
71 | 		cv::Point2f(dst_p2(1) * 12 + 320, dst_p2(0) * 12)
72 | 	};
73 | 
74 | 	perspective_transform_ = cv::getPerspectiveTransform(src_pts, dst_pts);
75 | }
76 | 
77 | void IPM::run(const cv::Mat & img, cv::Mat & output_img)
78 | {
79 | 	cv::warpPerspective(img, output_img, perspective_transform_, cv::Size(640, 640));
80 | }


--------------------------------------------------------------------------------
/src/utils.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 TIER IV
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #ifndef LIGHTNET_TRT__UTILS_HPP_
16 | #define LIGHTNET_TRT__UTILS_HPP_
17 | 
18 | #include <opencv2/opencv.hpp>
19 | 
20 | #include <Eigen/Core>
21 | #include <Eigen/Geometry>
22 | 
23 | #include "sensor_msgs/msg/camera_info.hpp"
24 | #include "geometry_msgs/msg/transform.hpp"
25 | 
26 | Eigen::Matrix3f get_intrinsic_matrix(const sensor_msgs::msg::CameraInfo & camera_info);
27 | Eigen::Matrix3f resize_intrinsic_matrix(const Eigen::Matrix3f & intrinsic, const int resized_width, const int resized_height);
28 | 
29 | class IPM
30 | {
31 | public:
32 |   IPM(const Eigen::Matrix3f & intrinsic, const Eigen::Matrix4f & extrinsic);
33 |   void run(const cv::Mat & img, cv::Mat & output_img);
34 | private:
35 |   cv::Mat perspective_transform_;
36 | };
37 | 
38 | #endif  // LIGHTNET_TRT__UTILS_HPP_
39 | 


--------------------------------------------------------------------------------