├── CMakeLists.txt ├── CMakeLists.txt.user ├── Makefile ├── README.md ├── Utils.h ├── data ├── yolov3-tiny.cfg ├── yolov3.cfg ├── yolov4-tiny.cfg └── yolov4.cfg ├── logging.h ├── main.cpp ├── mish.cu ├── mish.h ├── trt_utils.cpp ├── trt_utils.h ├── yolo.cpp ├── yolo.h ├── yololayer.cpp ├── yololayer.cu └── yololayer.h /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.6) 2 | 3 | project(yolo) 4 | 5 | add_definitions(-std=c++11) 6 | 7 | option(CUDA_USE_STATIC_CUDA_RUNTIME OFF) 8 | set(CMAKE_CXX_STANDARD 11) 9 | set(CMAKE_BUILD_TYPE Debug) 10 | 11 | find_package(CUDA REQUIRED) 12 | 13 | set(CUDA_NVCC_PLAGS ${CUDA_NVCC_PLAGS};-std=c++11;-g;-G;-gencode;arch=compute_61;code=sm_61) 14 | 15 | include_directories(${PROJECT_SOURCE_DIR}/) 16 | 17 | include_directories(/usr/local/cuda/include) 18 | link_directories(/usr/local/cuda/lib64) 19 | 20 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast -Wfatal-errors -D_MWAITXINTRIN_H_INCLUDED") 21 | 22 | cuda_add_library(yololayer SHARED ${PROJECT_SOURCE_DIR}/yololayer.cu ${PROJECT_SOURCE_DIR}/mish.cu) 23 | 24 | include_directories(/home/wh/program/TensorRT-7.0.0.11/include) 25 | link_directories(/home/wh/program/TensorRT-7.0.0.11/lib ) 26 | 27 | set(OpenCV_DIR /home/wh/program/opencv-4.2.0/build) 28 | find_package(OpenCV) 29 | include_directories(OpenCV_INCLUDE_DIRS) 30 | 31 | 32 | add_executable(yolo main.cpp trt_utils.cpp yolo.cpp) 33 | target_link_libraries(yolo nvinfer nvinfer_plugin nvparsers cudart cublas stdc++fs yololayer ${OpenCV_LIBS}) 34 | 35 | add_definitions(-O2 -pthread) 36 | 37 | -------------------------------------------------------------------------------- /CMakeLists.txt.user: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | EnvironmentId 7 | {fb0098ed-9136-48fb-a11b-4f3ef4b526e4} 8 | 9 | 10 | ProjectExplorer.Project.ActiveTarget 11 | 0 12 | 13 | 14 | ProjectExplorer.Project.EditorSettings 15 | 16 | true 17 | false 18 | true 19 | 20 | Cpp 21 | 22 | CppGlobal 23 | 24 | 25 | 26 | QmlJS 27 | 28 | QmlJSGlobal 29 | 30 | 31 | 2 32 | UTF-8 33 | false 34 | 4 35 | false 36 | 80 37 | true 38 | true 39 | 1 40 | true 41 | false 42 | 0 43 | true 44 | true 45 | 0 46 | 8 47 | true 48 | 1 49 | true 50 | true 51 | true 52 | false 53 | 54 | 55 | 56 | ProjectExplorer.Project.PluginSettings 57 | 58 | 59 | true 60 | 61 | 62 | 63 | ProjectExplorer.Project.Target.0 64 | 65 | Qt 5.14.2 (gcc_64) 66 | Qt 5.14.2 (gcc_64) 67 | {a69a9926-65a8-49d0-a70b-9fe5abf0939c} 68 | 0 69 | 0 70 | 0 71 | 72 | 73 | CMAKE_BUILD_TYPE:STRING=Debug 74 | CMAKE_CXX_COMPILER:STRING=%{Compiler:Executable:Cxx} 75 | CMAKE_C_COMPILER:STRING=%{Compiler:Executable:C} 76 | CMAKE_PREFIX_PATH:STRING=%{Qt:QT_INSTALL_PREFIX} 77 | QT_QMAKE_EXECUTABLE:STRING=%{Qt:qmakeExecutable} 78 | 79 | /home/wh/project/CUDA_learn/test_yololayer/deepstream_yolo/git_yolo/build-tensorrt-yolo-Qt_5_14_2_gcc_64-Debug 80 | 81 | 82 | 83 | 84 | all 85 | 86 | true 87 | CMakeProjectManager.MakeStep 88 | 89 | 1 90 | Build 91 | Build 92 | ProjectExplorer.BuildSteps.Build 93 | 94 | 95 | 96 | 97 | 98 | clean 99 | 100 | true 101 | CMakeProjectManager.MakeStep 102 | 103 | 1 104 | Clean 105 | Clean 106 | ProjectExplorer.BuildSteps.Clean 107 | 108 | 2 109 | false 110 | 111 | Debug 112 | CMakeProjectManager.CMakeBuildConfiguration 113 | 114 | 115 | 116 | CMAKE_BUILD_TYPE:STRING=Release 117 | CMAKE_CXX_COMPILER:STRING=%{Compiler:Executable:Cxx} 118 | CMAKE_C_COMPILER:STRING=%{Compiler:Executable:C} 119 | CMAKE_PREFIX_PATH:STRING=%{Qt:QT_INSTALL_PREFIX} 120 | QT_QMAKE_EXECUTABLE:STRING=%{Qt:qmakeExecutable} 121 | 122 | /home/wh/project/CUDA_learn/test_yololayer/deepstream_yolo/git_yolo/build-tensorrt-yolo-Qt_5_14_2_gcc_64-Release 123 | 124 | 125 | 126 | 127 | all 128 | 129 | true 130 | CMakeProjectManager.MakeStep 131 | 132 | 1 133 | Build 134 | Build 135 | ProjectExplorer.BuildSteps.Build 136 | 137 | 138 | 139 | 140 | 141 | clean 142 | 143 | true 144 | CMakeProjectManager.MakeStep 145 | 146 | 1 147 | Clean 148 | Clean 149 | ProjectExplorer.BuildSteps.Clean 150 | 151 | 2 152 | false 153 | 154 | Release 155 | CMakeProjectManager.CMakeBuildConfiguration 156 | 157 | 158 | 159 | CMAKE_BUILD_TYPE:STRING=RelWithDebInfo 160 | CMAKE_CXX_COMPILER:STRING=%{Compiler:Executable:Cxx} 161 | CMAKE_C_COMPILER:STRING=%{Compiler:Executable:C} 162 | CMAKE_PREFIX_PATH:STRING=%{Qt:QT_INSTALL_PREFIX} 163 | QT_QMAKE_EXECUTABLE:STRING=%{Qt:qmakeExecutable} 164 | 165 | /home/wh/project/CUDA_learn/test_yololayer/deepstream_yolo/git_yolo/build-tensorrt-yolo-Qt_5_14_2_gcc_64-RelWithDebInfo 166 | 167 | 168 | 169 | 170 | all 171 | 172 | true 173 | CMakeProjectManager.MakeStep 174 | 175 | 1 176 | Build 177 | Build 178 | ProjectExplorer.BuildSteps.Build 179 | 180 | 181 | 182 | 183 | 184 | clean 185 | 186 | true 187 | CMakeProjectManager.MakeStep 188 | 189 | 1 190 | Clean 191 | Clean 192 | ProjectExplorer.BuildSteps.Clean 193 | 194 | 2 195 | false 196 | 197 | Release with Debug Information 198 | CMakeProjectManager.CMakeBuildConfiguration 199 | 200 | 201 | 202 | CMAKE_BUILD_TYPE:STRING=MinSizeRel 203 | CMAKE_CXX_COMPILER:STRING=%{Compiler:Executable:Cxx} 204 | CMAKE_C_COMPILER:STRING=%{Compiler:Executable:C} 205 | CMAKE_PREFIX_PATH:STRING=%{Qt:QT_INSTALL_PREFIX} 206 | QT_QMAKE_EXECUTABLE:STRING=%{Qt:qmakeExecutable} 207 | 208 | /home/wh/project/CUDA_learn/test_yololayer/deepstream_yolo/git_yolo/build-tensorrt-yolo-Qt_5_14_2_gcc_64-MinSizeRel 209 | 210 | 211 | 212 | 213 | all 214 | 215 | true 216 | CMakeProjectManager.MakeStep 217 | 218 | 1 219 | Build 220 | Build 221 | ProjectExplorer.BuildSteps.Build 222 | 223 | 224 | 225 | 226 | 227 | clean 228 | 229 | true 230 | CMakeProjectManager.MakeStep 231 | 232 | 1 233 | Clean 234 | Clean 235 | ProjectExplorer.BuildSteps.Clean 236 | 237 | 2 238 | false 239 | 240 | Minimum Size Release 241 | CMakeProjectManager.CMakeBuildConfiguration 242 | 243 | 4 244 | 245 | 246 | 0 247 | Deploy 248 | Deploy 249 | ProjectExplorer.BuildSteps.Deploy 250 | 251 | 1 252 | ProjectExplorer.DefaultDeployConfiguration 253 | 254 | 1 255 | 256 | 257 | dwarf 258 | 259 | cpu-cycles 260 | 261 | 262 | 250 263 | 264 | -e 265 | cpu-cycles 266 | --call-graph 267 | dwarf,4096 268 | -F 269 | 250 270 | 271 | -F 272 | true 273 | 4096 274 | false 275 | false 276 | 1000 277 | 278 | true 279 | 280 | false 281 | false 282 | false 283 | false 284 | true 285 | 0.01 286 | 10 287 | true 288 | kcachegrind 289 | 1 290 | 25 291 | 292 | 1 293 | true 294 | false 295 | true 296 | valgrind 297 | 298 | 0 299 | 1 300 | 2 301 | 3 302 | 4 303 | 5 304 | 6 305 | 7 306 | 8 307 | 9 308 | 10 309 | 11 310 | 12 311 | 13 312 | 14 313 | 314 | 2 315 | 316 | yolo 317 | CMakeProjectManager.CMakeRunConfiguration.yolo 318 | yolo 319 | 320 | false 321 | 322 | false 323 | true 324 | true 325 | false 326 | false 327 | true 328 | 329 | /tmp/QtCreator-KeZdAz/qtc-cmake-GjVZhhZn 330 | 331 | 1 332 | 333 | 334 | 335 | ProjectExplorer.Project.TargetCount 336 | 1 337 | 338 | 339 | ProjectExplorer.Project.Updater.FileVersion 340 | 22 341 | 342 | 343 | Version 344 | 22 345 | 346 | 347 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a 5 | # copy of this software and associated documentation files (the "Software"), 6 | # to deal in the Software without restriction, including without limitation 7 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | # and/or sell copies of the Software, and to permit persons to whom the 9 | # Software is furnished to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in 12 | # all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | # DEALINGS IN THE SOFTWARE. 21 | ################################################################################ 22 | 23 | CUDA_VER?= 24 | ifeq ($(CUDA_VER),) 25 | $(error "CUDA_VER is not set") 26 | endif 27 | CC:= g++ 28 | NVCC:=/usr/local/cuda-$(CUDA_VER)/bin/nvcc 29 | 30 | CFLAGS:= -Wall -std=c++11 -shared -fPIC -Wno-error=deprecated-declarations 31 | CFLAGS+= -I../../includes -I/usr/local/cuda-$(CUDA_VER)/include 32 | 33 | LIBS:= -lnvinfer_plugin -lnvinfer -lnvparsers -L/usr/local/cuda-$(CUDA_VER)/lib64 -lcudart -lcublas -lstdc++fs 34 | LFLAGS:= -shared -Wl,--start-group $(LIBS) -Wl,--end-group 35 | 36 | INCS:= $(wildcard *.h) 37 | SRCFILES:= nvdsinfer_yolo_engine.cpp \ 38 | nvdsparsebbox_Yolo.cpp \ 39 | yoloPlugins.cpp \ 40 | trt_utils.cpp \ 41 | yolo.cpp \ 42 | kernels.cu 43 | TARGET_LIB:= libnvdsinfer_custom_impl_Yolo.so 44 | 45 | TARGET_OBJS:= $(SRCFILES:.cpp=.o) 46 | TARGET_OBJS:= $(TARGET_OBJS:.cu=.o) 47 | 48 | all: $(TARGET_LIB) 49 | 50 | %.o: %.cpp $(INCS) Makefile 51 | $(CC) -c -o $@ $(CFLAGS) $< 52 | 53 | %.o: %.cu $(INCS) Makefile 54 | $(NVCC) -c -o $@ --compiler-options '-fPIC' $< 55 | 56 | $(TARGET_LIB) : $(TARGET_OBJS) 57 | $(CC) -o $@ $(TARGET_OBJS) $(LFLAGS) 58 | 59 | clean: 60 | rm -rf $(TARGET_LIB) 61 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # if this help to you,please star star star 2 | # tensorrt7-yolo 3 | Now, just support Ubuntu, windows will be soon. 4 | 5 | Support yolov3 yolov3-tiny yolov4 yolov4-tiny，even your custom cfg networ 6 | 7 | if you train from darknet(AB), it usually can support. 8 | 9 | Don't need onnx, directly transport .cfg and .weights to Tensorrt engine 10 | 11 | this project borrow from [Deepstream](https://github.com/NVIDIA-AI-IOT/deepstream_reference_apps/tree/restructure) and [wang-xinyu/tensorrtx](https://github.com/wang-xinyu/tensorrtx) 12 | 13 | ## Excute: 14 | ``` 15 | 1. clone. 16 | 2.set CMakeList.txt tensorrt path, opencv path. 17 | 3.main.cpp, set diffenrt cfg and weights 18 | 4.set .cfg input_w and input_h,due to tensorrt upsample , input_w shuld equal input_h 19 | 5.copy .cfg and .weights file to folder 20 | 6.mkdir build. 21 | 7.cd build && cmake .. && make 22 | 7.run ./yolo -s to build yolo engine 23 | 7.run ./yolo -d to start detect 24 | ``` 25 | ## set FP16 or FP32 26 | - FP16/FP32 can be selected by the macro `USE_FP16` 27 | -------------------------------------------------------------------------------- /Utils.h: -------------------------------------------------------------------------------- 1 | #ifndef __TRT_UTILS_H_ 2 | #define __TRT_UTILS_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #ifndef CUDA_CHECK 10 | 11 | #define CUDA_CHECK(callstr) \ 12 | { \ 13 | cudaError_t error_code = callstr; \ 14 | if (error_code != cudaSuccess) { \ 15 | std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__; \ 16 | assert(0); \ 17 | } \ 18 | } 19 | 20 | #endif 21 | 22 | namespace Tn 23 | { 24 | class Profiler : public nvinfer1::IProfiler 25 | { 26 | public: 27 | void printLayerTimes(int itrationsTimes) 28 | { 29 | float totalTime = 0; 30 | for (size_t i = 0; i < mProfile.size(); i++) 31 | { 32 | printf("%-40.40s %4.3fms\n", mProfile[i].first.c_str(), mProfile[i].second / itrationsTimes); 33 | totalTime += mProfile[i].second; 34 | } 35 | printf("Time over all layers: %4.3f\n", totalTime / itrationsTimes); 36 | } 37 | private: 38 | typedef std::pair Record; 39 | std::vector mProfile; 40 | 41 | virtual void reportLayerTime(const char* layerName, float ms) 42 | { 43 | auto record = std::find_if(mProfile.begin(), mProfile.end(), [&](const Record& r){ return r.first == layerName; }); 44 | if (record == mProfile.end()) 45 | mProfile.push_back(std::make_pair(layerName, ms)); 46 | else 47 | record->second += ms; 48 | } 49 | }; 50 | 51 | //Logger for TensorRT info/warning/errors 52 | class Logger : public nvinfer1::ILogger 53 | { 54 | public: 55 | 56 | Logger(): Logger(Severity::kWARNING) {} 57 | 58 | Logger(Severity severity): reportableSeverity(severity) {} 59 | 60 | void log(Severity severity, const char* msg) override 61 | { 62 | // suppress messages with severity enum value greater than the reportable 63 | if (severity > reportableSeverity) return; 64 | 65 | switch (severity) 66 | { 67 | case Severity::kINTERNAL_ERROR: std::cerr << "INTERNAL_ERROR: "; break; 68 | case Severity::kERROR: std::cerr << "ERROR: "; break; 69 | case Severity::kWARNING: std::cerr << "WARNING: "; break; 70 | case Severity::kINFO: std::cerr << "INFO: "; break; 71 | default: std::cerr << "UNKNOWN: "; break; 72 | } 73 | std::cerr << msg << std::endl; 74 | } 75 | 76 | Severity reportableSeverity{Severity::kWARNING}; 77 | }; 78 | 79 | template 80 | void write(char*& buffer, const T& val) 81 | { 82 | *reinterpret_cast(buffer) = val; 83 | buffer += sizeof(T); 84 | } 85 | 86 | template 87 | void read(const char*& buffer, T& val) 88 | { 89 | val = *reinterpret_cast(buffer); 90 | buffer += sizeof(T); 91 | } 92 | } 93 | 94 | #endif -------------------------------------------------------------------------------- /data/yolov3-tiny.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=2 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=16 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=32 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=64 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [maxpool] 58 | size=2 59 | stride=2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=128 64 | size=3 65 | stride=1 66 | pad=1 67 | activation=leaky 68 | 69 | [maxpool] 70 | size=2 71 | stride=2 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=256 76 | size=3 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [maxpool] 82 | size=2 83 | stride=2 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=512 88 | size=3 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [maxpool] 94 | size=2 95 | stride=1 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=1024 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | ########### 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | filters=256 110 | size=1 111 | stride=1 112 | pad=1 113 | activation=leaky 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=512 118 | size=3 119 | stride=1 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | size=1 125 | stride=1 126 | pad=1 127 | filters=255 128 | activation=linear 129 | 130 | 131 | 132 | [yolo] 133 | mask = 3,4,5 134 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 135 | classes=80 136 | num=6 137 | jitter=.3 138 | ignore_thresh = .7 139 | truth_thresh = 1 140 | random=1 141 | 142 | [route] 143 | layers = -4 144 | 145 | [convolutional] 146 | batch_normalize=1 147 | filters=128 148 | size=1 149 | stride=1 150 | pad=1 151 | activation=leaky 152 | 153 | [upsample] 154 | stride=2 155 | 156 | [route] 157 | layers = -1, 8 158 | 159 | [convolutional] 160 | batch_normalize=1 161 | filters=256 162 | size=3 163 | stride=1 164 | pad=1 165 | activation=leaky 166 | 167 | [convolutional] 168 | size=1 169 | stride=1 170 | pad=1 171 | filters=255 172 | activation=linear 173 | 174 | [yolo] 175 | mask = 0,1,2 176 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 177 | classes=80 178 | num=6 179 | jitter=.3 180 | ignore_thresh = .7 181 | truth_thresh = 1 182 | random=1 183 | -------------------------------------------------------------------------------- /data/yolov3.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=16 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | # Downsample 34 | 35 | [convolutional] 36 | batch_normalize=1 37 | filters=64 38 | size=3 39 | stride=2 40 | pad=1 41 | activation=leaky 42 | 43 | [convolutional] 44 | batch_normalize=1 45 | filters=32 46 | size=1 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [shortcut] 60 | from=-3 61 | activation=linear 62 | 63 | # Downsample 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=2 70 | pad=1 71 | activation=leaky 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=64 76 | size=1 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [convolutional] 82 | batch_normalize=1 83 | filters=128 84 | size=3 85 | stride=1 86 | pad=1 87 | activation=leaky 88 | 89 | [shortcut] 90 | from=-3 91 | activation=linear 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=64 96 | size=1 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [convolutional] 102 | batch_normalize=1 103 | filters=128 104 | size=3 105 | stride=1 106 | pad=1 107 | activation=leaky 108 | 109 | [shortcut] 110 | from=-3 111 | activation=linear 112 | 113 | # Downsample 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=256 118 | size=3 119 | stride=2 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | batch_normalize=1 125 | filters=128 126 | size=1 127 | stride=1 128 | pad=1 129 | activation=leaky 130 | 131 | [convolutional] 132 | batch_normalize=1 133 | filters=256 134 | size=3 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [shortcut] 140 | from=-3 141 | activation=linear 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=128 146 | size=1 147 | stride=1 148 | pad=1 149 | activation=leaky 150 | 151 | [convolutional] 152 | batch_normalize=1 153 | filters=256 154 | size=3 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [shortcut] 160 | from=-3 161 | activation=linear 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=128 166 | size=1 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [convolutional] 172 | batch_normalize=1 173 | filters=256 174 | size=3 175 | stride=1 176 | pad=1 177 | activation=leaky 178 | 179 | [shortcut] 180 | from=-3 181 | activation=linear 182 | 183 | [convolutional] 184 | batch_normalize=1 185 | filters=128 186 | size=1 187 | stride=1 188 | pad=1 189 | activation=leaky 190 | 191 | [convolutional] 192 | batch_normalize=1 193 | filters=256 194 | size=3 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | [shortcut] 200 | from=-3 201 | activation=linear 202 | 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=1 208 | stride=1 209 | pad=1 210 | activation=leaky 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | filters=256 215 | size=3 216 | stride=1 217 | pad=1 218 | activation=leaky 219 | 220 | [shortcut] 221 | from=-3 222 | activation=linear 223 | 224 | [convolutional] 225 | batch_normalize=1 226 | filters=128 227 | size=1 228 | stride=1 229 | pad=1 230 | activation=leaky 231 | 232 | [convolutional] 233 | batch_normalize=1 234 | filters=256 235 | size=3 236 | stride=1 237 | pad=1 238 | activation=leaky 239 | 240 | [shortcut] 241 | from=-3 242 | activation=linear 243 | 244 | [convolutional] 245 | batch_normalize=1 246 | filters=128 247 | size=1 248 | stride=1 249 | pad=1 250 | activation=leaky 251 | 252 | [convolutional] 253 | batch_normalize=1 254 | filters=256 255 | size=3 256 | stride=1 257 | pad=1 258 | activation=leaky 259 | 260 | [shortcut] 261 | from=-3 262 | activation=linear 263 | 264 | [convolutional] 265 | batch_normalize=1 266 | filters=128 267 | size=1 268 | stride=1 269 | pad=1 270 | activation=leaky 271 | 272 | [convolutional] 273 | batch_normalize=1 274 | filters=256 275 | size=3 276 | stride=1 277 | pad=1 278 | activation=leaky 279 | 280 | [shortcut] 281 | from=-3 282 | activation=linear 283 | 284 | # Downsample 285 | 286 | [convolutional] 287 | batch_normalize=1 288 | filters=512 289 | size=3 290 | stride=2 291 | pad=1 292 | activation=leaky 293 | 294 | [convolutional] 295 | batch_normalize=1 296 | filters=256 297 | size=1 298 | stride=1 299 | pad=1 300 | activation=leaky 301 | 302 | [convolutional] 303 | batch_normalize=1 304 | filters=512 305 | size=3 306 | stride=1 307 | pad=1 308 | activation=leaky 309 | 310 | [shortcut] 311 | from=-3 312 | activation=linear 313 | 314 | 315 | [convolutional] 316 | batch_normalize=1 317 | filters=256 318 | size=1 319 | stride=1 320 | pad=1 321 | activation=leaky 322 | 323 | [convolutional] 324 | batch_normalize=1 325 | filters=512 326 | size=3 327 | stride=1 328 | pad=1 329 | activation=leaky 330 | 331 | [shortcut] 332 | from=-3 333 | activation=linear 334 | 335 | 336 | [convolutional] 337 | batch_normalize=1 338 | filters=256 339 | size=1 340 | stride=1 341 | pad=1 342 | activation=leaky 343 | 344 | [convolutional] 345 | batch_normalize=1 346 | filters=512 347 | size=3 348 | stride=1 349 | pad=1 350 | activation=leaky 351 | 352 | [shortcut] 353 | from=-3 354 | activation=linear 355 | 356 | 357 | [convolutional] 358 | batch_normalize=1 359 | filters=256 360 | size=1 361 | stride=1 362 | pad=1 363 | activation=leaky 364 | 365 | [convolutional] 366 | batch_normalize=1 367 | filters=512 368 | size=3 369 | stride=1 370 | pad=1 371 | activation=leaky 372 | 373 | [shortcut] 374 | from=-3 375 | activation=linear 376 | 377 | [convolutional] 378 | batch_normalize=1 379 | filters=256 380 | size=1 381 | stride=1 382 | pad=1 383 | activation=leaky 384 | 385 | [convolutional] 386 | batch_normalize=1 387 | filters=512 388 | size=3 389 | stride=1 390 | pad=1 391 | activation=leaky 392 | 393 | [shortcut] 394 | from=-3 395 | activation=linear 396 | 397 | 398 | [convolutional] 399 | batch_normalize=1 400 | filters=256 401 | size=1 402 | stride=1 403 | pad=1 404 | activation=leaky 405 | 406 | [convolutional] 407 | batch_normalize=1 408 | filters=512 409 | size=3 410 | stride=1 411 | pad=1 412 | activation=leaky 413 | 414 | [shortcut] 415 | from=-3 416 | activation=linear 417 | 418 | 419 | [convolutional] 420 | batch_normalize=1 421 | filters=256 422 | size=1 423 | stride=1 424 | pad=1 425 | activation=leaky 426 | 427 | [convolutional] 428 | batch_normalize=1 429 | filters=512 430 | size=3 431 | stride=1 432 | pad=1 433 | activation=leaky 434 | 435 | [shortcut] 436 | from=-3 437 | activation=linear 438 | 439 | [convolutional] 440 | batch_normalize=1 441 | filters=256 442 | size=1 443 | stride=1 444 | pad=1 445 | activation=leaky 446 | 447 | [convolutional] 448 | batch_normalize=1 449 | filters=512 450 | size=3 451 | stride=1 452 | pad=1 453 | activation=leaky 454 | 455 | [shortcut] 456 | from=-3 457 | activation=linear 458 | 459 | # Downsample 460 | 461 | [convolutional] 462 | batch_normalize=1 463 | filters=1024 464 | size=3 465 | stride=2 466 | pad=1 467 | activation=leaky 468 | 469 | [convolutional] 470 | batch_normalize=1 471 | filters=512 472 | size=1 473 | stride=1 474 | pad=1 475 | activation=leaky 476 | 477 | [convolutional] 478 | batch_normalize=1 479 | filters=1024 480 | size=3 481 | stride=1 482 | pad=1 483 | activation=leaky 484 | 485 | [shortcut] 486 | from=-3 487 | activation=linear 488 | 489 | [convolutional] 490 | batch_normalize=1 491 | filters=512 492 | size=1 493 | stride=1 494 | pad=1 495 | activation=leaky 496 | 497 | [convolutional] 498 | batch_normalize=1 499 | filters=1024 500 | size=3 501 | stride=1 502 | pad=1 503 | activation=leaky 504 | 505 | [shortcut] 506 | from=-3 507 | activation=linear 508 | 509 | [convolutional] 510 | batch_normalize=1 511 | filters=512 512 | size=1 513 | stride=1 514 | pad=1 515 | activation=leaky 516 | 517 | [convolutional] 518 | batch_normalize=1 519 | filters=1024 520 | size=3 521 | stride=1 522 | pad=1 523 | activation=leaky 524 | 525 | [shortcut] 526 | from=-3 527 | activation=linear 528 | 529 | [convolutional] 530 | batch_normalize=1 531 | filters=512 532 | size=1 533 | stride=1 534 | pad=1 535 | activation=leaky 536 | 537 | [convolutional] 538 | batch_normalize=1 539 | filters=1024 540 | size=3 541 | stride=1 542 | pad=1 543 | activation=leaky 544 | 545 | [shortcut] 546 | from=-3 547 | activation=linear 548 | 549 | ###################### 550 | 551 | [convolutional] 552 | batch_normalize=1 553 | filters=512 554 | size=1 555 | stride=1 556 | pad=1 557 | activation=leaky 558 | 559 | [convolutional] 560 | batch_normalize=1 561 | size=3 562 | stride=1 563 | pad=1 564 | filters=1024 565 | activation=leaky 566 | 567 | [convolutional] 568 | batch_normalize=1 569 | filters=512 570 | size=1 571 | stride=1 572 | pad=1 573 | activation=leaky 574 | 575 | [convolutional] 576 | batch_normalize=1 577 | size=3 578 | stride=1 579 | pad=1 580 | filters=1024 581 | activation=leaky 582 | 583 | [convolutional] 584 | batch_normalize=1 585 | filters=512 586 | size=1 587 | stride=1 588 | pad=1 589 | activation=leaky 590 | 591 | [convolutional] 592 | batch_normalize=1 593 | size=3 594 | stride=1 595 | pad=1 596 | filters=1024 597 | activation=leaky 598 | 599 | [convolutional] 600 | size=1 601 | stride=1 602 | pad=1 603 | filters=255 604 | activation=linear 605 | 606 | 607 | [yolo] 608 | mask = 6,7,8 609 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 610 | classes=80 611 | num=9 612 | jitter=.3 613 | ignore_thresh = .7 614 | truth_thresh = 1 615 | random=1 616 | 617 | 618 | [route] 619 | layers = -4 620 | 621 | [convolutional] 622 | batch_normalize=1 623 | filters=256 624 | size=1 625 | stride=1 626 | pad=1 627 | activation=leaky 628 | 629 | [upsample] 630 | stride=2 631 | 632 | [route] 633 | layers = -1, 61 634 | 635 | 636 | 637 | [convolutional] 638 | batch_normalize=1 639 | filters=256 640 | size=1 641 | stride=1 642 | pad=1 643 | activation=leaky 644 | 645 | [convolutional] 646 | batch_normalize=1 647 | size=3 648 | stride=1 649 | pad=1 650 | filters=512 651 | activation=leaky 652 | 653 | [convolutional] 654 | batch_normalize=1 655 | filters=256 656 | size=1 657 | stride=1 658 | pad=1 659 | activation=leaky 660 | 661 | [convolutional] 662 | batch_normalize=1 663 | size=3 664 | stride=1 665 | pad=1 666 | filters=512 667 | activation=leaky 668 | 669 | [convolutional] 670 | batch_normalize=1 671 | filters=256 672 | size=1 673 | stride=1 674 | pad=1 675 | activation=leaky 676 | 677 | [convolutional] 678 | batch_normalize=1 679 | size=3 680 | stride=1 681 | pad=1 682 | filters=512 683 | activation=leaky 684 | 685 | [convolutional] 686 | size=1 687 | stride=1 688 | pad=1 689 | filters=255 690 | activation=linear 691 | 692 | 693 | [yolo] 694 | mask = 3,4,5 695 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 696 | classes=80 697 | num=9 698 | jitter=.3 699 | ignore_thresh = .7 700 | truth_thresh = 1 701 | random=1 702 | 703 | 704 | 705 | [route] 706 | layers = -4 707 | 708 | [convolutional] 709 | batch_normalize=1 710 | filters=128 711 | size=1 712 | stride=1 713 | pad=1 714 | activation=leaky 715 | 716 | [upsample] 717 | stride=2 718 | 719 | [route] 720 | layers = -1, 36 721 | 722 | 723 | 724 | [convolutional] 725 | batch_normalize=1 726 | filters=128 727 | size=1 728 | stride=1 729 | pad=1 730 | activation=leaky 731 | 732 | [convolutional] 733 | batch_normalize=1 734 | size=3 735 | stride=1 736 | pad=1 737 | filters=256 738 | activation=leaky 739 | 740 | [convolutional] 741 | batch_normalize=1 742 | filters=128 743 | size=1 744 | stride=1 745 | pad=1 746 | activation=leaky 747 | 748 | [convolutional] 749 | batch_normalize=1 750 | size=3 751 | stride=1 752 | pad=1 753 | filters=256 754 | activation=leaky 755 | 756 | [convolutional] 757 | batch_normalize=1 758 | filters=128 759 | size=1 760 | stride=1 761 | pad=1 762 | activation=leaky 763 | 764 | [convolutional] 765 | batch_normalize=1 766 | size=3 767 | stride=1 768 | pad=1 769 | filters=256 770 | activation=leaky 771 | 772 | [convolutional] 773 | size=1 774 | stride=1 775 | pad=1 776 | filters=255 777 | activation=linear 778 | 779 | 780 | [yolo] 781 | mask = 0,1,2 782 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 783 | classes=80 784 | num=9 785 | jitter=.3 786 | ignore_thresh = .7 787 | truth_thresh = 1 788 | random=1 789 | 790 | -------------------------------------------------------------------------------- /data/yolov4-tiny.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=64 7 | subdivisions=1 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.00261 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=2 30 | pad=1 31 | activation=leaky 32 | 33 | [convolutional] 34 | batch_normalize=1 35 | filters=64 36 | size=3 37 | stride=2 38 | pad=1 39 | activation=leaky 40 | 41 | [convolutional] 42 | batch_normalize=1 43 | filters=64 44 | size=3 45 | stride=1 46 | pad=1 47 | activation=leaky 48 | 49 | [route] 50 | layers=-1 51 | groups=2 52 | group_id=1 53 | 54 | [convolutional] 55 | batch_normalize=1 56 | filters=32 57 | size=3 58 | stride=1 59 | pad=1 60 | activation=leaky 61 | 62 | [convolutional] 63 | batch_normalize=1 64 | filters=32 65 | size=3 66 | stride=1 67 | pad=1 68 | activation=leaky 69 | 70 | [route] 71 | layers = -1,-2 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=64 76 | size=1 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [route] 82 | layers = -6,-1 83 | 84 | [maxpool] 85 | size=2 86 | stride=2 87 | 88 | [convolutional] 89 | batch_normalize=1 90 | filters=128 91 | size=3 92 | stride=1 93 | pad=1 94 | activation=leaky 95 | 96 | [route] 97 | layers=-1 98 | groups=2 99 | group_id=1 100 | 101 | [convolutional] 102 | batch_normalize=1 103 | filters=64 104 | size=3 105 | stride=1 106 | pad=1 107 | activation=leaky 108 | 109 | [convolutional] 110 | batch_normalize=1 111 | filters=64 112 | size=3 113 | stride=1 114 | pad=1 115 | activation=leaky 116 | 117 | [route] 118 | layers = -1,-2 119 | 120 | [convolutional] 121 | batch_normalize=1 122 | filters=128 123 | size=1 124 | stride=1 125 | pad=1 126 | activation=leaky 127 | 128 | [route] 129 | layers = -6,-1 130 | 131 | [maxpool] 132 | size=2 133 | stride=2 134 | 135 | [convolutional] 136 | batch_normalize=1 137 | filters=256 138 | size=3 139 | stride=1 140 | pad=1 141 | activation=leaky 142 | 143 | [route] 144 | layers=-1 145 | groups=2 146 | group_id=1 147 | 148 | [convolutional] 149 | batch_normalize=1 150 | filters=128 151 | size=3 152 | stride=1 153 | pad=1 154 | activation=leaky 155 | 156 | [convolutional] 157 | batch_normalize=1 158 | filters=128 159 | size=3 160 | stride=1 161 | pad=1 162 | activation=leaky 163 | 164 | [route] 165 | layers = -1,-2 166 | 167 | [convolutional] 168 | batch_normalize=1 169 | filters=256 170 | size=1 171 | stride=1 172 | pad=1 173 | activation=leaky 174 | 175 | [route] 176 | layers = -6,-1 177 | 178 | [maxpool] 179 | size=2 180 | stride=2 181 | 182 | [convolutional] 183 | batch_normalize=1 184 | filters=512 185 | size=3 186 | stride=1 187 | pad=1 188 | activation=leaky 189 | 190 | ################################## 191 | 192 | [convolutional] 193 | batch_normalize=1 194 | filters=256 195 | size=1 196 | stride=1 197 | pad=1 198 | activation=leaky 199 | 200 | [convolutional] 201 | batch_normalize=1 202 | filters=512 203 | size=3 204 | stride=1 205 | pad=1 206 | activation=leaky 207 | 208 | [convolutional] 209 | size=1 210 | stride=1 211 | pad=1 212 | filters=255 213 | activation=linear 214 | 215 | 216 | 217 | [yolo] 218 | mask = 3,4,5 219 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 220 | classes=80 221 | num=6 222 | jitter=.3 223 | scale_x_y = 1.05 224 | cls_normalizer=1.0 225 | iou_normalizer=0.07 226 | iou_loss=ciou 227 | ignore_thresh = .7 228 | truth_thresh = 1 229 | random=0 230 | resize=1.5 231 | nms_kind=greedynms 232 | beta_nms=0.6 233 | 234 | [route] 235 | layers = -4 236 | 237 | [convolutional] 238 | batch_normalize=1 239 | filters=128 240 | size=1 241 | stride=1 242 | pad=1 243 | activation=leaky 244 | 245 | [upsample] 246 | stride=2 247 | 248 | [route] 249 | layers = -1, 23 250 | 251 | [convolutional] 252 | batch_normalize=1 253 | filters=256 254 | size=3 255 | stride=1 256 | pad=1 257 | activation=leaky 258 | 259 | [convolutional] 260 | size=1 261 | stride=1 262 | pad=1 263 | filters=255 264 | activation=linear 265 | 266 | [yolo] 267 | mask = 1,2,3 268 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 269 | classes=80 270 | num=6 271 | jitter=.3 272 | scale_x_y = 1.05 273 | cls_normalizer=1.0 274 | iou_normalizer=0.07 275 | iou_loss=ciou 276 | ignore_thresh = .7 277 | truth_thresh = 1 278 | random=0 279 | resize=1.5 280 | nms_kind=greedynms 281 | beta_nms=0.6 282 | -------------------------------------------------------------------------------- /data/yolov4.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=64 7 | subdivisions=8 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.949 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.00261 19 | burn_in=1000 20 | max_batches = 500500 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | #cutmix=1 26 | mosaic=1 27 | 28 | #:104x104 54:52x52 85:26x26 104:13x13 for 416 29 | 30 | [convolutional] 31 | batch_normalize=1 32 | filters=32 33 | size=3 34 | stride=1 35 | pad=1 36 | activation=mish 37 | 38 | # Downsample 39 | 40 | [convolutional] 41 | batch_normalize=1 42 | filters=64 43 | size=3 44 | stride=2 45 | pad=1 46 | activation=mish 47 | 48 | [convolutional] 49 | batch_normalize=1 50 | filters=64 51 | size=1 52 | stride=1 53 | pad=1 54 | activation=mish 55 | 56 | [route] 57 | layers = -2 58 | 59 | [convolutional] 60 | batch_normalize=1 61 | filters=64 62 | size=1 63 | stride=1 64 | pad=1 65 | activation=mish 66 | 67 | [convolutional] 68 | batch_normalize=1 69 | filters=32 70 | size=1 71 | stride=1 72 | pad=1 73 | activation=mish 74 | 75 | [convolutional] 76 | batch_normalize=1 77 | filters=64 78 | size=3 79 | stride=1 80 | pad=1 81 | activation=mish 82 | 83 | [shortcut] 84 | from=-3 85 | activation=linear 86 | 87 | [convolutional] 88 | batch_normalize=1 89 | filters=64 90 | size=1 91 | stride=1 92 | pad=1 93 | activation=mish 94 | 95 | [route] 96 | layers = -1,-7 97 | 98 | [convolutional] 99 | batch_normalize=1 100 | filters=64 101 | size=1 102 | stride=1 103 | pad=1 104 | activation=mish 105 | 106 | # Downsample 107 | 108 | [convolutional] 109 | batch_normalize=1 110 | filters=128 111 | size=3 112 | stride=2 113 | pad=1 114 | activation=mish 115 | 116 | [convolutional] 117 | batch_normalize=1 118 | filters=64 119 | size=1 120 | stride=1 121 | pad=1 122 | activation=mish 123 | 124 | [route] 125 | layers = -2 126 | 127 | [convolutional] 128 | batch_normalize=1 129 | filters=64 130 | size=1 131 | stride=1 132 | pad=1 133 | activation=mish 134 | 135 | [convolutional] 136 | batch_normalize=1 137 | filters=64 138 | size=1 139 | stride=1 140 | pad=1 141 | activation=mish 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=64 146 | size=3 147 | stride=1 148 | pad=1 149 | activation=mish 150 | 151 | [shortcut] 152 | from=-3 153 | activation=linear 154 | 155 | [convolutional] 156 | batch_normalize=1 157 | filters=64 158 | size=1 159 | stride=1 160 | pad=1 161 | activation=mish 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=64 166 | size=3 167 | stride=1 168 | pad=1 169 | activation=mish 170 | 171 | [shortcut] 172 | from=-3 173 | activation=linear 174 | 175 | [convolutional] 176 | batch_normalize=1 177 | filters=64 178 | size=1 179 | stride=1 180 | pad=1 181 | activation=mish 182 | 183 | [route] 184 | layers = -1,-10 185 | 186 | [convolutional] 187 | batch_normalize=1 188 | filters=128 189 | size=1 190 | stride=1 191 | pad=1 192 | activation=mish 193 | 194 | # Downsample 195 | 196 | [convolutional] 197 | batch_normalize=1 198 | filters=256 199 | size=3 200 | stride=2 201 | pad=1 202 | activation=mish 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=1 208 | stride=1 209 | pad=1 210 | activation=mish 211 | 212 | [route] 213 | layers = -2 214 | 215 | [convolutional] 216 | batch_normalize=1 217 | filters=128 218 | size=1 219 | stride=1 220 | pad=1 221 | activation=mish 222 | 223 | [convolutional] 224 | batch_normalize=1 225 | filters=128 226 | size=1 227 | stride=1 228 | pad=1 229 | activation=mish 230 | 231 | [convolutional] 232 | batch_normalize=1 233 | filters=128 234 | size=3 235 | stride=1 236 | pad=1 237 | activation=mish 238 | 239 | [shortcut] 240 | from=-3 241 | activation=linear 242 | 243 | [convolutional] 244 | batch_normalize=1 245 | filters=128 246 | size=1 247 | stride=1 248 | pad=1 249 | activation=mish 250 | 251 | [convolutional] 252 | batch_normalize=1 253 | filters=128 254 | size=3 255 | stride=1 256 | pad=1 257 | activation=mish 258 | 259 | [shortcut] 260 | from=-3 261 | activation=linear 262 | 263 | [convolutional] 264 | batch_normalize=1 265 | filters=128 266 | size=1 267 | stride=1 268 | pad=1 269 | activation=mish 270 | 271 | [convolutional] 272 | batch_normalize=1 273 | filters=128 274 | size=3 275 | stride=1 276 | pad=1 277 | activation=mish 278 | 279 | [shortcut] 280 | from=-3 281 | activation=linear 282 | 283 | [convolutional] 284 | batch_normalize=1 285 | filters=128 286 | size=1 287 | stride=1 288 | pad=1 289 | activation=mish 290 | 291 | [convolutional] 292 | batch_normalize=1 293 | filters=128 294 | size=3 295 | stride=1 296 | pad=1 297 | activation=mish 298 | 299 | [shortcut] 300 | from=-3 301 | activation=linear 302 | 303 | 304 | [convolutional] 305 | batch_normalize=1 306 | filters=128 307 | size=1 308 | stride=1 309 | pad=1 310 | activation=mish 311 | 312 | [convolutional] 313 | batch_normalize=1 314 | filters=128 315 | size=3 316 | stride=1 317 | pad=1 318 | activation=mish 319 | 320 | [shortcut] 321 | from=-3 322 | activation=linear 323 | 324 | [convolutional] 325 | batch_normalize=1 326 | filters=128 327 | size=1 328 | stride=1 329 | pad=1 330 | activation=mish 331 | 332 | [convolutional] 333 | batch_normalize=1 334 | filters=128 335 | size=3 336 | stride=1 337 | pad=1 338 | activation=mish 339 | 340 | [shortcut] 341 | from=-3 342 | activation=linear 343 | 344 | [convolutional] 345 | batch_normalize=1 346 | filters=128 347 | size=1 348 | stride=1 349 | pad=1 350 | activation=mish 351 | 352 | [convolutional] 353 | batch_normalize=1 354 | filters=128 355 | size=3 356 | stride=1 357 | pad=1 358 | activation=mish 359 | 360 | [shortcut] 361 | from=-3 362 | activation=linear 363 | 364 | [convolutional] 365 | batch_normalize=1 366 | filters=128 367 | size=1 368 | stride=1 369 | pad=1 370 | activation=mish 371 | 372 | [convolutional] 373 | batch_normalize=1 374 | filters=128 375 | size=3 376 | stride=1 377 | pad=1 378 | activation=mish 379 | 380 | [shortcut] 381 | from=-3 382 | activation=linear 383 | 384 | [convolutional] 385 | batch_normalize=1 386 | filters=128 387 | size=1 388 | stride=1 389 | pad=1 390 | activation=mish 391 | 392 | [route] 393 | layers = -1,-28 394 | 395 | [convolutional] 396 | batch_normalize=1 397 | filters=256 398 | size=1 399 | stride=1 400 | pad=1 401 | activation=mish 402 | 403 | # Downsample 404 | 405 | [convolutional] 406 | batch_normalize=1 407 | filters=512 408 | size=3 409 | stride=2 410 | pad=1 411 | activation=mish 412 | 413 | [convolutional] 414 | batch_normalize=1 415 | filters=256 416 | size=1 417 | stride=1 418 | pad=1 419 | activation=mish 420 | 421 | [route] 422 | layers = -2 423 | 424 | [convolutional] 425 | batch_normalize=1 426 | filters=256 427 | size=1 428 | stride=1 429 | pad=1 430 | activation=mish 431 | 432 | [convolutional] 433 | batch_normalize=1 434 | filters=256 435 | size=1 436 | stride=1 437 | pad=1 438 | activation=mish 439 | 440 | [convolutional] 441 | batch_normalize=1 442 | filters=256 443 | size=3 444 | stride=1 445 | pad=1 446 | activation=mish 447 | 448 | [shortcut] 449 | from=-3 450 | activation=linear 451 | 452 | 453 | [convolutional] 454 | batch_normalize=1 455 | filters=256 456 | size=1 457 | stride=1 458 | pad=1 459 | activation=mish 460 | 461 | [convolutional] 462 | batch_normalize=1 463 | filters=256 464 | size=3 465 | stride=1 466 | pad=1 467 | activation=mish 468 | 469 | [shortcut] 470 | from=-3 471 | activation=linear 472 | 473 | 474 | [convolutional] 475 | batch_normalize=1 476 | filters=256 477 | size=1 478 | stride=1 479 | pad=1 480 | activation=mish 481 | 482 | [convolutional] 483 | batch_normalize=1 484 | filters=256 485 | size=3 486 | stride=1 487 | pad=1 488 | activation=mish 489 | 490 | [shortcut] 491 | from=-3 492 | activation=linear 493 | 494 | 495 | [convolutional] 496 | batch_normalize=1 497 | filters=256 498 | size=1 499 | stride=1 500 | pad=1 501 | activation=mish 502 | 503 | [convolutional] 504 | batch_normalize=1 505 | filters=256 506 | size=3 507 | stride=1 508 | pad=1 509 | activation=mish 510 | 511 | [shortcut] 512 | from=-3 513 | activation=linear 514 | 515 | 516 | [convolutional] 517 | batch_normalize=1 518 | filters=256 519 | size=1 520 | stride=1 521 | pad=1 522 | activation=mish 523 | 524 | [convolutional] 525 | batch_normalize=1 526 | filters=256 527 | size=3 528 | stride=1 529 | pad=1 530 | activation=mish 531 | 532 | [shortcut] 533 | from=-3 534 | activation=linear 535 | 536 | 537 | [convolutional] 538 | batch_normalize=1 539 | filters=256 540 | size=1 541 | stride=1 542 | pad=1 543 | activation=mish 544 | 545 | [convolutional] 546 | batch_normalize=1 547 | filters=256 548 | size=3 549 | stride=1 550 | pad=1 551 | activation=mish 552 | 553 | [shortcut] 554 | from=-3 555 | activation=linear 556 | 557 | 558 | [convolutional] 559 | batch_normalize=1 560 | filters=256 561 | size=1 562 | stride=1 563 | pad=1 564 | activation=mish 565 | 566 | [convolutional] 567 | batch_normalize=1 568 | filters=256 569 | size=3 570 | stride=1 571 | pad=1 572 | activation=mish 573 | 574 | [shortcut] 575 | from=-3 576 | activation=linear 577 | 578 | [convolutional] 579 | batch_normalize=1 580 | filters=256 581 | size=1 582 | stride=1 583 | pad=1 584 | activation=mish 585 | 586 | [convolutional] 587 | batch_normalize=1 588 | filters=256 589 | size=3 590 | stride=1 591 | pad=1 592 | activation=mish 593 | 594 | [shortcut] 595 | from=-3 596 | activation=linear 597 | 598 | [convolutional] 599 | batch_normalize=1 600 | filters=256 601 | size=1 602 | stride=1 603 | pad=1 604 | activation=mish 605 | 606 | [route] 607 | layers = -1,-28 608 | 609 | [convolutional] 610 | batch_normalize=1 611 | filters=512 612 | size=1 613 | stride=1 614 | pad=1 615 | activation=mish 616 | 617 | # Downsample 618 | 619 | [convolutional] 620 | batch_normalize=1 621 | filters=1024 622 | size=3 623 | stride=2 624 | pad=1 625 | activation=mish 626 | 627 | [convolutional] 628 | batch_normalize=1 629 | filters=512 630 | size=1 631 | stride=1 632 | pad=1 633 | activation=mish 634 | 635 | [route] 636 | layers = -2 637 | 638 | [convolutional] 639 | batch_normalize=1 640 | filters=512 641 | size=1 642 | stride=1 643 | pad=1 644 | activation=mish 645 | 646 | [convolutional] 647 | batch_normalize=1 648 | filters=512 649 | size=1 650 | stride=1 651 | pad=1 652 | activation=mish 653 | 654 | [convolutional] 655 | batch_normalize=1 656 | filters=512 657 | size=3 658 | stride=1 659 | pad=1 660 | activation=mish 661 | 662 | [shortcut] 663 | from=-3 664 | activation=linear 665 | 666 | [convolutional] 667 | batch_normalize=1 668 | filters=512 669 | size=1 670 | stride=1 671 | pad=1 672 | activation=mish 673 | 674 | [convolutional] 675 | batch_normalize=1 676 | filters=512 677 | size=3 678 | stride=1 679 | pad=1 680 | activation=mish 681 | 682 | [shortcut] 683 | from=-3 684 | activation=linear 685 | 686 | [convolutional] 687 | batch_normalize=1 688 | filters=512 689 | size=1 690 | stride=1 691 | pad=1 692 | activation=mish 693 | 694 | [convolutional] 695 | batch_normalize=1 696 | filters=512 697 | size=3 698 | stride=1 699 | pad=1 700 | activation=mish 701 | 702 | [shortcut] 703 | from=-3 704 | activation=linear 705 | 706 | [convolutional] 707 | batch_normalize=1 708 | filters=512 709 | size=1 710 | stride=1 711 | pad=1 712 | activation=mish 713 | 714 | [convolutional] 715 | batch_normalize=1 716 | filters=512 717 | size=3 718 | stride=1 719 | pad=1 720 | activation=mish 721 | 722 | [shortcut] 723 | from=-3 724 | activation=linear 725 | 726 | [convolutional] 727 | batch_normalize=1 728 | filters=512 729 | size=1 730 | stride=1 731 | pad=1 732 | activation=mish 733 | 734 | [route] 735 | layers = -1,-16 736 | 737 | [convolutional] 738 | batch_normalize=1 739 | filters=1024 740 | size=1 741 | stride=1 742 | pad=1 743 | activation=mish 744 | 745 | ########################## 746 | 747 | [convolutional] 748 | batch_normalize=1 749 | filters=512 750 | size=1 751 | stride=1 752 | pad=1 753 | activation=leaky 754 | 755 | [convolutional] 756 | batch_normalize=1 757 | size=3 758 | stride=1 759 | pad=1 760 | filters=1024 761 | activation=leaky 762 | 763 | [convolutional] 764 | batch_normalize=1 765 | filters=512 766 | size=1 767 | stride=1 768 | pad=1 769 | activation=leaky 770 | 771 | ### SPP ### 772 | [maxpool] 773 | stride=1 774 | size=5 775 | 776 | [route] 777 | layers=-2 778 | 779 | [maxpool] 780 | stride=1 781 | size=9 782 | 783 | [route] 784 | layers=-4 785 | 786 | [maxpool] 787 | stride=1 788 | size=13 789 | 790 | [route] 791 | layers=-1,-3,-5,-6 792 | ### End SPP ### 793 | 794 | [convolutional] 795 | batch_normalize=1 796 | filters=512 797 | size=1 798 | stride=1 799 | pad=1 800 | activation=leaky 801 | 802 | [convolutional] 803 | batch_normalize=1 804 | size=3 805 | stride=1 806 | pad=1 807 | filters=1024 808 | activation=leaky 809 | 810 | [convolutional] 811 | batch_normalize=1 812 | filters=512 813 | size=1 814 | stride=1 815 | pad=1 816 | activation=leaky 817 | 818 | [convolutional] 819 | batch_normalize=1 820 | filters=256 821 | size=1 822 | stride=1 823 | pad=1 824 | activation=leaky 825 | 826 | [upsample] 827 | stride=2 828 | 829 | [route] 830 | layers = 85 831 | 832 | [convolutional] 833 | batch_normalize=1 834 | filters=256 835 | size=1 836 | stride=1 837 | pad=1 838 | activation=leaky 839 | 840 | [route] 841 | layers = -1, -3 842 | 843 | [convolutional] 844 | batch_normalize=1 845 | filters=256 846 | size=1 847 | stride=1 848 | pad=1 849 | activation=leaky 850 | 851 | [convolutional] 852 | batch_normalize=1 853 | size=3 854 | stride=1 855 | pad=1 856 | filters=512 857 | activation=leaky 858 | 859 | [convolutional] 860 | batch_normalize=1 861 | filters=256 862 | size=1 863 | stride=1 864 | pad=1 865 | activation=leaky 866 | 867 | [convolutional] 868 | batch_normalize=1 869 | size=3 870 | stride=1 871 | pad=1 872 | filters=512 873 | activation=leaky 874 | 875 | [convolutional] 876 | batch_normalize=1 877 | filters=256 878 | size=1 879 | stride=1 880 | pad=1 881 | activation=leaky 882 | 883 | [convolutional] 884 | batch_normalize=1 885 | filters=128 886 | size=1 887 | stride=1 888 | pad=1 889 | activation=leaky 890 | 891 | [upsample] 892 | stride=2 893 | 894 | [route] 895 | layers = 54 896 | 897 | [convolutional] 898 | batch_normalize=1 899 | filters=128 900 | size=1 901 | stride=1 902 | pad=1 903 | activation=leaky 904 | 905 | [route] 906 | layers = -1, -3 907 | 908 | [convolutional] 909 | batch_normalize=1 910 | filters=128 911 | size=1 912 | stride=1 913 | pad=1 914 | activation=leaky 915 | 916 | [convolutional] 917 | batch_normalize=1 918 | size=3 919 | stride=1 920 | pad=1 921 | filters=256 922 | activation=leaky 923 | 924 | [convolutional] 925 | batch_normalize=1 926 | filters=128 927 | size=1 928 | stride=1 929 | pad=1 930 | activation=leaky 931 | 932 | [convolutional] 933 | batch_normalize=1 934 | size=3 935 | stride=1 936 | pad=1 937 | filters=256 938 | activation=leaky 939 | 940 | [convolutional] 941 | batch_normalize=1 942 | filters=128 943 | size=1 944 | stride=1 945 | pad=1 946 | activation=leaky 947 | 948 | ########################## 949 | 950 | [convolutional] 951 | batch_normalize=1 952 | size=3 953 | stride=1 954 | pad=1 955 | filters=256 956 | activation=leaky 957 | 958 | [convolutional] 959 | size=1 960 | stride=1 961 | pad=1 962 | filters=255 963 | activation=linear 964 | 965 | 966 | [yolo] 967 | mask = 0,1,2 968 | anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 969 | classes=80 970 | num=9 971 | jitter=.3 972 | ignore_thresh = .7 973 | truth_thresh = 1 974 | scale_x_y = 1.2 975 | iou_thresh=0.213 976 | cls_normalizer=1.0 977 | iou_normalizer=0.07 978 | iou_loss=ciou 979 | nms_kind=greedynms 980 | beta_nms=0.6 981 | 982 | 983 | [route] 984 | layers = -4 985 | 986 | [convolutional] 987 | batch_normalize=1 988 | size=3 989 | stride=2 990 | pad=1 991 | filters=256 992 | activation=leaky 993 | 994 | [route] 995 | layers = -1, -16 996 | 997 | [convolutional] 998 | batch_normalize=1 999 | filters=256 1000 | size=1 1001 | stride=1 1002 | pad=1 1003 | activation=leaky 1004 | 1005 | [convolutional] 1006 | batch_normalize=1 1007 | size=3 1008 | stride=1 1009 | pad=1 1010 | filters=512 1011 | activation=leaky 1012 | 1013 | [convolutional] 1014 | batch_normalize=1 1015 | filters=256 1016 | size=1 1017 | stride=1 1018 | pad=1 1019 | activation=leaky 1020 | 1021 | [convolutional] 1022 | batch_normalize=1 1023 | size=3 1024 | stride=1 1025 | pad=1 1026 | filters=512 1027 | activation=leaky 1028 | 1029 | [convolutional] 1030 | batch_normalize=1 1031 | filters=256 1032 | size=1 1033 | stride=1 1034 | pad=1 1035 | activation=leaky 1036 | 1037 | [convolutional] 1038 | batch_normalize=1 1039 | size=3 1040 | stride=1 1041 | pad=1 1042 | filters=512 1043 | activation=leaky 1044 | 1045 | [convolutional] 1046 | size=1 1047 | stride=1 1048 | pad=1 1049 | filters=255 1050 | activation=linear 1051 | 1052 | 1053 | [yolo] 1054 | mask = 3,4,5 1055 | anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 1056 | classes=80 1057 | num=9 1058 | jitter=.3 1059 | ignore_thresh = .7 1060 | truth_thresh = 1 1061 | scale_x_y = 1.1 1062 | iou_thresh=0.213 1063 | cls_normalizer=1.0 1064 | iou_normalizer=0.07 1065 | iou_loss=ciou 1066 | nms_kind=greedynms 1067 | beta_nms=0.6 1068 | 1069 | 1070 | [route] 1071 | layers = -4 1072 | 1073 | [convolutional] 1074 | batch_normalize=1 1075 | size=3 1076 | stride=2 1077 | pad=1 1078 | filters=512 1079 | activation=leaky 1080 | 1081 | [route] 1082 | layers = -1, -37 1083 | 1084 | [convolutional] 1085 | batch_normalize=1 1086 | filters=512 1087 | size=1 1088 | stride=1 1089 | pad=1 1090 | activation=leaky 1091 | 1092 | [convolutional] 1093 | batch_normalize=1 1094 | size=3 1095 | stride=1 1096 | pad=1 1097 | filters=1024 1098 | activation=leaky 1099 | 1100 | [convolutional] 1101 | batch_normalize=1 1102 | filters=512 1103 | size=1 1104 | stride=1 1105 | pad=1 1106 | activation=leaky 1107 | 1108 | [convolutional] 1109 | batch_normalize=1 1110 | size=3 1111 | stride=1 1112 | pad=1 1113 | filters=1024 1114 | activation=leaky 1115 | 1116 | [convolutional] 1117 | batch_normalize=1 1118 | filters=512 1119 | size=1 1120 | stride=1 1121 | pad=1 1122 | activation=leaky 1123 | 1124 | [convolutional] 1125 | batch_normalize=1 1126 | size=3 1127 | stride=1 1128 | pad=1 1129 | filters=1024 1130 | activation=leaky 1131 | 1132 | [convolutional] 1133 | size=1 1134 | stride=1 1135 | pad=1 1136 | filters=255 1137 | activation=linear 1138 | 1139 | 1140 | [yolo] 1141 | mask = 6,7,8 1142 | anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 1143 | classes=80 1144 | num=9 1145 | jitter=.3 1146 | ignore_thresh = .7 1147 | truth_thresh = 1 1148 | random=1 1149 | scale_x_y = 1.05 1150 | iou_thresh=0.213 1151 | cls_normalizer=1.0 1152 | iou_normalizer=0.07 1153 | iou_loss=ciou 1154 | nms_kind=greedynms 1155 | beta_nms=0.6 1156 | 1157 | -------------------------------------------------------------------------------- /logging.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef TENSORRT_LOGGING_H 18 | #define TENSORRT_LOGGING_H 19 | 20 | #include "NvInferRuntimeCommon.h" 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | using Severity = nvinfer1::ILogger::Severity; 30 | 31 | class LogStreamConsumerBuffer : public std::stringbuf 32 | { 33 | public: 34 | LogStreamConsumerBuffer(std::ostream& stream, const std::string& prefix, bool shouldLog) 35 | : mOutput(stream) 36 | , mPrefix(prefix) 37 | , mShouldLog(shouldLog) 38 | { 39 | } 40 | 41 | LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other) 42 | : mOutput(other.mOutput) 43 | { 44 | } 45 | 46 | ~LogStreamConsumerBuffer() 47 | { 48 | // std::streambuf::pbase() gives a pointer to the beginning of the buffered part of the output sequence 49 | // std::streambuf::pptr() gives a pointer to the current position of the output sequence 50 | // if the pointer to the beginning is not equal to the pointer to the current position, 51 | // call putOutput() to log the output to the stream 52 | if (pbase() != pptr()) 53 | { 54 | putOutput(); 55 | } 56 | } 57 | 58 | // synchronizes the stream buffer and returns 0 on success 59 | // synchronizing the stream buffer consists of inserting the buffer contents into the stream, 60 | // resetting the buffer and flushing the stream 61 | virtual int sync() 62 | { 63 | putOutput(); 64 | return 0; 65 | } 66 | 67 | void putOutput() 68 | { 69 | if (mShouldLog) 70 | { 71 | // prepend timestamp 72 | std::time_t timestamp = std::time(nullptr); 73 | tm* tm_local = std::localtime(×tamp); 74 | std::cout << "["; 75 | std::cout << std::setw(2) << std::setfill('0') << 1 + tm_local->tm_mon << "/"; 76 | std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_mday << "/"; 77 | std::cout << std::setw(4) << std::setfill('0') << 1900 + tm_local->tm_year << "-"; 78 | std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_hour << ":"; 79 | std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_min << ":"; 80 | std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_sec << "] "; 81 | // std::stringbuf::str() gets the string contents of the buffer 82 | // insert the buffer contents pre-appended by the appropriate prefix into the stream 83 | mOutput << mPrefix << str(); 84 | // set the buffer to empty 85 | str(""); 86 | // flush the stream 87 | mOutput.flush(); 88 | } 89 | } 90 | 91 | void setShouldLog(bool shouldLog) 92 | { 93 | mShouldLog = shouldLog; 94 | } 95 | 96 | private: 97 | std::ostream& mOutput; 98 | std::string mPrefix; 99 | bool mShouldLog; 100 | }; 101 | 102 | //! 103 | //! \class LogStreamConsumerBase 104 | //! \brief Convenience object used to initialize LogStreamConsumerBuffer before std::ostream in LogStreamConsumer 105 | //! 106 | class LogStreamConsumerBase 107 | { 108 | public: 109 | LogStreamConsumerBase(std::ostream& stream, const std::string& prefix, bool shouldLog) 110 | : mBuffer(stream, prefix, shouldLog) 111 | { 112 | } 113 | 114 | protected: 115 | LogStreamConsumerBuffer mBuffer; 116 | }; 117 | 118 | //! 119 | //! \class LogStreamConsumer 120 | //! \brief Convenience object used to facilitate use of C++ stream syntax when logging messages. 121 | //! Order of base classes is LogStreamConsumerBase and then std::ostream. 122 | //! This is because the LogStreamConsumerBase class is used to initialize the LogStreamConsumerBuffer member field 123 | //! in LogStreamConsumer and then the address of the buffer is passed to std::ostream. 124 | //! This is necessary to prevent the address of an uninitialized buffer from being passed to std::ostream. 125 | //! Please do not change the order of the parent classes. 126 | //! 127 | class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream 128 | { 129 | public: 130 | //! \brief Creates a LogStreamConsumer which logs messages with level severity. 131 | //! Reportable severity determines if the messages are severe enough to be logged. 132 | LogStreamConsumer(Severity reportableSeverity, Severity severity) 133 | : LogStreamConsumerBase(severityOstream(severity), severityPrefix(severity), severity <= reportableSeverity) 134 | , std::ostream(&mBuffer) // links the stream buffer with the stream 135 | , mShouldLog(severity <= reportableSeverity) 136 | , mSeverity(severity) 137 | { 138 | } 139 | 140 | LogStreamConsumer(LogStreamConsumer&& other) 141 | : LogStreamConsumerBase(severityOstream(other.mSeverity), severityPrefix(other.mSeverity), other.mShouldLog) 142 | , std::ostream(&mBuffer) // links the stream buffer with the stream 143 | , mShouldLog(other.mShouldLog) 144 | , mSeverity(other.mSeverity) 145 | { 146 | } 147 | 148 | void setReportableSeverity(Severity reportableSeverity) 149 | { 150 | mShouldLog = mSeverity <= reportableSeverity; 151 | mBuffer.setShouldLog(mShouldLog); 152 | } 153 | 154 | private: 155 | static std::ostream& severityOstream(Severity severity) 156 | { 157 | return severity >= Severity::kINFO ? std::cout : std::cerr; 158 | } 159 | 160 | static std::string severityPrefix(Severity severity) 161 | { 162 | switch (severity) 163 | { 164 | case Severity::kINTERNAL_ERROR: return "[F] "; 165 | case Severity::kERROR: return "[E] "; 166 | case Severity::kWARNING: return "[W] "; 167 | case Severity::kINFO: return "[I] "; 168 | case Severity::kVERBOSE: return "[V] "; 169 | default: assert(0); return ""; 170 | } 171 | } 172 | 173 | bool mShouldLog; 174 | Severity mSeverity; 175 | }; 176 | 177 | //! \class Logger 178 | //! 179 | //! \brief Class which manages logging of TensorRT tools and samples 180 | //! 181 | //! \details This class provides a common interface for TensorRT tools and samples to log information to the console, 182 | //! and supports logging two types of messages: 183 | //! 184 | //! - Debugging messages with an associated severity (info, warning, error, or internal error/fatal) 185 | //! - Test pass/fail messages 186 | //! 187 | //! The advantage of having all samples use this class for logging as opposed to emitting directly to stdout/stderr is 188 | //! that the logic for controlling the verbosity and formatting of sample output is centralized in one location. 189 | //! 190 | //! In the future, this class could be extended to support dumping test results to a file in some standard format 191 | //! (for example, JUnit XML), and providing additional metadata (e.g. timing the duration of a test run). 192 | //! 193 | //! TODO: For backwards compatibility with existing samples, this class inherits directly from the nvinfer1::ILogger 194 | //! interface, which is problematic since there isn't a clean separation between messages coming from the TensorRT 195 | //! library and messages coming from the sample. 196 | //! 197 | //! In the future (once all samples are updated to use Logger::getTRTLogger() to access the ILogger) we can refactor the 198 | //! class to eliminate the inheritance and instead make the nvinfer1::ILogger implementation a member of the Logger 199 | //! object. 200 | 201 | class Logger : public nvinfer1::ILogger 202 | { 203 | public: 204 | Logger(Severity severity = Severity::kWARNING) 205 | : mReportableSeverity(severity) 206 | { 207 | } 208 | 209 | //! 210 | //! \enum TestResult 211 | //! \brief Represents the state of a given test 212 | //! 213 | enum class TestResult 214 | { 215 | kRUNNING, //!< The test is running 216 | kPASSED, //!< The test passed 217 | kFAILED, //!< The test failed 218 | kWAIVED //!< The test was waived 219 | }; 220 | 221 | //! 222 | //! \brief Forward-compatible method for retrieving the nvinfer::ILogger associated with this Logger 223 | //! \return The nvinfer1::ILogger associated with this Logger 224 | //! 225 | //! TODO Once all samples are updated to use this method to register the logger with TensorRT, 226 | //! we can eliminate the inheritance of Logger from ILogger 227 | //! 228 | nvinfer1::ILogger& getTRTLogger() 229 | { 230 | return *this; 231 | } 232 | 233 | //! 234 | //! \brief Implementation of the nvinfer1::ILogger::log() virtual method 235 | //! 236 | //! Note samples should not be calling this function directly; it will eventually go away once we eliminate the 237 | //! inheritance from nvinfer1::ILogger 238 | //! 239 | void log(Severity severity, const char* msg) override 240 | { 241 | LogStreamConsumer(mReportableSeverity, severity) << "[TRT] " << std::string(msg) << std::endl; 242 | } 243 | 244 | //! 245 | //! \brief Method for controlling the verbosity of logging output 246 | //! 247 | //! \param severity The logger will only emit messages that have severity of this level or higher. 248 | //! 249 | void setReportableSeverity(Severity severity) 250 | { 251 | mReportableSeverity = severity; 252 | } 253 | 254 | //! 255 | //! \brief Opaque handle that holds logging information for a particular test 256 | //! 257 | //! This object is an opaque handle to information used by the Logger to print test results. 258 | //! The sample must call Logger::defineTest() in order to obtain a TestAtom that can be used 259 | //! with Logger::reportTest{Start,End}(). 260 | //! 261 | class TestAtom 262 | { 263 | public: 264 | TestAtom(TestAtom&&) = default; 265 | 266 | private: 267 | friend class Logger; 268 | 269 | TestAtom(bool started, const std::string& name, const std::string& cmdline) 270 | : mStarted(started) 271 | , mName(name) 272 | , mCmdline(cmdline) 273 | { 274 | } 275 | 276 | bool mStarted; 277 | std::string mName; 278 | std::string mCmdline; 279 | }; 280 | 281 | //! 282 | //! \brief Define a test for logging 283 | //! 284 | //! \param[in] name The name of the test. This should be a string starting with 285 | //! "TensorRT" and containing dot-separated strings containing 286 | //! the characters [A-Za-z0-9_]. 287 | //! For example, "TensorRT.sample_googlenet" 288 | //! \param[in] cmdline The command line used to reproduce the test 289 | // 290 | //! \return a TestAtom that can be used in Logger::reportTest{Start,End}(). 291 | //! 292 | static TestAtom defineTest(const std::string& name, const std::string& cmdline) 293 | { 294 | return TestAtom(false, name, cmdline); 295 | } 296 | 297 | //! 298 | //! \brief A convenience overloaded version of defineTest() that accepts an array of command-line arguments 299 | //! as input 300 | //! 301 | //! \param[in] name The name of the test 302 | //! \param[in] argc The number of command-line arguments 303 | //! \param[in] argv The array of command-line arguments (given as C strings) 304 | //! 305 | //! \return a TestAtom that can be used in Logger::reportTest{Start,End}(). 306 | static TestAtom defineTest(const std::string& name, int argc, char const* const* argv) 307 | { 308 | auto cmdline = genCmdlineString(argc, argv); 309 | return defineTest(name, cmdline); 310 | } 311 | 312 | //! 313 | //! \brief Report that a test has started. 314 | //! 315 | //! \pre reportTestStart() has not been called yet for the given testAtom 316 | //! 317 | //! \param[in] testAtom The handle to the test that has started 318 | //! 319 | static void reportTestStart(TestAtom& testAtom) 320 | { 321 | reportTestResult(testAtom, TestResult::kRUNNING); 322 | assert(!testAtom.mStarted); 323 | testAtom.mStarted = true; 324 | } 325 | 326 | //! 327 | //! \brief Report that a test has ended. 328 | //! 329 | //! \pre reportTestStart() has been called for the given testAtom 330 | //! 331 | //! \param[in] testAtom The handle to the test that has ended 332 | //! \param[in] result The result of the test. Should be one of TestResult::kPASSED, 333 | //! TestResult::kFAILED, TestResult::kWAIVED 334 | //! 335 | static void reportTestEnd(const TestAtom& testAtom, TestResult result) 336 | { 337 | assert(result != TestResult::kRUNNING); 338 | assert(testAtom.mStarted); 339 | reportTestResult(testAtom, result); 340 | } 341 | 342 | static int reportPass(const TestAtom& testAtom) 343 | { 344 | reportTestEnd(testAtom, TestResult::kPASSED); 345 | return EXIT_SUCCESS; 346 | } 347 | 348 | static int reportFail(const TestAtom& testAtom) 349 | { 350 | reportTestEnd(testAtom, TestResult::kFAILED); 351 | return EXIT_FAILURE; 352 | } 353 | 354 | static int reportWaive(const TestAtom& testAtom) 355 | { 356 | reportTestEnd(testAtom, TestResult::kWAIVED); 357 | return EXIT_SUCCESS; 358 | } 359 | 360 | static int reportTest(const TestAtom& testAtom, bool pass) 361 | { 362 | return pass ? reportPass(testAtom) : reportFail(testAtom); 363 | } 364 | 365 | Severity getReportableSeverity() const 366 | { 367 | return mReportableSeverity; 368 | } 369 | 370 | private: 371 | //! 372 | //! \brief returns an appropriate string for prefixing a log message with the given severity 373 | //! 374 | static const char* severityPrefix(Severity severity) 375 | { 376 | switch (severity) 377 | { 378 | case Severity::kINTERNAL_ERROR: return "[F] "; 379 | case Severity::kERROR: return "[E] "; 380 | case Severity::kWARNING: return "[W] "; 381 | case Severity::kINFO: return "[I] "; 382 | case Severity::kVERBOSE: return "[V] "; 383 | default: assert(0); return ""; 384 | } 385 | } 386 | 387 | //! 388 | //! \brief returns an appropriate string for prefixing a test result message with the given result 389 | //! 390 | static const char* testResultString(TestResult result) 391 | { 392 | switch (result) 393 | { 394 | case TestResult::kRUNNING: return "RUNNING"; 395 | case TestResult::kPASSED: return "PASSED"; 396 | case TestResult::kFAILED: return "FAILED"; 397 | case TestResult::kWAIVED: return "WAIVED"; 398 | default: assert(0); return ""; 399 | } 400 | } 401 | 402 | //! 403 | //! \brief returns an appropriate output stream (cout or cerr) to use with the given severity 404 | //! 405 | static std::ostream& severityOstream(Severity severity) 406 | { 407 | return severity >= Severity::kINFO ? std::cout : std::cerr; 408 | } 409 | 410 | //! 411 | //! \brief method that implements logging test results 412 | //! 413 | static void reportTestResult(const TestAtom& testAtom, TestResult result) 414 | { 415 | severityOstream(Severity::kINFO) << "&&&& " << testResultString(result) << " " << testAtom.mName << " # " 416 | << testAtom.mCmdline << std::endl; 417 | } 418 | 419 | //! 420 | //! \brief generate a command line string from the given (argc, argv) values 421 | //! 422 | static std::string genCmdlineString(int argc, char const* const* argv) 423 | { 424 | std::stringstream ss; 425 | for (int i = 0; i < argc; i++) 426 | { 427 | if (i > 0) 428 | ss << " "; 429 | ss << argv[i]; 430 | } 431 | return ss.str(); 432 | } 433 | 434 | Severity mReportableSeverity; 435 | }; 436 | 437 | namespace 438 | { 439 | 440 | //! 441 | //! \brief produces a LogStreamConsumer object that can be used to log messages of severity kVERBOSE 442 | //! 443 | //! Example usage: 444 | //! 445 | //! LOG_VERBOSE(logger) << "hello world" << std::endl; 446 | //! 447 | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) 448 | { 449 | return LogStreamConsumer(logger.getReportableSeverity(), Severity::kVERBOSE); 450 | } 451 | 452 | //! 453 | //! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINFO 454 | //! 455 | //! Example usage: 456 | //! 457 | //! LOG_INFO(logger) << "hello world" << std::endl; 458 | //! 459 | inline LogStreamConsumer LOG_INFO(const Logger& logger) 460 | { 461 | return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINFO); 462 | } 463 | 464 | //! 465 | //! \brief produces a LogStreamConsumer object that can be used to log messages of severity kWARNING 466 | //! 467 | //! Example usage: 468 | //! 469 | //! LOG_WARN(logger) << "hello world" << std::endl; 470 | //! 471 | inline LogStreamConsumer LOG_WARN(const Logger& logger) 472 | { 473 | return LogStreamConsumer(logger.getReportableSeverity(), Severity::kWARNING); 474 | } 475 | 476 | //! 477 | //! \brief produces a LogStreamConsumer object that can be used to log messages of severity kERROR 478 | //! 479 | //! Example usage: 480 | //! 481 | //! LOG_ERROR(logger) << "hello world" << std::endl; 482 | //! 483 | inline LogStreamConsumer LOG_ERROR(const Logger& logger) 484 | { 485 | return LogStreamConsumer(logger.getReportableSeverity(), Severity::kERROR); 486 | } 487 | 488 | //! 489 | //! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINTERNAL_ERROR 490 | // ("fatal" severity) 491 | //! 492 | //! Example usage: 493 | //! 494 | //! LOG_FATAL(logger) << "hello world" << std::endl; 495 | //! 496 | inline LogStreamConsumer LOG_FATAL(const Logger& logger) 497 | { 498 | return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINTERNAL_ERROR); 499 | } 500 | 501 | } // anonymous namespace 502 | 503 | #endif // TENSORRT_LOGGING_H 504 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "NvInfer.h" 9 | #include "NvInferRuntime.h" 10 | #include "cuda_runtime_api.h" 11 | 12 | #include 13 | 14 | #include "logging.h" 15 | #include "yolo.h" 16 | #include "trt_utils.h" 17 | #include "yololayer.h" 18 | #include "mish.h" 19 | 20 | #include "opencv2/opencv.hpp" 21 | 22 | using namespace nvinfer1; 23 | 24 | Logger gLogger; 25 | REGISTER_TENSORRT_PLUGIN(MishPluginCreator); 26 | REGISTER_TENSORRT_PLUGIN(YoloPluginCreator); 27 | 28 | cv::Mat preprocess_img(cv::Mat& img,int input_w,int input_h) { 29 | int w, h, x, y; 30 | float r_w = input_w / (img.cols*1.0); 31 | float r_h = input_h / (img.rows*1.0); 32 | if (r_h > r_w) { 33 | w = input_w; 34 | h = r_w * img.rows; 35 | x = 0; 36 | y = (input_h - h) / 2; 37 | } else { 38 | w = r_h* img.cols; 39 | h = input_h; 40 | x = (input_w - w) / 2; 41 | y = 0; 42 | } 43 | cv::Mat re(h, w, CV_8UC3); 44 | cv::resize(img, re, re.size(), 0, 0, cv::INTER_CUBIC); 45 | cv::Mat out(input_h, input_w, CV_8UC3, cv::Scalar(128, 128, 128)); 46 | re.copyTo(out(cv::Rect(x, y, re.cols, re.rows))); 47 | return out; 48 | } 49 | 50 | cv::Rect get_rect(cv::Mat& img, float bbox[4],int input_w,int input_h) { 51 | int l, r, t, b; 52 | float r_w = input_w / (img.cols * 1.0); 53 | float r_h = input_h / (img.rows * 1.0); 54 | if (r_h > r_w) { 55 | l = bbox[0] - bbox[2]/2.f; 56 | r = bbox[0] + bbox[2]/2.f; 57 | t = bbox[1] - bbox[3]/2.f - (input_h - r_w * img.rows) / 2; 58 | b = bbox[1] + bbox[3]/2.f - (input_h - r_w * img.rows) / 2; 59 | l = l / r_w; 60 | r = r / r_w; 61 | t = t / r_w; 62 | b = b / r_w; 63 | } else { 64 | l = bbox[0] - bbox[2]/2.f - (input_w - r_h * img.cols) / 2; 65 | r = bbox[0] + bbox[2]/2.f - (input_w - r_h * img.cols) / 2; 66 | t = bbox[1] - bbox[3]/2.f; 67 | b = bbox[1] + bbox[3]/2.f; 68 | l = l / r_h; 69 | r = r / r_h; 70 | t = t / r_h; 71 | b = b / r_h; 72 | } 73 | return cv::Rect(l, t, r-l, b-t); 74 | } 75 | 76 | float iou(float lbox[4], float rbox[4]) { 77 | float interBox[] = { 78 | std::max(lbox[0] - lbox[2]/2.f , rbox[0] - rbox[2]/2.f), //left 79 | std::min(lbox[0] + lbox[2]/2.f , rbox[0] + rbox[2]/2.f), //right 80 | std::max(lbox[1] - lbox[3]/2.f , rbox[1] - rbox[3]/2.f), //top 81 | std::min(lbox[1] + lbox[3]/2.f , rbox[1] + rbox[3]/2.f), //bottom 82 | }; 83 | 84 | if(interBox[2] > interBox[3] || interBox[0] > interBox[1]) 85 | return 0.0f; 86 | 87 | float interBoxS =(interBox[1]-interBox[0])*(interBox[3]-interBox[2]); 88 | return interBoxS/(lbox[2]*lbox[3] + rbox[2]*rbox[3] -interBoxS); 89 | } 90 | 91 | bool cmp(Detection& a, Detection& b) { 92 | return a.det_confidence > b.det_confidence; 93 | } 94 | 95 | void nms(std::vector& res, float *output, float ignore_thresh=0.4,float nms_thresh = 0.4) { 96 | std::map> m; 97 | // std::cout << "output[0] "<< output[0]<()); 103 | m[det.class_id].push_back(det); 104 | } 105 | for (auto it = m.begin(); it != m.end(); it++) { 106 | auto& dets = it->second; 107 | std::sort(dets.begin(), dets.end(), cmp); 108 | for (size_t m = 0; m < dets.size(); ++m) { 109 | auto& item = dets[m]; 110 | res.push_back(item); 111 | for (size_t n = m + 1; n < dets.size(); ++n) { 112 | if (iou(item.bbox, dets[n].bbox) > nms_thresh) { 113 | dets.erase(dets.begin()+n); 114 | --n; 115 | } 116 | } 117 | } 118 | } 119 | } 120 | 121 | int main(int argc,char* argv[]) 122 | { 123 | cudaSetDevice(0); 124 | char *trtModelStream{nullptr}; 125 | size_t size{0}; 126 | 127 | NetworkInfo networkInfo; 128 | 129 | networkInfo.networkType = "yolov4-tiny"; 130 | networkInfo.configFilePath = "../data/yolov4-tiny.cfg"; 131 | networkInfo.wtsFilePath = "../data/yolov4-tiny.weights"; 132 | networkInfo.deviceType = "kGPU"; 133 | networkInfo.inputBlobName = "data"; 134 | 135 | std::string modelname = networkInfo.networkType + ".engine"; 136 | 137 | IBuilder* builder = createInferBuilder(gLogger); 138 | if (argc == 2 && std::string(argv[1]) == "-s") { 139 | IHostMemory* modelStream{nullptr}; 140 | Yolo yolo(networkInfo); 141 | ICudaEngine *cudaEngine = yolo.createEngine (builder); 142 | modelStream = cudaEngine->serialize(); 143 | assert(modelStream != nullptr); 144 | std::ofstream p(modelname, std::ios::binary); 145 | if (!p) { 146 | std::cerr << "could not open plan output file" << std::endl; 147 | return -1; 148 | } 149 | p.write(reinterpret_cast(modelStream->data()), modelStream->size()); 150 | modelStream->destroy(); 151 | return 0; 152 | } else if (argc == 2 && std::string(argv[1]) == "-d") { 153 | std::ifstream file(modelname, std::ios::binary); 154 | if (file.good()) { 155 | file.seekg(0, file.end); 156 | size = file.tellg(); 157 | file.seekg(0, file.beg); 158 | trtModelStream = new char[size]; 159 | assert(trtModelStream); 160 | file.read(trtModelStream, size); 161 | file.close(); 162 | } 163 | }else { 164 | std::cerr << "arguments not right!" << std::endl; 165 | std::cerr << "./yolov3 -s // serialize model to plan file" << std::endl; 166 | std::cerr << "./yolov3 -d // deserialize plan file and run inference" << std::endl; 167 | return -1; 168 | } 169 | 170 | IRuntime* runtime = createInferRuntime(gLogger); 171 | assert(runtime != nullptr); 172 | ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream, size); 173 | assert(engine != nullptr); 174 | IExecutionContext* context = engine->createExecutionContext(); 175 | assert(context != nullptr); 176 | delete[] trtModelStream; 177 | 178 | int numbindings=engine->getNbBindings(); 179 | std::cout<< "getNbBindings: " << numbindings<getBindingName(1); 182 | std::cout<< "getBindingName:1 " << layername<getBindingDimensions(1); 184 | std::cout<< "out dims: " << out.d[0]<<" "<getBindingDimensions(0); 187 | std::cout<< "out dims: " << in.d[0]<<" "<>img; 218 | cv::Mat pr_img = preprocess_img(img,input_w,input_h); 219 | for (int i = 0; i < input_h * input_w; i++) { 220 | data[i] = pr_img.at(i)[2] / 255.0; 221 | data[i + input_h * input_w] = pr_img.at(i)[1] / 255.0; 222 | data[i + 2 * input_h * input_w] = pr_img.at(i)[0] / 255.0; 223 | } 224 | 225 | // // Run inference 226 | auto start = std::chrono::system_clock::now(); 227 | 228 | cudaMemcpyAsync(buffers[0], data, batchSize * 3 * input_w * input_h * sizeof(float), cudaMemcpyHostToDevice, stream); 229 | context->enqueue(batchSize, buffers, stream, nullptr); 230 | cudaMemcpyAsync(prob, buffers[1], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream); 231 | cudaStreamSynchronize(stream); 232 | 233 | auto end = std::chrono::system_clock::now(); 234 | std::cout << std::chrono::duration_cast(end - start).count() << "ms" << std::endl; 235 | 236 | std::vector res; 237 | nms(res, prob); 238 | 239 | for (size_t j = 0; j < res.size(); j++) { 240 | float *p = (float*)&res[j]; 241 | cv::Rect r = get_rect(img, res[j].bbox,input_w,input_h); 242 | cv::rectangle(img, r, cv::Scalar(0x27, 0xC1, 0x36), 2); 243 | std::string text = std::to_string((int)res[j].class_id) + " "+ 244 | std::to_string((float)res[j].det_confidence)+" "+ 245 | std::to_string((float)res[j].class_confidence); 246 | cv::putText(img, text, cv::Point(r.x, r.y - 1), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0xFF, 0xFF, 0xFF), 2); 247 | } 248 | cv::imshow("_", img); 249 | if(cv::waitKey(1)==27){break;} 250 | } 251 | 252 | // Release stream and buffers 253 | cudaStreamDestroy(stream); 254 | cudaFree(buffers[0]); 255 | cudaFree(buffers[1]); 256 | 257 | // Destroy the engine 258 | context->destroy(); 259 | engine->destroy(); 260 | runtime->destroy(); 261 | } 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | -------------------------------------------------------------------------------- /mish.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "mish.h" 6 | 7 | namespace nvinfer1 8 | { 9 | MishPlugin::MishPlugin() 10 | { 11 | } 12 | 13 | MishPlugin::~MishPlugin() 14 | { 15 | } 16 | 17 | // create the plugin at runtime from a byte stream 18 | MishPlugin::MishPlugin(const void* data, size_t length) 19 | { 20 | assert(length == sizeof(input_size_)); 21 | input_size_ = *reinterpret_cast(data); 22 | } 23 | 24 | void MishPlugin::serialize(void* buffer) const 25 | { 26 | *reinterpret_cast(buffer) = input_size_; 27 | } 28 | 29 | size_t MishPlugin::getSerializationSize() const 30 | { 31 | return sizeof(input_size_); 32 | } 33 | 34 | int MishPlugin::initialize() 35 | { 36 | return 0; 37 | } 38 | 39 | Dims MishPlugin::getOutputDimensions(int index, const Dims* inputs, int nbInputDims) 40 | { 41 | assert(nbInputDims == 1); 42 | assert(index == 0); 43 | input_size_ = inputs[0].d[0] * inputs[0].d[1] * inputs[0].d[2]; 44 | // Output dimensions 45 | return Dims3(inputs[0].d[0], inputs[0].d[1], inputs[0].d[2]); 46 | } 47 | 48 | // Set plugin namespace 49 | void MishPlugin::setPluginNamespace(const char* pluginNamespace) 50 | { 51 | mPluginNamespace = pluginNamespace; 52 | } 53 | 54 | const char* MishPlugin::getPluginNamespace() const 55 | { 56 | return mPluginNamespace; 57 | } 58 | 59 | // Return the DataType of the plugin output at the requested index 60 | DataType MishPlugin::getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const 61 | { 62 | return DataType::kFLOAT; 63 | } 64 | 65 | // Return true if output tensor is broadcast across a batch. 66 | bool MishPlugin::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const 67 | { 68 | return false; 69 | } 70 | 71 | // Return true if plugin can use input that is broadcast across batch without replication. 72 | bool MishPlugin::canBroadcastInputAcrossBatch(int inputIndex) const 73 | { 74 | return false; 75 | } 76 | 77 | void MishPlugin::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) 78 | { 79 | } 80 | 81 | // Attach the plugin object to an execution context and grant the plugin the access to some context resource. 82 | void MishPlugin::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) 83 | { 84 | } 85 | 86 | // Detach the plugin object from its execution context. 87 | void MishPlugin::detachFromContext() {} 88 | 89 | const char* MishPlugin::getPluginType() const 90 | { 91 | return "Mish_TRT"; 92 | } 93 | 94 | const char* MishPlugin::getPluginVersion() const 95 | { 96 | return "1"; 97 | } 98 | 99 | void MishPlugin::destroy() 100 | { 101 | delete this; 102 | } 103 | 104 | // Clone the plugin 105 | IPluginV2IOExt* MishPlugin::clone() const 106 | { 107 | MishPlugin *p = new MishPlugin(); 108 | p->input_size_ = input_size_; 109 | p->setPluginNamespace(mPluginNamespace); 110 | return p; 111 | } 112 | 113 | __device__ float tanh_activate_kernel(float x){return (2/(1 + expf(-2*x)) - 1);} 114 | 115 | __device__ float softplus_kernel(float x, float threshold = 20) { 116 | if (x > threshold) return x; // too large 117 | else if (x < -threshold) return expf(x); // too small 118 | return logf(expf(x) + 1); 119 | } 120 | 121 | __global__ void mish_kernel(const float *input, float *output, int num_elem) { 122 | 123 | int idx = threadIdx.x + blockDim.x * blockIdx.x; 124 | if (idx >= num_elem) return; 125 | 126 | //float t = exp(input[idx]); 127 | //if (input[idx] > 20.0) { 128 | // t *= t; 129 | // output[idx] = (t - 1.0) / (t + 1.0); 130 | //} else { 131 | // float tt = t * t; 132 | // output[idx] = (tt + 2.0 * t) / (tt + 2.0 * t + 2.0); 133 | //} 134 | //output[idx] *= input[idx]; 135 | output[idx] = input[idx] * tanh_activate_kernel(softplus_kernel(input[idx])); 136 | } 137 | 138 | void MishPlugin::forwardGpu(const float *const * inputs, float* output, cudaStream_t stream, int batchSize) { 139 | int block_size = thread_count_; 140 | int grid_size = (input_size_ * batchSize + block_size - 1) / block_size; 141 | mish_kernel<<>>(inputs[0], output, input_size_ * batchSize); 142 | } 143 | 144 | int MishPlugin::enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) 145 | { 146 | //assert(batchSize == 1); 147 | //GPU 148 | //CUDA_CHECK(cudaStreamSynchronize(stream)); 149 | forwardGpu((const float *const *)inputs, (float*)outputs[0], stream, batchSize); 150 | return 0; 151 | } 152 | 153 | PluginFieldCollection MishPluginCreator::mFC{}; 154 | std::vector MishPluginCreator::mPluginAttributes; 155 | 156 | MishPluginCreator::MishPluginCreator() 157 | { 158 | mPluginAttributes.clear(); 159 | 160 | mFC.nbFields = mPluginAttributes.size(); 161 | mFC.fields = mPluginAttributes.data(); 162 | } 163 | 164 | const char* MishPluginCreator::getPluginName() const 165 | { 166 | return "Mish_TRT"; 167 | } 168 | 169 | const char* MishPluginCreator::getPluginVersion() const 170 | { 171 | return "1"; 172 | } 173 | 174 | const PluginFieldCollection* MishPluginCreator::getFieldNames() 175 | { 176 | return &mFC; 177 | } 178 | 179 | IPluginV2IOExt* MishPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc) 180 | { 181 | MishPlugin* obj = new MishPlugin(); 182 | obj->setPluginNamespace(mNamespace.c_str()); 183 | return obj; 184 | } 185 | 186 | IPluginV2IOExt* MishPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength) 187 | { 188 | // This object will be deleted when the network is destroyed, which will 189 | // call MishPlugin::destroy() 190 | MishPlugin* obj = new MishPlugin(serialData, serialLength); 191 | obj->setPluginNamespace(mNamespace.c_str()); 192 | return obj; 193 | } 194 | 195 | } 196 | 197 | -------------------------------------------------------------------------------- /mish.h: -------------------------------------------------------------------------------- 1 | #ifndef _MISH_PLUGIN_H 2 | #define _MISH_PLUGIN_H 3 | 4 | #include 5 | #include 6 | #include "NvInfer.h" 7 | 8 | namespace nvinfer1 9 | { 10 | class MishPlugin: public IPluginV2IOExt 11 | { 12 | public: 13 | explicit MishPlugin(); 14 | MishPlugin(const void* data, size_t length); 15 | 16 | ~MishPlugin(); 17 | 18 | int getNbOutputs() const override 19 | { 20 | return 1; 21 | } 22 | 23 | Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override; 24 | 25 | int initialize() override; 26 | 27 | virtual void terminate() override {}; 28 | 29 | virtual size_t getWorkspaceSize(int maxBatchSize) const override { return 0;} 30 | 31 | virtual int enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) override; 32 | 33 | virtual size_t getSerializationSize() const override; 34 | 35 | virtual void serialize(void* buffer) const override; 36 | 37 | bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const override { 38 | return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT; 39 | } 40 | 41 | const char* getPluginType() const override; 42 | 43 | const char* getPluginVersion() const override; 44 | 45 | void destroy() override; 46 | 47 | IPluginV2IOExt* clone() const override; 48 | 49 | void setPluginNamespace(const char* pluginNamespace) override; 50 | 51 | const char* getPluginNamespace() const override; 52 | 53 | DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const override; 54 | 55 | bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const override; 56 | 57 | bool canBroadcastInputAcrossBatch(int inputIndex) const override; 58 | 59 | void attachToContext( 60 | cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) override; 61 | 62 | void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) override; 63 | 64 | void detachFromContext() override; 65 | 66 | int input_size_; 67 | private: 68 | void forwardGpu(const float *const * inputs, float* output, cudaStream_t stream, int batchSize = 1); 69 | int thread_count_ = 256; 70 | const char* mPluginNamespace; 71 | }; 72 | 73 | class MishPluginCreator : public IPluginCreator 74 | { 75 | public: 76 | MishPluginCreator(); 77 | 78 | ~MishPluginCreator() override = default; 79 | 80 | const char* getPluginName() const override; 81 | 82 | const char* getPluginVersion() const override; 83 | 84 | const PluginFieldCollection* getFieldNames() override; 85 | 86 | IPluginV2IOExt* createPlugin(const char* name, const PluginFieldCollection* fc) override; 87 | 88 | IPluginV2IOExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override; 89 | 90 | void setPluginNamespace(const char* libNamespace) override 91 | { 92 | mNamespace = libNamespace; 93 | } 94 | 95 | const char* getPluginNamespace() const override 96 | { 97 | return mNamespace.c_str(); 98 | } 99 | 100 | private: 101 | std::string mNamespace; 102 | static PluginFieldCollection mFC; 103 | static std::vector mPluginAttributes; 104 | }; 105 | }; 106 | #endif 107 | -------------------------------------------------------------------------------- /trt_utils.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #include "trt_utils.h" 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include "NvInferPlugin.h" 31 | 32 | static void leftTrim(std::string& s) 33 | { 34 | s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int ch) { return !isspace(ch); })); 35 | } 36 | 37 | static void rightTrim(std::string& s) 38 | { 39 | s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !isspace(ch); }).base(), s.end()); 40 | } 41 | 42 | std::string trim(std::string s) 43 | { 44 | leftTrim(s); 45 | rightTrim(s); 46 | return s; 47 | } 48 | 49 | float clamp(const float val, const float minVal, const float maxVal) 50 | { 51 | assert(minVal <= maxVal); 52 | return std::min(maxVal, std::max(minVal, val)); 53 | } 54 | 55 | bool fileExists(const std::string fileName, bool verbose) 56 | { 57 | if (!std::experimental::filesystem::exists(std::experimental::filesystem::path(fileName))) 58 | { 59 | if (verbose) std::cout << "File does not exist : " << fileName << std::endl; 60 | return false; 61 | } 62 | return true; 63 | } 64 | 65 | std::vector loadWeights(const std::string weightsFilePath, const std::string& networkType) 66 | { 67 | assert(fileExists(weightsFilePath)); 68 | std::cout << "Loading pre-trained weights..." << std::endl; 69 | std::ifstream file(weightsFilePath, std::ios_base::binary); 70 | assert(file.good()); 71 | std::string line; 72 | 73 | if (networkType == "yolov2") 74 | { 75 | // Remove 4 int32 bytes of data from the stream belonging to the header 76 | file.ignore(4 * 4); 77 | } 78 | else if ((networkType == "yolov3") || (networkType == "yolov3-tiny") 79 | || (networkType == "yolov4") || (networkType == "yolov4-tiny")) 80 | { 81 | // Remove 5 int32 bytes of data from the stream belonging to the header 82 | file.ignore(4 * 5); 83 | } 84 | else 85 | { 86 | std::cout << "Invalid network type" << std::endl; 87 | assert(0); 88 | } 89 | 90 | std::vector weights; 91 | char floatWeight[4]; 92 | while (!file.eof()) 93 | { 94 | file.read(floatWeight, 4); 95 | assert(file.gcount() == 4); 96 | weights.push_back(*reinterpret_cast(floatWeight)); 97 | if (file.peek() == std::istream::traits_type::eof()) break; 98 | } 99 | std::cout << "Loading weights of " << networkType << " complete!" 100 | << std::endl; 101 | std::cout << "Total Number of weights read : " << weights.size() << std::endl; 102 | return weights; 103 | } 104 | 105 | std::string dimsToString(const nvinfer1::Dims d) 106 | { 107 | std::stringstream s; 108 | assert(d.nbDims >= 1); 109 | for (int i = 0; i < d.nbDims - 1; ++i) 110 | { 111 | s << std::setw(4) << d.d[i] << " x"; 112 | } 113 | s << std::setw(4) << d.d[d.nbDims - 1]; 114 | 115 | return s.str(); 116 | } 117 | 118 | void displayDimType(const nvinfer1::Dims d) 119 | { 120 | std::cout << "(" << d.nbDims << ") "; 121 | for (int i = 0; i < d.nbDims; ++i) 122 | { 123 | switch (d.type[i]) 124 | { 125 | case nvinfer1::DimensionType::kSPATIAL: std::cout << "kSPATIAL "; break; 126 | case nvinfer1::DimensionType::kCHANNEL: std::cout << "kCHANNEL "; break; 127 | case nvinfer1::DimensionType::kINDEX: std::cout << "kINDEX "; break; 128 | case nvinfer1::DimensionType::kSEQUENCE: std::cout << "kSEQUENCE "; break; 129 | } 130 | } 131 | std::cout << std::endl; 132 | } 133 | 134 | int getNumChannels(nvinfer1::ITensor* t) 135 | { 136 | nvinfer1::Dims d = t->getDimensions(); 137 | assert(d.nbDims == 3); 138 | 139 | return d.d[0]; 140 | } 141 | 142 | uint64_t get3DTensorVolume(nvinfer1::Dims inputDims) 143 | { 144 | assert(inputDims.nbDims == 3); 145 | return inputDims.d[0] * inputDims.d[1] * inputDims.d[2]; 146 | } 147 | 148 | nvinfer1::ILayer* netAddMaxpool(int layerIdx, std::map& block, 149 | nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network) 150 | { 151 | assert(block.at("type") == "maxpool"); 152 | assert(block.find("size") != block.end()); 153 | assert(block.find("stride") != block.end()); 154 | 155 | int size = std::stoi(block.at("size")); 156 | int stride = std::stoi(block.at("stride")); 157 | 158 | nvinfer1::IPoolingLayer* pool 159 | = network->addPooling(*input, nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{size, size}); 160 | assert(pool); 161 | std::string maxpoolLayerName = "maxpool_" + std::to_string(layerIdx); 162 | pool->setStride(nvinfer1::DimsHW{stride, stride}); 163 | pool->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER); 164 | pool->setName(maxpoolLayerName.c_str()); 165 | 166 | return pool; 167 | } 168 | 169 | nvinfer1::ILayer* netAddConvLinear(int layerIdx, std::map& block, 170 | std::vector& weights, 171 | std::vector& trtWeights, int& weightPtr, 172 | int& inputChannels, nvinfer1::ITensor* input, 173 | nvinfer1::INetworkDefinition* network) 174 | { 175 | assert(block.at("type") == "convolutional"); 176 | assert(block.find("batch_normalize") == block.end()); 177 | assert(block.at("activation") == "linear"); 178 | assert(block.find("filters") != block.end()); 179 | assert(block.find("pad") != block.end()); 180 | assert(block.find("size") != block.end()); 181 | assert(block.find("stride") != block.end()); 182 | 183 | int filters = std::stoi(block.at("filters")); 184 | int padding = std::stoi(block.at("pad")); 185 | int kernelSize = std::stoi(block.at("size")); 186 | int stride = std::stoi(block.at("stride")); 187 | int pad; 188 | if (padding) 189 | pad = (kernelSize - 1) / 2; 190 | else 191 | pad = 0; 192 | // load the convolution layer bias 193 | nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, filters}; 194 | float* val = new float[filters]; 195 | for (int i = 0; i < filters; ++i) 196 | { 197 | val[i] = weights[weightPtr]; 198 | weightPtr++; 199 | } 200 | convBias.values = val; 201 | trtWeights.push_back(convBias); 202 | // load the convolutional layer weights 203 | int size = filters * inputChannels * kernelSize * kernelSize; 204 | nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, size}; 205 | val = new float[size]; 206 | for (int i = 0; i < size; ++i) 207 | { 208 | val[i] = weights[weightPtr]; 209 | weightPtr++; 210 | } 211 | convWt.values = val; 212 | trtWeights.push_back(convWt); 213 | nvinfer1::IConvolutionLayer* conv = network->addConvolution( 214 | *input, filters, nvinfer1::DimsHW{kernelSize, kernelSize}, convWt, convBias); 215 | assert(conv != nullptr); 216 | std::string convLayerName = "conv_" + std::to_string(layerIdx); 217 | conv->setName(convLayerName.c_str()); 218 | conv->setStride(nvinfer1::DimsHW{stride, stride}); 219 | conv->setPadding(nvinfer1::DimsHW{pad, pad}); 220 | 221 | return conv; 222 | } 223 | 224 | nvinfer1::ILayer* netAddConvBNActive(int layerIdx, std::map& block, 225 | std::vector& weights, 226 | std::vector& trtWeights, int& weightPtr, 227 | int& inputChannels, nvinfer1::ITensor* input, 228 | nvinfer1::INetworkDefinition* network) 229 | { 230 | assert(block.at("type") == "convolutional"); 231 | assert(block.find("batch_normalize") != block.end()); 232 | assert(block.at("batch_normalize") == "1"); 233 | // assert(block.at("activation") == "leaky"); 234 | assert(block.find("filters") != block.end()); 235 | assert(block.find("pad") != block.end()); 236 | assert(block.find("size") != block.end()); 237 | assert(block.find("stride") != block.end()); 238 | 239 | bool batchNormalize, bias; 240 | if (block.find("batch_normalize") != block.end()) 241 | { 242 | batchNormalize = (block.at("batch_normalize") == "1"); 243 | bias = false; 244 | } 245 | else 246 | { 247 | batchNormalize = false; 248 | bias = true; 249 | } 250 | // all conv_bn_leaky layers assume bias is false 251 | assert(batchNormalize == true && bias == false); 252 | UNUSED(batchNormalize); 253 | UNUSED(bias); 254 | 255 | int filters = std::stoi(block.at("filters")); 256 | int padding = std::stoi(block.at("pad")); 257 | int kernelSize = std::stoi(block.at("size")); 258 | int stride = std::stoi(block.at("stride")); 259 | int pad; 260 | if (padding) 261 | pad = (kernelSize - 1) / 2; 262 | else 263 | pad = 0; 264 | 265 | /***** CONVOLUTION LAYER *****/ 266 | /*****************************/ 267 | // batch norm weights are before the conv layer 268 | // load BN biases (bn_biases) 269 | std::vector bnBiases; 270 | for (int i = 0; i < filters; ++i) 271 | { 272 | bnBiases.push_back(weights[weightPtr]); 273 | weightPtr++; 274 | } 275 | // load BN weights 276 | std::vector bnWeights; 277 | for (int i = 0; i < filters; ++i) 278 | { 279 | bnWeights.push_back(weights[weightPtr]); 280 | weightPtr++; 281 | } 282 | // load BN running_mean 283 | std::vector bnRunningMean; 284 | for (int i = 0; i < filters; ++i) 285 | { 286 | bnRunningMean.push_back(weights[weightPtr]); 287 | weightPtr++; 288 | } 289 | // load BN running_var 290 | std::vector bnRunningVar; 291 | for (int i = 0; i < filters; ++i) 292 | { 293 | // 1e-05 for numerical stability 294 | bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5)); 295 | weightPtr++; 296 | } 297 | // load Conv layer weights (GKCRS) 298 | int size = filters * inputChannels * kernelSize * kernelSize; 299 | nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, size}; 300 | float* val = new float[size]; 301 | for (int i = 0; i < size; ++i) 302 | { 303 | val[i] = weights[weightPtr]; 304 | weightPtr++; 305 | } 306 | convWt.values = val; 307 | trtWeights.push_back(convWt); 308 | nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, 0}; 309 | trtWeights.push_back(convBias); 310 | nvinfer1::IConvolutionLayer* conv = network->addConvolution( 311 | *input, filters, nvinfer1::DimsHW{kernelSize, kernelSize}, convWt, convBias); 312 | assert(conv != nullptr); 313 | std::string convLayerName = "conv_" + std::to_string(layerIdx); 314 | conv->setName(convLayerName.c_str()); 315 | conv->setStride(nvinfer1::DimsHW{stride, stride}); 316 | conv->setPadding(nvinfer1::DimsHW{pad, pad}); 317 | 318 | /***** BATCHNORM LAYER *****/ 319 | /***************************/ 320 | size = filters; 321 | // create the weights 322 | nvinfer1::Weights shift{nvinfer1::DataType::kFLOAT, nullptr, size}; 323 | nvinfer1::Weights scale{nvinfer1::DataType::kFLOAT, nullptr, size}; 324 | nvinfer1::Weights power{nvinfer1::DataType::kFLOAT, nullptr, size}; 325 | float* shiftWt = new float[size]; 326 | for (int i = 0; i < size; ++i) 327 | { 328 | shiftWt[i] 329 | = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i)); 330 | } 331 | shift.values = shiftWt; 332 | float* scaleWt = new float[size]; 333 | for (int i = 0; i < size; ++i) 334 | { 335 | scaleWt[i] = bnWeights.at(i) / bnRunningVar[i]; 336 | } 337 | scale.values = scaleWt; 338 | float* powerWt = new float[size]; 339 | for (int i = 0; i < size; ++i) 340 | { 341 | powerWt[i] = 1.0; 342 | } 343 | power.values = powerWt; 344 | trtWeights.push_back(shift); 345 | trtWeights.push_back(scale); 346 | trtWeights.push_back(power); 347 | // Add the batch norm layers 348 | nvinfer1::IScaleLayer* bn = network->addScale( 349 | *conv->getOutput(0), nvinfer1::ScaleMode::kCHANNEL, shift, scale, power); 350 | assert(bn != nullptr); 351 | std::string bnLayerName = "batch_norm_" + std::to_string(layerIdx); 352 | bn->setName(bnLayerName.c_str()); 353 | /***** ACTIVATION LAYER *****/ 354 | /****************************/ 355 | if(block.at("activation") == "leaky"){ 356 | nvinfer1::ITensor* bnOutput = bn->getOutput(0); 357 | nvinfer1::IActivationLayer* leaky = network->addActivation( 358 | *bnOutput, nvinfer1::ActivationType::kLEAKY_RELU); 359 | leaky->setAlpha(0.1); 360 | assert(leaky != nullptr); 361 | std::string leakyLayerName = "leaky_" + std::to_string(layerIdx); 362 | leaky->setName(leakyLayerName.c_str()); 363 | return leaky; 364 | }else if(block.at("activation") == "mish") 365 | { 366 | auto creator = getPluginRegistry()->getPluginCreator("Mish_TRT", "1"); 367 | const nvinfer1::PluginFieldCollection* pluginData = creator->getFieldNames(); 368 | nvinfer1::IPluginV2 *pluginObj = creator->createPlugin(("mish" + std::to_string(layerIdx)).c_str(), pluginData); 369 | nvinfer1::ITensor* inputTensors[] = {bn->getOutput(0)}; 370 | auto mish = network->addPluginV2(&inputTensors[0], 1, *pluginObj); 371 | return mish; 372 | } 373 | 374 | } 375 | 376 | nvinfer1::ILayer* netAddUpsample(int layerIdx, std::map& block, 377 | std::vector& weights, 378 | std::vector& trtWeights, int& inputChannels, 379 | nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network) 380 | { 381 | assert(block.at("type") == "upsample"); 382 | nvinfer1::Dims inpDims = input->getDimensions(); 383 | assert(inpDims.nbDims == 3); 384 | assert(inpDims.d[1] == inpDims.d[2]); 385 | int h = inpDims.d[1]; 386 | int w = inpDims.d[2]; 387 | int stride = std::stoi(block.at("stride")); 388 | // add pre multiply matrix as a constant 389 | nvinfer1::Dims preDims{3, 390 | {1, stride * h, w}, 391 | {nvinfer1::DimensionType::kCHANNEL, nvinfer1::DimensionType::kSPATIAL, 392 | nvinfer1::DimensionType::kSPATIAL}}; 393 | int size = stride * h * w; 394 | nvinfer1::Weights preMul{nvinfer1::DataType::kFLOAT, nullptr, size}; 395 | float* preWt = new float[size]; 396 | /* (2*h * w) 397 | [ [1, 0, ..., 0], 398 | [1, 0, ..., 0], 399 | [0, 1, ..., 0], 400 | [0, 1, ..., 0], 401 | ..., 402 | ..., 403 | [0, 0, ..., 1], 404 | [0, 0, ..., 1] ] 405 | */ 406 | for (int i = 0, idx = 0; i < h; ++i) 407 | { 408 | for (int s = 0; s < stride; ++s) 409 | { 410 | for (int j = 0; j < w; ++j, ++idx) 411 | { 412 | preWt[idx] = (i == j) ? 1.0 : 0.0; 413 | } 414 | } 415 | } 416 | preMul.values = preWt; 417 | trtWeights.push_back(preMul); 418 | nvinfer1::IConstantLayer* preM = network->addConstant(preDims, preMul); 419 | assert(preM != nullptr); 420 | std::string preLayerName = "preMul_" + std::to_string(layerIdx); 421 | preM->setName(preLayerName.c_str()); 422 | // add post multiply matrix as a constant 423 | nvinfer1::Dims postDims{3, 424 | {1, h, stride * w}, 425 | {nvinfer1::DimensionType::kCHANNEL, nvinfer1::DimensionType::kSPATIAL, 426 | nvinfer1::DimensionType::kSPATIAL}}; 427 | size = stride * h * w; 428 | nvinfer1::Weights postMul{nvinfer1::DataType::kFLOAT, nullptr, size}; 429 | float* postWt = new float[size]; 430 | /* (h * 2*w) 431 | [ [1, 1, 0, 0, ..., 0, 0], 432 | [0, 0, 1, 1, ..., 0, 0], 433 | ..., 434 | ..., 435 | [0, 0, 0, 0, ..., 1, 1] ] 436 | */ 437 | for (int i = 0, idx = 0; i < h; ++i) 438 | { 439 | for (int j = 0; j < stride * w; ++j, ++idx) 440 | { 441 | postWt[idx] = (j / stride == i) ? 1.0 : 0.0; 442 | } 443 | } 444 | postMul.values = postWt; 445 | trtWeights.push_back(postMul); 446 | nvinfer1::IConstantLayer* post_m = network->addConstant(postDims, postMul); 447 | assert(post_m != nullptr); 448 | std::string postLayerName = "postMul_" + std::to_string(layerIdx); 449 | post_m->setName(postLayerName.c_str()); 450 | // add matrix multiply layers for upsampling 451 | nvinfer1::IMatrixMultiplyLayer* mm1 452 | = network->addMatrixMultiply(*preM->getOutput(0), nvinfer1::MatrixOperation::kNONE, *input, 453 | nvinfer1::MatrixOperation::kNONE); 454 | assert(mm1 != nullptr); 455 | std::string mm1LayerName = "mm1_" + std::to_string(layerIdx); 456 | mm1->setName(mm1LayerName.c_str()); 457 | nvinfer1::IMatrixMultiplyLayer* mm2 458 | = network->addMatrixMultiply(*mm1->getOutput(0), nvinfer1::MatrixOperation::kNONE, 459 | *post_m->getOutput(0), nvinfer1::MatrixOperation::kNONE); 460 | assert(mm2 != nullptr); 461 | std::string mm2LayerName = "mm2_" + std::to_string(layerIdx); 462 | mm2->setName(mm2LayerName.c_str()); 463 | return mm2; 464 | } 465 | 466 | void printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput, 467 | std::string layerOutput, std::string weightPtr) 468 | { 469 | std::cout << std::setw(6) << std::left << layerIndex << std::setw(15) << std::left << layerName; 470 | std::cout << std::setw(20) << std::left << layerInput << std::setw(20) << std::left 471 | << layerOutput; 472 | std::cout << std::setw(6) << std::left << weightPtr << std::endl; 473 | } 474 | -------------------------------------------------------------------------------- /trt_utils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | 24 | #ifndef __TRT_UTILS_H__ 25 | #define __TRT_UTILS_H__ 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | #include "NvInfer.h" 36 | #include "NvInferPlugin.h" 37 | 38 | #define UNUSED(expr) (void)(expr) 39 | #define DIVUP(n, d) ((n) + (d)-1) / (d) 40 | 41 | std::string trim(std::string s); 42 | float clamp(const float val, const float minVal, const float maxVal); 43 | bool fileExists(const std::string fileName, bool verbose = true); 44 | std::vector loadWeights(const std::string weightsFilePath, const std::string& networkType); 45 | std::string dimsToString(const nvinfer1::Dims d); 46 | void displayDimType(const nvinfer1::Dims d); 47 | int getNumChannels(nvinfer1::ITensor* t); 48 | uint64_t get3DTensorVolume(nvinfer1::Dims inputDims); 49 | 50 | // Helper functions to create yolo engine 51 | nvinfer1::ILayer* netAddMaxpool(int layerIdx, std::map& block, 52 | nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network); 53 | nvinfer1::ILayer* netAddConvLinear(int layerIdx, std::map& block, 54 | std::vector& weights, 55 | std::vector& trtWeights, int& weightPtr, 56 | int& inputChannels, nvinfer1::ITensor* input, 57 | nvinfer1::INetworkDefinition* network); 58 | nvinfer1::ILayer* netAddConvBNActive(int layerIdx, std::map& block, 59 | std::vector& weights, 60 | std::vector& trtWeights, int& weightPtr, 61 | int& inputChannels, nvinfer1::ITensor* input, 62 | nvinfer1::INetworkDefinition* network); 63 | nvinfer1::ILayer* netAddUpsample(int layerIdx, std::map& block, 64 | std::vector& weights, 65 | std::vector& trtWeights, int& inputChannels, 66 | nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network); 67 | void printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput, 68 | std::string layerOutput, std::string weightPtr); 69 | 70 | #endif 71 | -------------------------------------------------------------------------------- /yolo.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "yolo.h" 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace nvinfer1; 9 | 10 | REGISTER_TENSORRT_PLUGIN(MishPluginCreator); 11 | REGISTER_TENSORRT_PLUGIN(YoloPluginCreator); 12 | 13 | Yolo::Yolo(const NetworkInfo& networkInfo) 14 | : m_NetworkType(networkInfo.networkType), // yolov3 15 | m_ConfigFilePath(networkInfo.configFilePath), // yolov3.cfg 16 | m_WtsFilePath(networkInfo.wtsFilePath), // yolov3.weights 17 | m_DeviceType(networkInfo.deviceType), // kDLA, kGPU 18 | m_InputBlobName(networkInfo.inputBlobName), // data 19 | m_InputH(0), 20 | m_InputW(0), 21 | m_InputC(0), 22 | m_InputSize(0) 23 | {} 24 | 25 | Yolo::~Yolo() 26 | { 27 | destroyNetworkUtils(); 28 | } 29 | 30 | nvinfer1::ICudaEngine *Yolo::createEngine (nvinfer1::IBuilder* builder) 31 | { 32 | assert (builder); 33 | 34 | // std::vector weights = loadWeights(m_WtsFilePath, m_NetworkType); 35 | // std::vector trtWeights; 36 | 37 | nvinfer1::INetworkDefinition *network = builder->createNetwork(); 38 | if (parseModel(*network) != NVDSINFER_SUCCESS) { 39 | network->destroy(); 40 | return nullptr; 41 | } 42 | 43 | // Build the engine 44 | std::cout << "Building the TensorRT Engine..." << std::endl; 45 | nvinfer1::ICudaEngine * engine = builder->buildCudaEngine(*network); 46 | if (engine) { 47 | std::cout << "Building complete!" << std::endl; 48 | } else { 49 | std::cerr << "Building engine failed!" << std::endl; 50 | } 51 | 52 | // destroy 53 | network->destroy(); 54 | return engine; 55 | } 56 | 57 | NvDsInferStatus Yolo::parseModel(nvinfer1::INetworkDefinition& network) { 58 | destroyNetworkUtils(); 59 | 60 | m_ConfigBlocks = parseConfigFile(m_ConfigFilePath); 61 | parseConfigBlocks(); 62 | 63 | std::vector weights = loadWeights(m_WtsFilePath, m_NetworkType); 64 | // build yolo network 65 | std::cout << "Building Yolo network..." << std::endl; 66 | NvDsInferStatus status = buildYoloNetwork(weights, network); 67 | 68 | if (status == NVDSINFER_SUCCESS) { 69 | std::cout << "Building yolo network complete!" << std::endl; 70 | } else { 71 | std::cerr << "Building yolo network failed!" << std::endl; 72 | } 73 | 74 | return status; 75 | } 76 | 77 | NvDsInferStatus Yolo::buildYoloNetwork( 78 | std::vector& weights, nvinfer1::INetworkDefinition& network) { 79 | 80 | // 清理yolo层 81 | m_YoloKernel.clear(); 82 | 83 | int weightPtr = 0; 84 | int channels = m_InputC; 85 | 86 | nvinfer1::ITensor* data = 87 | network.addInput(m_InputBlobName.c_str(), nvinfer1::DataType::kFLOAT, 88 | nvinfer1::DimsCHW{static_cast(m_InputC), 89 | static_cast(m_InputH), static_cast(m_InputW)}); 90 | assert(data != nullptr && data->getDimensions().nbDims > 0); 91 | 92 | nvinfer1::ITensor* previous = data; 93 | std::vector tensorOutputs; 94 | uint outputTensorCount = 0; 95 | 96 | // build the network using the network API 97 | for (uint i = 0; i < m_ConfigBlocks.size(); ++i) { 98 | // check if num. of channels is correct 99 | assert(getNumChannels(previous) == channels); 100 | std::string layerIndex = "(" + std::to_string(tensorOutputs.size()) + ")"; 101 | 102 | if (m_ConfigBlocks.at(i).at("type") == "net") { 103 | printLayerInfo("", "layer", " inp_size", " out_size", "weightPtr"); 104 | } else if (m_ConfigBlocks.at(i).at("type") == "convolutional") { 105 | std::string inputVol = dimsToString(previous->getDimensions()); 106 | nvinfer1::ILayer* out; 107 | std::string layerType; 108 | // check if batch_norm enabled 109 | if (m_ConfigBlocks.at(i).find("batch_normalize") != m_ConfigBlocks.at(i).end()) { 110 | 111 | out = netAddConvBNActive(i, m_ConfigBlocks.at(i), weights, 112 | m_TrtWeights, weightPtr, channels, previous, &network); 113 | layerType = "conv-bn-Active"; 114 | }else{ 115 | out = netAddConvLinear(i, m_ConfigBlocks.at(i), weights, 116 | m_TrtWeights, weightPtr, channels, previous, &network); 117 | layerType = "conv-linear"; 118 | } 119 | previous = out->getOutput(0); 120 | assert(previous != nullptr); 121 | channels = getNumChannels(previous); 122 | std::string outputVol = dimsToString(previous->getDimensions()); 123 | tensorOutputs.push_back(out->getOutput(0)); 124 | printLayerInfo(layerIndex, layerType, inputVol, outputVol, std::to_string(weightPtr)); 125 | } else if (m_ConfigBlocks.at(i).at("type") == "shortcut") { 126 | assert(m_ConfigBlocks.at(i).at("activation") == "linear"); 127 | assert(m_ConfigBlocks.at(i).find("from") != 128 | m_ConfigBlocks.at(i).end()); 129 | int from = stoi(m_ConfigBlocks.at(i).at("from")); 130 | 131 | std::string inputVol = dimsToString(previous->getDimensions()); 132 | // check if indexes are correct 133 | assert((i - 2 >= 0) && (i - 2 < tensorOutputs.size())); 134 | assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size())); 135 | assert(i + from - 1 < i - 2); 136 | nvinfer1::IElementWiseLayer* ew = network.addElementWise( 137 | *tensorOutputs[i - 2], *tensorOutputs[i + from - 1], 138 | nvinfer1::ElementWiseOperation::kSUM); 139 | assert(ew != nullptr); 140 | std::string ewLayerName = "shortcut_" + std::to_string(i); 141 | ew->setName(ewLayerName.c_str()); 142 | previous = ew->getOutput(0); 143 | assert(previous != nullptr); 144 | std::string outputVol = dimsToString(previous->getDimensions()); 145 | tensorOutputs.push_back(ew->getOutput(0)); 146 | printLayerInfo(layerIndex, "skip", inputVol, outputVol, " -"); 147 | } else if (m_ConfigBlocks.at(i).at("type") == "yolo") { 148 | nvinfer1::Dims prevTensorDims = previous->getDimensions(); 149 | assert(prevTensorDims.d[1] == prevTensorDims.d[2]); 150 | TensorInfo& curYoloTensor = m_OutputTensors.at(outputTensorCount); 151 | curYoloTensor.gridSize = prevTensorDims.d[1]; 152 | curYoloTensor.stride = m_InputW / curYoloTensor.gridSize; 153 | m_OutputTensors.at(outputTensorCount).volume = curYoloTensor.gridSize 154 | * curYoloTensor.gridSize 155 | * (curYoloTensor.numBBoxes * (5 + curYoloTensor.numClasses)); 156 | std::string layerName = "yolo_" + std::to_string(i); 157 | curYoloTensor.blobName = layerName; 158 | 159 | // 添加yolo层 160 | m_YoloTensor.push_back(previous); 161 | tensorOutputs.push_back(previous); 162 | 163 | // 调整 yolo层的信息 164 | Dims inputdims = previous->getDimensions(); 165 | YoloKernel tmpYolokernel; 166 | tmpYolokernel.height= inputdims.d[1]; 167 | tmpYolokernel.width= inputdims.d[2]; 168 | // 添加yolo anchors 169 | int masksize = m_OutputTensors.at(outputTensorCount).masks.size(); 170 | tmpYolokernel.everyYoloAnchors = masksize; 171 | 172 | for(int i=0;igetDimensions(); 188 | assert(prevTensorDims.d[1] == prevTensorDims.d[2]); 189 | TensorInfo& curRegionTensor = m_OutputTensors.at(outputTensorCount); 190 | curRegionTensor.gridSize = prevTensorDims.d[1]; 191 | curRegionTensor.stride = m_InputW / curRegionTensor.gridSize; 192 | m_OutputTensors.at(outputTensorCount).volume = curRegionTensor.gridSize 193 | * curRegionTensor.gridSize 194 | * (curRegionTensor.numBBoxes * (5 + curRegionTensor.numClasses)); 195 | std::string layerName = "region_" + std::to_string(i); 196 | curRegionTensor.blobName = layerName; 197 | nvinfer1::plugin::RegionParameters RegionParameters{ 198 | static_cast(curRegionTensor.numBBoxes), 4, 199 | static_cast(curRegionTensor.numClasses), nullptr}; 200 | std::string inputVol = dimsToString(previous->getDimensions()); 201 | nvinfer1::IPluginV2* regionPlugin 202 | = createRegionPlugin(RegionParameters); 203 | assert(regionPlugin != nullptr); 204 | nvinfer1::IPluginV2Layer* region = 205 | network.addPluginV2(&previous, 1, *regionPlugin); 206 | assert(region != nullptr); 207 | region->setName(layerName.c_str()); 208 | previous = region->getOutput(0); 209 | assert(previous != nullptr); 210 | previous->setName(layerName.c_str()); 211 | std::string outputVol = dimsToString(previous->getDimensions()); 212 | network.markOutput(*previous); 213 | channels = getNumChannels(previous); 214 | tensorOutputs.push_back(region->getOutput(0)); 215 | printLayerInfo(layerIndex, "region", inputVol, outputVol, std::to_string(weightPtr)); 216 | std::cout << "Anchors are being converted to network input resolution i.e. Anchors x " 217 | << curRegionTensor.stride << " (stride)" << std::endl; 218 | for (auto& anchor : curRegionTensor.anchors) anchor *= curRegionTensor.stride; 219 | ++outputTensorCount; 220 | } else if (m_ConfigBlocks.at(i).at("type") == "reorg") { 221 | std::string inputVol = dimsToString(previous->getDimensions()); 222 | nvinfer1::IPluginV2* reorgPlugin = createReorgPlugin(2); 223 | assert(reorgPlugin != nullptr); 224 | nvinfer1::IPluginV2Layer* reorg = 225 | network.addPluginV2(&previous, 1, *reorgPlugin); 226 | assert(reorg != nullptr); 227 | 228 | std::string layerName = "reorg_" + std::to_string(i); 229 | reorg->setName(layerName.c_str()); 230 | previous = reorg->getOutput(0); 231 | assert(previous != nullptr); 232 | std::string outputVol = dimsToString(previous->getDimensions()); 233 | channels = getNumChannels(previous); 234 | tensorOutputs.push_back(reorg->getOutput(0)); 235 | printLayerInfo(layerIndex, "reorg", inputVol, outputVol, std::to_string(weightPtr)); 236 | } 237 | // route layers (single or concat) 238 | else if (m_ConfigBlocks.at(i).at("type") == "route") { 239 | std::string strLayers = m_ConfigBlocks.at(i).at("layers"); 240 | std::vector idxLayers; 241 | size_t lastPos = 0, pos = 0; 242 | while ((pos = strLayers.find(',', lastPos)) != std::string::npos) { 243 | int vL = std::stoi(trim(strLayers.substr(lastPos, pos - lastPos))); 244 | idxLayers.push_back (vL); 245 | lastPos = pos + 1; 246 | } 247 | if (lastPos < strLayers.length()) { 248 | std::string lastV = trim(strLayers.substr(lastPos)); 249 | if (!lastV.empty()) { 250 | idxLayers.push_back (std::stoi(lastV)); 251 | } 252 | } 253 | assert (!idxLayers.empty()); 254 | std::vector concatInputs; 255 | for (int idxLayer : idxLayers) { 256 | if (idxLayer < 0) { 257 | idxLayer = tensorOutputs.size() + idxLayer; 258 | } 259 | assert (idxLayer >= 0 && idxLayer < (int)tensorOutputs.size()); 260 | concatInputs.push_back (tensorOutputs[idxLayer]); 261 | } 262 | nvinfer1::IConcatenationLayer* concat; 263 | if(m_ConfigBlocks.at(i).find("groups") != m_ConfigBlocks.at(i).end()) 264 | { 265 | assert(m_ConfigBlocks.at(i).find("group_id") != m_ConfigBlocks.at(i).end()); 266 | int gorups = std::stoi(m_ConfigBlocks.at(i).at("groups")); 267 | int group_id = std::stoi(m_ConfigBlocks.at(i).at("group_id")); 268 | std::vector group_concatInputs; 269 | for(auto concatInput : concatInputs) 270 | { 271 | Dims out_shape = concatInput->getDimensions(); 272 | ISliceLayer* tmp= network.addSlice(*concatInput,Dims3{out_shape.d[0]/2,0,0},Dims3{out_shape.d[0]/2,out_shape.d[1],out_shape.d[2]},Dims3{1,1,1}); 273 | group_concatInputs.push_back(tmp->getOutput(0)); 274 | } 275 | concat=network.addConcatenation(group_concatInputs.data(), group_concatInputs.size()); 276 | }else { 277 | concat=network.addConcatenation(concatInputs.data(), concatInputs.size()); 278 | } 279 | 280 | assert(concat != nullptr); 281 | std::string concatLayerName = "route_" + std::to_string(i - 1); 282 | concat->setName(concatLayerName.c_str()); 283 | // concatenate along the channel dimension 284 | concat->setAxis(0); 285 | previous = concat->getOutput(0); 286 | assert(previous != nullptr); 287 | std::string outputVol = dimsToString(previous->getDimensions()); 288 | // set the output volume depth 289 | channels 290 | = getNumChannels(previous); 291 | tensorOutputs.push_back(concat->getOutput(0)); 292 | printLayerInfo(layerIndex, "route", " -", outputVol, 293 | std::to_string(weightPtr)); 294 | } else if (m_ConfigBlocks.at(i).at("type") == "upsample") { 295 | std::string inputVol = dimsToString(previous->getDimensions()); 296 | nvinfer1::ILayer* out = netAddUpsample(i - 1, m_ConfigBlocks[i], 297 | weights, m_TrtWeights, channels, previous, &network); 298 | previous = out->getOutput(0); 299 | std::string outputVol = dimsToString(previous->getDimensions()); 300 | tensorOutputs.push_back(out->getOutput(0)); 301 | printLayerInfo(layerIndex, "upsample", inputVol, outputVol, " -"); 302 | } else if (m_ConfigBlocks.at(i).at("type") == "maxpool") { 303 | std::string inputVol = dimsToString(previous->getDimensions()); 304 | nvinfer1::ILayer* out = 305 | netAddMaxpool(i, m_ConfigBlocks.at(i), previous, &network); 306 | previous = out->getOutput(0); 307 | assert(previous != nullptr); 308 | std::string outputVol = dimsToString(previous->getDimensions()); 309 | tensorOutputs.push_back(out->getOutput(0)); 310 | printLayerInfo(layerIndex, "maxpool", inputVol, outputVol, std::to_string(weightPtr)); 311 | } 312 | else 313 | { 314 | std::cout << "Unsupported layer type --> \"" 315 | << m_ConfigBlocks.at(i).at("type") << "\"" << std::endl; 316 | assert(0); 317 | } 318 | } 319 | 320 | auto creator = getPluginRegistry()->getPluginCreator("YoloLayer_TRT", "1"); 321 | assert(m_YoloKernel.size() == outputTensorCount); 322 | 323 | // plugin filed 数量 324 | int numyololayers = m_YoloKernel.size(); 325 | 326 | // 假定每个yolo输出层class相等 327 | int numclass = m_OutputTensors[0].numClasses; 328 | int input_w = m_InputW; 329 | int input_h = m_InputH; 330 | 331 | std::vector mPluginAttributes1 = { 332 | PluginField("numclass", &numclass, PluginFieldType::kINT32, 1), 333 | PluginField("input_w", &input_w, PluginFieldType::kINT32, 1), 334 | PluginField("input_h", &input_h, PluginFieldType::kINT32, 1), 335 | PluginField("numyololayers", &numyololayers, PluginFieldType::kINT32, 1), 336 | PluginField("m_YoloKernel", &m_YoloKernel, PluginFieldType::kUNKNOWN, numyololayers), 337 | }; 338 | PluginFieldCollection mFC1; 339 | mFC1.nbFields = mPluginAttributes1.size(); 340 | mFC1.fields = mPluginAttributes1.data(); 341 | IPluginV2 * yoloplugin = creator->createPlugin(creator->getPluginName(), &mFC1); 342 | 343 | ITensor** inputTensors_yolo = new ITensor*; 344 | for (int i = 0; igetOutput(0); 352 | assert(previous != nullptr); 353 | previous->setName("prob"); 354 | std::string outputVol = dimsToString(previous->getDimensions()); 355 | network.markOutput(*previous); 356 | 357 | if ((int)weights.size() != weightPtr) 358 | { 359 | std::cout << "Number of unused weights left : " << (int)weights.size() - weightPtr << std::endl; 360 | assert(0); 361 | } 362 | 363 | std::cout << "Output yolo blob names :" << std::endl; 364 | for (auto& tensor : m_OutputTensors) { 365 | std::cout << tensor.blobName << std::endl; 366 | } 367 | 368 | int nbLayers = network.getNbLayers(); 369 | std::cout << "Total number of yolo layers: " << nbLayers << std::endl; 370 | 371 | return NVDSINFER_SUCCESS; 372 | } 373 | 374 | std::vector> 375 | Yolo::parseConfigFile (const std::string cfgFilePath) 376 | { 377 | assert(fileExists(cfgFilePath)); 378 | std::ifstream file(cfgFilePath); 379 | assert(file.good()); 380 | std::string line; 381 | std::vector> blocks; 382 | std::map block; 383 | 384 | while (getline(file, line)) 385 | { 386 | if (line.size() == 0) continue; 387 | if (line.front() == '#') continue; 388 | line = trim(line); 389 | if (line.front() == '[') 390 | { 391 | if (block.size() > 0) 392 | { 393 | blocks.push_back(block); 394 | block.clear(); 395 | } 396 | std::string key = "type"; 397 | std::string value = trim(line.substr(1, line.size() - 2)); 398 | block.insert(std::pair(key, value)); 399 | } 400 | else 401 | { 402 | int cpos = line.find('='); 403 | std::string key = trim(line.substr(0, cpos)); 404 | std::string value = trim(line.substr(cpos + 1)); 405 | block.insert(std::pair(key, value)); 406 | } 407 | } 408 | blocks.push_back(block); 409 | return blocks; 410 | } 411 | 412 | void Yolo::parseConfigBlocks() 413 | { 414 | for (auto block : m_ConfigBlocks) { 415 | if (block.at("type") == "net") 416 | { 417 | assert((block.find("height") != block.end()) 418 | && "Missing 'height' param in network cfg"); 419 | assert((block.find("width") != block.end()) && "Missing 'width' param in network cfg"); 420 | assert((block.find("channels") != block.end()) 421 | && "Missing 'channels' param in network cfg"); 422 | 423 | m_InputH = std::stoul(block.at("height")); 424 | m_InputW = std::stoul(block.at("width")); 425 | m_InputC = std::stoul(block.at("channels")); 426 | // assert(m_InputW == m_InputH); 427 | m_InputSize = m_InputC * m_InputH * m_InputW; 428 | } 429 | else if ((block.at("type") == "region") || (block.at("type") == "yolo")) 430 | { 431 | assert((block.find("num") != block.end()) 432 | && std::string("Missing 'num' param in " + block.at("type") + " layer").c_str()); 433 | assert((block.find("classes") != block.end()) 434 | && std::string("Missing 'classes' param in " + block.at("type") + " layer") 435 | .c_str()); 436 | assert((block.find("anchors") != block.end()) 437 | && std::string("Missing 'anchors' param in " + block.at("type") + " layer") 438 | .c_str()); 439 | 440 | TensorInfo outputTensor; 441 | std::string anchorString = block.at("anchors"); 442 | while (!anchorString.empty()) 443 | { 444 | int npos = anchorString.find_first_of(','); 445 | if (npos != -1) 446 | { 447 | float anchor = std::stof(trim(anchorString.substr(0, npos))); 448 | outputTensor.anchors.push_back(anchor); 449 | anchorString.erase(0, npos + 1); 450 | } 451 | else 452 | { 453 | float anchor = std::stof(trim(anchorString)); 454 | outputTensor.anchors.push_back(anchor); 455 | break; 456 | } 457 | } 458 | 459 | if ((m_NetworkType == "yolov3") || (m_NetworkType == "yolov3-tiny") || (m_NetworkType == "yolov4-tiny") || (m_NetworkType == "yolov4")) 460 | { 461 | assert((block.find("mask") != block.end()) 462 | && std::string("Missing 'mask' param in " + block.at("type") + " layer") 463 | .c_str()); 464 | 465 | std::string maskString = block.at("mask"); 466 | while (!maskString.empty()) 467 | { 468 | int npos = maskString.find_first_of(','); 469 | if (npos != -1) 470 | { 471 | uint mask = std::stoul(trim(maskString.substr(0, npos))); 472 | outputTensor.masks.push_back(mask); 473 | maskString.erase(0, npos + 1); 474 | } 475 | else 476 | { 477 | uint mask = std::stoul(trim(maskString)); 478 | outputTensor.masks.push_back(mask); 479 | break; 480 | } 481 | } 482 | } 483 | 484 | outputTensor.numBBoxes = outputTensor.masks.size() > 0 485 | ? outputTensor.masks.size() 486 | : std::stoul(trim(block.at("num"))); 487 | outputTensor.numClasses = std::stoul(block.at("classes")); 488 | m_OutputTensors.push_back(outputTensor); 489 | } 490 | } 491 | } 492 | 493 | void Yolo::destroyNetworkUtils() { 494 | // deallocate the weights 495 | for (uint i = 0; i < m_TrtWeights.size(); ++i) { 496 | if (m_TrtWeights[i].count > 0) 497 | free(const_cast(m_TrtWeights[i].values)); 498 | } 499 | m_TrtWeights.clear(); 500 | } 501 | 502 | -------------------------------------------------------------------------------- /yolo.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | */ 22 | 23 | #ifndef _YOLO_H_ 24 | #define _YOLO_H_ 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | #include "NvInfer.h" 32 | #include "trt_utils.h" 33 | #include "yololayer.h" 34 | #include "mish.h" 35 | 36 | typedef enum { 37 | /** NvDsInferContext operation succeeded. */ 38 | NVDSINFER_SUCCESS = 0, 39 | /** Failed to configure the NvDsInferContext instance possibly due to an 40 | * erroneous initialization property. */ 41 | NVDSINFER_CONFIG_FAILED, 42 | /** Custom Library interface implementation failed. */ 43 | NVDSINFER_CUSTOM_LIB_FAILED, 44 | /** Invalid parameters were supplied. */ 45 | NVDSINFER_INVALID_PARAMS, 46 | /** Output parsing failed. */ 47 | NVDSINFER_OUTPUT_PARSING_FAILED, 48 | /** CUDA error was encountered. */ 49 | NVDSINFER_CUDA_ERROR, 50 | /** TensorRT interface failed. */ 51 | NVDSINFER_TENSORRT_ERROR, 52 | /** Resource error was encountered. */ 53 | NVDSINFER_RESOURCE_ERROR, 54 | /** TRT-IS error was encountered. */ 55 | NVDSINFER_TRTIS_ERROR, 56 | /** Unknown error was encountered. */ 57 | NVDSINFER_UNKNOWN_ERROR 58 | } NvDsInferStatus; 59 | 60 | class IModelParser 61 | { 62 | public: 63 | IModelParser() = default; 64 | /** 65 | * Destructor, make sure all external resource would be released here. */ 66 | virtual ~IModelParser() = default; 67 | 68 | /** 69 | * Function interface for parsing custom model and building tensorrt 70 | * network. 71 | * 72 | * @param[in, out] network NvDsInfer will create the @a network and 73 | * implementation can setup this network layer by layer. 74 | * @return NvDsInferStatus indicating if model parsing was sucessful. 75 | */ 76 | virtual NvDsInferStatus parseModel( 77 | nvinfer1::INetworkDefinition& network) = 0; 78 | 79 | /** 80 | * Function interface to check if parser can support full-dimensions. 81 | */ 82 | virtual bool hasFullDimsSupported() const = 0; 83 | 84 | /** 85 | * Function interface to get the new model name which is to be used for 86 | * constructing the serialized engine file path. 87 | */ 88 | virtual const char* getModelName() const = 0; 89 | }; 90 | 91 | 92 | /** 93 | * Holds all the file paths required to build a network. 94 | */ 95 | struct NetworkInfo 96 | { 97 | std::string networkType; 98 | std::string configFilePath; 99 | std::string wtsFilePath; 100 | std::string deviceType; 101 | std::string inputBlobName; 102 | }; 103 | 104 | /** 105 | * Holds information about an output tensor of the yolo network. 106 | */ 107 | struct TensorInfo 108 | { 109 | std::string blobName; 110 | uint stride{0}; 111 | uint gridSize{0}; 112 | uint numClasses{0}; 113 | uint numBBoxes{0}; 114 | uint64_t volume{0}; 115 | std::vector masks; 116 | std::vector anchors; 117 | int bindingIndex{-1}; 118 | float* hostBuffer{nullptr}; 119 | }; 120 | 121 | class Yolo : public IModelParser { 122 | public: 123 | Yolo(const NetworkInfo& networkInfo); 124 | ~Yolo() override; 125 | bool hasFullDimsSupported() const override { return false; } 126 | const char* getModelName() const override { 127 | return m_ConfigFilePath.empty() ? m_NetworkType.c_str() 128 | : m_ConfigFilePath.c_str(); 129 | } 130 | NvDsInferStatus parseModel(nvinfer1::INetworkDefinition& network) override; 131 | 132 | nvinfer1::ICudaEngine *createEngine (nvinfer1::IBuilder* builder); 133 | 134 | protected: 135 | const std::string m_NetworkType; 136 | const std::string m_ConfigFilePath; 137 | const std::string m_WtsFilePath; 138 | const std::string m_DeviceType; 139 | const std::string m_InputBlobName; 140 | const std::string m_OutputBlobName; 141 | std::vector m_OutputTensors; 142 | std::vector> m_ConfigBlocks; 143 | uint m_InputH; 144 | uint m_InputW; 145 | uint m_InputC; 146 | uint64_t m_InputSize; 147 | 148 | // TRT specific members 149 | std::vector m_TrtWeights; 150 | std::vector m_YoloTensor; 151 | 152 | std::vector m_YoloKernel; 153 | 154 | 155 | private: 156 | NvDsInferStatus buildYoloNetwork( 157 | std::vector& weights, nvinfer1::INetworkDefinition& network); 158 | std::vector> parseConfigFile( 159 | const std::string cfgFilePath); 160 | void parseConfigBlocks(); 161 | void destroyNetworkUtils(); 162 | }; 163 | 164 | #endif // _YOLO_H_ 165 | -------------------------------------------------------------------------------- /yololayer.cpp: -------------------------------------------------------------------------------- 1 | #include "yololayer.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace YoloLayer; 9 | using namespace nvinfer1; 10 | 11 | YoloLayerPlugin::YoloLayerPlugin() 12 | { 13 | mClassCount = CLASS_NUM; 14 | mYoloKernel.clear(); 15 | mYoloKernel.push_back(yolo1); 16 | mYoloKernel.push_back(yolo2); 17 | mYoloKernel.push_back(yolo3); 18 | 19 | mKernelCount = mYoloKernel.size(); 20 | } 21 | 22 | YoloLayerPlugin::~YoloLayerPlugin() 23 | { 24 | } 25 | 26 | // create the plugin at runtime from a byte stream 27 | YoloLayerPlugin::YoloLayerPlugin(const void* data, size_t length) 28 | { 29 | 30 | const char *d = reinterpret_cast(data), *a = d; 31 | read(d, mClassCount); 32 | read(d, mThreadCount); 33 | read(d, mKernelCount); 34 | mYoloKernel.resize(mKernelCount); 35 | auto kernelSize = mKernelCount*sizeof(YoloKernel); 36 | memcpy(mYoloKernel.data(),d,kernelSize); 37 | d += kernelSize; 38 | 39 | assert(d == a + length); 40 | } 41 | 42 | void YoloLayerPlugin::serialize(void* buffer) const 43 | { 44 | std::cout<<"start getSerializationSize"<(buffer), *a = d; 46 | write(d, mClassCount); 47 | write(d, mThreadCount); 48 | write(d, mKernelCount); 49 | auto kernelSize = mKernelCount*sizeof(YoloKernel); 50 | memcpy(d,mYoloKernel.data(),kernelSize); 51 | d += kernelSize; 52 | 53 | assert(d == a + getSerializationSize()); 54 | } 55 | 56 | size_t YoloLayerPlugin::getSerializationSize() const 57 | { 58 | std::cout<<"start getSerializationSize"<(fields[i].data)); 15 | } 16 | else if (!strcmp(attrName, "input_w")){ 17 | mInput_w= *(static_cast(fields[i].data)); 18 | }else if(!strcmp(attrName, "input_h")){ 19 | mInput_h = *(static_cast(fields[i].data)); 20 | }else if(!strcmp(attrName, "numyololayers")){ 21 | mNumYoloLayers = *(static_cast(fields[i].data)); 22 | }else if(!strcmp(attrName, "m_YoloKernel")){ 23 | assert(fields[i].type == PluginFieldType::kUNKNOWN); 24 | tmpvoid = const_cast(fields[i].data); 25 | } 26 | } 27 | // 解析 yolo层 28 | mYoloKernel = *(std::vector *)tmpvoid; 29 | std::cout<<"mYoloKernel.size()"<(data), *a = d; 39 | read(d, mClassCount); 40 | read(d, mThreadCount); 41 | read(d, mNumYoloLayers); 42 | read(d, mInput_h); 43 | read(d, mInput_w); 44 | mYoloKernel.resize(mNumYoloLayers); 45 | auto kernelSize = mNumYoloLayers*sizeof(YoloKernel); 46 | memcpy(mYoloKernel.data(),d,kernelSize); 47 | d += kernelSize; 48 | assert(d == a + length); 49 | } 50 | // 序列化模型，即保存模型，将插件内用到的参数保存到模型中 51 | void YoloLayerPlugin::serialize(void* buffer) const 52 | { 53 | using namespace Tn; 54 | char* d = static_cast(buffer), *a = d; 55 | write(d, mClassCount); 56 | write(d, mThreadCount); 57 | write(d, mNumYoloLayers); 58 | write(d, mInput_h); 59 | write(d, mInput_w); 60 | auto kernelSize = mNumYoloLayers*sizeof(YoloKernel); 61 | memcpy(d,mYoloKernel.data(),kernelSize); 62 | d += kernelSize; 63 | assert(d == a + getSerializationSize()); 64 | } 65 | // 保存模型，序列化阶段，计算插件需要保存的数据长度 66 | size_t YoloLayerPlugin::getSerializationSize() const 67 | { 68 | int size = sizeof(mInput_w) +sizeof(mInput_h)+ 69 | sizeof(mClassCount) + sizeof(mThreadCount) + 70 | sizeof(mNumYoloLayers) + sizeof(YoloKernel) * mYoloKernel.size(); 71 | return size; 72 | } 73 | 74 | int YoloLayerPlugin::initialize() 75 | { 76 | return 0; 77 | } 78 | 79 | Dims YoloLayerPlugin::getOutputDimensions(int index, const Dims* inputs, int nbInputDims) 80 | { 81 | //output the result to channel 82 | int totalsize = max_output_box * sizeof(Detection) / sizeof(float); 83 | return Dims3(totalsize + 1, 1, 1); 84 | } 85 | 86 | // Set plugin namespace 87 | void YoloLayerPlugin::setPluginNamespace(const char* pluginNamespace) 88 | { 89 | mPluginNamespace = pluginNamespace; 90 | } 91 | 92 | const char* YoloLayerPlugin::getPluginNamespace() const 93 | { 94 | return mPluginNamespace; 95 | } 96 | 97 | // Return the DataType of the plugin output at the requested index 98 | DataType YoloLayerPlugin::getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const 99 | { 100 | return DataType::kFLOAT; 101 | } 102 | 103 | // Return true if output tensor is broadcast across a batch. 104 | bool YoloLayerPlugin::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const 105 | { 106 | return false; 107 | } 108 | 109 | // Return true if plugin can use input that is broadcast across batch without replication. 110 | bool YoloLayerPlugin::canBroadcastInputAcrossBatch(int inputIndex) const 111 | { 112 | return false; 113 | } 114 | 115 | void YoloLayerPlugin::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) 116 | { 117 | } 118 | 119 | // Attach the plugin object to an execution context and grant the plugin the access to some context resource. 120 | void YoloLayerPlugin::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) 121 | { 122 | } 123 | 124 | // Detach the plugin object from its execution context. 125 | void YoloLayerPlugin::detachFromContext() {} 126 | 127 | const char* YoloLayerPlugin::getPluginType() const 128 | { 129 | return "YoloLayer_TRT"; 130 | } 131 | 132 | const char* YoloLayerPlugin::getPluginVersion() const 133 | { 134 | return "1"; 135 | } 136 | 137 | void YoloLayerPlugin::destroy() 138 | { 139 | delete this; 140 | } 141 | 142 | // Clone the plugin 143 | IPluginV2IOExt* YoloLayerPlugin::clone() const 144 | { 145 | 146 | YoloLayerPlugin *p = new YoloLayerPlugin(*this); 147 | p->setPluginNamespace(mPluginNamespace); 148 | return p; 149 | } 150 | // 核函数 sigmoid 151 | __device__ float Logist(float data){ return 1.0f / (1.0f + __expf(-data)); }; 152 | // cuda 调用接口 153 | __global__ void CalDetection(const float *input, float *output,int noElements, 154 | int yoloWidth,int yoloHeight,const float* anchors,int classes, 155 | int outputElem,int input_w,int input_h, 156 | float ignore_thresh,int every_yolo_anchors,int max_out_put_bbox_count) { 157 | 158 | int idx = threadIdx.x + blockDim.x * blockIdx.x; 159 | if (idx >= noElements) return; 160 | 161 | int total_grid = yoloWidth * yoloHeight; 162 | int bnIdx = idx / total_grid; 163 | idx = idx - total_grid*bnIdx; 164 | int info_len_i = 5 + classes; 165 | const float* curInput = input + bnIdx * (info_len_i * total_grid * every_yolo_anchors); 166 | 167 | for (int k = 0; k < 3; ++k) { 168 | int class_id = 0; 169 | float max_cls_prob = 0.0; 170 | for (int i = 5; i < info_len_i; ++i) { 171 | float p = Logist(curInput[idx + k * info_len_i * total_grid + i * total_grid]); 172 | if (p > max_cls_prob) { 173 | max_cls_prob = p; 174 | class_id = i - 5; 175 | } 176 | } 177 | float box_prob = Logist(curInput[idx + k * info_len_i * total_grid + 4 * total_grid]); 178 | if (max_cls_prob*box_prob < ignore_thresh) continue; 179 | 180 | float *res_count = output + bnIdx*outputElem; 181 | int count = (int)atomicAdd(res_count, 1); 182 | if (count >= max_out_put_bbox_count) return; 183 | char* data = (char * )res_count + sizeof(float) + count*sizeof(Detection); 184 | Detection* det = (Detection*)(data); 185 | 186 | int row = idx / yoloWidth; 187 | int col = idx % yoloWidth; 188 | 189 | //Location 190 | det->bbox[0] = (col + Logist(curInput[idx + k * info_len_i * total_grid + 0 * total_grid]))* input_w/ yoloWidth; 191 | det->bbox[1] = (row + Logist(curInput[idx + k * info_len_i * total_grid + 1 * total_grid]))* input_h/ yoloHeight; 192 | det->bbox[2] = __expf(curInput[idx + k * info_len_i * total_grid + 2 * total_grid]) * anchors[2*k]; 193 | det->bbox[3] = __expf(curInput[idx + k * info_len_i * total_grid + 3 * total_grid]) * anchors[2*k + 1]; 194 | det->det_confidence = box_prob; 195 | det->class_id = class_id; 196 | det->class_confidence = max_cls_prob; 197 | } 198 | } 199 | 200 | void YoloLayerPlugin::forwardGpu(const float *const * inputs, float* output, cudaStream_t stream, int batchSize) 201 | { 202 | // 每一层的输出大小长度， 203 | int outputElem = 1 + max_output_box * sizeof(Detection) / sizeof(float); 204 | // 根据batchsize调整输出的output 内存大小，初始化为0，以最小内存单位字节为长度 205 | for(int idx = 0 ; idx < batchSize; ++idx) { 206 | CUDA_CHECK(cudaMemset(output + idx*outputElem, 0, sizeof(float))); 207 | } 208 | int numElem = 0; 209 | void* devAnchor; 210 | for (unsigned int i = 0;i< mYoloKernel.size();++i) 211 | { 212 | // yolo 每一层的参数 213 | const auto& yolo = mYoloKernel[i]; 214 | numElem = yolo.width*yolo.height*batchSize; 215 | if (numElem < mThreadCount) 216 | mThreadCount = numElem; 217 | int every_yolo_anchor_num = yolo.everyYoloAnchors; 218 | size_t AnchorLen = sizeof(float)* yolo.everyYoloAnchors*2; 219 | CUDA_CHECK(cudaMalloc(&devAnchor,AnchorLen)); 220 | CUDA_CHECK(cudaMemcpy(devAnchor, yolo.anchors, AnchorLen, cudaMemcpyHostToDevice)); 221 | CUDA_CHECK(cudaFree(devAnchor)); 222 | // 调用cuda接口，<调用的block数量，每一个block中的thread数量> 223 | CalDetection<<< (yolo.width*yolo.height*batchSize + mThreadCount - 1) / mThreadCount, mThreadCount>>> 224 | (inputs[i],output, numElem, yolo.width, yolo.height, 225 | (float *)devAnchor, mClassCount ,outputElem,mInput_w, mInput_w, 226 | mIgnore_thresh,every_yolo_anchor_num,max_output_box); 227 | } 228 | } 229 | 230 | // 插件标准调用接口，enqueue 231 | int YoloLayerPlugin::enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) 232 | { 233 | forwardGpu((const float *const *)inputs, (float*)outputs[0], stream, batchSize); 234 | return 0; 235 | } 236 | 237 | YoloPluginCreator::YoloPluginCreator() 238 | { 239 | } 240 | 241 | const char* YoloPluginCreator::getPluginName() const 242 | { 243 | return "YoloLayer_TRT"; 244 | } 245 | 246 | const char* YoloPluginCreator::getPluginVersion() const 247 | { 248 | return "1"; 249 | } 250 | 251 | const PluginFieldCollection* YoloPluginCreator::getFieldNames() 252 | { 253 | return 0; 254 | } 255 | 256 | IPluginV2IOExt* YoloPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc) 257 | { 258 | YoloLayerPlugin* obj = new YoloLayerPlugin(*fc); 259 | obj->setPluginNamespace(mNamespace.c_str()); 260 | return obj; 261 | } 262 | 263 | IPluginV2IOExt* YoloPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength) 264 | { 265 | // This object will be deleted when the network is destroyed 266 | YoloLayerPlugin* obj = new YoloLayerPlugin(serialData, serialLength); 267 | obj->setPluginNamespace(mNamespace.c_str()); 268 | return obj; 269 | } 270 | 271 | } 272 | -------------------------------------------------------------------------------- /yololayer.h: -------------------------------------------------------------------------------- 1 | #ifndef _YOLO_LAYER_H 2 | #define _YOLO_LAYER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "NvInfer.h" 9 | #include "Utils.h" 10 | #include 11 | #include "NvInferPlugin.h" 12 | 13 | struct YoloKernel 14 | { 15 | int width; 16 | int height; 17 | int everyYoloAnchors; 18 | float anchors[10]; // 一组yolo输出层中 anchors的数据个数等于 3*2，可以设置的更大一点，这个无所谓 19 | }; 20 | 21 | struct alignas(float) Detection{ 22 | //x y w h 23 | float bbox[4]; 24 | float det_confidence; 25 | float class_id; 26 | float class_confidence; 27 | }; 28 | 29 | namespace nvinfer1 30 | { 31 | class YoloLayerPlugin: public IPluginV2IOExt 32 | { 33 | public: 34 | YoloLayerPlugin(const PluginFieldCollection& fc); 35 | YoloLayerPlugin(const void* data, size_t length); 36 | 37 | ~YoloLayerPlugin(); 38 | 39 | int getNbOutputs() const override 40 | { 41 | return 1; 42 | } 43 | 44 | Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override; 45 | 46 | int initialize() override; 47 | 48 | virtual void terminate() override {}; 49 | 50 | virtual size_t getWorkspaceSize(int maxBatchSize) const override { return 0;} 51 | 52 | virtual int enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) override; 53 | 54 | virtual size_t getSerializationSize() const override; 55 | 56 | virtual void serialize(void* buffer) const override; 57 | 58 | bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const override { 59 | return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT; 60 | } 61 | 62 | const char* getPluginType() const override; 63 | 64 | const char* getPluginVersion() const override; 65 | 66 | void destroy() override; 67 | 68 | IPluginV2IOExt* clone() const override; 69 | 70 | void setPluginNamespace(const char* pluginNamespace) override; 71 | 72 | const char* getPluginNamespace() const override; 73 | 74 | DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const override; 75 | 76 | bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const override; 77 | 78 | bool canBroadcastInputAcrossBatch(int inputIndex) const override; 79 | 80 | void attachToContext( 81 | cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) override; 82 | 83 | void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) override; 84 | 85 | void detachFromContext() override; 86 | 87 | private: 88 | void forwardGpu(const float *const * inputs,float * output, cudaStream_t stream,int batchSize = 1); 89 | int mClassCount; // 检测的目标的类别，从cfg文件获取，在cfg 设置 90 | int mInput_w; // 图像输入的尺寸，从cfg获取 91 | int mInput_h; // 由于umsample层的原因，宽度和高度要想等，TODO 调整 92 | int mNumYoloLayers; // yolo输出层的数量，从cfg获取，无需设置 93 | std::vector mYoloKernel; 94 | 95 | float mIgnore_thresh = 0.4; // 置信度阈值，可以调整 96 | int max_output_box = 1000; // 最大输出数量 97 | int mThreadCount = 256; // cuda 内核函数，每一block中线程数量 98 | const char* mPluginNamespace; // 该插件名称 99 | 100 | }; 101 | // 继承与IPluginCreator，重写虚函数 102 | class YoloPluginCreator : public IPluginCreator 103 | { 104 | public: 105 | YoloPluginCreator(); 106 | 107 | ~YoloPluginCreator() override = default; 108 | const char* getPluginName() const override; 109 | const char* getPluginVersion() const override; 110 | const PluginFieldCollection* getFieldNames() override; 111 | // 生成插件，这个是在 build network时调用 112 | IPluginV2IOExt* createPlugin(const char* name, const PluginFieldCollection* fc) override; 113 | // 反序列化，在读取保存的trt模型engine时调用，负责解析插件 114 | IPluginV2IOExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override; 115 | void setPluginNamespace(const char* libNamespace) override{ 116 | mNamespace = libNamespace; 117 | } 118 | const char* getPluginNamespace() const override{ 119 | return mNamespace.c_str(); 120 | } 121 | private: 122 | std::string mNamespace; 123 | }; 124 | }; 125 | 126 | #endif 127 | --------------------------------------------------------------------------------