├── README.md
├── yolov5l
├── CMakeLists.txt
├── README.md
├── common.hpp
├── gen_wts.py
├── images
│ ├── bus.jpg
│ └── zidane.jpg
├── logging.h
├── utils.h
├── yololayer.cu
├── yololayer.h
└── yolov5l.cpp
├── yolov5m
├── CMakeLists.txt
├── README.md
├── common.hpp
├── gen_wts.py
├── images
│ ├── bus.jpg
│ └── zidane.jpg
├── logging.h
├── utils.h
├── yololayer.cu
├── yololayer.h
└── yolov5m.cpp
├── yolov5s
├── CMakeLists.txt
├── README.md
├── common.hpp
├── gen_wts.py
├── images
│ ├── bus.jpg
│ └── zidane.jpg
├── logging.h
├── utils.h
├── yololayer.cu
├── yololayer.h
└── yolov5s.cpp
└── yolov5x
├── CMakeLists.txt
├── README.md
├── common.hpp
├── gen_wts.py
├── images
├── bus.jpg
└── zidane.jpg
├── logging.h
├── utils.h
├── yololayer.cu
├── yololayer.h
└── yolov5x.cpp
/README.md:
--------------------------------------------------------------------------------
1 | # yolov5_2.0-TensorRt
2 | U版yolov5 2.0的tensorrt加速
3 |
4 |
5 |
6 | 并且对resize和图像处理阶段的操作做了优化,在win环境下debug下速度有很大提升,但是release则没有变化,因为在release时,opencv中会有相应的优化操作。
7 |
8 | ```
9 | 实际上,at操作符与ptr操作符在Debug版本下都是有内存检查、防止操作越界的操作,而data十分简单粗暴,没有任何检查,由于它的简单粗暴所以使得data操作速度很快。所以在Debug版本下,at操作符与ptr操作符相较于data,速度还是慢了不少。
10 |
11 | 另外在Debug版本下,at操作要比指针操作慢得多,所以对于不连续数据或者单个点处理,可以考虑at操作,对于连续的大量数据,尽量不要使用它。
12 | ```
13 |
14 | 感谢下面两个开源实现:
15 |
16 | https://github.com/wang-xinyu/tensorrtx
17 |
18 | https://github.com/AIpakchoi/yolov5_tensorrt
19 |
--------------------------------------------------------------------------------
/yolov5l/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 2.6)
2 |
3 | project(yolov5)
4 |
5 | add_definitions(-std=c++11)
6 |
7 | option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
8 | set(CMAKE_CXX_STANDARD 11)
9 | set(CMAKE_BUILD_TYPE Debug)
10 |
11 | find_package(CUDA REQUIRED)
12 |
13 | set(CUDA_NVCC_PLAGS ${CUDA_NVCC_PLAGS};-std=c++11;-g;-G;-gencode;arch=compute_30;code=sm_30)
14 |
15 | include_directories(${PROJECT_SOURCE_DIR}/include)
16 | if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
17 | message("embed_platform on")
18 | include_directories(/usr/local/cuda/targets/aarch64-linux/include)
19 | link_directories(/usr/local/cuda/targets/aarch64-linux/lib)
20 | else()
21 | message("embed_platform off")
22 | include_directories(/usr/local/cuda/include)
23 | link_directories(/usr/local/cuda/lib64)
24 | endif()
25 |
26 |
27 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast -Wfatal-errors -D_MWAITXINTRIN_H_INCLUDED")
28 |
29 | cuda_add_library(yololayer SHARED ${PROJECT_SOURCE_DIR}/yololayer.cu)
30 |
31 | ########## opencv configuration ############
32 | find_package(OpenCV 3.4 REQUIRED)
33 | message(OpenCV_LIBS)
34 | include_directories(OpenCV_INCLUDE_DIRS)
35 |
36 | add_executable(yolov5l ${PROJECT_SOURCE_DIR}/yolov5l.cpp)
37 | target_link_libraries(yolov5l nvinfer)
38 | target_link_libraries(yolov5l cudart)
39 | target_link_libraries(yolov5l yololayer)
40 | target_link_libraries(yolov5l ${OpenCV_LIBS})
41 |
42 | add_definitions(-O2 -pthread)
43 |
44 |
--------------------------------------------------------------------------------
/yolov5l/README.md:
--------------------------------------------------------------------------------
1 | # yolov5
2 |
3 | The Pytorch implementation is [ultralytics/yolov5](https://github.com/ultralytics/yolov5).
4 |
5 | I was using [ultralytics/yolov5](https://github.com/ultralytics/yolov5)(The latest version). Just in case the yolov5 model updated.
6 |
7 | ## How to Run
8 |
9 | ```
10 | 1. generate yolov5l.wts from pytorch implementation with yolov5.pt
11 |
12 | git clone https://github.com/AIpakchoi/yolov5_tensorrt.git
13 | git clone https://github.com/ultralytics/yolov5.git
14 | // download its weights 'yolov5l.pt'
15 | cd yolov5
16 | cp ../yolov5_tensorrt/yolov5l/gen_wts.py .
17 | python gen_wts.py
18 | // a file 'yolov5l.wts' will be generated.
19 |
20 | 2. put yolov5l.wts into yolov5l, build and run
21 |
22 | mv yolov5l.wts ../yolov5_tensorrt/yolov5l/
23 | cd ../yolov5_tensorrt/yolov5l
24 | mkdir build
25 | cd build
26 | cmake ..
27 | make
28 | sudo ./yolov5l -s // serialize model to plan file i.e. 'yolov5l.engine'
29 | sudo ./yolov5l -d ../samples // deserialize plan file and run inference, the images in samples will be processed.
30 |
31 | 3. check the images generated, as follows. _zidane.jpg and _bus.jpg
32 | ```
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 | ## Config
43 |
44 | - Input shape defined in yololayer.h
45 | - Number of classes defined in yololayer.h
46 | - FP16/FP32 can be selected by the macro in yolov5l.cpp
47 | - GPU id can be selected by the macro in yolov5l.cpp
48 | - NMS thresh in yolov5l.cpp
49 | - BBox confidence thresh in yolov5l.cpp
50 | - Batch size in yolov5l.cpp
51 |
--------------------------------------------------------------------------------
/yolov5l/common.hpp:
--------------------------------------------------------------------------------
1 | #ifndef YOLOV5_COMMON_H_
2 | #define YOLOV5_COMMON_H_
3 |
4 | #include
5 | #include