├── CMakeLists.txt
├── README.md
├── calibrator.cpp
├── calibrator.h
├── common.hpp
├── cuda_utils.h
├── gen_engine.sh
├── gen_wts.py
├── logging.h
├── macros.h
├── samples
├── bus.jpg
└── zidane.jpg
├── utils.h
├── yololayer.cu
├── yololayer.h
├── yolov5.cpp
├── yolov5_trt.py
└── yolov5s.wts
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 2.6)
2 |
3 | project(yolov5)
4 |
5 | add_definitions(-std=c++11)
6 | add_definitions(-DAPI_EXPORTS)
7 | option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
8 | set(CMAKE_CXX_STANDARD 11)
9 | set(CMAKE_BUILD_TYPE Debug)
10 |
11 | find_package(CUDA REQUIRED)
12 |
13 | if(WIN32)
14 | enable_language(CUDA)
15 | endif(WIN32)
16 |
17 | include_directories(${PROJECT_SOURCE_DIR}/include)
18 | # include and link dirs of cuda and tensorrt, you need adapt them if yours are different
19 | # cuda
20 | include_directories(/usr/local/cuda/include)
21 | link_directories(/usr/local/cuda/lib64)
22 | # tensorrt
23 | include_directories(/usr/include/x86_64-linux-gnu/)
24 | link_directories(/usr/lib/x86_64-linux-gnu/)
25 |
26 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast -Wfatal-errors -D_MWAITXINTRIN_H_INCLUDED")
27 |
28 | cuda_add_library(yoloplugin SHARED ${PROJECT_SOURCE_DIR}/yololayer.cu)
29 | target_link_libraries(yoloplugin nvinfer cudart)
30 |
31 | find_package(OpenCV)
32 | include_directories(${OpenCV_INCLUDE_DIRS})
33 |
34 | add_executable(yolov5 ${PROJECT_SOURCE_DIR}/calibrator.cpp ${PROJECT_SOURCE_DIR}/yolov5.cpp)
35 | target_link_libraries(yolov5 nvinfer)
36 | target_link_libraries(yolov5 nvinfer_plugin)
37 | target_link_libraries(yolov5 cudart)
38 | target_link_libraries(yolov5 yoloplugin)
39 | target_link_libraries(yolov5 ${OpenCV_LIBS})
40 |
41 | if(UNIX)
42 | add_definitions(-O2 -pthread)
43 | endif(UNIX)
44 |
45 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # yolov5-5.0
2 | Original codes from [tensorrtx](https://github.com/wang-xinyu/tensorrtx). I modified the yololayer and integrated batchedNMSPlugin. A `yolov5s.wts` is provided for fast demo. How to generate `.wts` can refer to https://github.com/wang-xinyu/tensorrtx/tree/master/yolov5.
3 |
4 | ## Run with docker
5 |
6 | 1. Run into docker, assume repo path is `/data/YoLov5-TensorRT-NMS`
7 | ```
8 | sudo docker run --gpus all -v /data/YoLov5-TensorRT-NMS:/work -it registry.cn-guangzhou.aliyuncs.com/nvidia-images/tensorrt20.03-py3:v3 /bin/bash
9 | cd work
10 | mkdir build
11 | cd build
12 | cmake ..
13 | make
14 | sudo ./yolov5 -s // serialize model to plan file i.e. 'yolov5s.engine'
15 | sudo ./yolov5 -d ../samples // deserialize plan file and run inference, the images in samples will be processed.
16 | ```
17 |
18 |
19 |
20 | ## How to Run, yolov5s as example
21 |
22 | 1. build and run
23 | ```
24 | mkdir build
25 | cd build
26 | cmake ..
27 | make
28 | sudo ./yolov5 -s // serialize model to plan file i.e. 'yolov5s.engine'
29 | sudo ./yolov5 -d ../samples // deserialize plan file and run inference, the images in samples will be processed.
30 | ```
31 | 2. check the images generated, as follows. _zidane.jpg and _bus.jpg
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 | 3. run Python example, please install Python tensorrt and Pycuda and then
43 | ```
44 | python yolov5_trt.py
45 | ```
46 | ## More Information
47 |
48 | See the readme in [tensorrtx home page.](https://github.com/wang-xinyu/tensorrtx)
49 |
50 | ## Known issues
51 |
52 | None!
--------------------------------------------------------------------------------
/calibrator.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include "calibrator.h"
6 | #include "cuda_utils.h"
7 | #include "utils.h"
8 |
9 | Int8EntropyCalibrator2::Int8EntropyCalibrator2(int batchsize, int input_w, int input_h, const char* img_dir, const char* calib_table_name, const char* input_blob_name, bool read_cache)
10 | : batchsize_(batchsize)
11 | , input_w_(input_w)
12 | , input_h_(input_h)
13 | , img_idx_(0)
14 | , img_dir_(img_dir)
15 | , calib_table_name_(calib_table_name)
16 | , input_blob_name_(input_blob_name)
17 | , read_cache_(read_cache)
18 | {
19 | input_count_ = 3 * input_w * input_h * batchsize;
20 | CUDA_CHECK(cudaMalloc(&device_input_, input_count_ * sizeof(float)));
21 | read_files_in_dir(img_dir, img_files_);
22 | }
23 |
24 | Int8EntropyCalibrator2::~Int8EntropyCalibrator2()
25 | {
26 | CUDA_CHECK(cudaFree(device_input_));
27 | }
28 |
29 | int Int8EntropyCalibrator2::getBatchSize() const TRT_NOEXCEPT
30 | {
31 | return batchsize_;
32 | }
33 |
34 | bool Int8EntropyCalibrator2::getBatch(void* bindings[], const char* names[], int nbBindings) TRT_NOEXCEPT
35 | {
36 | if (img_idx_ + batchsize_ > (int)img_files_.size()) {
37 | return false;
38 | }
39 |
40 | std::vector input_imgs_;
41 | for (int i = img_idx_; i < img_idx_ + batchsize_; i++) {
42 | std::cout << img_files_[i] << " " << i << std::endl;
43 | cv::Mat temp = cv::imread(img_dir_ + img_files_[i]);
44 | if (temp.empty()){
45 | std::cerr << "Fatal error: image cannot open!" << std::endl;
46 | return false;
47 | }
48 | cv::Mat pr_img = preprocess_img(temp, input_w_, input_h_);
49 | input_imgs_.push_back(pr_img);
50 | }
51 | img_idx_ += batchsize_;
52 | cv::Mat blob = cv::dnn::blobFromImages(input_imgs_, 1.0 / 255.0, cv::Size(input_w_, input_h_), cv::Scalar(0, 0, 0), true, false);
53 |
54 | CUDA_CHECK(cudaMemcpy(device_input_, blob.ptr(0), input_count_ * sizeof(float), cudaMemcpyHostToDevice));
55 | assert(!strcmp(names[0], input_blob_name_));
56 | bindings[0] = device_input_;
57 | return true;
58 | }
59 |
60 | const void* Int8EntropyCalibrator2::readCalibrationCache(size_t& length) TRT_NOEXCEPT
61 | {
62 | std::cout << "reading calib cache: " << calib_table_name_ << std::endl;
63 | calib_cache_.clear();
64 | std::ifstream input(calib_table_name_, std::ios::binary);
65 | input >> std::noskipws;
66 | if (read_cache_ && input.good())
67 | {
68 | std::copy(std::istream_iterator(input), std::istream_iterator(), std::back_inserter(calib_cache_));
69 | }
70 | length = calib_cache_.size();
71 | return length ? calib_cache_.data() : nullptr;
72 | }
73 |
74 | void Int8EntropyCalibrator2::writeCalibrationCache(const void* cache, size_t length) TRT_NOEXCEPT
75 | {
76 | std::cout << "writing calib cache: " << calib_table_name_ << " size: " << length << std::endl;
77 | std::ofstream output(calib_table_name_, std::ios::binary);
78 | output.write(reinterpret_cast(cache), length);
79 | }
80 |
81 |
--------------------------------------------------------------------------------
/calibrator.h:
--------------------------------------------------------------------------------
1 | #ifndef ENTROPY_CALIBRATOR_H
2 | #define ENTROPY_CALIBRATOR_H
3 |
4 | #include
5 | #include
6 | #include
7 | #include "macros.h"
8 |
9 | //! \class Int8EntropyCalibrator2
10 | //!
11 | //! \brief Implements Entropy calibrator 2.
12 | //! CalibrationAlgoType is kENTROPY_CALIBRATION_2.
13 | //!
14 | class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2
15 | {
16 | public:
17 | Int8EntropyCalibrator2(int batchsize, int input_w, int input_h, const char* img_dir, const char* calib_table_name, const char* input_blob_name, bool read_cache = true);
18 |
19 | virtual ~Int8EntropyCalibrator2();
20 | int getBatchSize() const TRT_NOEXCEPT override;
21 | bool getBatch(void* bindings[], const char* names[], int nbBindings) TRT_NOEXCEPT override;
22 | const void* readCalibrationCache(size_t& length) TRT_NOEXCEPT override;
23 | void writeCalibrationCache(const void* cache, size_t length) TRT_NOEXCEPT override;
24 |
25 | private:
26 | int batchsize_;
27 | int input_w_;
28 | int input_h_;
29 | int img_idx_;
30 | std::string img_dir_;
31 | std::vector img_files_;
32 | size_t input_count_;
33 | std::string calib_table_name_;
34 | const char* input_blob_name_;
35 | bool read_cache_;
36 | void* device_input_;
37 | std::vector calib_cache_;
38 | };
39 |
40 | #endif // ENTROPY_CALIBRATOR_H
41 |
--------------------------------------------------------------------------------
/common.hpp:
--------------------------------------------------------------------------------
1 | #ifndef YOLOV5_COMMON_H_
2 | #define YOLOV5_COMMON_H_
3 |
4 | #include
5 | #include