├── .github
├── examples
│ └── python_with_dll
│ │ ├── image-20230302203606848.png
│ │ ├── image-20230302203807549.png
│ │ ├── image-20230302205149660.png
│ │ ├── image-20230302211219640.png
│ │ ├── image-20230302211258968.png
│ │ ├── image-20230302211446110.png
│ │ ├── image-20230302212805461.png
│ │ ├── image-20230302213219151.png
│ │ ├── image-20230302213246167.png
│ │ ├── image-20230302213433177.png
│ │ ├── image-20230302214103308.png
│ │ ├── image-20230302214127422.png
│ │ ├── image-20230302220950777.png
│ │ ├── image-20230302221408389.png
│ │ ├── image-20230302221617892.png
│ │ └── images-20230304121452.png
├── facemesh.jpg
├── libfacedet-Offical(left)vsOurs(right-topk-2000).jpg
├── libfacedet.gif
├── people.gif
├── u2net.gif
├── yolov5s-v5.7-Offical(left)vsOurs(right)-img1.jpg
├── yolov5s-v5.7-Offical(left)vsOurs(right)-img2.jpg
├── yolov6s-v6.3-Offical(left)vsOurs(right).jpg
├── yolov7-tiny-Offical(left)vsOurs(right).jpg
├── yolov8-snow.gif
├── yolov8-stree.gif
├── yolov8n-Offical(left)vsOurs(right).jpg
└── yolov8n-b8-1080p-to-640.jpg
├── .gitignore
├── Install_For_Ubuntu18.04
└── Install_For_Ubuntu18.04.md
├── LICENSE
├── README.md
├── README_en.md
├── cmake
└── common.cmake
├── data
├── 12801.jpg
├── 12802.jpg
├── 12803.jpg
├── 12804.jpg
├── 2.png
├── 51204.jpg
├── 6.jpg
├── 6086083.jpg
├── 6406401.jpg
├── 6406402.jpg
├── 6406403.jpg
├── 6406404.jpg
├── 6406406.jpg
├── 6406407.jpg
├── 7.jpg
├── bus.jpg
├── dog.jpg
├── efficientdet
│ └── .gitkeep
├── im_01.png
├── image1.jpg
├── image2.jpg
├── image3.jpg
├── libfacedetction
│ └── .gitkeep
├── long.jpg
├── mobilenetv3
│ └── .gitkeep
├── people.mp4
├── pphumanseg
│ └── .gitkeep
├── resnet18
│ └── .gitkeep
├── retinanet
│ └── .gitkeep
├── rifle2.jpeg
├── road0.png
├── road1.jpg
├── sailboat3.jpg
├── ssd
│ └── .gitkeep
├── swin
│ └── .gitkeep
├── u2net
│ └── .gitkeep
├── yolor
│ ├── .gitkeep
│ └── coco.names
├── yolov3
│ └── .gitkeep
├── yolov4
│ └── .gitkeep
├── yolov5
│ └── .gitkeep
├── yolov6
│ └── .gitkeep
├── yolov7
│ └── .gitkeep
├── yolov8-pose
│ └── .gitkeep
├── yolov8
│ └── .gitkeep
├── yolox
│ └── .gitkeep
└── zidane.jpg
├── docker
├── README.md
└── ubuntu18.04-cu113.Dockerfile
├── efficientdet
├── CMakeLists.txt
├── README.md
├── app_efficientdet.cpp
├── efficientdet.cpp
└── efficientdet.h
├── examples
└── python_with_dll
│ ├── README.md
│ ├── c_files
│ ├── pch.cpp
│ └── pch.h
│ ├── config
│ └── screen_inf.py
│ └── python_trt.py
├── libfacedetection
├── CMakeLists.txt
├── README.md
├── alpha_edit.py
├── app_libfacedetction.cpp
├── libfacedetection.cu
└── libfacedetection.h
├── pphumanseg
├── CMakeLists.txt
├── README.md
├── alpha_edit.py
├── app_pphunmanseg.cpp
├── decode_pphunmanseg.cu
├── decode_pphunmanseg.h
├── pphunmanseg.cpp
└── pphunmanseg.h
├── requirements.txt
├── tools
└── onnx2trt.cpp
├── u2net
├── CMakeLists.txt
├── README.md
├── alpha_export.py
├── app_u2net.cpp
├── u2net.cu
└── u2net.h
├── utils
├── common_include.h
├── kernel_function.cu
├── kernel_function.h
├── tracking
│ └── .gitkeep
├── utils.cpp
├── utils.h
├── yolo.cpp
└── yolo.h
├── vscode
└── launch.json
├── yolonas
├── CMakeLists.txt
├── README.md
├── alpha_export_dynamic.py
├── app_yolo_nas.cpp
├── decode_yolo_nas.cu
├── decode_yolo_nas.h
├── yolo_nas.cpp
└── yolo_nas.h
├── yolor
├── CMakeLists.txt
├── README.md
├── alpha_export.py
└── app_yolor.cpp
├── yolov3
├── CMakeLists.txt
├── README.md
├── alpha_edit.py
└── app_yolov3.cpp
├── yolov4
├── CMakeLists.txt
├── README.md
├── alpha_export.py
├── app_yolov4.cpp
├── decode_yolov4.cu
├── decode_yolov4.h
├── yolov4.cpp
└── yolov4.h
├── yolov5
├── CMakeLists.txt
├── README.md
├── alpha_edit.py
└── app_yolov5.cpp
├── yolov6
├── CMakeLists.txt
├── README.md
└── app_yolov6.cpp
├── yolov7
├── CMakeLists.txt
├── README.md
└── app_yolov7.cpp
├── yolov8-pose
├── CMakeLists.txt
├── README.md
├── app_yolov8_pose.cpp
├── decode_yolov8_pose.cu
├── decode_yolov8_pose.h
├── yolov8_pose.cpp
└── yolov8_pose.h
├── yolov8-seg
├── CMakeLists.txt
├── README.md
├── app_yolov8_seg.cpp
├── decode_yolov8_seg.cu
├── decode_yolov8_seg.h
├── yolov8_seg.cpp
└── yolov8_seg.h
├── yolov8
├── CMakeLists.txt
├── README.md
├── app_yolov8.cpp
├── decode_yolov8.cu
├── decode_yolov8.h
├── yolov8.cpp
└── yolov8.h
└── yolox
├── CMakeLists.txt
├── README.md
├── app_yolox.cpp
├── yolox.cu
└── yolox.h
/.github/examples/python_with_dll/image-20230302203606848.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302203606848.png
--------------------------------------------------------------------------------
/.github/examples/python_with_dll/image-20230302203807549.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302203807549.png
--------------------------------------------------------------------------------
/.github/examples/python_with_dll/image-20230302205149660.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302205149660.png
--------------------------------------------------------------------------------
/.github/examples/python_with_dll/image-20230302211219640.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302211219640.png
--------------------------------------------------------------------------------
/.github/examples/python_with_dll/image-20230302211258968.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302211258968.png
--------------------------------------------------------------------------------
/.github/examples/python_with_dll/image-20230302211446110.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302211446110.png
--------------------------------------------------------------------------------
/.github/examples/python_with_dll/image-20230302212805461.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302212805461.png
--------------------------------------------------------------------------------
/.github/examples/python_with_dll/image-20230302213219151.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302213219151.png
--------------------------------------------------------------------------------
/.github/examples/python_with_dll/image-20230302213246167.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302213246167.png
--------------------------------------------------------------------------------
/.github/examples/python_with_dll/image-20230302213433177.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302213433177.png
--------------------------------------------------------------------------------
/.github/examples/python_with_dll/image-20230302214103308.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302214103308.png
--------------------------------------------------------------------------------
/.github/examples/python_with_dll/image-20230302214127422.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302214127422.png
--------------------------------------------------------------------------------
/.github/examples/python_with_dll/image-20230302220950777.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302220950777.png
--------------------------------------------------------------------------------
/.github/examples/python_with_dll/image-20230302221408389.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302221408389.png
--------------------------------------------------------------------------------
/.github/examples/python_with_dll/image-20230302221617892.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302221617892.png
--------------------------------------------------------------------------------
/.github/examples/python_with_dll/images-20230304121452.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/images-20230304121452.png
--------------------------------------------------------------------------------
/.github/facemesh.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/facemesh.jpg
--------------------------------------------------------------------------------
/.github/libfacedet-Offical(left)vsOurs(right-topk-2000).jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/libfacedet-Offical(left)vsOurs(right-topk-2000).jpg
--------------------------------------------------------------------------------
/.github/libfacedet.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/libfacedet.gif
--------------------------------------------------------------------------------
/.github/people.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/people.gif
--------------------------------------------------------------------------------
/.github/u2net.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/u2net.gif
--------------------------------------------------------------------------------
/.github/yolov5s-v5.7-Offical(left)vsOurs(right)-img1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/yolov5s-v5.7-Offical(left)vsOurs(right)-img1.jpg
--------------------------------------------------------------------------------
/.github/yolov5s-v5.7-Offical(left)vsOurs(right)-img2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/yolov5s-v5.7-Offical(left)vsOurs(right)-img2.jpg
--------------------------------------------------------------------------------
/.github/yolov6s-v6.3-Offical(left)vsOurs(right).jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/yolov6s-v6.3-Offical(left)vsOurs(right).jpg
--------------------------------------------------------------------------------
/.github/yolov7-tiny-Offical(left)vsOurs(right).jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/yolov7-tiny-Offical(left)vsOurs(right).jpg
--------------------------------------------------------------------------------
/.github/yolov8-snow.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/yolov8-snow.gif
--------------------------------------------------------------------------------
/.github/yolov8-stree.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/yolov8-stree.gif
--------------------------------------------------------------------------------
/.github/yolov8n-Offical(left)vsOurs(right).jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/yolov8n-Offical(left)vsOurs(right).jpg
--------------------------------------------------------------------------------
/.github/yolov8n-b8-1080p-to-640.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/yolov8n-b8-1080p-to-640.jpg
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Prerequisites
2 | *.d
3 |
4 | # Compiled Object files
5 | *.slo
6 | *.lo
7 | *.o
8 | *.obj
9 |
10 | # Precompiled Headers
11 | *.gch
12 | *.pch
13 |
14 | # Compiled Dynamic libraries
15 | *.so
16 | *.dylib
17 | *.dll
18 |
19 | # Fortran module files
20 | *.mod
21 | *.smod
22 |
23 | # Compiled Static libraries
24 | *.lai
25 | *.la
26 | *.a
27 | *.lib
28 |
29 | # Executables
30 | *.exe
31 | *.out
32 | *.app
33 |
34 |
35 | # VS 2019
36 | *x64/
37 | *.idea
38 | *CUDA_Demo.sln
39 | *vs
40 | *.vcxproj
41 | *.vcxproj.user
42 | *.user
43 | *.onnx
44 | *.trt
45 | *.vcxproj.filters
46 | *.sln
47 |
48 | #vscode
49 | *.vscode
50 |
51 | # linux
52 | *build
53 |
54 | *.ppm
55 | *.tgz
56 | *.prototxt
57 | *.caffemodel
58 | *.code-workspace
59 |
60 | *__pycache__
61 |
62 | # deep learning's file
63 |
64 | *onnx
65 | *trt
66 | *pt
67 | *pth
--------------------------------------------------------------------------------
/Install_For_Ubuntu18.04/Install_For_Ubuntu18.04.md:
--------------------------------------------------------------------------------
1 | ## 1. Install Tool Chains
2 | ```bash
3 | sudo apt-get update
4 | sudo apt-get install build-essential
5 | sudo apt-get install git
6 | sudo apt-get install gdb
7 | sudo apt-get install cmake
8 | ```
9 | ```bash
10 | sudo apt-get install pkg-config libgtk-3-dev libavcodec-dev libavformat-dev libswscale-dev libv4l-dev libxvidcore-dev libx264-dev
11 | sudo apt-get install libopencv-dev
12 | # pkg-config --modversion opencv
13 | ```
14 | ## 2. Install Nvidia Libs
15 | ### 2.1 install nvidia driver470
16 | ```bash
17 | ubuntu-drivers devices
18 | sudo add-apt-repository ppa:graphics-drivers/ppa
19 | sudo apt update
20 | sudo apt install nvidia-driver-470-server # for ubuntu18.04
21 | nvidia-smi
22 | ```
23 | ### 2.2 install cuda11.3
24 | - enter: https://developer.nvidia.com/cuda-toolkit-archive
25 | - select:CUDA Toolkit 11.3.0(April 2021)
26 | - select:[Linux] -> [x86_64] -> [Ubuntu] -> [18.04] -> [runfile(local)]
27 | You will see installation instructions on the web page like this:
28 | ```bash
29 | wget https://developer.download.nvidia.com/compute/cuda/11.3.0/local_installers/cuda_11.3.0_465.19.01_linux.run
30 | sudo sh cuda_11.3.0_465.19.01_linux.run
31 | ```
32 | The cuda installation process will have a window display.
33 | - select:[continue] -> [accept] -> Press enter to cancel the first and second options like the following(**it is important!**) -> [Install]
34 |
35 | ```bash
36 | CUDA Installer
37 | [ ] Driver # cancel the first
38 | [ ] 465.19.01 # cancel the second
39 | [X] CUDA Toolkit 11.3
40 | [X] CUDA Samples 11.3
41 | [X] CUDA Demo Suite 11.3
42 | [X] CUDA Documentation 11.3 0tions
43 | ```
44 |
45 | The bash window prints the following, which means the installation is OK.
46 | ```bash
47 | #===========
48 | #= Summary =
49 | #===========
50 |
51 | #Driver: Not Selected
52 | #Toolkit: Installed in /usr/local/cuda-11.3/
53 | #......
54 | ```
55 | add environment variables:
56 | ```bash
57 | vim ~/.bashrc
58 | ```
59 | Copy and paste the following into .bashrc
60 | ```bash
61 | # cuda v11.3
62 | export PATH=/usr/local/cuda-11.3/bin${PATH:+:${PATH}}
63 | export LD_LIBRARY_PATH=/usr/local/cuda-11.3/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}
64 | export CUDA_HOME=/usr/local/cuda-11.3
65 | ```
66 | ```bash
67 | source ~/.bashrc
68 | nvcc -V
69 | ```
70 | The bash window prints the following content:
71 |
72 | nvcc: NVIDIA (R) Cuda compiler driver
73 | Copyright (c) 2005-2021 NVIDIA Corporation
74 | Built on Sun_Mar_21_19:15:46_PDT_2021
75 | Cuda compilation tools, release 11.3, V11.3.58
76 | Build cuda_11.3.r11.3/compiler.29745058_0
77 |
78 |
79 | ### 2.3 install cudnn8.2
80 | - enter:https://developer.nvidia.com/rdp/cudnn-archive
81 | - select: Download cuDNN v8.2.0 (April 23rd, 2021), for CUDA 11.x
82 | - select: cuDNN Library for Linux (x86_64)
83 | - you will download file: "cudnn-11.3-linux-x64-v8.2.0.53.tgz"
84 | ```bash
85 | tar -zxvf cudnn-11.3-linux-x64-v8.2.0.53.tgz
86 | ```
87 | copy cudnn to cuda11.3's install dir
88 | ```bash
89 | sudo cp cuda/include/cudnn.h /usr/local/cuda/include/
90 | sudo cp cuda/lib64/libcudnn* /usr/local/cuda/lib64/
91 | sudo chmod a+r /usr/local/cuda/include/cudnn.h
92 | sudo chmod a+r /usr/local/cuda/lib64/libcudnn*
93 | ```
94 | ### 2.4 download tensorrt8.4.2.4
95 | - enter: https://developer.nvidia.cn/nvidia-tensorrt-8x-download
96 | - select: I Agree To the Terms of the NVIDIA TensorRT License Agreement
97 | - select: TensorRT 8.4 GA Update 1
98 | - select: TensorRT 8.4 GA Update 1 for Linux x86_64 and CUDA 11.0, 11.1, 11.2, 11.3, 11.4, 11.5, 11.6 and 11.7 TAR Package
99 | - you will download file: "TensorRT-8.4.2.4.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz"
100 | ```bash
101 | tar -zxvf TensorRT-8.4.2.4.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz
102 | # test
103 | cd TensorRT-8.4.2.4/samples/sampleMNIST
104 | make
105 | cd ../../bin/
106 | ```
107 | Change the following path to your path!(**it is important!**)
108 | ```bash
109 |
110 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/xxx/temp/TensorRT-8.4.2.4/lib
111 | ./sample_mnist
112 | ```
113 | The bash window prints digit recognition task information, which indicats tensorrt8.4.2.4 is installed normally.
114 |
--------------------------------------------------------------------------------
/cmake/common.cmake:
--------------------------------------------------------------------------------
1 | # set
2 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations")
3 | # find thirdparty
4 | find_package(CUDA REQUIRED)
5 | list(APPEND ALL_LIBS
6 | ${CUDA_LIBRARIES}
7 | ${CUDA_cublas_LIBRARY}
8 | ${CUDA_nppc_LIBRARY} ${CUDA_nppig_LIBRARY} ${CUDA_nppidei_LIBRARY} ${CUDA_nppial_LIBRARY})
9 |
10 | # include cuda's header
11 | list(APPEND INCLUDE_DRIS ${CUDA_INCLUDE_DIRS})
12 | # message(FATAL_ERROR "CUDA_npp_LIBRARY: ${CUDA_npp_LIBRARY}")
13 |
14 | # gather TensorRT lib
15 | #set(TensorRT_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../TensorRT)
16 | #set(TensorRT_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../../../TensorRT-8.2.1.8)
17 | #set(TensorRT_ROOT /home/feiyull/TensorRT-Plugin)
18 | set(TensorRT_ROOT /home/feiyull/TensorRT-8.4.2.4)
19 | #set(TensorRT_ROOT /home/feiyull/TensorRT-8.6.1.6)
20 |
21 | find_library(TRT_NVINFER NAMES nvinfer HINTS ${TensorRT_ROOT} PATH_SUFFIXES lib lib64 lib/x64)
22 | find_library(TRT_NVINFER_PLUGIN NAMES nvinfer_plugin HINTS ${TensorRT_ROOT} PATH_SUFFIXES lib lib64 lib/x64)
23 | find_library(TRT_NVONNX_PARSER NAMES nvonnxparser HINTS ${TensorRT_ROOT} PATH_SUFFIXES lib lib64 lib/x64)
24 | find_library(TRT_NVCAFFE_PARSER NAMES nvcaffe_parser HINTS ${TensorRT_ROOT} PATH_SUFFIXES lib lib64 lib/x64)
25 | find_path(TENSORRT_INCLUDE_DIR NAMES NvInfer.h HINTS ${TensorRT_ROOT} PATH_SUFFIXES include)
26 | list(APPEND ALL_LIBS ${TRT_NVINFER} ${TRT_NVINFER_PLUGIN} ${TRT_NVONNX_PARSER} ${TRT_NVCAFFE_PARSER})
27 |
28 | # include tensorrt's headers
29 | list(APPEND INCLUDE_DRIS ${TENSORRT_INCLUDE_DIR})
30 |
31 | # include tensorrt's sample/common headers
32 | #set(SAMPLES_COMMON_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../common)
33 | #set(SAMPLES_COMMON_DIR ${CMAKE_CURRENT_SOURCE_DIR}/common)
34 | set(SAMPLES_COMMON_DIR ${TensorRT_ROOT}/samples/common)
35 | list(APPEND INCLUDE_DRIS ${SAMPLES_COMMON_DIR})
36 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS})
37 | message(STATUS "ALL_LIBS: ${ALL_LIBS}")
38 |
--------------------------------------------------------------------------------
/data/12801.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/12801.jpg
--------------------------------------------------------------------------------
/data/12802.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/12802.jpg
--------------------------------------------------------------------------------
/data/12803.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/12803.jpg
--------------------------------------------------------------------------------
/data/12804.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/12804.jpg
--------------------------------------------------------------------------------
/data/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/2.png
--------------------------------------------------------------------------------
/data/51204.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/51204.jpg
--------------------------------------------------------------------------------
/data/6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/6.jpg
--------------------------------------------------------------------------------
/data/6086083.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/6086083.jpg
--------------------------------------------------------------------------------
/data/6406401.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/6406401.jpg
--------------------------------------------------------------------------------
/data/6406402.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/6406402.jpg
--------------------------------------------------------------------------------
/data/6406403.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/6406403.jpg
--------------------------------------------------------------------------------
/data/6406404.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/6406404.jpg
--------------------------------------------------------------------------------
/data/6406406.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/6406406.jpg
--------------------------------------------------------------------------------
/data/6406407.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/6406407.jpg
--------------------------------------------------------------------------------
/data/7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/7.jpg
--------------------------------------------------------------------------------
/data/bus.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/bus.jpg
--------------------------------------------------------------------------------
/data/dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/dog.jpg
--------------------------------------------------------------------------------
/data/efficientdet/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/efficientdet/.gitkeep
--------------------------------------------------------------------------------
/data/im_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/im_01.png
--------------------------------------------------------------------------------
/data/image1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/image1.jpg
--------------------------------------------------------------------------------
/data/image2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/image2.jpg
--------------------------------------------------------------------------------
/data/image3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/image3.jpg
--------------------------------------------------------------------------------
/data/libfacedetction/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/libfacedetction/.gitkeep
--------------------------------------------------------------------------------
/data/long.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/long.jpg
--------------------------------------------------------------------------------
/data/mobilenetv3/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/mobilenetv3/.gitkeep
--------------------------------------------------------------------------------
/data/people.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/people.mp4
--------------------------------------------------------------------------------
/data/pphumanseg/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/pphumanseg/.gitkeep
--------------------------------------------------------------------------------
/data/resnet18/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/resnet18/.gitkeep
--------------------------------------------------------------------------------
/data/retinanet/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/retinanet/.gitkeep
--------------------------------------------------------------------------------
/data/rifle2.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/rifle2.jpeg
--------------------------------------------------------------------------------
/data/road0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/road0.png
--------------------------------------------------------------------------------
/data/road1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/road1.jpg
--------------------------------------------------------------------------------
/data/sailboat3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/sailboat3.jpg
--------------------------------------------------------------------------------
/data/ssd/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/ssd/.gitkeep
--------------------------------------------------------------------------------
/data/swin/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/swin/.gitkeep
--------------------------------------------------------------------------------
/data/u2net/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/u2net/.gitkeep
--------------------------------------------------------------------------------
/data/yolor/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/yolor/.gitkeep
--------------------------------------------------------------------------------
/data/yolor/coco.names:
--------------------------------------------------------------------------------
1 | person
2 | bicycle
3 | car
4 | motorbike
5 | aeroplane
6 | bus
7 | train
8 | truck
9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 |
--------------------------------------------------------------------------------
/data/yolov3/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/yolov3/.gitkeep
--------------------------------------------------------------------------------
/data/yolov4/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/yolov4/.gitkeep
--------------------------------------------------------------------------------
/data/yolov5/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/yolov5/.gitkeep
--------------------------------------------------------------------------------
/data/yolov6/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/yolov6/.gitkeep
--------------------------------------------------------------------------------
/data/yolov7/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/yolov7/.gitkeep
--------------------------------------------------------------------------------
/data/yolov8-pose/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/yolov8-pose/.gitkeep
--------------------------------------------------------------------------------
/data/yolov8/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/yolov8/.gitkeep
--------------------------------------------------------------------------------
/data/yolox/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/yolox/.gitkeep
--------------------------------------------------------------------------------
/data/zidane.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/zidane.jpg
--------------------------------------------------------------------------------
/docker/README.md:
--------------------------------------------------------------------------------
1 | ## 1. download tensorrt8.4.2.4
2 | - enter: https://developer.nvidia.cn/nvidia-tensorrt-8x-download
3 | - select: I Agree To the Terms of the NVIDIA TensorRT License Agreement
4 | - select: TensorRT 8.4 GA Update 1
5 | - select: TensorRT 8.4 GA Update 1 for Linux x86_64 and CUDA 11.0, 11.1, 11.2, 11.3, 11.4, 11.5, 11.6 and 11.7 TAR Package
6 | - download file: "TensorRT-8.4.2.4.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz"
7 |
8 | ```bash
9 | cd TensorRT-Alpha/docker
10 | cp TensorRT-8.4.2.4.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz .
11 | ```
12 |
13 | ## 2. build docker images
14 | ```bash
15 | docker build -f ubuntu18.04-cu113.Dockerfile --network=host -t trta .
16 | ```
--------------------------------------------------------------------------------
/docker/ubuntu18.04-cu113.Dockerfile:
--------------------------------------------------------------------------------
1 | FROM nvidia/cuda:11.3.1-cudnn8-devel-ubuntu18.04
2 | RUN sed -i 's#http://archive.ubuntu.com/#http://mirrors.tuna.tsinghua.edu.cn/#' /etc/apt/sources.list && \
3 | apt-get update
4 |
5 | RUN apt-get install -y software-properties-common && \
6 | add-apt-repository ppa:deadsnakes/ppa && \
7 | apt-get update && \
8 | DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
9 | build-essential \
10 | git \
11 | gdb \
12 | cmake \
13 | python3.8 \
14 | python3.8-dev \
15 | python3-pip \
16 | && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.6 1 \
17 | && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.8 2 \
18 | && update-alternatives --config python3
19 |
20 | #copy and unzip tensorrt8.4.2.4
21 | RUN mkdir -p /home/feiyull/
22 | COPY TensorRT-8.4.2.4.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz /home/feiyull/
23 | RUN cd /home/feiyull/ && \
24 | tar -zxvf TensorRT-8.4.2.4.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz && \
25 | rm TensorRT-8.4.2.4.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz && \
26 | mkdir workspace
27 |
28 | RUN \
29 | DEBIAN_FRONTEND=noninteractive apt-get install libgl1-mesa-glx -y \
30 | pkg-config \
31 | libgtk-3-dev \
32 | libavcodec-dev \
33 | libavformat-dev \
34 | libswscale-dev \
35 | libv4l-dev \
36 | libxvidcore-dev \
37 | libx264-dev \
38 | libopencv-dev \
39 | && apt-get clean
40 |
41 | # RUN pip3 install --upgrade pip -i https://pypi.tuna.tsinghua.edu.cn/simple
42 | # RUN pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
43 | # RUN pip install opencv-python-headless==4.8.0.74 && \
44 | # pip install opencv-python==4.8.0.74 \
45 | # pip install onnx==1.9.0 \
46 | # pip install torch==1.9.0 \
47 | # pip install torchvision==0.10.0 \
48 | # pip install onnx-simplifier==0.4.8
49 |
50 | #RUN cd /root/.cache/pip && \
51 | # rm -r *
--------------------------------------------------------------------------------
/efficientdet/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.10)
2 |
3 | set(CMAKE_BUILD_TYPE "Debug")
4 | #set(CMAKE_BUILD_TYPE "Release")
5 |
6 | PROJECT(efficientdet VERSION 1.0.0 LANGUAGES C CXX CUDA)
7 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR})
8 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake)
9 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT})
10 |
11 | message(STATUS ${ALL_LIBS})
12 | file(GLOB CPPS
13 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
14 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cu
15 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu
16 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp
17 | ${TensorRT_ROOT}/samples/common/logger.cpp
18 | ${TensorRT_ROOT}/samples/common/sampleOptions.cpp
19 | )
20 | list(REMOVE_ITEM CPPS app_efficientdet.cpp)
21 |
22 | message(STATUS CPPS = ${CPPS})
23 | list (LENGTH CPPS length)
24 | message(STATUS ***length*** = ${length})
25 | find_package(OpenCV REQUIRED)
26 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR})
27 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS})
28 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS})
29 |
30 | add_library(${PROJECT_NAME} SHARED ${CPPS})
31 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES})
32 |
33 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75)
34 | target_compile_options(${PROJECT_NAME} PUBLIC
35 | $<$:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>)
36 |
37 | add_executable(app_efficientdet app_efficientdet.cpp)
38 | # NVCC
39 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a)
40 | target_link_libraries(app_efficientdet ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} )
41 |
--------------------------------------------------------------------------------
/efficientdet/README.md:
--------------------------------------------------------------------------------
1 | ## 1. get onnx
2 | download directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv)
3 |
4 | or export onnx:
5 | ```bash
6 | # Please refer to following site, it is tensorrt's offical doc, and it lead you to export onnx from efficientdet's offical weights.
7 | # TensorRT-Alpha converts python to cuda c.
8 | https://github.com/NVIDIA/TensorRT/blob/release/8.4/samples/python/efficientdet/README.md
9 | ```
10 | ## 2.edit and save onnx
11 | ```bash
12 | # note: If you have obtained onnx by downloading, this step can be ignored
13 | ignored
14 | ```
15 | ## 3.compile onnx
16 | ```bash
17 | # put your onnx file in this path:tensorrt-alpha/data/efficientdet
18 | cd tensorrt-alpha/data/efficientdet
19 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib
20 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=efficientdet0.onnx --saveEngine=efficientdet0.trt --buildOnly --minShapes=input:1x512x512x3 --optShapes=input:2x512x512x3 --maxShapes=input:4x512x512x3
21 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=efficientdet1.onnx --saveEngine=efficientdet1.trt --buildOnly --minShapes=input:1x640x640x3 --optShapes=input:2x640x640x3 --maxShapes=input:4x640x640x3
22 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=efficientdet2.onnx --saveEngine=efficientdet2.trt --buildOnly --minShapes=input:1x768x768x3 --optShapes=input:2x768x768x3 --maxShapes=input:4x768x768x3
23 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=efficientdet3.onnx --saveEngine=efficientdet3.trt --buildOnly --minShapes=input:1x896x896x3 --optShapes=input:2x896x896x3 --maxShapes=input:4x896x896x3
24 |
25 | ```
26 | ## 4.run
27 | ```bash
28 | git clone https://github.com/FeiYull/tensorrt-alpha
29 | cd tensorrt-alpha/efficientdet
30 | mkdir build
31 | cd build
32 | cmake ..
33 | make -j10
34 | # note: the dstImage will be saved in tensorrt-alpha/efficientdet/build by default
35 |
36 | # infer image
37 | ./app_efficientdet --model=../../data/efficientdet/efficientdet0.trt --img=../../data/road0.png --size=512 --batch_size=1 --show --savePath
38 | ./app_efficientdet --model=../../data/efficientdet/efficientdet1.trt --img=../../data/road0.png --size=640 --batch_size=1 --show --savePath
39 | ./app_efficientdet --model=../../data/efficientdet/efficientdet2.trt --img=../../data/road0.png --size=768 --batch_size=1 --show --savePath
40 | ./app_efficientdet --model=../../data/efficientdet/efficientdet3.trt --img=../../data/road0.png --size=896 --batch_size=1 --show --savePath
41 |
42 |
43 | # infer video
44 | ./app_efficientdet --model=../../data/efficientdet/efficientdet0.trt --size=512 --batch_size=2 --video=../../data/people.mp4 --show
45 |
46 | # infer camera
47 | ./app_efficientdet --model=../../data/efficientdet/efficientdet0.trt --size=512 --batch_size=2 --cam_id=0 --show
48 | ```
49 | ## 5. appendix
50 | ignore
--------------------------------------------------------------------------------
/efficientdet/efficientdet.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include"../utils/common_include.h"
3 | #include"../utils/utils.h"
4 | #include"../utils/kernel_function.h"
5 |
6 | class EfficientDet
7 | {
8 | public:
9 | EfficientDet(const utils::InitParameter& param);
10 | ~EfficientDet();
11 |
12 | public:
13 | bool init(const std::vector& trtFile);
14 | void check();
15 | void copy(const std::vector& imgsBatch);
16 | void preprocess(const std::vector& imgsBatch);
17 | bool infer();
18 | void postprocess(const std::vector& imgsBatch);
19 | void reset();
20 |
21 | public:
22 | std::vector> getObjectss() const;
23 |
24 | protected:
25 | std::shared_ptr m_engine;
26 | std::unique_ptr m_context;
27 |
28 | protected:
29 | utils::InitParameter m_param;
30 | std::vector> m_objectss;
31 | utils::AffineMat m_dst2src;
32 | // input
33 | float* m_input_src_device;
34 | float* m_input_resize_device;
35 | float* m_input_rgb_device;
36 | // output
37 | int* m_output_num_device;
38 | int* m_output_boxes_device;
39 | int* m_output_scores_device;
40 | int* m_output_classes_device;
41 | int* m_output_num_host;
42 | int* m_output_boxes_host;
43 | int* m_output_scores_host;
44 | int* m_output_classes_host;
45 | };
--------------------------------------------------------------------------------
/examples/python_with_dll/c_files/pch.cpp:
--------------------------------------------------------------------------------
1 | // pch.cpp: 与预编译标头对应的源文件
2 | #include"./utils/yolo.h"
3 | #include "pch.h"
4 | #include"./yolov8/yolov8.h"
5 | // 当使用预编译的头时,需要使用此源文件,编译才能成功。
6 |
7 | void getAimsInfo(const std::vector>& objectss, float(*res_array)[6])
8 | {
9 | for (const auto& objects : objectss)
10 | {
11 | for (const auto& box : objects)
12 | {
13 | res_array[0][0] = box.left;
14 | res_array[0][1] = box.top;
15 | res_array[0][2] = box.right;
16 | res_array[0][3] = box.bottom;
17 | res_array[0][4] = box.label;
18 | res_array[0][5] = box.confidence;
19 |
20 | ++res_array;
21 | }
22 | }
23 | }
24 |
25 |
26 | // c++ code
27 |
28 | void* Init(
29 | const char* trt_file_path,
30 | int src_w,
31 | int src_h,
32 | float conf_thresh,
33 | float iou_thresh,
34 | int num_class
35 | )
36 |
37 | {
38 | // parameters
39 | utils::InitParameter param;
40 |
41 | param.input_output_names = { "images", "output0" };
42 | param.batch_size = 1;
43 | param.src_h = src_h;
44 | param.src_w = src_w;
45 | param.dst_h = 640;
46 | param.dst_w = 640;
47 | param.iou_thresh = iou_thresh;
48 | param.conf_thresh = conf_thresh;
49 | param.num_class = num_class;
50 |
51 | YOLOV8* yolov8 = new YOLOV8(param);
52 |
53 | std::vector trt_file = utils::loadModel(trt_file_path);
54 | if (trt_file.empty())
55 | {
56 | sample::gLogError << "trt_file is empty!" << std::endl;
57 | return nullptr;
58 | }
59 |
60 | if (!yolov8->init(trt_file))
61 | {
62 | sample::gLogError << "initEngine() ocur errors!" << std::endl;
63 | return nullptr;
64 | }
65 | yolov8->check();
66 | return yolov8;
67 | }
68 |
69 |
70 | // 2. img inference
71 | void Detect(void* yolo, int rows, int cols, unsigned char* src_data, float(*res_array)[6])
72 |
73 | {
74 | YOLOV8* yolov8 = (YOLOV8*)yolo;
75 |
76 | cv::Mat frame = cv::Mat(rows, cols, CV_8UC3, src_data);
77 |
78 | std::vector imgs_batch(1, frame.clone());
79 |
80 | yolov8->reset();
81 |
82 | yolov8->copy(imgs_batch);
83 |
84 | utils::DeviceTimer d_t1; yolov8->preprocess(imgs_batch); float t1 = d_t1.getUsedTime();
85 | utils::DeviceTimer d_t2; yolov8->infer(); float t2 = d_t2.getUsedTime();
86 | utils::DeviceTimer d_t3; yolov8->postprocess(imgs_batch); float t3 = d_t3.getUsedTime();
87 |
88 | sample::gLogInfo <<
89 | "preprocess time = " << t1 << "; "
90 | "infer time = " << t2 << "; "
91 | "postprocess time = " << t3 << std::endl;
92 |
93 | getAimsInfo(yolov8->getObjectss(), res_array);
94 | }
95 |
--------------------------------------------------------------------------------
/examples/python_with_dll/c_files/pch.h:
--------------------------------------------------------------------------------
1 | // pch.h: 这是预编译标头文件。
2 | // 下方列出的文件仅编译一次,提高了将来生成的生成性能。
3 | // 这还将影响 IntelliSense 性能,包括代码完成和许多代码浏览功能。
4 | // 但是,如果此处列出的文件中的任何一个在生成之间有更新,它们全部都将被重新编译。
5 | // 请勿在此处添加要频繁更新的文件,这将使得性能优势无效。
6 |
7 | #ifndef PCH_H
8 | #define PCH_H
9 |
10 | // 添加要在此处预编译的标头
11 | #include "framework.h"
12 | #endif //PCH_H
13 |
14 | //定义宏
15 | #ifdef IMPORT_DLL
16 | #else
17 | #define IMPORT_DLL extern "C" _declspec(dllimport)
18 | #endif
19 |
20 |
21 | IMPORT_DLL void* Init(
22 | const char* trt_file_path,
23 | int src_w,
24 | int src_h,
25 | float conf_thresh,
26 | float iou_thresh,
27 | int num_class
28 | );
29 | IMPORT_DLL void Detect(void* yolo, int rows, int cols, unsigned char* src_data, float(*res_array)[6]);
--------------------------------------------------------------------------------
/examples/python_with_dll/config/screen_inf.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 | import mss
4 | import win32api
5 |
6 | cap = mss.mss()
7 | def grab_screen_mss(monitor):
8 | return cv2.cvtColor(np.array(cap.grab(monitor)), cv2.COLOR_BGRA2BGR)
9 |
10 | def get_parameters():
11 | x, y = get_screen_size().values()
12 | return 0, 0, x, y
13 |
14 | def get_screen_size():
15 | wide = win32api.GetSystemMetrics(0)
16 | high = win32api.GetSystemMetrics(1)
17 | return {"wide": wide, "high": high}
--------------------------------------------------------------------------------
/examples/python_with_dll/python_trt.py:
--------------------------------------------------------------------------------
1 | from ctypes import *
2 | from threading import Thread
3 | import cv2
4 | import numpy as np
5 | import numpy.ctypeslib as npct
6 | from pygame.time import Clock
7 | from config.screen_inf import get_parameters, grab_screen_mss
8 |
9 |
10 | class Detector:
11 | def __init__(
12 | self, dll_path, trt_path, window_width=640, window_height=640, conf_thresh=0.25, iou_thresh=0.45,
13 | num_class=80):
14 | self.yolo = CDLL(dll_path)
15 | self.max_bbox = 50
16 |
17 | self.yolo.Detect.argtypes = [c_void_p, c_int, c_int, POINTER(c_ubyte),
18 | npct.ndpointer(dtype=np.float32, ndim=2, shape=(self.max_bbox, 6),
19 | flags="C_CONTIGUOUS")]
20 |
21 | self.yolo.Init.argtypes = [c_char_p, c_int, c_int, c_float, c_float, c_int]
22 | self.yolo.Init.restype = c_void_p
23 |
24 | self.c_point = self.yolo.Init(trt_path.encode('utf-8'), window_width, window_height, conf_thresh, iou_thresh,
25 | num_class)
26 |
27 | def predict(self, img):
28 | rows, cols = img.shape[0], img.shape[1]
29 | res_arr = np.zeros((self.max_bbox, 6), dtype=np.float32)
30 | self.yolo.Detect(self.c_point, c_int(rows), c_int(cols), img.ctypes.data_as(POINTER(c_ubyte)), res_arr)
31 | self.bbox_array = res_arr[~(res_arr == 0).all(1)]
32 | return self.bbox_array
33 |
34 |
35 | class_names = [
36 | "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
37 | "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
38 | "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
39 | "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
40 | "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
41 | "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
42 | "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
43 | "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
44 | "hair drier", "toothbrush"
45 | ]
46 |
47 | # 对屏幕指定的区域录屏,并推理
48 | if __name__ == '__main__':
49 |
50 | def img_grab_thread():
51 |
52 | global frame
53 | global monitor
54 | clock = Clock()
55 |
56 | while True:
57 | frame = grab_screen_mss(monitor)
58 | clock.tick(200)
59 |
60 |
61 | def img_pred_thread():
62 |
63 | global frame
64 | global source_w
65 | global source_h
66 | det = Detector(dll_path="./python_dll.dll", trt_path="./yolov8n.trt", window_width=source_w,
67 | window_height=source_h)
68 | clock = Clock()
69 |
70 | windows_title = "cvwindow"
71 | cv2.namedWindow(windows_title, cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux)
72 |
73 | max_w = 576
74 | max_h = 324
75 | if source_h > max_h or source_w > max_w:
76 | cv2.resizeWindow(windows_title, max_w, source_h * max_w // source_w)
77 |
78 | while True:
79 | aims = det.predict(frame)
80 | for aim in aims:
81 | cv2.rectangle(frame, (int(aim[0]), int(aim[1])), (int(aim[2]), int(aim[3])), (0, 255, 0), 2)
82 | det_info = class_names[int(aim[4])] + " " + str(aim[5])
83 | cv2.putText(frame, det_info, (int(aim[0]), int(aim[1])), cv2.FONT_HERSHEY_DUPLEX, 0.6, (255, 0, 255), 1,
84 | cv2.LINE_AA)
85 |
86 | cv2.putText(frame, "FPS:{:.1f}".format(clock.get_fps()), (10, 50), cv2.FONT_HERSHEY_SIMPLEX,
87 | 2, (0, 0, 235), 4)
88 | cv2.imshow('cvwindow', frame)
89 | cv2.waitKey(1)
90 |
91 | clock.tick(200)
92 |
93 |
94 | # 4:3 800x600 center region detect
95 | source_w = int(800)
96 | source_h = int(600)
97 |
98 | _, _, x, y = get_parameters()
99 | top_x = (x // 2) - (source_w // 2)
100 | top_y = (y // 2) - (source_h // 2)
101 |
102 | monitor = {'left': top_x, 'top': top_y, 'width': source_w, 'height': source_h}
103 |
104 | frame = None
105 |
106 | # To demonstrate the inference speed more intuitively,
107 | # two threads are used here:
108 | # img_grab_thread for image fetching
109 | # img_pred_thread for inference
110 | # Lock is not used here, so the display effect may be poor if the image fetching speed is too high
111 | Thread(target=img_grab_thread).start()
112 | Thread(target=img_pred_thread).start()
113 |
114 | # VideoCapture predict demo
115 | if __name__ == '__main__OFF':
116 | cap = cv2.VideoCapture('./people.mp4')
117 |
118 | source_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
119 | source_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
120 |
121 | det = Detector(dll_path="./yoloDemo.dll", trt_path="./yolov8n.trt", window_width=source_w, window_height=source_h)
122 |
123 | clock = Clock()
124 | while True:
125 | ret, frame = cap.read()
126 | if not ret:
127 | break
128 |
129 | aims = det.predict(frame)
130 |
131 | # do something here
132 | for aim in aims:
133 | cv2.rectangle(frame, (int(aim[0]), int(aim[1])), (int(aim[2]), int(aim[3])), (0, 255, 0), 2)
134 | det_info = class_names[int(aim[4])] + " " + str(aim[5])
135 | cv2.putText(frame, det_info, (int(aim[0]), int(aim[1])), cv2.FONT_HERSHEY_DUPLEX, 0.6, (255, 0, 255), 1,
136 | cv2.LINE_AA)
137 |
138 | cv2.imshow('cvwindow', frame)
139 | cv2.waitKey(1)
140 |
141 | print('pred fps: ', clock.get_fps())
142 | clock.tick(5)
143 |
144 | cap.release()
145 | cv2.destroyAllWindows()
146 |
--------------------------------------------------------------------------------
/libfacedetection/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.10)
2 |
3 | #set(CMAKE_BUILD_TYPE "Debug")
4 | set(CMAKE_BUILD_TYPE "Release")
5 |
6 | # cuda
7 | PROJECT(facedet VERSION 1.0.0 LANGUAGES C CXX CUDA)
8 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR})
9 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake)
10 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT})
11 |
12 | message(STATUS ${ALL_LIBS})
13 | file(GLOB CPPS
14 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
15 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cu
16 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu
17 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp
18 | ${TensorRT_ROOT}/samples/common/logger.cpp
19 | ${TensorRT_ROOT}/samples/common/sampleOptions.cpp
20 | )
21 | list(REMOVE_ITEM CPPS app_libfacedetction.cpp)
22 |
23 | message(STATUS CPPS = ${CPPS})
24 | list (LENGTH CPPS length)
25 | message(STATUS ***length*** = ${length})
26 | find_package(OpenCV REQUIRED)
27 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR})
28 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS})
29 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS})
30 | add_library(${PROJECT_NAME} SHARED ${CPPS})
31 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES})
32 |
33 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75)
34 | target_compile_options(${PROJECT_NAME} PUBLIC
35 | $<$:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>)
36 |
37 | add_executable(app_libfacedetction app_libfacedetction.cpp)
38 |
39 | # NVCC
40 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a)
41 | target_link_libraries(app_libfacedetction ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} )
42 |
--------------------------------------------------------------------------------
/libfacedetection/README.md:
--------------------------------------------------------------------------------
1 | ## 1. get onnx
2 | download directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv)
3 |
4 | or export onnx:
5 | ```bash
6 | git clone https://github.com/ShiqiYu/libfacedetection.train
7 | git checkout a3bc97c7e85bb206c9feca97fbd541ce82cfa3a9
8 |
9 | # note:The official repository gives the following three models:
10 | yunet_yunet_final_320_320_simplify.onnx
11 | yunet_yunet_final_640_640_simplify.onnx
12 | yunet_yunet_final_dynamic_simplify.onnx
13 | choose the third model here.
14 | ```
15 | ## 2.edit and save onnx
16 | ```bash
17 | # note: If you have obtained onnx by downloading, this step can be ignored
18 | conda activate tensorrt-alpha
19 | # put your onnx file in this path:tensorrt-alpha/data/libfacedetection
20 | cd tensorrt-alpha/data/libfacedetction
21 | python alpha_edit.py --onnx=yunet_yunet_final_dynamic_simplify.onnx
22 | ```
23 | ## 3.compile onnx
24 | ```bash
25 | # put your onnx file in this path:tensorrt-alpha/data/libfacedetection
26 | cd tensorrt-alpha/data/libfacedetection
27 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib
28 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=alpha_yunet_yunet_final_dynamic_simplify.onnx --saveEngine=alpha_yunet_yunet_final_dynamic_simplify.trt --buildOnly --minShapes=input:1x3x120x120 --optShapes=input:4x3x320x320 --maxShapes=input:8x3x2000x2000
29 | ```
30 | ## 4.run
31 | ```bash
32 | git clone https://github.com/FeiYull/tensorrt-alpha
33 | cd tensorrt-alpha/libfacedetction
34 | mkdir build
35 | cd build
36 | cmake ..
37 | make -j10
38 | # note: the dstImage will be saved in tensorrt-alpha/libfacedetction/build by default
39 |
40 | # dynamic [b w h]
41 | # infer image
42 | ./app_libfacedetction --model=../../data/libfacedetction/alpha_yunet_yunet_final_dynamic_simplify.trt --batch_size=1 --img=../../data/6406401.jpg --show --savePath
43 |
44 | # infer video
45 | ./app_libfacedetction --model=../../data/libfacedetction/alpha_yunet_yunet_final_dynamic_simplify.trt --batch_size=8 --video=../../data/people.mp4 --show
46 |
47 | # infer camera
48 | ./app_libfacedetction --model=../../data/libfacedetction/alpha_yunet_yunet_final_dynamic_simplify.trt --batch_size=2 --cam_id=0 --show
49 | ```
50 | ## 5. appendix
51 | ignore
--------------------------------------------------------------------------------
/libfacedetection/alpha_edit.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from pyexpat import model
3 | from turtle import width
4 | import onnx
5 | import onnx.checker
6 | import onnx.utils
7 | from onnx.tools import update_model_dims
8 | import onnx.helper as helper
9 | import torch
10 |
11 | if __name__ == '__main__':
12 | parser = argparse.ArgumentParser()
13 | parser.add_argument('--onnx', type=str, default='yunet_yunet_final_dynamic_simplify.onnx', help='onnx path')
14 | opt = parser.parse_args()
15 |
16 | model = onnx.load(opt.onnx)
17 | in_b = model.graph.input[0].type.tensor_type.shape.dim[0]
18 | in_c = model.graph.input[0].type.tensor_type.shape.dim[1]
19 | in_h = model.graph.input[0].type.tensor_type.shape.dim[2]
20 | in_w = model.graph.input[0].type.tensor_type.shape.dim[3]
21 | # loc
22 | out_loc_b = model.graph.output[0].type.tensor_type.shape.dim[0]
23 | out_loc_num_candidates = model.graph.output[0].type.tensor_type.shape.dim[1]
24 | out_loc_dim2 = model.graph.output[0].type.tensor_type.shape.dim[2]
25 | # conf
26 | out_conf_b = model.graph.output[1].type.tensor_type.shape.dim[0]
27 | out_conf_num_candidates = model.graph.output[1].type.tensor_type.shape.dim[1]
28 | out_conf_dim2 = model.graph.output[1].type.tensor_type.shape.dim[2]
29 | # iou
30 | out_iou_b = model.graph.output[2].type.tensor_type.shape.dim[0]
31 | out_iou_num_candidates = model.graph.output[2].type.tensor_type.shape.dim[1]
32 | out_iou_dim2 = model.graph.output[2].type.tensor_type.shape.dim[2]
33 | in_b.dim_param= "batch_size"
34 | in_h.dim_param= "height"
35 | in_w.dim_param= "width"
36 | out_loc_b.dim_param = "batch_size"
37 | out_conf_b.dim_param= "batch_size"
38 | out_iou_b.dim_param = "batch_size"
39 | out_loc_num_candidates.dim_param = "num_condidates"
40 | out_conf_num_candidates.dim_param = "num_condidates"
41 | out_iou_num_candidates.dim_param = "num_condidates"
42 |
43 | onnx.save(model, 'alpha_yunet_yunet_final_dynamic_simplify.onnx')
44 | print("ok")
45 |
46 |
--------------------------------------------------------------------------------
/libfacedetection/app_libfacedetction.cpp:
--------------------------------------------------------------------------------
1 | #include"../utils/common_include.h"
2 | #include"../utils/utils.h"
3 | #include"libfacedetection.h"
4 |
5 | void setParameters(utils::InitParameter& initParameters)
6 | {
7 | initParameters.class_names = utils::dataSets::face2;
8 |
9 | initParameters.num_class = 2;
10 | initParameters.batch_size = 8;
11 | // dynamic: HWC
12 | /*initParameters.dst_h = 640;
13 | initParameters.dst_w = 640;*/
14 |
15 | initParameters.topK = 1000;
16 |
17 | initParameters.input_output_names = { "input", "loc", "conf", "iou"};
18 | initParameters.conf_thresh = 0.3f;
19 | initParameters.iou_thresh = 0.45f;
20 | initParameters.save_path = "";
21 | }
22 |
23 | void task(LibFaceDet& face_det, const utils::InitParameter& param, std::vector& imgsBatch, const int& delayTime, const int& batchi,
24 | const bool& isShow, const bool& isSave)
25 | {
26 | face_det.copy(imgsBatch);
27 | utils::DeviceTimer d_t1; face_det.preprocess(imgsBatch); float t1 = d_t1.getUsedTime();
28 | utils::DeviceTimer d_t2; face_det.infer(); float t2 = d_t2.getUsedTime();
29 | utils::DeviceTimer d_t3; face_det.postprocess(imgsBatch); float t3 = d_t3.getUsedTime();
30 | sample::gLogInfo << "preprocess time = " << t1 / param.batch_size << "; "
31 | "infer time = " << t2 / param.batch_size << "; "
32 | "postprocess time = " << t3 / param.batch_size << std::endl;
33 | if(isShow)
34 | utils::show(face_det.getObjectss(), param.class_names, delayTime, imgsBatch);
35 | if(isSave)
36 | utils::save(face_det.getObjectss(), param.class_names, param.save_path, imgsBatch, param.batch_size, batchi);
37 | face_det.reset();
38 | }
39 |
40 | int main(int argc, char** argv)
41 | {
42 | cv::CommandLineParser parser(argc, argv,
43 | {
44 | "{model || tensorrt model file }"
45 | "{batch_size|| batch size }"
46 | "{video || video's path }"
47 | "{img || image's path }"
48 | "{cam_id || camera's device id }"
49 | "{show || if show the result }"
50 | "{savePath || save path, can be ignore}"
51 | });
52 | // parameters
53 | utils::InitParameter param;
54 | setParameters(param);
55 | // path
56 | std::string model_path = "../../data/libfacedetction/alpha_yunet_yunet_final_dynamic_simplify.trt";
57 | std::string video_path = "../../data/people.mp4";
58 | std::string image_path = "../../data/6406403.jpg";
59 | // camera' id
60 | int camera_id = 0;
61 |
62 | // get input
63 | utils::InputStream source;
64 | source = utils::InputStream::IMAGE;
65 | //source = utils::InputStream::VIDEO;
66 | //source = utils::InputStream::CAMERA;
67 |
68 | // update params from command line parser
69 | //int size = -1; // w or h
70 | int batch_size = 8;
71 | bool is_show = false;
72 | bool is_save = false;
73 | if(parser.has("model"))
74 | {
75 | model_path = parser.get("model");
76 | sample::gLogInfo << "model_path = " << model_path << std::endl;
77 | }
78 |
79 | if(parser.has("batch_size"))
80 | {
81 | batch_size = parser.get("batch_size");
82 | sample::gLogInfo << "batch_size = " << batch_size << std::endl;
83 | param.batch_size = batch_size;
84 | }
85 | if(parser.has("video"))
86 | {
87 | source = utils::InputStream::VIDEO;
88 | video_path = parser.get("video");
89 | sample::gLogInfo << "video_path = " << video_path << std::endl;
90 | }
91 | if(parser.has("img"))
92 | {
93 | source = utils::InputStream::IMAGE;
94 | image_path = parser.get("img");
95 | sample::gLogInfo << "image_path = " << image_path << std::endl;
96 | }
97 | if(parser.has("cam_id"))
98 | {
99 | source = utils::InputStream::CAMERA;
100 | camera_id = parser.get("cam_id");
101 | sample::gLogInfo << "camera_id = " << camera_id << std::endl;
102 | }
103 | if(parser.has("show"))
104 | {
105 | is_show = true;
106 | sample::gLogInfo << "is_show = " << is_show << std::endl;
107 | }
108 | if(parser.has("savePath"))
109 | {
110 | is_save = true;
111 | param.save_path = parser.get("savePath");
112 | sample::gLogInfo << "save_path = " << param.save_path << std::endl;
113 | }
114 |
115 | int total_batches = 0;
116 | int delay_time = 1;
117 | cv::VideoCapture capture;
118 | if (!setInputStream(source, image_path, video_path, camera_id,
119 | capture, total_batches, delay_time, param))
120 | {
121 | sample::gLogError << "read the input data errors!" << std::endl;
122 | return -1;
123 | }
124 |
125 | LibFaceDet face_det(param);
126 |
127 | // read model
128 | std::vector trt_file = utils::loadModel(model_path);
129 | if (trt_file.empty())
130 | {
131 | sample::gLogError << "trt_file is empty!" << std::endl;
132 | return -1;
133 | }
134 | // init model
135 | if (!face_det.init(trt_file))
136 | {
137 | sample::gLogError << "initEngine() ocur errors!" << std::endl;
138 | return -1;
139 | }
140 | face_det.check();
141 | cv::Mat frame;
142 | std::vector imgs_batch;
143 | imgs_batch.reserve(param.batch_size);
144 | sample::gLogInfo << imgs_batch.capacity() << std::endl;
145 | int batchi = 0;
146 | while (capture.isOpened())
147 | {
148 | if (batchi >= total_batches && source != utils::InputStream::CAMERA)
149 | {
150 | break;
151 | }
152 | if (imgs_batch.size() < param.batch_size)
153 | {
154 | if (source != utils::InputStream::IMAGE)
155 | {
156 | capture.read(frame);
157 | }
158 | else
159 | {
160 | frame = cv::imread(image_path);
161 | }
162 |
163 | if (frame.empty())
164 | {
165 | sample::gLogWarning << "no more video or camera frame" << std::endl;
166 | task(face_det, param, imgs_batch, delay_time, batchi, is_show, is_save);
167 | imgs_batch.clear();
168 | batchi++;
169 | break;
170 | }
171 | else
172 | {
173 | imgs_batch.emplace_back(frame.clone());
174 | }
175 |
176 | }
177 | else
178 | {
179 | task(face_det, param, imgs_batch, delay_time, batchi, is_show, is_save);
180 | imgs_batch.clear();
181 | batchi++;
182 | }
183 | }
184 | return -1;
185 | }
--------------------------------------------------------------------------------
/libfacedetection/libfacedetection.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include"../utils/common_include.h"
3 | #include"../utils/utils.h"
4 | #include"../utils/kernel_function.h"
5 |
6 | class LibFaceDet
7 | {
8 | public:
9 | LibFaceDet(const utils::InitParameter& param);
10 | ~LibFaceDet();
11 |
12 | public:
13 | bool init(const std::vector& trtFile);
14 | void check();
15 | void copy(const std::vector& imgsBatch);
16 | void preprocess(const std::vector& imgsBatch);
17 | bool infer();
18 | void postprocess(const std::vector& imgsBatch);
19 | void reset();
20 |
21 | public:
22 | std::vector> getObjectss() const;
23 |
24 | private:
25 | std::shared_ptr m_engine;
26 | std::unique_ptr m_context;
27 |
28 | protected:
29 | utils::InitParameter m_param;
30 | nvinfer1::Dims m_output_loc_dims;
31 | nvinfer1::Dims m_output_conf_dims;
32 | nvinfer1::Dims m_output_iou_dims;
33 | int m_total_objects;
34 |
35 | // const params on host
36 | const float m_min_sizes_host[4 * 3] =
37 | { 10, 16, 24, 32, 48, FLT_MAX, 64, 96, FLT_MAX, 128, 192, 256 };
38 | const int m_min_sizes_host_dim[4] =
39 | { 3, 2, 2, 3 };
40 | float* m_feat_hw_host;
41 | float* m_prior_boxes_host;
42 | const float m_variances_host[2] = { 0.1f, 0.2f };
43 | // const params on device
44 | float* m_min_sizes_device;
45 | float* m_feat_hw_host_device;
46 | float* m_prior_boxes_device;
47 | float* m_variances_device;
48 | std::vector> m_objectss;
49 | // input
50 | float* m_input_src_device;
51 | float* m_input_hwc_device;
52 | // output
53 | float* m_output_loc_device;
54 | float* m_output_conf_device;
55 | float* m_output_iou_device;
56 | float* m_output_objects_device;
57 | float* m_output_objects_host;
58 | int m_output_objects_width;
59 |
60 | };
61 |
62 | void decodeLibFaceDetDevice(float* minSizes, float* feat_hw, float* priorBoxes, float* variances,
63 | int srcImgWidth, int srcImgHeight,
64 | float confThreshold, int batchSize, int srcHeight,
65 | float* srcLoc, int srcLocWidth,
66 | float* srcConf, int srcConfWidth,
67 | float* srcIou, int srcIouWidth,
68 | float* dst, int dstWidth, int dstHeight);
--------------------------------------------------------------------------------
/pphumanseg/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.10)
2 |
3 | #set(CMAKE_BUILD_TYPE "Debug")
4 | set(CMAKE_BUILD_TYPE "Release")
5 |
6 | PROJECT(pphunmanseg VERSION 1.0.0 LANGUAGES C CXX CUDA)
7 |
8 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR})
9 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake)
10 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT})
11 |
12 | message(STATUS ${ALL_LIBS})
13 | file(GLOB CPPS
14 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
15 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cu
16 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu
17 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp
18 | ${TensorRT_ROOT}/samples/common/logger.cpp
19 | ${TensorRT_ROOT}/samples/common/sampleOptions.cpp
20 | )
21 | list(REMOVE_ITEM CPPS app_pphunmanseg.cpp)
22 |
23 | message(STATUS CPPS = ${CPPS})
24 | list (LENGTH CPPS length)
25 | message(STATUS ***length*** = ${length})
26 | find_package(OpenCV REQUIRED)
27 |
28 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR})
29 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS})
30 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS})
31 |
32 | add_library(${PROJECT_NAME} SHARED ${CPPS})
33 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES})
34 |
35 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75)
36 | target_compile_options(${PROJECT_NAME} PUBLIC
37 | $<$:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>)
38 |
39 | add_executable(app_pphunmanseg app_pphunmanseg.cpp)
40 |
41 | # NVCC
42 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a)
43 | target_link_libraries(app_pphunmanseg ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} )
44 |
--------------------------------------------------------------------------------
/pphumanseg/README.md:
--------------------------------------------------------------------------------
1 | ## 1. get onnx
2 | download directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv)
3 |
4 | or export onnx:
5 | ```bash
6 | # Install git-lfs from https://git-lfs.github.com/
7 | git clone https://github.com/opencv/opencv_zoo && cd opencv_zoo
8 | git checkout ae1d754a3ea14e4244fbea7d781cca2e18584035
9 | git lfs install
10 | git lfs pull
11 | # note:The official onnx is in this path:opencv_zoo/models/human_segmentation_pphumanseg.
12 | ```
13 | ## 2.edit and save onnx
14 | ```bash
15 | # note: If you have obtained onnx by downloading, this step can be ignored
16 | conda activate tensorrt-alpha
17 | # put your onnx file in this path:tensorrt-alpha/data/pphumanseg
18 | cd tensorrt-alpha/data/pphumanseg
19 | python alpha_edit.py --onnx=../data/pphumanseg/human_segmentation_pphumanseg_2021oct.onnx
20 | ```
21 | ## 3.compile onnx
22 | ```bash
23 | # put your onnx file in this path:tensorrt-alpha/data/pphumanseg
24 | cd tensorrt-alpha/data/pphumanseg
25 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib
26 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=human_segmentation_pphumanseg_2021oct_dynamic.onnx --saveEngine=human_segmentation_pphumanseg_2021oct_dynamic.trt --buildOnly --minShapes=x:1x3x192x192 --optShapes=x:2x3x192x192 --maxShapes=x:4x3x192x192
27 | ```
28 | ## 4.run
29 | ```bash
30 | git clone https://github.com/FeiYull/tensorrt-alpha
31 | cd tensorrt-alpha/pphumanseg
32 | mkdir build
33 | cd build
34 | cmake ..
35 | make -j10
36 | # note: the dstImage will be saved in tensorrt-alpha/pphumanseg/build by default
37 |
38 | # infer image
39 | ./app_pphunmanseg --model=../../data/pphumanseg/human_segmentation_pphumanseg_2021oct_dynamic.trt --img=../../data/6.jpg --size=192 --batch_size=1 --show -savePath
40 |
41 | # infer video
42 | ./app_pphunmanseg --model=../../data/pphumanseg/human_segmentation_pphumanseg_2021oct_dynamic.trt --batch_size=2 --video=../../data/people.mp4 --show
43 |
44 | # infer camera
45 | ./app_pphunmanseg --model=../../data/pphumanseg/human_segmentation_pphumanseg_2021oct_dynamic.trt --batch_size=2 --cam_id=0 --show
46 | ```
47 | ## 5. appendix
48 | ignore
--------------------------------------------------------------------------------
/pphumanseg/alpha_edit.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import onnx
3 | import onnx.checker
4 | import onnx.utils
5 | from onnx.tools import update_model_dims
6 | import onnx.helper as helper
7 |
8 | if __name__ == '__main__':
9 | parser = argparse.ArgumentParser()
10 | parser.add_argument('--onnx', type=str, default='../data/pphumanseg/human_segmentation_pphumanseg_2021oct.onnx', help='onnx path')
11 | opt = parser.parse_args()
12 |
13 | model = onnx.load(opt.onnx)
14 |
15 | in_b = model.graph.input[0].type.tensor_type.shape.dim[0]
16 | in_c = model.graph.input[0].type.tensor_type.shape.dim[1]
17 | in_h = model.graph.input[0].type.tensor_type.shape.dim[2]
18 | in_w = model.graph.input[0].type.tensor_type.shape.dim[3]
19 |
20 | out_loc_b = model.graph.output[0].type.tensor_type.shape.dim[0]
21 | out_loc_num_candidates = model.graph.output[0].type.tensor_type.shape.dim[1]
22 | out_loc_dim2 = model.graph.output[0].type.tensor_type.shape.dim[2] # 这个维度不修改
23 |
24 | in_b.dim_param= "batch_size"
25 |
26 | out_loc_b.dim_param = "batch_size"
27 |
28 | onnx.save(model, '../data/pphumanseg//human_segmentation_pphumanseg_2021oct_dynamic.onnx')
29 | print("ok")
--------------------------------------------------------------------------------
/pphumanseg/app_pphunmanseg.cpp:
--------------------------------------------------------------------------------
1 | #include"pphunmanseg.h"
2 |
3 | void setParameters(utils::InitParameter& initParameters)
4 | {
5 | initParameters.batch_size = 8;
6 | initParameters.dst_h = 192;
7 | initParameters.dst_w = 192;
8 |
9 | initParameters.means[0] = 0.5f;
10 | initParameters.means[1] = 0.5f;
11 | initParameters.means[2] = 0.5f;
12 | initParameters.stds[0] = 0.5f;
13 | initParameters.stds[1] = 0.5f;
14 | initParameters.stds[2] = 0.5f;
15 |
16 | initParameters.input_output_names = { "x", "save_infer_model/scale_0.tmp_1" };
17 | initParameters.save_path = "";
18 | }
19 |
20 | void task(PPHunmanSeg& hunman_seg, const utils::InitParameter& param, std::vector& imgsBatch, const int& delayTime, const int& batchi,
21 | const bool& isShow, const bool& isSave)
22 | {
23 | hunman_seg.copy(imgsBatch);
24 | utils::DeviceTimer d_t1; hunman_seg.preprocess(imgsBatch); float t1 = d_t1.getUsedTime();
25 | utils::DeviceTimer d_t2; hunman_seg.infer(); float t2 = d_t2.getUsedTime();
26 | utils::DeviceTimer d_t3; hunman_seg.postprocess(imgsBatch); float t3 = d_t3.getUsedTime();
27 | sample::gLogInfo << "preprocess time = " << t1 / param.batch_size << "; "
28 | "infer time = " << t2 / param.batch_size << "; "
29 | "postprocess time = " << t3 / param.batch_size << std::endl;
30 | if (isShow)
31 | hunman_seg.showMask(imgsBatch, delayTime);
32 | if (isSave)
33 | hunman_seg.saveMask(imgsBatch, param.save_path, param.batch_size, batchi);
34 | }
35 |
36 | int main(int argc, char** argv)
37 | {
38 | cv::CommandLineParser parser(argc, argv,
39 | {
40 | "{model || tensorrt model file }"
41 | "{size || image (h, w), eg: 640}"
42 | "{batch_size|| batch size }"
43 | "{video || video's path }"
44 | "{img || image's path }"
45 | "{cam_id || camera's device id }"
46 | "{show || if show the result }"
47 | "{savePath || save path, can be ignore}"
48 | });
49 | // parameters
50 | utils::InitParameter param;
51 | setParameters(param);
52 | // path
53 | std::string model_path = "../../data/pphumanseg/human_segmentation_pphumanseg_2021oct_dynamic.trt";
54 | std::string video_path = "../../data/people.mp4";
55 | std::string image_path = "../../data/6406403.jpg";
56 | int camera_id = 0; // camera' id
57 |
58 | // get input
59 | utils::InputStream source;
60 | //source = utils::InputStream::IMAGE;
61 | source = utils::InputStream::VIDEO;
62 | //source = utils::InputStream::CAMERA;
63 |
64 | // update params from command line parser
65 | int size = -1; // w or h
66 | int batch_size = 8;
67 | bool is_show = false;
68 | bool is_save = false;
69 | if(parser.has("model"))
70 | {
71 | model_path = parser.get("model");
72 | sample::gLogInfo << "model_path = " << model_path << std::endl;
73 | }
74 | if(parser.has("size"))
75 | {
76 | size = parser.get("size");
77 | sample::gLogInfo << "size = " << size << std::endl;
78 | param.dst_h = param.dst_w = size;
79 | }
80 | if(parser.has("batch_size"))
81 | {
82 | batch_size = parser.get("batch_size");
83 | sample::gLogInfo << "batch_size = " << batch_size << std::endl;
84 | param.batch_size = batch_size;
85 | }
86 | if(parser.has("video"))
87 | {
88 | source = utils::InputStream::VIDEO;
89 | video_path = parser.get("video");
90 | sample::gLogInfo << "video_path = " << video_path << std::endl;
91 | }
92 | if(parser.has("img"))
93 | {
94 | source = utils::InputStream::IMAGE;
95 | image_path = parser.get("img");
96 | sample::gLogInfo << "image_path = " << image_path << std::endl;
97 | }
98 | if(parser.has("cam_id"))
99 | {
100 | source = utils::InputStream::CAMERA;
101 | camera_id = parser.get("cam_id");
102 | sample::gLogInfo << "camera_id = " << camera_id << std::endl;
103 | }
104 | if(parser.has("show"))
105 | {
106 | is_show = true;
107 | sample::gLogInfo << "is_show = " << is_show << std::endl;
108 | }
109 | if(parser.has("savePath"))
110 | {
111 | is_save = true;
112 | param.save_path = parser.get("savePath");
113 | sample::gLogInfo << "save_path = " << param.save_path << std::endl;
114 | }
115 |
116 |
117 |
118 | int total_batches = 0;
119 | int delay_time = 1;
120 | cv::VideoCapture capture;
121 | if (!setInputStream(source, image_path, video_path, camera_id,
122 | capture, total_batches, delay_time, param))
123 | {
124 | sample::gLogError << "read the input data errors!" << std::endl;
125 | return -1;
126 | }
127 |
128 | PPHunmanSeg hunman_seg(param);
129 |
130 | // read model
131 | std::vector trt_file = utils::loadModel(model_path);
132 | if (trt_file.empty())
133 | {
134 | sample::gLogError << "trt_file is empty!" << std::endl;
135 | return -1;
136 | }
137 | // init model
138 | if (!hunman_seg.init(trt_file))
139 | {
140 | sample::gLogError << "initEngine() ocur errors!" << std::endl;
141 | return -1;
142 | }
143 | hunman_seg.check();
144 | cv::Mat frame;
145 | std::vector imgs_batch;
146 | imgs_batch.reserve(param.batch_size);
147 | sample::gLogInfo << imgs_batch.capacity() << std::endl;
148 | int batchi = 0;
149 | while (capture.isOpened())
150 | {
151 | if (batchi >= total_batches && source != utils::InputStream::CAMERA)
152 | {
153 | break;
154 | }
155 | if (imgs_batch.size() < param.batch_size) // get input
156 | {
157 | if (source != utils::InputStream::IMAGE)
158 | {
159 | capture.read(frame);
160 | }
161 | else
162 | {
163 | frame = cv::imread(image_path);
164 | }
165 |
166 | if (frame.empty())
167 | {
168 | sample::gLogWarning << "no more video or camera frame" << std::endl;
169 | task(hunman_seg, param, imgs_batch, delay_time, batchi, is_show, is_save);
170 | imgs_batch.clear();
171 | batchi++;
172 | break;
173 | }
174 | else
175 | {
176 | imgs_batch.emplace_back(frame.clone());
177 | }
178 |
179 | }
180 | else // infer
181 | {
182 | task(hunman_seg, param, imgs_batch, delay_time, batchi, is_show, is_save);
183 | imgs_batch.clear();
184 | batchi++;
185 | }
186 | }
187 | return -1;
188 | }
189 |
190 |
--------------------------------------------------------------------------------
/pphumanseg/decode_pphunmanseg.cu:
--------------------------------------------------------------------------------
1 | #include"decode_pphunmanseg.h"
2 | #include"../utils/kernel_function.h"
3 |
4 | __global__
5 | void decode_pphunmanseg_device_kernel(int batch_size,
6 | float* src, int src_width, int src_height, int src_area, int src_volum,
7 | float* dst, int dst_width, int dst_height, int dst_area, int dst_volum)
8 | {
9 | int dx = blockDim.x * blockIdx.x + threadIdx.x;
10 | int dy = blockDim.y * blockIdx.y + threadIdx.y;
11 | if (dx >= dst_area || dy >= batch_size)
12 | {
13 | return;
14 | }
15 | dst[dy * dst_volum + dx] = (src[dy * src_volum + dx + src_area] > src[dy * src_volum + dx] ? 1.f : 0.f);
16 | }
17 | void pphunmanseg::decodeDevice(int batchSize, float* src, int srcWidth, int srcHeight, float* dst, int dstWidth, int dstHeight)
18 | {
19 | dim3 block_size(BLOCK_SIZE, BLOCK_SIZE);
20 | dim3 grid_size((dstWidth * dstHeight + BLOCK_SIZE - 1) / BLOCK_SIZE,
21 | (batchSize + BLOCK_SIZE - 1) / BLOCK_SIZE);
22 | int src_area = srcWidth * srcHeight;
23 | int src_volum = srcWidth * srcHeight * 2;
24 | int dst_area = dstWidth * dstHeight;
25 | int dst_volum = dstWidth * dstHeight * 1;
26 | decode_pphunmanseg_device_kernel << < grid_size, block_size, 0, nullptr >> > (batchSize,
27 | src, srcWidth, srcHeight, src_area, src_volum,
28 | dst, dstWidth, dstHeight, dst_area, dst_volum);
29 | }
--------------------------------------------------------------------------------
/pphumanseg/decode_pphunmanseg.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include"../utils/utils.h"
3 | #include"../utils/common_include.h"
4 |
5 | namespace pphunmanseg
6 | {
7 | void decodeDevice(int batchSize, float* src, int srcWidth, int srcHeight, float* dst, int dstWidth, int dstHeight);
8 | }
--------------------------------------------------------------------------------
/pphumanseg/pphunmanseg.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include"../utils/common_include.h"
3 | #include"../utils/utils.h"
4 | #include"../utils/kernel_function.h"
5 |
6 |
7 | class PPHunmanSeg
8 | {
9 | public:
10 | PPHunmanSeg(const utils::InitParameter& param);
11 | ~PPHunmanSeg();
12 |
13 | public:
14 | bool init(const std::vector& trtFile);
15 | void check();
16 | void copy(const std::vector& imgsBatch);
17 | void preprocess(const std::vector& imgsBatch);
18 | bool infer();
19 | void postprocess(const std::vector& imgsBatch);
20 | void reset();
21 | void showMask(const std::vector& imgsBatch, const int& cvDelayTime);
22 | void saveMask(const std::vector& imgsBatch, const std::string& savePath, const int& batchSize, const int& batchi);
23 |
24 | protected:
25 | std::shared_ptr m_engine;
26 | std::unique_ptr m_context;
27 |
28 | protected:
29 | utils::InitParameter m_param;
30 | nvinfer1::Dims m_output_src_dims;
31 | int m_output_src_area;
32 |
33 | utils::AffineMat m_dst2src;
34 | utils::AffineMat m_src2dst;
35 |
36 | // input
37 | float* m_input_src_device;
38 | float* m_input_resize_device;
39 | float* m_input_rgb_device;
40 | float* m_input_norm_device;
41 | float* m_input_hwc_device;
42 |
43 | // output
44 | float* m_output_src_device;
45 | float* m_output_mask_device;
46 | float* m_output_resize_device;
47 | float* m_output_resize_host;
48 | };
49 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | torch==1.9.0
2 | onnx== 1.9.0
3 | torchvision==0.10.1 # Image classification
4 | onnx-simplifier==0.4.8
5 | onnxruntime==1.8.0
6 | opencv-python==4.6.0
--------------------------------------------------------------------------------
/tools/onnx2trt.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include // add file: ../TensorRT-8.4.2.4/samples/common/logger.cpp
9 | using namespace std;
10 |
11 | int main() {
12 | // setting
13 | std::string onnx_file = "D:/ThirdParty/TensorRT-8.4.2.4/bin/yolov8n.onnx";
14 | std::string trt_file = "yolov8n.trt";
15 | int min_batchsize = 1;
16 | int opt_batchsize = 1;
17 | int max_batchsize = 2;
18 | nvinfer1::Dims4 min_shape(min_batchsize, 3, 640, 640);
19 | nvinfer1::Dims4 opt_shape(opt_batchsize, 3, 640, 640);
20 | nvinfer1::Dims4 max_shape(max_batchsize, 3, 640, 640);
21 |
22 |
23 |
24 | nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger());
25 | nvinfer1::IBuilderConfig* config = builder->createBuilderConfig();
26 | nvinfer1::INetworkDefinition* network = builder->createNetworkV2(1);
27 |
28 | nvonnxparser::IParser* parser = nvonnxparser::createParser(*network, sample::gLogger.getTRTLogger());
29 | if (!parser->parseFromFile(onnx_file.c_str(), 1)) {
30 | printf("Failed to parser demo.onnx\n");
31 | return false;
32 | }
33 |
34 | printf("Workspace Size = %.2f MB\n", (1 << 28) / 1024.0f / 1024.0f);
35 | config->setMaxWorkspaceSize(1 << 28);
36 |
37 | auto profile = builder->createOptimizationProfile();
38 | auto input_tensor = network->getInput(0);
39 | int input_channel = input_tensor->getDimensions().d[1];
40 |
41 | profile->setDimensions(input_tensor->getName(), nvinfer1::OptProfileSelector::kMIN, min_shape);
42 | profile->setDimensions(input_tensor->getName(), nvinfer1::OptProfileSelector::kOPT, opt_shape);
43 | profile->setDimensions(input_tensor->getName(), nvinfer1::OptProfileSelector::kMAX, max_shape);
44 | config->addOptimizationProfile(profile);
45 |
46 | nvinfer1::ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
47 | if (engine == nullptr) {
48 | printf("Build engine failed.\n");
49 | return false;
50 | }
51 | nvinfer1::IHostMemory* model_data = engine->serialize();
52 | FILE* f = fopen(trt_file.c_str(), "wb");
53 | fwrite(model_data->data(), 1, model_data->size(), f);
54 | fclose(f);
55 |
56 | model_data->destroy();
57 | parser->destroy();
58 | engine->destroy();
59 | network->destroy();
60 | config->destroy();
61 | builder->destroy();
62 | printf("Done.\n");
63 | return true;
64 | }
65 |
--------------------------------------------------------------------------------
/u2net/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.10)
2 |
3 | #set(CMAKE_BUILD_TYPE "Debug")
4 | set(CMAKE_BUILD_TYPE "Release")
5 |
6 | PROJECT(u2net VERSION 1.0.0 LANGUAGES C CXX CUDA)
7 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR})
8 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake)
9 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT})
10 |
11 | message(STATUS ${ALL_LIBS})
12 | file(GLOB CPPS
13 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
14 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cu
15 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu
16 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp
17 | ${TensorRT_ROOT}/samples/common/logger.cpp
18 | ${TensorRT_ROOT}/samples/common/sampleOptions.cpp
19 | )
20 | list(REMOVE_ITEM CPPS u2net.cpp)
21 | message(STATUS CPPS = ${CPPS})
22 | list (LENGTH CPPS length)
23 | message(STATUS ***length*** = ${length})
24 | find_package(OpenCV REQUIRED)
25 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR})
26 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS})
27 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS})
28 |
29 | add_library(${PROJECT_NAME} SHARED ${CPPS})
30 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES})
31 |
32 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75)
33 | target_compile_options(${PROJECT_NAME} PUBLIC
34 | $<$:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>)
35 |
36 | add_executable(app_u2net app_u2net.cpp)
37 |
38 | # NVCC
39 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a)
40 | target_link_libraries(app_u2net ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} )
41 |
--------------------------------------------------------------------------------
/u2net/README.md:
--------------------------------------------------------------------------------
1 | ## 1. get onnx
2 | download directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv)
3 |
4 | or export onnx:
5 | ```bash
6 | git clone https://github.com/xuebinqin/U-2-Net
7 | cd U-2-Net-master
8 | # Use the script alpha_export.py provided by this repo to export onnx
9 | cp alpha_export.py U-2-Net-master
10 | python alpha_export.py --net=u2net --weights=saved_models/u2net/u2net.pth
11 | python alpha_export.py --net=u2netp --weights=saved_models/u2netp/u2netp.pth
12 | ```
13 | ## 2.edit and save onnx
14 | ```bash
15 | # note: If you have obtained onnx by downloading, this step can be ignored
16 | ignore
17 | ```
18 | ## 3.compile onnx
19 | ```bash
20 | # put your onnx file in this path:tensorrt-alpha/data/u2net
21 | cd tensorrt-alpha/data/u2net
22 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib
23 |
24 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=u2net.onnx --saveEngine=u2net.trt --buildOnly --minShapes=images:1x3x320x320 --optShapes=images:4x3x320x320 --maxShapes=images:8x3x320x320
25 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=u2netp.onnx --saveEngine=u2netp.trt --buildOnly --minShapes=images:1x3x320x320 --optShapes=images:4x3x320x320 --maxShapes=images:8x3x320x320
26 | ```
27 | ## 4.run
28 | ```bash
29 | git clone https://github.com/FeiYull/tensorrt-alpha
30 | cd tensorrt-alpha/u2net
31 | mkdir build
32 | cd build
33 | cmake ..
34 | make -j10
35 | # note: the dstImage will be saved in tensorrt-alpha/u2net/build by default
36 |
37 | ## 320
38 | # infer image
39 | ./app_u2net --model=../../data/u2net/u2net.trt --size=320 --batch_size=1 --img=../../data/sailboat3.jpg --show --savePath
40 |
41 | # infer video
42 | ./app_u2net --model=../../data/u2net/u2net.trt --size=320 --batch_size=2 --video=../../data/people.mp4 --show
43 |
44 | # infer camera
45 | ./app_u2net --model=../../data/u2net/u2net.trt --size=320 --batch_size=2 --cam_id=0 --show
46 | ```
47 | ## 5. appendix
48 | ignore
--------------------------------------------------------------------------------
/u2net/alpha_export.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import torch.nn
3 | from model import U2NET
4 | from model import U2NETP
5 |
6 | import onnx
7 | import numpy as np
8 | import onnxsim # pip install onnx-simplifier
9 | import onnxruntime as ort
10 | import numpy as np
11 |
12 | class Alpha_U2Net(torch.nn.Module):
13 | def __init__(self, weight_file):
14 | super().__init__()
15 | self.model = U2NET(3, 1)
16 | self.model.load_state_dict(torch.load(model_path, map_location='cpu'))
17 | self.model.eval()
18 |
19 | def forward(self, x):
20 | y = self.model(x)
21 | return y[0]
22 |
23 | class Alpha_U2Netp(torch.nn.Module):
24 | def __init__(self, weight_file):
25 | super().__init__()
26 | self.model = U2NETP(3, 1)
27 | self.model.load_state_dict(torch.load(model_path, map_location='cpu'))
28 | self.model.eval()
29 |
30 | def forward(self, x):
31 | y = self.model(x)
32 | return y[0]
33 | """
34 | example:
35 | python alpha_export.py --net=u2net --weights=saved_models/u2net/u2net.pth
36 | python alpha_export.py --net=u2netp --weights=saved_models/u2netp/u2netp.pth
37 | """
38 | if __name__ == '__main__':
39 | parser = argparse.ArgumentParser()
40 | parser.add_argument('--net', type=str, default='u2net', help='net type')
41 | parser.add_argument('--weights', type=str, default='saved_models/u2net/u2net.pth', help='net path')
42 | opt = parser.parse_args()
43 |
44 | net = ''
45 | image_input_shape = [1, 3, 320, 320]
46 | image_input = torch.autograd.Variable(torch.randn(image_input_shape))
47 | input_names = ["images"]
48 | output_names = ["output"]
49 | dynamic_axes = {"images": {0: "batch_size"}, "output": {0: "batch_size"}}
50 |
51 | net = opt.net
52 | if net=='u2net': # for u2net.pt
53 | net_name = "u2net"
54 | onnx_name = net_name + ".onnx"
55 | model_path = opt.weights
56 | u2net = Alpha_U2Net(model_path)
57 | torch.onnx.export(u2net, image_input, "saved_models/onnx/" + onnx_name,
58 | verbose=True,
59 | input_names=input_names,
60 | output_names=output_names,
61 | opset_version=11, # try opset_version=9
62 | training=False,
63 | dynamic_axes=dynamic_axes)
64 | elif net=='u2netp': # for u2netp.pt
65 | model_path = opt.weights
66 | u2netp = Alpha_U2Netp(model_path)
67 | torch.onnx.export(u2netp, image_input, "saved_models/onnx/u2netp.onnx",
68 | verbose=True,
69 | input_names=input_names,
70 | output_names=output_names,
71 | opset_version=11,
72 | training=False,
73 | dynamic_axes=dynamic_axes)
74 |
--------------------------------------------------------------------------------
/u2net/app_u2net.cpp:
--------------------------------------------------------------------------------
1 | #include"u2net.h"
2 |
3 | void setParameters(utils::InitParameter& initParameters)
4 | {
5 | initParameters.class_names = utils::dataSets::coco80;
6 | //initParameters.num_class = 80; // for coco
7 |
8 | initParameters.batch_size = 1;
9 | initParameters.dst_h = 320;
10 | initParameters.dst_w = 320;
11 | initParameters.input_output_names = { "images", "output" };
12 | initParameters.scale = 1.0; // div by max in u2net!
13 | initParameters.means[0] = 0.485;
14 | initParameters.means[1] = 0.456;
15 | initParameters.means[2] = 0.406;
16 | initParameters.stds[0] = 0.229;
17 | initParameters.stds[1] = 0.224;
18 | initParameters.stds[2] = 0.225;
19 |
20 | initParameters.save_path = "";
21 | }
22 |
23 | void task(u2net::U2NET& u2net, const utils::InitParameter& param, std::vector& imgsBatch, const int& delayTime, const int& batchi,
24 | const bool& isShow, const bool& isSave)
25 | {
26 | u2net.copy(imgsBatch);
27 | utils::DeviceTimer d_t1; u2net.preprocess(imgsBatch); float t1 = d_t1.getUsedTime();
28 | utils::DeviceTimer d_t2; u2net.infer(); float t2 = d_t2.getUsedTime();
29 | utils::DeviceTimer d_t3; u2net.postprocess(imgsBatch); float t3 = d_t3.getUsedTime();
30 | sample::gLogInfo << "preprocess time = " << t1 / param.batch_size << "; "
31 | "infer time = " << t2 / param.batch_size << "; "
32 | "postprocess time = " << t3 / param.batch_size << std::endl;
33 | if(isShow)
34 | u2net.showMask(imgsBatch, delayTime);
35 | if(isSave)
36 | u2net.saveMask(imgsBatch, param.save_path, param.batch_size, batchi);
37 | }
38 |
39 | int main(int argc, char** argv)
40 | {
41 | cv::CommandLineParser parser(argc, argv,
42 | {
43 | "{model || tensorrt model file }"
44 | "{size || image (h, w), eg: 640}"
45 | "{batch_size|| batch size }"
46 | "{video || video's path }"
47 | "{img || image's path }"
48 | "{cam_id || camera's device id }"
49 | "{show || if show the result }"
50 | "{savePath || save path, can be ignore}"
51 | });
52 | // parameters
53 | utils::InitParameter param;
54 | setParameters(param);
55 | // path
56 | std::string model_path = "../../data/u2net/u2net.trt";
57 | std::string video_path = "../../data/people.mp4";
58 | std::string image_path = "../../data/6406403.jpg";
59 | // camera' id
60 | int camera_id = 0;
61 |
62 | // get input
63 | utils::InputStream source;
64 | //source = utils::InputStream::IMAGE;
65 | //source = utils::InputStream::VIDEO;
66 | source = utils::InputStream::CAMERA;
67 |
68 | // update params from command line parser
69 | int size = -1; // w or h
70 | int batch_size = 8;
71 | bool is_show = false;
72 | bool is_save = false;
73 | if(parser.has("model"))
74 | {
75 | model_path = parser.get("model");
76 | sample::gLogInfo << "model_path = " << model_path << std::endl;
77 | }
78 | if(parser.has("size"))
79 | {
80 | size = parser.get("size");
81 | sample::gLogInfo << "size = " << size << std::endl;
82 | param.dst_h = param.dst_w = size;
83 | }
84 | if(parser.has("batch_size"))
85 | {
86 | batch_size = parser.get("batch_size");
87 | sample::gLogInfo << "batch_size = " << batch_size << std::endl;
88 | param.batch_size = batch_size;
89 | }
90 | if(parser.has("video"))
91 | {
92 | source = utils::InputStream::VIDEO;
93 | video_path = parser.get("video");
94 | sample::gLogInfo << "video_path = " << video_path << std::endl;
95 | }
96 | if(parser.has("img"))
97 | {
98 | source = utils::InputStream::IMAGE;
99 | image_path = parser.get("img");
100 | sample::gLogInfo << "image_path = " << image_path << std::endl;
101 | }
102 | if(parser.has("cam_id"))
103 | {
104 | source = utils::InputStream::CAMERA;
105 | camera_id = parser.get("cam_id");
106 | sample::gLogInfo << "camera_id = " << camera_id << std::endl;
107 | }
108 | if(parser.has("show"))
109 | {
110 | is_show = true;
111 | sample::gLogInfo << "is_show = " << is_show << std::endl;
112 | }
113 | if(parser.has("savePath"))
114 | {
115 | is_save = true;
116 | param.save_path = parser.get("savePath");
117 | sample::gLogInfo << "save_path = " << param.save_path << std::endl;
118 | }
119 |
120 | int total_batches = 0;
121 | int delay_time = 1;
122 | cv::VideoCapture capture;
123 | if (!setInputStream(source, image_path, video_path, camera_id,
124 | capture, total_batches, delay_time, param))
125 | {
126 | sample::gLogError << "read the input data errors!" << std::endl;
127 | return -1;
128 | }
129 | u2net::U2NET u2net(param);
130 | // read model
131 | std::vector trt_file = utils::loadModel(model_path);
132 | if (trt_file.empty())
133 | {
134 | sample::gLogError << "trt_file is empty!" << std::endl;
135 | return -1;
136 | }
137 | // init model
138 | if (!u2net.init(trt_file))
139 | {
140 | sample::gLogError << "initEngine() ocur errors!" << std::endl;
141 | return -1;
142 | }
143 | u2net.check();
144 | cv::Mat frame;
145 | std::vector imgs_batch;
146 | imgs_batch.reserve(param.batch_size);
147 | sample::gLogInfo << imgs_batch.capacity() << std::endl;
148 | int batchi = 0;
149 | while (capture.isOpened())
150 | {
151 | if (batchi >= total_batches && source != utils::InputStream::CAMERA)
152 | {
153 | break;
154 | }
155 | if (imgs_batch.size() < param.batch_size)
156 | {
157 | if (source != utils::InputStream::IMAGE)
158 | {
159 | capture.read(frame);
160 | }
161 | else
162 | {
163 | frame = cv::imread(image_path);
164 | }
165 |
166 | if (frame.empty())
167 | {
168 | sample::gLogWarning << "no more video or camera frame" << std::endl;
169 | task(u2net, param, imgs_batch, delay_time, batchi, is_show, is_save);
170 | imgs_batch.clear();
171 | batchi++;
172 | break;
173 | }
174 | else
175 | {
176 | imgs_batch.emplace_back(frame.clone());
177 | }
178 | }
179 | else
180 | {
181 | task(u2net, param, imgs_batch, delay_time, batchi, is_show, is_save);
182 | imgs_batch.clear();
183 | batchi++;
184 | }
185 | }
186 | return -1;
187 | }
188 |
189 |
--------------------------------------------------------------------------------
/u2net/u2net.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include"../utils/common_include.h"
3 | #include"../utils/utils.h"
4 | #include"../utils/kernel_function.h"
5 | namespace u2net
6 | {
7 | class U2NET
8 | {
9 | public:
10 | U2NET(const utils::InitParameter& param);
11 | ~U2NET();
12 |
13 | public:
14 | bool init(const std::vector& trtFile);
15 | void check();
16 | void copy(const std::vector& imgsBatch);
17 | void preprocess(const std::vector& imgsBatch);
18 | bool infer();
19 | void postprocess(const std::vector& imgsBatch);
20 | void showMask(const std::vector& imgsBatch, const int& cvDelayTime);
21 | void saveMask(const std::vector& imgsBatch, const std::string& savePath, const int& batchSize, const int& batchi);
22 | void reset();
23 | private:
24 | std::shared_ptr m_engine;
25 | std::unique_ptr m_context;
26 |
27 | //private:
28 | protected:
29 | utils::InitParameter m_param;
30 | nvinfer1::Dims m_output_dims;
31 | int m_output_area;
32 | std::vector> m_objectss;
33 |
34 |
35 | utils::AffineMat m_dst2src;
36 | utils::AffineMat m_src2dst;
37 |
38 | // input
39 | float* m_input_src_device;
40 | float* m_input_resize_device;
41 | float* m_input_rgb_device;
42 | float* m_input_norm_device;
43 | float* m_input_hwc_device;
44 |
45 | float* m_max_val_device;
46 | float* m_min_val_device;
47 |
48 | // output
49 | float* m_output_src_device;
50 | float* m_output_resize_device;
51 | float* m_output_resize_host;
52 | float* m_output_mask_host;
53 |
54 | };
55 | }
56 |
57 | void u2netDivMaxDevice(const int& batchSize, float* src, int srcWidth, int srcHeight, int channel, float* maxVals);
58 |
59 | void u2netNormPredDevice(const int& batchSize, float* src, int srcWidth, int srcHeight, float scale, float* minVals, float* maxVals);
--------------------------------------------------------------------------------
/utils/common_include.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | // tensorrt
3 | #include
4 | #include
5 | #include
6 | // cuda
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 | #include
15 | // opencv
16 | #include
17 | // cpp std
18 | #include
19 | #include
20 | #include
21 | #include
22 | #include
23 | #include
24 | #include
--------------------------------------------------------------------------------
/utils/kernel_function.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include"../utils/common_include.h"
3 | #include"../utils/utils.h"
4 |
5 | #define CHECK(op) __check_cuda_runtime((op), #op, __FILE__, __LINE__)
6 |
7 | bool __check_cuda_runtime(cudaError_t code, const char* op, const char* file, int line);
8 |
9 | #define BLOCK_SIZE 8
10 |
11 | //note: resize rgb with padding
12 | void resizeDevice(const int& batch_size, float* src, int src_width, int src_height,
13 | float* dst, int dstWidth, int dstHeight,
14 | float paddingValue, utils::AffineMat matrix);
15 |
16 | //overload:resize rgb with padding, but src's type is uin8
17 | void resizeDevice(const int& batch_size, unsigned char* src, int src_width, int src_height,
18 | float* dst, int dstWidth, int dstHeight,
19 | float paddingValue, utils::AffineMat matrix);
20 |
21 | // overload: resize rgb/gray without padding
22 | void resizeDevice(const int& batchSize, float* src, int srcWidth, int srcHeight,
23 | float* dst, int dstWidth, int dstHeight,
24 | utils::ColorMode mode, utils::AffineMat matrix);
25 |
26 | void bgr2rgbDevice(const int& batch_size, float* src, int srcWidth, int srcHeight,
27 | float* dst, int dstWidth, int dstHeight);
28 |
29 | void normDevice(const int& batch_size, float* src, int srcWidth, int srcHeight,
30 | float* dst, int dstWidth, int dstHeight,
31 | utils::InitParameter norm_param);
32 |
33 | void hwc2chwDevice(const int& batch_size, float* src, int srcWidth, int srcHeight,
34 | float* dst, int dstWidth, int dstHeight);
35 |
36 | void decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcLength, float* dst, int dstWidth, int dstHeight);
37 |
38 | // nms fast
39 | void nmsDeviceV1(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea);
40 |
41 | // nms sort
42 | void nmsDeviceV2(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea,
43 | int* idx, float* conf);
44 |
45 | void copyWithPaddingDevice(const int& batchSize, float* src, int srcWidth, int srcHeight,
46 | float* dst, int dstWidth, int dstHeight, float paddingValue, int padTop, int padLeft);
--------------------------------------------------------------------------------
/utils/tracking/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/utils/tracking/.gitkeep
--------------------------------------------------------------------------------
/utils/yolo.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include"../utils/common_include.h"
3 | #include"../utils/utils.h"
4 | #include"../utils/kernel_function.h"
5 |
6 | namespace yolo
7 | {
8 | class YOLO
9 | {
10 | public:
11 | YOLO(const utils::InitParameter& param);
12 | ~YOLO();
13 |
14 | public:
15 | virtual bool init(const std::vector& trtFile);
16 | virtual void check();
17 | virtual void copy(const std::vector& imgsBatch);
18 | virtual void preprocess(const std::vector& imgsBatch);
19 | virtual bool infer();
20 | virtual void postprocess(const std::vector& imgsBatch);
21 | virtual void reset();
22 |
23 | public:
24 | std::vector> getObjectss() const;
25 |
26 | protected:
27 | std::shared_ptr m_engine;
28 | std::unique_ptr m_context;
29 |
30 | protected:
31 | utils::InitParameter m_param;
32 | nvinfer1::Dims m_output_dims;
33 | int m_output_area;
34 | int m_total_objects;
35 | std::vector> m_objectss;
36 | utils::AffineMat m_dst2src;
37 |
38 | // input
39 | unsigned char* m_input_src_device;
40 | float* m_input_resize_device;
41 | float* m_input_rgb_device;
42 | float* m_input_norm_device;
43 | float* m_input_hwc_device;
44 | // output
45 | float* m_output_src_device;
46 | float* m_output_objects_device;
47 | float* m_output_objects_host;
48 | int m_output_objects_width;
49 | int* m_output_idx_device;
50 | float* m_output_conf_device;
51 | };
52 | }
53 |
--------------------------------------------------------------------------------
/vscode/launch.json:
--------------------------------------------------------------------------------
1 | {
2 | "version": "0.2.0",
3 | "configurations": [
4 | {
5 | "name": "C++ Launch",
6 | "type": "cppdbg",
7 | "request": "launch",
8 | "program": "${workspaceFolder}/build/app_yolox",
9 | "args": [
10 |
11 | "--model=../data/yolox/yolox_tiny.trt",
12 | "--size=416",
13 | "--batch_size=8",
14 |
15 | // image
16 | //"--img= ../data/6406403.jpg",
17 |
18 | // video
19 | "--video=../data/people.mp4",
20 |
21 | // camera
22 | // "--cam_id= 0",
23 |
24 | "--show",
25 | "--savePath= build/"
26 | ],
27 | "stopAtEntry": false,
28 | "cwd": "${workspaceFolder}",
29 | //"preLaunchTask": "C/C++: g++ build active file",
30 | //"miDebuggerPath": "/usr/bin/gdb"
31 | }
32 | ]
33 | }
--------------------------------------------------------------------------------
/yolonas/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.10)
2 |
3 | #set(CMAKE_BUILD_TYPE "Debug")
4 | set(CMAKE_BUILD_TYPE "Release")
5 |
6 | PROJECT(yolo_nas VERSION 1.0.0 LANGUAGES C CXX CUDA)
7 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR})
8 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake)
9 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT})
10 | message(STATUS ${ALL_LIBS})
11 | file(GLOB CPPS
12 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
13 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cu
14 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu
15 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp
16 | ${TensorRT_ROOT}/samples/common/logger.cpp
17 | ${TensorRT_ROOT}/samples/common/sampleOptions.cpp
18 | #${TensorRT_ROOT}/samples/common/sampleUtils.cpp
19 | )
20 | list(REMOVE_ITEM CPPS app_yolo_nas.cpp)
21 | message(STATUS CPPS = ${CPPS})
22 |
23 | list (LENGTH CPPS length)
24 | message(STATUS ***length*** = ${length})
25 | find_package(OpenCV REQUIRED)
26 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR})
27 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS})
28 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS})
29 | add_library(${PROJECT_NAME} SHARED ${CPPS})
30 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES})
31 |
32 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75)
33 | target_compile_options(${PROJECT_NAME} PUBLIC
34 | $<$:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>)
35 |
36 | add_executable(app_yolo_nas app_yolo_nas.cpp)
37 |
38 | # NVCC
39 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a)
40 | target_link_libraries(app_yolo_nas ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} )
41 |
--------------------------------------------------------------------------------
/yolonas/README.md:
--------------------------------------------------------------------------------
1 | ## 1. get onnx
2 | download directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv) or export onnx:
3 | ```bash
4 | pip install super-gradients==3.3.1
5 | cd super-gradients
6 | # copy the python script provided in this repository to your workspace
7 | # note:The weight file is downloaded automatically
8 | cp TensorRT-Alpha/yolonas/alpha_export_dynamic.py YOUR_WORKSPACE
9 |
10 | # for YOLO_NAS_S
11 | # Changing lines 9-11 of the code allows you to switch to other models, eg:YOLO_NAS_M
12 | python alpha_export_dynamic.py
13 | ```
14 |
15 | ## 2.edit and save onnx
16 | ```bash
17 | # note: If you have obtained onnx by downloading, this step can be ignored
18 | ignore
19 | ```
20 |
21 | ## 3.compile onnx
22 | ```bash
23 | # put your onnx file in this path:tensorrt-alpha/data/yolonas
24 | cd tensorrt-alpha/data/yolonas
25 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib
26 | # 640
27 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolonas_s.onnx --saveEngine=yolonas_s.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
28 | ```
29 | ## 4.run
30 | ```bash
31 | git clone https://github.com/FeiYull/tensorrt-alpha
32 | cd tensorrt-alpha/yolonas
33 | mkdir build
34 | cd build
35 | cmake ..
36 | make -j10
37 | # note: the dstImage will be saved in tensorrt-alpha/yolonas/build by default
38 |
39 | ## 640
40 | # infer image
41 | ./app_yolo_nas --model=../../data/yolo_nas/yolonas_s.trt --size=640 --batch_size=1 --img=../../data/6406407.jpg --show --savePath=../
42 |
43 | # infer video
44 | ./app_yolo_nas --model=../../data/yolo_nas/yolonas_s.trt --size=640 --batch_size=2 --video=../../data/people.mp4 --show
45 |
46 | # infer camera
47 | ./app_yolo_nas --model=../../data/yolo_nas/yolonas_s.trt --size=640 --batch_size=2 --cam_id=0 --show
48 | ```
49 | ## 5. appendix
50 | ignore
--------------------------------------------------------------------------------
/yolonas/alpha_export_dynamic.py:
--------------------------------------------------------------------------------
1 | from super_gradients.training import models
2 | from super_gradients.common.object_names import Models
3 | import torch
4 | import numpy as np
5 |
6 | class AlphaYoloNas(torch.nn.Module):
7 | def __init__(self):
8 | super().__init__()
9 | self.model = models.get(Models.YOLO_NAS_S, pretrained_weights="coco")
10 | # self.model = models.get(Models.YOLO_NAS_M, pretrained_weights="coco")
11 | # self.model = models.get(Models.YOLO_NAS_L, pretrained_weights="coco")
12 | self.model.eval()
13 |
14 | def forward(self, x):
15 | y = self.model(x)
16 | return torch.cat((y[0], y[1]), 2)
17 |
18 | input_size = (1, 3, 640, 640)
19 | onnx_input = torch.Tensor(np.zeros(input_size))
20 |
21 | net = AlphaYoloNas()
22 | input_names = ["images"]
23 | output_names = ["output"]
24 | dynamic_axes = {input_names[0]: {0: "batch_size"},
25 | output_names[0]: {0: "batch_size"}}
26 |
27 | torch.onnx.export(net, onnx_input, "yolonas_s.onnx",
28 | #verbose=True,
29 | input_names=input_names,
30 | output_names=output_names,
31 | opset_version=12,
32 | dynamic_axes=dynamic_axes)
--------------------------------------------------------------------------------
/yolonas/app_yolo_nas.cpp:
--------------------------------------------------------------------------------
1 | #include"../utils/yolo.h"
2 | #include"yolo_nas.h"
3 |
4 | void setParameters(utils::InitParameter& initParameters)
5 | {
6 | initParameters.class_names = utils::dataSets::coco80;
7 | //initParameters.class_names = utils::dataSets::voc20;
8 | initParameters.num_class = 80; // for coco
9 | //initParameters.num_class = 20; // for voc2012
10 | initParameters.batch_size = 8;
11 | initParameters.dst_h = 636;
12 | initParameters.dst_w = 636;
13 | initParameters.input_output_names = { "images", "output" };
14 | initParameters.conf_thresh = 0.25f;
15 | initParameters.iou_thresh = 0.7f;
16 | initParameters.save_path = "";
17 | }
18 |
19 | void task(YOLO_NAS& yolo, const utils::InitParameter& param, std::vector& imgsBatch, const int& delayTime, const int& batchi,
20 | const bool& isShow, const bool& isSave)
21 | {
22 | utils::DeviceTimer d_t0; yolo.copy(imgsBatch); float t0 = d_t0.getUsedTime();
23 | utils::DeviceTimer d_t1; yolo.preprocess(imgsBatch); float t1 = d_t1.getUsedTime();
24 | utils::DeviceTimer d_t2; yolo.infer(); float t2 = d_t2.getUsedTime();
25 | utils::DeviceTimer d_t3; yolo.postprocess(imgsBatch); float t3 = d_t3.getUsedTime();
26 | sample::gLogInfo <<
27 | "preprocess time = " << t1 / param.batch_size << "; "
28 | "infer time = " << t2 / param.batch_size << "; "
29 | "postprocess time = " << t3 / param.batch_size << std::endl;
30 |
31 | if(isShow)
32 | utils::show(yolo.getObjectss(), param.class_names, delayTime, imgsBatch);
33 | if(isSave)
34 | utils::save(yolo.getObjectss(), param.class_names, param.save_path, imgsBatch, param.batch_size, batchi);
35 | yolo.reset();
36 | }
37 |
38 | int main(int argc, char** argv)
39 | {
40 | cv::CommandLineParser parser(argc, argv,
41 | {
42 | "{model || tensorrt model file }"
43 | "{size || image (h, w), eg: 640 }"
44 | "{batch_size|| batch size }"
45 | "{video || video's path }"
46 | "{img || image's path }"
47 | "{cam_id || camera's device id }"
48 | "{show || if show the result }"
49 | "{savePath || save path, can be ignore}"
50 | });
51 | utils::InitParameter param;
52 | setParameters(param);
53 | std::string model_path = "../../data/yolov8/yolonas_s.trt";
54 | std::string video_path = "../../data/people.mp4";
55 | std::string image_path = "../../data/bus.jpg";
56 | int camera_id = 0;
57 | utils::InputStream source;
58 | source = utils::InputStream::IMAGE;
59 | //source = utils::InputStream::VIDEO;
60 | //source = utils::InputStream::CAMERA;
61 |
62 | int size = -1;
63 | int batch_size = 8;
64 | bool is_show = false;
65 | bool is_save = false;
66 | if(parser.has("model"))
67 | {
68 | model_path = parser.get("model");
69 | sample::gLogInfo << "model_path = " << model_path << std::endl;
70 | }
71 | if(parser.has("size"))
72 | {
73 | size = parser.get("size");
74 | sample::gLogInfo << "size = " << size << std::endl;
75 | param.dst_h = param.dst_w = size;
76 | }
77 | if(parser.has("batch_size"))
78 | {
79 | batch_size = parser.get("batch_size");
80 | sample::gLogInfo << "batch_size = " << batch_size << std::endl;
81 | param.batch_size = batch_size;
82 | }
83 | if(parser.has("video"))
84 | {
85 | source = utils::InputStream::VIDEO;
86 | video_path = parser.get("video");
87 | sample::gLogInfo << "video_path = " << video_path << std::endl;
88 | }
89 | if(parser.has("img"))
90 | {
91 | source = utils::InputStream::IMAGE;
92 | image_path = parser.get("img");
93 | sample::gLogInfo << "image_path = " << image_path << std::endl;
94 | }
95 | if(parser.has("cam_id"))
96 | {
97 | source = utils::InputStream::CAMERA;
98 | camera_id = parser.get("cam_id");
99 | sample::gLogInfo << "camera_id = " << camera_id << std::endl;
100 | }
101 | if(parser.has("show"))
102 | {
103 | is_show = true;
104 | sample::gLogInfo << "is_show = " << is_show << std::endl;
105 | }
106 | if(parser.has("savePath"))
107 | {
108 | is_save = true;
109 | param.save_path = parser.get("savePath");
110 | sample::gLogInfo << "save_path = " << param.save_path << std::endl;
111 | }
112 | int total_batches = 0;
113 | int delay_time = 1;
114 | cv::VideoCapture capture;
115 | if (!setInputStream(source, image_path, video_path, camera_id,
116 | capture, total_batches, delay_time, param))
117 | {
118 | sample::gLogError << "read the input data errors!" << std::endl;
119 | return -1;
120 | }
121 | YOLO_NAS yolo(param);
122 | std::vector trt_file = utils::loadModel(model_path);
123 | if (trt_file.empty())
124 | {
125 | sample::gLogError << "trt_file is empty!" << std::endl;
126 | return -1;
127 | }
128 | if (!yolo.init(trt_file))
129 | {
130 | sample::gLogError << "initEngine() ocur errors!" << std::endl;
131 | return -1;
132 | }
133 | yolo.check();
134 | cv::Mat frame;
135 | std::vector imgs_batch;
136 | imgs_batch.reserve(param.batch_size);
137 | sample::gLogInfo << imgs_batch.capacity() << std::endl;
138 | int batchi = 0;
139 | while (capture.isOpened())
140 | {
141 | if (batchi >= total_batches && source != utils::InputStream::CAMERA)
142 | {
143 | break;
144 | }
145 | if (imgs_batch.size() < param.batch_size)
146 | {
147 | if (source != utils::InputStream::IMAGE)
148 | {
149 | capture.read(frame);
150 | }
151 | else
152 | {
153 | frame = cv::imread(image_path);
154 | }
155 |
156 | if (frame.empty())
157 | {
158 | sample::gLogWarning << "no more video or camera frame" << std::endl;
159 | task(yolo, param, imgs_batch, delay_time, batchi, is_show, is_save);
160 | imgs_batch.clear();
161 | batchi++;
162 | break;
163 | }
164 | else
165 | {
166 | imgs_batch.emplace_back(frame.clone());
167 | }
168 | }
169 | else
170 | {
171 | task(yolo, param, imgs_batch, delay_time, batchi, is_show, is_save);
172 | imgs_batch.clear();
173 | batchi++;
174 | }
175 | }
176 | return -1;
177 | }
178 |
179 |
--------------------------------------------------------------------------------
/yolonas/decode_yolo_nas.cu:
--------------------------------------------------------------------------------
1 | #include "decode_yolo_nas.h"
2 |
3 | __global__ void decode_yolo_nas_device_kernel(int batch_size, int num_class, int topK, float conf_thresh,
4 | float* src, int srcWidth, int srcHeight, int srcArea,
5 | float* dst, int dstWidth, int dstHeight, int dstArea)
6 | {
7 | int dx = blockDim.x * blockIdx.x + threadIdx.x;
8 | int dy = blockDim.y * blockIdx.y + threadIdx.y;
9 | if (dx >= srcHeight || dy >= batch_size)
10 | {
11 | return;
12 | }
13 | float* pitem = src + dy * srcArea + dx * srcWidth;
14 | float* class_confidence = pitem + 4;
15 | float confidence = *class_confidence++;
16 | int label = 0;
17 | for (int i = 1; i < num_class; ++i, ++class_confidence)
18 | {
19 | if (*class_confidence > confidence)
20 | {
21 | confidence = *class_confidence;
22 | label = i;
23 | }
24 | }
25 | if (confidence < conf_thresh)
26 | {
27 | return;
28 | }
29 | int index = atomicAdd(dst + dy * dstArea, 1);
30 |
31 | if (index >= topK)
32 | {
33 | return;
34 | }
35 | float left = *pitem++;
36 | float top = *pitem++;
37 | float right = *pitem++;
38 | float bottom = *pitem++;
39 |
40 | float* pout_item = dst + dy * dstArea + 1 + index * dstWidth;
41 | *pout_item++ = left;
42 | *pout_item++ = top;
43 | *pout_item++ = right;
44 | *pout_item++ = bottom;
45 | *pout_item++ = confidence;
46 | *pout_item++ = label;
47 | *pout_item++ = 1;
48 | }
49 |
50 | void yolo_nas::decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea, float* dst, int dstWidth, int dstHeight)
51 | {
52 | dim3 block_size(BLOCK_SIZE, BLOCK_SIZE);
53 | dim3 grid_size((srcHeight + BLOCK_SIZE - 1) / BLOCK_SIZE,
54 | (param.batch_size + BLOCK_SIZE - 1) / BLOCK_SIZE);
55 | int dstArea = 1 + dstWidth * dstHeight;
56 | decode_yolo_nas_device_kernel << < grid_size, block_size, 0, nullptr >> > (param.batch_size, param.num_class, param.topK, param.conf_thresh,
57 | src, srcWidth, srcHeight, srcArea,
58 | dst, dstWidth, dstHeight, dstArea);
59 | }
--------------------------------------------------------------------------------
/yolonas/decode_yolo_nas.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include"../utils/utils.h"
3 | #include"../utils/kernel_function.h"
4 |
5 | namespace yolo_nas
6 | {
7 | void decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcLength, float* dst, int dstWidth, int dstHeight);
8 | }
9 |
--------------------------------------------------------------------------------
/yolonas/yolo_nas.cpp:
--------------------------------------------------------------------------------
1 | #include"yolo_nas.h"
2 | #include"decode_yolo_nas.h"
3 |
4 | YOLO_NAS::YOLO_NAS(const utils::InitParameter& param) :yolo::YOLO(param)
5 | {
6 | m_resize_shape = cv::Size(636, 636);
7 | m_input_resize_padding_device = nullptr;
8 | CHECK(cudaMalloc(&m_input_resize_padding_device, param.batch_size * 3 * m_param.dst_h * m_param.dst_w * sizeof(float)));
9 | }
10 |
11 | YOLO_NAS::~YOLO_NAS()
12 | {
13 | CHECK(cudaFree(m_input_resize_padding_device));
14 | }
15 |
16 | bool YOLO_NAS::init(const std::vector& trtFile)
17 | {
18 | if (trtFile.empty())
19 | {
20 | return false;
21 | }
22 | std::unique_ptr runtime =
23 | std::unique_ptr(nvinfer1::createInferRuntime(sample::gLogger.getTRTLogger()));
24 | if (runtime == nullptr)
25 | {
26 | return false;
27 | }
28 | this->m_engine = std::unique_ptr(runtime->deserializeCudaEngine(trtFile.data(), trtFile.size()));
29 | if (this->m_engine == nullptr)
30 | {
31 | return false;
32 | }
33 | this->m_context = std::unique_ptr(this->m_engine->createExecutionContext());
34 | if (this->m_context == nullptr)
35 | {
36 | return false;
37 | }
38 | if (m_param.dynamic_batch)
39 | {
40 | this->m_context->setBindingDimensions(0, nvinfer1::Dims4(m_param.batch_size, 3, m_param.dst_h, m_param.dst_w));
41 | }
42 | m_output_dims = this->m_context->getBindingDimensions(1);
43 | m_total_objects = m_output_dims.d[1];
44 | assert(m_param.batch_size <= m_output_dims.d[0]);
45 | m_output_area = 1;
46 | for (int i = 1; i < m_output_dims.nbDims; i++)
47 | {
48 | if (m_output_dims.d[i] != 0)
49 | {
50 | m_output_area *= m_output_dims.d[i];
51 | }
52 | }
53 | CHECK(cudaMalloc(&m_output_src_device, m_param.batch_size * m_output_area * sizeof(float)));
54 | float a = float(m_resize_shape.height) / m_param.src_h;
55 | float b = float(m_resize_shape.width) / m_param.src_w;
56 | float scale = a < b ? a : b;
57 | cv::Mat src2dst = (cv::Mat_(2, 3) << scale, 0.f, (-scale * m_param.src_w + m_resize_shape.width + scale - 1) * 0.5,
58 | 0.f, scale, (-scale * m_param.src_h + m_resize_shape.height + scale - 1) * 0.5);
59 | cv::Mat dst2src = cv::Mat::zeros(2, 3, CV_32FC1);
60 | cv::invertAffineTransform(src2dst, dst2src);
61 | int pad_height = m_param.dst_h - m_resize_shape.height;
62 | int pad_width = m_param.dst_w - m_resize_shape.width;
63 | m_pad_top = pad_height / 2;
64 | m_pad_left = pad_width / 2;
65 |
66 | m_dst2src.v0 = dst2src.ptr(0)[0];
67 | m_dst2src.v1 = dst2src.ptr(0)[1];
68 | m_dst2src.v2 = dst2src.ptr(0)[2];
69 | m_dst2src.v3 = dst2src.ptr(1)[0];
70 | m_dst2src.v4 = dst2src.ptr(1)[1];
71 | m_dst2src.v5 = dst2src.ptr(1)[2];
72 | return true;
73 | }
74 |
75 | void YOLO_NAS::preprocess(const std::vector& imgsBatch)
76 | {
77 | resizeDevice(m_param.batch_size, m_input_src_device, m_param.src_w, m_param.src_h,
78 | m_input_resize_device, m_resize_shape.width, m_resize_shape.height, 114, m_dst2src);
79 | copyWithPaddingDevice(m_param.batch_size, m_input_resize_device, m_resize_shape.width, m_resize_shape.height,
80 | m_input_resize_padding_device, m_param.dst_w, m_param.dst_h, 114.f, m_pad_top, m_pad_left);
81 | bgr2rgbDevice(m_param.batch_size, m_input_resize_padding_device, m_param.dst_w, m_param.dst_h,
82 | m_input_rgb_device, m_param.dst_w, m_param.dst_h);
83 | normDevice(m_param.batch_size, m_input_rgb_device, m_param.dst_w, m_param.dst_h,
84 | m_input_norm_device, m_param.dst_w, m_param.dst_h, m_param);
85 | hwc2chwDevice(m_param.batch_size, m_input_norm_device, m_param.dst_w, m_param.dst_h,
86 | m_input_hwc_device, m_param.dst_w, m_param.dst_h);
87 | }
88 | void YOLO_NAS::postprocess(const std::vector& imgsBatch)
89 | {
90 | yolo_nas::decodeDevice(m_param, m_output_src_device, 4 + m_param.num_class, m_total_objects, m_output_area,
91 | m_output_objects_device, m_output_objects_width, m_param.topK);
92 | nmsDeviceV1(m_param, m_output_objects_device, m_output_objects_width, m_param.topK, m_param.topK * m_output_objects_width + 1);
93 | //nmsDeviceV2(m_param, m_output_objects_device, m_output_objects_width, m_param.topK, m_param.topK * m_output_objects_width + 1, m_output_idx_device, m_output_conf_device);
94 | CHECK(cudaMemcpy(m_output_objects_host, m_output_objects_device, m_param.batch_size * sizeof(float) * (1 + 7 * m_param.topK), cudaMemcpyDeviceToHost));
95 | for (size_t bi = 0; bi < imgsBatch.size(); bi++)
96 | {
97 | int num_boxes = std::min((int)(m_output_objects_host + bi * (m_param.topK * m_output_objects_width + 1))[0], m_param.topK);
98 | for (size_t i = 0; i < num_boxes; i++)
99 | {
100 | float* ptr = m_output_objects_host + bi * (m_param.topK * m_output_objects_width + 1) + m_output_objects_width * i + 1;
101 | int keep_flag = ptr[6];
102 | if (keep_flag)
103 | {
104 | ptr[0] -= m_pad_left;
105 | ptr[1] -= m_pad_top;
106 | ptr[2] -= m_pad_left;
107 | ptr[3] -= m_pad_top;
108 | float x_lt = m_dst2src.v0 * ptr[0] + m_dst2src.v1 * ptr[1] + m_dst2src.v2;
109 | float y_lt = m_dst2src.v3 * ptr[0] + m_dst2src.v4 * ptr[1] + m_dst2src.v5;
110 | float x_rb = m_dst2src.v0 * ptr[2] + m_dst2src.v1 * ptr[3] + m_dst2src.v2;
111 | float y_rb = m_dst2src.v3 * ptr[2] + m_dst2src.v4 * ptr[3] + m_dst2src.v5;
112 | m_objectss[bi].emplace_back(x_lt, y_lt, x_rb, y_rb, ptr[4], (int)ptr[5]);
113 | }
114 | }
115 | }
116 | }
--------------------------------------------------------------------------------
/yolonas/yolo_nas.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include"../utils/yolo.h"
3 | #include"../utils/utils.h"
4 | class YOLO_NAS : public yolo::YOLO
5 | {
6 | public:
7 | YOLO_NAS(const utils::InitParameter& param);
8 | ~YOLO_NAS();
9 | virtual bool init(const std::vector& trtFile);
10 | virtual void preprocess(const std::vector& imgsBatch);
11 | virtual void postprocess(const std::vector& imgsBatch);
12 |
13 | private:
14 | float* m_input_resize_padding_device;
15 | cv::Size m_resize_shape;
16 | int m_pad_top;
17 | int m_pad_left;
18 | };
--------------------------------------------------------------------------------
/yolor/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.10)
2 |
3 | #set(CMAKE_BUILD_TYPE "Debug")
4 | set(CMAKE_BUILD_TYPE "Release")
5 |
6 | PROJECT(yolor VERSION 1.0.0 LANGUAGES C CXX CUDA)
7 |
8 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR})
9 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake)
10 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT})
11 | message(STATUS ${ALL_LIBS})
12 | file(GLOB CPPS
13 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
14 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu
15 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp
16 | ${TensorRT_ROOT}/samples/common/logger.cpp
17 | ${TensorRT_ROOT}/samples/common/sampleOptions.cpp
18 | )
19 | list(REMOVE_ITEM CPPS app_yolor.cpp)
20 | message(STATUS CPPS = ${CPPS})
21 | list (LENGTH CPPS length)
22 | message(STATUS ***length*** = ${length})
23 | find_package(OpenCV REQUIRED)
24 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR})
25 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS})
26 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS})
27 |
28 | add_library(${PROJECT_NAME} SHARED ${CPPS})
29 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES})
30 |
31 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75)
32 | target_compile_options(${PROJECT_NAME} PUBLIC
33 | $<$:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>)
34 |
35 | add_executable(app_yolor app_yolor.cpp)
36 |
37 | # NVCC
38 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a)
39 | target_link_libraries(app_yolor ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} )
40 |
--------------------------------------------------------------------------------
/yolor/README.md:
--------------------------------------------------------------------------------
1 | ## 说明
2 | - 0、请使用本仓库提供的导出脚本“alpha_export.py:
3 | - 1、使用torch1.7+onnx1.8.0时候,导出onnx的时候会报错:
4 | “RuntimeError: Exporting the operator silu to ONNX opset version 11 is not supported. Please open a bug to request ONNX export support for the missing operator.”
5 | - 2、将环境改为:torch1.9+onnx1.11.0,上述不支持的op问题就解决了导出onnx问题。
6 |
7 |
8 | ## 1. get onnx
9 | download directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv)
10 |
11 | or export onnx:
12 | ```bash
13 | git clone https://github.com/WongKinYiu/yolor
14 | git checkout 462858e8737f56388f812cfe381a69c4ffca0cc7
15 | # PLease use the "alpha_export.py" file provided by TensorRT-Alpha to export onnx
16 | cd yolor-main
17 | cp alpha_export.py yolor-main
18 |
19 | # 1280
20 | python alpha_export.py --net=yolor_p6
21 | # 640
22 | python alpha_export.py --net=yolor_csp
23 | python alpha_export.py --net=yolor_csp_star
24 | python alpha_export.py --net=yolor_csp_x
25 | python alpha_export.py --net=yolor_csp_x_star
26 | ```
27 | ## 2.edit and save onnx
28 | ```bash
29 | # note: If you have obtained onnx by downloading, this step can be ignored
30 | ignore
31 | ```
32 | ## 3.compile onnx
33 | ```bash
34 | # put your onnx file in this path:tensorrt-alpha/data/yolor
35 | cd tensorrt-alpha/data/yolor
36 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib
37 |
38 | #1280
39 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolor_p6.onnx --saveEngine=yolor_p6.trt --buildOnly --minShapes=images:1x3x1280x1280 --optShapes=images:2x3x1280x1280 --maxShapes=images:4x3x1280x1280
40 |
41 | # 640
42 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolor_csp.onnx --saveEngine=yolor_csp.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
43 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolor_csp_star.onnx --saveEngine=yolor_csp_star.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
44 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolor_csp_x.onnx --saveEngine=yolor_csp_x.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
45 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolor_csp_x_star.onnx --saveEngine=yolor_csp_x_star.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
46 | ```
47 | ## 4.run
48 | ```bash
49 | git clone https://github.com/FeiYull/tensorrt-alpha
50 | cd tensorrt-alpha/yolor
51 | mkdir build
52 | cd build
53 | cmake ..
54 | make -j10
55 | # note: the dstImage will be saved in tensorrt-alpha/yolor/build by default
56 |
57 | ## 640
58 | # infer image
59 | ./app_yolor --model=../../data/yolor/yolor_csp.trt --size=640 --batch_size=1 --img=../../data/6406401.jpg --show --savePath=../
60 |
61 | # infer video
62 | ./app_yolor --model=../../data/yolor/yolor_csp.trt --size=640 --batch_size=2 --video=../../data/people.mp4 --show
63 |
64 | # infer camera
65 | ./app_yolor --model=../../data/yolor/yolor_csp.trt --size=640 --batch_size=2 --cam_id=0 --show
66 |
67 |
68 | ## 1280
69 | ./app_yolor --model=../../data/yolor/yolor_p6.trt --size=1280 --batch_size=1 --img=../../data/6406401.jpg --show --savePath
70 | ```
71 | ## 5. appendix
72 | ignore
--------------------------------------------------------------------------------
/yolor/alpha_export.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 | import platform
4 | import shutil
5 | import time
6 | from pathlib import Path
7 |
8 | import cv2
9 | import torch
10 | import torch.backends.cudnn as cudnn
11 | from numpy import random
12 |
13 | from utils.google_utils import attempt_load
14 | from utils.datasets import LoadStreams, LoadImages
15 | from utils.general import (
16 | check_img_size, non_max_suppression, apply_classifier, scale_coords, xyxy2xywh, strip_optimizer)
17 | from utils.plots import plot_one_box
18 | from utils.torch_utils import select_device, load_classifier, time_synchronized
19 |
20 | from models.models import *
21 | from utils.datasets import *
22 | from utils.general import *
23 |
24 | import argparse
25 |
26 | import torch
27 | from utils.google_utils import attempt_download
28 |
29 |
30 | import onnx
31 | import onnxruntime as ort
32 | import numpy as np
33 |
34 | """
35 | example:
36 | python alpha_export.py --net=yolor_p6
37 | python alpha_export.py --net=yolor_csp
38 | python alpha_export.py --net=yolor_csp_star
39 | python alpha_export.py --net=yolor_csp_x
40 | python alpha_export.py --net=yolor_csp_x_star
41 | """
42 | if __name__ == '__main__':
43 | parser = argparse.ArgumentParser()
44 | parser.add_argument('--net', type=str, default='yolor_p6', help='net type')
45 | opt = parser.parse_args()
46 | # init
47 | image_input_shape = ''
48 | img = ''
49 | model = ''
50 |
51 | net = opt.net
52 | if net == "yolor_p6":
53 | # yolor_p6
54 | image_input_shape = (1, 3, 1280, 1280)
55 | img = torch.ones(image_input_shape) # image size(1,3,320,192) iDetection
56 | model = Darknet("cfg/yolor_p6.cfg", 1280).cpu()
57 | opt.weights = 'yolor_p6.pt'
58 | elif net == "yolor_csp":
59 | # yolor_csp
60 | image_input_shape = (1, 3, 640, 640)
61 | img = torch.ones(image_input_shape) # image size(1,3,320,192) iDetection
62 | model = Darknet("cfg/yolor_csp.cfg", 640).cpu()
63 | opt.weights = 'yolor_csp.pt'
64 | elif net == "yolor_csp_star":
65 | # yolor_csp_star
66 | image_input_shape = (1, 3, 640, 640)
67 | img = torch.ones(image_input_shape) # image size(1,3,320,192) iDetection
68 | model = Darknet("cfg/yolor_csp.cfg", 640).cpu()
69 | opt.weights = 'yolor_csp_star.pt'
70 | elif net == "yolor_csp_x":
71 | # yolor_csp_x:
72 | image_input_shape = (1, 3, 640, 640)
73 | img = torch.ones(image_input_shape) # image size(1,3,320,192) iDetection
74 | model = Darknet("cfg/yolor_csp_x.cfg", 640).cpu()
75 | opt.weights = 'yolor_csp_x.pt'
76 | elif net == "yolor_csp_x_star":
77 | # yolor_csp_x_star: 640*640
78 | image_input_shape = (1, 3, 640, 640)
79 | img = torch.ones(image_input_shape) # image size(1,3,320,192) iDetection
80 | model = Darknet("cfg/yolor_csp_x.cfg", 640).cpu()
81 | opt.weights = 'yolor_csp_x_star.pt'
82 |
83 | model.load_state_dict(torch.load(opt.weights, map_location="cpu")['model'])
84 |
85 | model.eval()
86 | y = model(img) # dry run
87 | print(y[0][0][0][0:10])
88 |
89 | # ONNX export
90 | # try
91 | print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
92 | f = opt.weights.replace('.pt', '.onnx') # filename
93 | torch.onnx.export(model, img, f, verbose=False, opset_version=11, input_names=['images'], output_names=['output'],
94 | dynamic_axes={
95 | 'images': {
96 | 0: 'batch',
97 | 2: 'height',
98 | 3: 'width'}, # shape(1,3,640,640)
99 | 'output': {
100 | 0: 'batch',
101 | 1: 'anchors'} # shape(1,25200,85)
102 | })
103 |
104 | # Checks
105 | onnx_model = onnx.load(f) # load onnx model
106 |
107 | input_names = ("images")
108 | ort_session = ort.InferenceSession(f)
109 | outputs = ort_session.run(
110 | None,
111 | {input_names: np.ones(shape=image_input_shape).astype(np.float32)},
112 | )
113 | print(outputs[0][0][0][0:10])
114 | onnx.checker.check_model(onnx_model) # check onnx model
115 | print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model
116 | print('ONNX export success, saved as %s' % f)
117 |
118 | if net == "yolor_p6":
119 | item1 = onnx_model.graph.output[1]
120 | item2 = onnx_model.graph.output[2]
121 | item3 = onnx_model.graph.output[3]
122 | item4 = onnx_model.graph.output[4]
123 | onnx_model.graph.output.remove(item1)
124 | onnx_model.graph.output.remove(item2)
125 | onnx_model.graph.output.remove(item3)
126 | onnx_model.graph.output.remove(item4)
127 | else:
128 | item1 = onnx_model.graph.output[1]
129 | item2 = onnx_model.graph.output[2]
130 | item3 = onnx_model.graph.output[3]
131 | onnx_model.graph.output.remove(item1)
132 | onnx_model.graph.output.remove(item2)
133 | onnx_model.graph.output.remove(item3)
134 |
135 | # save
136 | onnx.save(onnx_model, f)
137 | # Finish
138 | print('\nExport complete. Visualize with https://github.com/lutzroeder/netron.')
139 |
--------------------------------------------------------------------------------
/yolov3/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.10)
2 |
3 | #set(CMAKE_BUILD_TYPE "Debug")
4 | set(CMAKE_BUILD_TYPE "Release")
5 |
6 | PROJECT(yolov3 VERSION 1.0.0 LANGUAGES C CXX CUDA)
7 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR})
8 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake)
9 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT})
10 |
11 | message(STATUS ${ALL_LIBS})
12 | file(GLOB CPPS
13 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
14 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu
15 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp
16 | ${TensorRT_ROOT}/samples/common/logger.cpp
17 | ${TensorRT_ROOT}/samples/common/sampleOptions.cpp
18 | )
19 | list(REMOVE_ITEM CPPS app_yolov3.cpp)
20 | message(STATUS CPPS = ${CPPS})
21 | list (LENGTH CPPS length)
22 | message(STATUS ***length*** = ${length})
23 | find_package(OpenCV REQUIRED)
24 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR})
25 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS})
26 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS})
27 |
28 | add_library(${PROJECT_NAME} SHARED ${CPPS})
29 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES})
30 |
31 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75)
32 | target_compile_options(${PROJECT_NAME} PUBLIC
33 | $<$:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>)
34 |
35 | add_executable(app_yolov3 app_yolov3.cpp)
36 |
37 | # NVCC
38 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a)
39 | target_link_libraries(app_yolov3 ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} )
40 |
--------------------------------------------------------------------------------
/yolov3/README.md:
--------------------------------------------------------------------------------
1 | ## 1. get onnx
2 | download directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv)
3 | or export onnx:
4 | ```bash
5 | git clone https://github.com/ultralytics/yolov3
6 | git checkout dd838e25863169d0de4f10631a609350658efb69
7 | ```
8 | ```bash
9 | # note: When using the official export.py to export onnx, you need to comment the following two lines:
10 | #---------------------------------------------------------------------------------------------------------
11 | if simplify:
12 | try:
13 | check_requirements(('onnx-simplifier',))
14 | import onnxsim
15 |
16 | LOGGER.info(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...')
17 | model_onnx, check = onnxsim.simplify(
18 | model_onnx,
19 | #-------------------------------------------------------------------------------
20 | #dynamic_input_shape=dynamic,
21 | #input_shapes={'images': list(im.shape)} if dynamic else None
22 | #-------------------------------------------------------------------------------
23 | )
24 | assert check, 'assert check failed'
25 | onnx.save(model_onnx, f)
26 | except Exception as e:
27 | LOGGER.info(f'{prefix} simplifier failure: {e}')
28 | LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
29 | LOGGER.info(f"{prefix} run --dynamic ONNX model inference with: 'python detect.py --weights {f}'")
30 | #---------------------------------------------------------------------------------------------------------
31 | ```
32 | ```bash
33 | cd yolov3
34 | python export.py --weights yolov3-tiny.pt --dynamic --simplify
35 | python export.py --weights yolov3.pt --dynamic --simplify
36 | python export.py --weights yolov3-spp.pt --dynamic
37 | ```
38 | ## 2.edit and save onnx
39 | ```bash
40 | # note: If you have obtained onnx by downloading, this step can be ignored
41 | git clone https://github.com/FeiYull/tensorrt-alpha
42 | cd tensorrt-alpha/yolov3
43 | conda activate tensorrt-alpha
44 | #edit alpha_edit.py on line21 & line25.
45 | python alpha_edit.py
46 | ```
47 | ## 3.compile onnx
48 | ```bash
49 | # put your onnx file in this path:tensorrt-alpha/data/yolov3
50 | cd tensorrt-alpha/data/yolov3
51 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib
52 | # 640
53 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=alpha_yolov3.onnx --saveEngine=alpha_yolov3.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
54 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=alpha_yolov3-spp.onnx --saveEngine=alpha_yolov3-spp.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
55 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=alpha_yolov3-tiny.onnx --saveEngine=alpha_yolov3-tiny.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
56 |
57 | # note: When compiling the alpha_yolov3-tiny model, if the following error may occur
58 | # error: Error Code 4: Internal Error (/model.11/Reshape: IShuffleLayer applied to shape tensor must have 0 or 1 #reshape dimensions: dimensions were [-1,2])
59 | # solve:Add the parameter --simplify when exporting onnx (opset defaults to 13, which is high enough)
60 | ```
61 | ## 4.run
62 | ```bash
63 | git clone https://github.com/FeiYull/tensorrt-alpha
64 | cd tensorrt-alpha/yolov3
65 | mkdir build
66 | cd build
67 | cmake ..
68 | make -j10
69 | # note: the dstImage will be saved in tensorrt-alpha/yolov3/build by default
70 |
71 | ## 640
72 | # infer image
73 | ./app_yolov3 --model=../../data/yolov3/alpha_yolov3-tiny.trt --size=640 --batch_size=1 --img=../../data/6406403.jpg --show --savePath
74 |
75 | # infer video
76 | ./app_yolov3 --model=../../data/yolov3/alpha_yolov3-tiny.trt --size=640 --batch_size=2 --video=../../data/people.mp4 --show
77 |
78 | # infer camera
79 | ./app_yolov3 --model=../../data/yolov3/alpha_yolov3-tiny.trt --size=640 --batch_size=2 --cam_id=0 --show
80 |
81 | # note:yolov3-tiny has obvious missed detection on the image 6406401.jpg, don't worry, the effect is consistent with the official
82 | ```
83 | ## 5. appendix
84 | ignore
--------------------------------------------------------------------------------
/yolov3/alpha_edit.py:
--------------------------------------------------------------------------------
1 | import onnx
2 | import onnx.helper as helper
3 | import torch
4 | # import torchvision
5 | import onnxsim # pip install onnx-simplifier
6 | import onnxruntime as ort
7 | import numpy as np
8 | import os
9 |
10 |
11 | def infer_onnx(onnx_file, input_names, image_input_shape):
12 | ort_session = ort.InferenceSession(onnx_file)
13 | outputs = ort_session.run(
14 | None,
15 | # {"data": np.ones(shape=image_input_shape).astype(np.float32)},
16 | {input_names[0]: np.ones(shape=image_input_shape).astype(np.float32)},
17 | )
18 | return outputs
19 |
20 |
21 | net_name = "yolov3-tiny"
22 | # net_name = "yolov3"
23 | # net_name = "yolov3-spp"
24 | path = "../data/yolov3/"
25 |
26 | image_input_shape = [1, 3, 640, 640]
27 | onnx_name = net_name + ".onnx"
28 | input_names = ["images"]
29 | output_names = ["output"]
30 |
31 | model = onnx.load_model(path + onnx_name)
32 |
33 | outputs = infer_onnx(path + onnx_name, input_names, image_input_shape)
34 | for output in outputs:
35 | print(output.shape)
36 |
37 | # delete some nodes
38 | if net_name == "yolov3-tiny":
39 | item1 = model.graph.output[1]
40 | item2 = model.graph.output[2]
41 | model.graph.output.remove(item1)
42 | model.graph.output.remove(item2)
43 | elif net_name == "yolov3" or net_name == "yolov3-spp":
44 | item1 = model.graph.output[1]
45 | item2 = model.graph.output[2]
46 | item3 = model.graph.output[3]
47 | model.graph.output.remove(item1)
48 | model.graph.output.remove(item2)
49 | model.graph.output.remove(item3)
50 |
51 | onnx.save(model, path + "alpha_" + onnx_name)
52 | outputs = infer_onnx(path + "alpha_" + onnx_name, input_names, image_input_shape)
53 | for output in outputs:
54 | print(output.shape)
55 | print("")
--------------------------------------------------------------------------------
/yolov4/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.10)
2 |
3 | #set(CMAKE_BUILD_TYPE "Debug")
4 | set(CMAKE_BUILD_TYPE "Release")
5 |
6 | PROJECT(yolov4 VERSION 1.0.0 LANGUAGES C CXX CUDA)
7 |
8 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR})
9 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake)
10 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT})
11 |
12 | message(STATUS ${ALL_LIBS})
13 | file(GLOB CPPS
14 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
15 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cu
16 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu
17 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp
18 | ${TensorRT_ROOT}/samples/common/logger.cpp
19 | ${TensorRT_ROOT}/samples/common/sampleOptions.cpp
20 | )
21 | list(REMOVE_ITEM CPPS app_yolov4.cpp)
22 | message(STATUS CPPS = ${CPPS})
23 | list (LENGTH CPPS length)
24 | message(STATUS ***length*** = ${length})
25 | find_package(OpenCV REQUIRED)
26 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR})
27 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS})
28 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS})
29 | add_library(${PROJECT_NAME} SHARED ${CPPS})
30 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES})
31 |
32 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75)
33 | target_compile_options(${PROJECT_NAME} PUBLIC
34 | $<$:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>)
35 |
36 | add_executable(app_yolov4 app_yolov4.cpp)
37 |
38 | # NVCC
39 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a)
40 | target_link_libraries(app_yolov4 ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} )
41 |
--------------------------------------------------------------------------------
/yolov4/README.md:
--------------------------------------------------------------------------------
1 | ## 1. get onnx
2 | download directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv)
3 | or export onnx:
4 | ```bash
5 | git clone https://github.com/Tianxiaomo/pytorch-YOLOv4
6 | git checkout a65d219f9066bae4e12003bd7cdc04531860c672
7 | git clone https://github.com/FeiYull/tensorrt-alpha
8 | cd tensorrt-alpha/yolov4
9 | # PLease use the "alpha_export.py" file provided by TensorRT-Alpha to export onnx
10 | cp alpha_export.py Pytorch_YOLOV4/
11 | cd Pytorch_YOLOV4/
12 | # 608
13 | python alpha_export.py cfg/yolov4.cfg yolov4.weights --batch_size=-1 --onnx_file_path=alpha_yolov4_-1_3_608_608_dynamic.onnx
14 | ```
15 | ## 2.edit and save onnx
16 | ```bash
17 | # note: If you have obtained onnx by downloading, this step can be ignored
18 | ignore
19 | ```
20 | ## 3.compile onnx
21 | ```bash
22 | # put your onnx file in this path:tensorrt-alpha/data/yolov4
23 | cd tensorrt-alpha/data/yolov4
24 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib
25 | # 608
26 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov4_-1_3_608_608_dynamic.onnx --saveEngine=yolov4_-1_3_608_608_dynamic.trt --buildOnly --minShapes=input:1x3x608x608 --optShapes=input:2x3x608x608 --maxShapes=input:4x3x608x608
27 | ```
28 | ## 4.run
29 | ```bash
30 | git clone https://github.com/FeiYull/tensorrt-alpha
31 | cd tensorrt-alpha/yolov4
32 | mkdir build
33 | cd build
34 | cmake ..
35 | make -j10
36 | # note: the dstImage will be saved in tensorrt-alpha/yolov4/build by default
37 |
38 | ## 608
39 | # infer image
40 | ./app_yolov4 --model=../../data/yolov4/alpha_yolov4_-1_3_608_608_dynamic.trt --size=608 --batch_size=1 --img=../../data/6406402.jpg --show --savePath=../
41 |
42 | # infer video
43 | ./app_yolov4 --model=../../data/yolov4/alpha_yolov4_-1_3_608_608_dynamic.trt --size=608 --batch_size=2 --video=../../data/people.mp4 --show
44 |
45 | # infer camera
46 | ./app_yolov4 --model=../../data/yolov4/alpha_yolov4_-1_3_608_608_dynamic.trt --size=608 --batch_size=2 --cam_id=0 --show
47 | ```
48 | ## 5. appendix
49 | ignore
--------------------------------------------------------------------------------
/yolov4/alpha_export.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import torch
3 | from tool.darknet2pytorch import Darknet
4 |
5 | class AlphaYolov4(torch.nn.Module):
6 | def __init__(self, cfgfile, weightfile):
7 | super().__init__()
8 | self.model = Darknet(cfgfile)
9 | self.model.load_weights(weightfile)
10 | self.model.eval()
11 | self.model.print_network()
12 |
13 | def forward(self, x):
14 | y = self.model(x)
15 | boxes = y[0]
16 | confs = y[1].unsqueeze(dim = 2)
17 | return torch.cat((boxes, confs), 3)
18 |
19 | def transform_to_onnx(cfgfile, weightfile, batch_size=1, onnx_file_name=None):
20 | model = AlphaYolov4(cfgfile, weightfile)
21 |
22 | dynamic = False
23 | if batch_size <= 0:
24 | dynamic = True
25 |
26 | input_names = ["input"]
27 | output_names = ['output']
28 |
29 | if dynamic:
30 | x = torch.randn((1, 3, model.model.height, model.model.width), requires_grad=True)
31 | if not onnx_file_name:
32 | onnx_file_name = "yolov4_-1_3_{}_{}_dynamic.onnx".format(model.model.height, model.model.width)
33 | dynamic_axes = {"input": {0: "batch_size"}, "output": {0: "batch_size"}}
34 | # Export the model
35 | print('Export the onnx model ...')
36 | torch.onnx.export(model,
37 | x,
38 | onnx_file_name,
39 | export_params=True,
40 | opset_version=11,
41 | do_constant_folding=True,
42 | input_names=input_names, output_names=output_names,
43 | dynamic_axes=dynamic_axes)
44 |
45 | print('Onnx model exporting done')
46 | return onnx_file_name
47 |
48 | else:
49 | x = torch.randn((batch_size, 3, model.model.height, model.model.width), requires_grad=True)
50 | onnx_file_name = "yolov4_{}_3_{}_{}_static.onnx".format(batch_size, model.model.height, model.model.width)
51 | torch.onnx.export(model,
52 | x,
53 | onnx_file_name,
54 | export_params=True,
55 | opset_version=11,
56 | do_constant_folding=True,
57 | input_names=input_names, output_names=output_names,
58 | dynamic_axes=None)
59 |
60 | print('Onnx model exporting done')
61 | return onnx_file_name
62 |
63 |
64 | if __name__ == '__main__':
65 | from argparse import ArgumentParser
66 | parser = ArgumentParser()
67 | parser.add_argument('config')
68 | parser.add_argument('weightfile')
69 | parser.add_argument('--batch_size', type=int, help="Static Batchsize of the model. use batch_size<=0 for dynamic batch size")
70 | parser.add_argument('--onnx_file_path', help="Output onnx file path")
71 | args = parser.parse_args()
72 | transform_to_onnx(args.config, args.weightfile, args.batch_size, args.onnx_file_path)
73 |
74 |
--------------------------------------------------------------------------------
/yolov4/decode_yolov4.cu:
--------------------------------------------------------------------------------
1 | #include "decode_yolov4.h"
2 |
3 | __global__ void decode_yolov4_device_kernel(int batch_size, int num_class, int topK, float conf_thresh,
4 | float* src, int srcWidth, int srcHeight, int srcArea,
5 | float* dst, int dstWidth, int dstHeight, int dstArea)
6 | {
7 | int dx = blockDim.x * blockIdx.x + threadIdx.x;
8 | int dy = blockDim.y * blockIdx.y + threadIdx.y;
9 | if (dx >= srcHeight || dy >= batch_size)
10 | {
11 | return;
12 | }
13 | float* pitem = src + dy * srcArea + dx * srcWidth;
14 | float* class_confidence = pitem + 4;
15 | float confidence = *class_confidence++;
16 | int label = 0;
17 | for (int i = 1; i < num_class; ++i, ++class_confidence)
18 | {
19 | if (*class_confidence > confidence)
20 | {
21 | confidence = *class_confidence;
22 | label = i;
23 | }
24 | }
25 | if (confidence < conf_thresh)
26 | {
27 | return;
28 | }
29 | int index = atomicAdd(dst + dy * dstArea, 1);
30 | if (index >= topK)
31 | {
32 | return;
33 | }
34 | float cx = *pitem++;
35 | float cy = *pitem++;
36 | float width = *pitem++;
37 | float height = *pitem++;
38 |
39 | float left = cx;
40 | float top = cy;
41 | float right = width;
42 | float bottom = height;
43 | float* pout_item = dst + dy * dstArea + 1 + index * dstWidth;
44 | *pout_item++ = left;
45 | *pout_item++ = top;
46 | *pout_item++ = right;
47 | *pout_item++ = bottom;
48 | *pout_item++ = confidence;
49 | *pout_item++ = label;
50 | *pout_item++ = 1;
51 | }
52 |
53 | static __device__ float box_iou(
54 | float aleft, float atop, float aright, float abottom,
55 | float bleft, float btop, float bright, float bbottom
56 | ) {
57 | float cleft = max(aleft, bleft);
58 | float ctop = max(atop, btop);
59 | float cright = min(aright, bright);
60 | float cbottom = min(abottom, bbottom);
61 |
62 | float c_area = max(cright - cleft, 0.0f) * max(cbottom - ctop, 0.0f);
63 | if (c_area == 0.0f)
64 | return 0.0f;
65 |
66 | float a_area = max(0.0f, aright - aleft) * max(0.0f, abottom - atop);
67 | float b_area = max(0.0f, bright - bleft) * max(0.0f, bbottom - btop);
68 | return c_area / (a_area + b_area - c_area);
69 | }
70 |
71 | void yolov4::decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea, float* dst, int dstWidth, int dstHeight)
72 | {
73 | dim3 block_size(BLOCK_SIZE, BLOCK_SIZE);
74 | dim3 grid_size((srcHeight + BLOCK_SIZE - 1) / BLOCK_SIZE,
75 | (param.batch_size + BLOCK_SIZE - 1) / BLOCK_SIZE);
76 | int dstArea = 1 + dstWidth * dstHeight;
77 |
78 | decode_yolov4_device_kernel << < grid_size, block_size, 0, nullptr >> >(param.batch_size, param.num_class, param.topK, param.conf_thresh,
79 | src, srcWidth, srcHeight, srcArea,
80 | dst, dstWidth, dstHeight, dstArea);
81 | }
82 |
83 |
--------------------------------------------------------------------------------
/yolov4/decode_yolov4.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include"../utils/utils.h"
3 | #include"../utils/kernel_function.h"
4 |
5 | namespace yolov4
6 | {
7 | void decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcLength, float* dst, int dstWidth, int dstHeight);
8 | }
9 |
--------------------------------------------------------------------------------
/yolov4/yolov4.cpp:
--------------------------------------------------------------------------------
1 | #include"yolov4.h"
2 | #include"decode_yolov4.h"
3 |
4 | YOLOV4::YOLOV4(const utils::InitParameter& param) :yolo::YOLO(param)
5 | {
6 | }
7 |
8 | YOLOV4::~YOLOV4()
9 | {
10 | }
11 |
12 | void YOLOV4::postprocess(const std::vector& imgsBatch)
13 | {
14 | yolov4::decodeDevice(m_param, m_output_src_device, 4 + m_param.num_class, m_total_objects, m_output_area,
15 | m_output_objects_device, m_output_objects_width, m_param.topK);
16 | nmsDeviceV1(m_param, m_output_objects_device, m_output_objects_width, m_param.topK, m_param.topK * m_output_objects_width + 1);
17 | CHECK(cudaMemcpy(m_output_objects_host, m_output_objects_device, m_param.batch_size * sizeof(float) * (1 + 7 * m_param.topK), cudaMemcpyDeviceToHost));
18 | for (size_t bi = 0; bi < imgsBatch.size(); bi++)
19 | {
20 | int num_boxes = std::min((int)(m_output_objects_host + bi * (m_param.topK * m_output_objects_width + 1))[0], m_param.topK);
21 | for (size_t i = 0; i < num_boxes; i++)
22 | {
23 | float* ptr = m_output_objects_host + bi * (m_param.topK * m_output_objects_width + 1) + m_output_objects_width * i + 1;
24 | int keep_flag = ptr[6];
25 | if (keep_flag)
26 | {
27 | float x_lt = m_dst2src.v0 * ptr[0] * m_param.dst_w + m_dst2src.v1 * ptr[1] * m_param.dst_h + m_dst2src.v2;
28 | float y_lt = m_dst2src.v3 * ptr[0] * m_param.dst_w + m_dst2src.v4 * ptr[1] * m_param.dst_h + m_dst2src.v5;
29 | float x_rb = m_dst2src.v0 * ptr[2] * m_param.dst_w + m_dst2src.v1 * ptr[3] * m_param.dst_h + m_dst2src.v2;
30 | float y_rb = m_dst2src.v3 * ptr[2] * m_param.dst_w + m_dst2src.v4 * ptr[3] * m_param.dst_h + m_dst2src.v5;
31 |
32 | m_objectss[bi].emplace_back(x_lt, y_lt, x_rb, y_rb, ptr[4], (int)ptr[5]);
33 | }
34 | }
35 |
36 | }
37 | }
--------------------------------------------------------------------------------
/yolov4/yolov4.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include"../utils/yolo.h"
3 | #include"../utils/utils.h"
4 | class YOLOV4 : public yolo::YOLO
5 | {
6 | public:
7 | YOLOV4(const utils::InitParameter& param);
8 | ~YOLOV4();
9 | virtual void postprocess(const std::vector& imgsBatch);
10 | };
--------------------------------------------------------------------------------
/yolov5/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.10)
2 |
3 | #set(CMAKE_BUILD_TYPE "Debug")
4 | set(CMAKE_BUILD_TYPE "Release")
5 |
6 | # cuda
7 | PROJECT(yolov5 VERSION 1.0.0 LANGUAGES C CXX CUDA)
8 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR})
9 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake)
10 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT})
11 | message(STATUS ${ALL_LIBS})
12 | file(GLOB CPPS
13 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
14 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu
15 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp
16 | ${TensorRT_ROOT}/samples/common/logger.cpp
17 | ${TensorRT_ROOT}/samples/common/sampleOptions.cpp
18 | )
19 | list(REMOVE_ITEM CPPS app_yolov5.cpp)
20 | message(STATUS CPPS = ${CPPS})
21 | list (LENGTH CPPS length)
22 | message(STATUS ***length*** = ${length})
23 | find_package(OpenCV REQUIRED)
24 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR})
25 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS})
26 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS})
27 | add_library(${PROJECT_NAME} SHARED ${CPPS})
28 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES})
29 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75)
30 | target_compile_options(${PROJECT_NAME} PUBLIC
31 | $<$:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>)
32 |
33 | add_executable(app_yolov5 app_yolov5.cpp)
34 |
35 | # NVCC
36 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a)
37 | target_link_libraries(app_yolov5 ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} )
38 |
--------------------------------------------------------------------------------
/yolov5/alpha_edit.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import onnx
3 | import onnx.helper as helper
4 | import torch
5 | # import torchvision
6 | import onnxsim # pip install onnx-simplifier
7 | import onnxruntime as ort
8 | import numpy as np
9 | import os
10 |
11 |
12 | def infer_onnx(onnx_file, input_names, image_input_shape):
13 | ort_session = ort.InferenceSession(onnx_file)
14 | outputs = ort_session.run(
15 | None,
16 | {input_names[0]: np.ones(shape=image_input_shape).astype(np.float32)},
17 | )
18 | return outputs
19 |
20 |
21 | def run(mode, net_name, model_path):
22 | #mode = "p5"
23 | mode = mode
24 |
25 | if mode == "p5":
26 | #net_name = "yolov5m"
27 | net_name = net_name
28 | image_input_shape = [1, 3, 640, 640]
29 | else: # mode == "p6":
30 | #net_name = "yolov5m6"
31 | net_name = net_name
32 | image_input_shape = [1, 3, 1280, 1280]
33 |
34 |
35 |
36 | path = model_path
37 | onnx_name = net_name + ".onnx"
38 | input_names = ["images"]
39 | output_names = ["output"]
40 |
41 | model = onnx.load_model(path + onnx_name)
42 |
43 | outputs = infer_onnx(path + onnx_name, input_names, image_input_shape)
44 | for output in outputs:
45 | print(output.shape)
46 |
47 | # delete some nodes
48 | if mode == "p5":
49 | item1 = model.graph.output[1]
50 | item2 = model.graph.output[2]
51 | item3 = model.graph.output[3]
52 | model.graph.output.remove(item1)
53 | model.graph.output.remove(item2)
54 | model.graph.output.remove(item3)
55 | else: # mode == "p6":
56 | item1 = model.graph.output[1]
57 | item2 = model.graph.output[2]
58 | item3 = model.graph.output[3]
59 | item4 = model.graph.output[4]
60 | model.graph.output.remove(item1)
61 | model.graph.output.remove(item2)
62 | model.graph.output.remove(item3)
63 | model.graph.output.remove(item4)
64 |
65 | onnx.save(model, path + "alpha_" + onnx_name)
66 | outputs = infer_onnx(path + "alpha_" + onnx_name, input_names, image_input_shape)
67 | for output in outputs:
68 | print(output.shape)
69 |
70 | def parse_opt():
71 | parser = argparse.ArgumentParser()
72 | parser.add_argument('--mode', type=str, default='p5', help='p5:640*640, p6:1280*1280')
73 | parser.add_argument('--net_name', type=str, default='yolov5s', help='yolov5n yolov5s yolov5m ... yolov5s6 ...')
74 | parser.add_argument('--model_path', type=str, default='', help='pth file path')
75 | opt = parser.parse_args()
76 | #print_args(vars(opt))
77 | return opt
78 |
79 | def main(opt):
80 | run(**vars(opt))
81 |
82 | if __name__ == "__main__":
83 | opt = parse_opt()
84 | main(opt)
85 |
86 |
--------------------------------------------------------------------------------
/yolov6/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.10)
2 |
3 | #set(CMAKE_BUILD_TYPE "Debug")
4 | set(CMAKE_BUILD_TYPE "Release")
5 |
6 | PROJECT(yolov6 VERSION 1.0.0 LANGUAGES C CXX CUDA)
7 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR})
8 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake)
9 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT})
10 | message(STATUS ${ALL_LIBS})
11 | file(GLOB CPPS
12 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
13 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu
14 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp
15 | ${TensorRT_ROOT}/samples/common/logger.cpp
16 | ${TensorRT_ROOT}/samples/common/sampleOptions.cpp
17 | )
18 | list(REMOVE_ITEM CPPS app_yolov6.cpp)
19 | message(STATUS CPPS = ${CPPS})
20 | list (LENGTH CPPS length)
21 | message(STATUS ***length*** = ${length})
22 | find_package(OpenCV REQUIRED)
23 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR})
24 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS})
25 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS})
26 | add_library(${PROJECT_NAME} SHARED ${CPPS})
27 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES})
28 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75)
29 | target_compile_options(${PROJECT_NAME} PUBLIC
30 | $<$:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>)
31 |
32 | add_executable(app_yolov6 app_yolov6.cpp)
33 |
34 | # NVCC
35 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a)
36 | target_link_libraries(app_yolov6 ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} )
37 |
--------------------------------------------------------------------------------
/yolov7/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.10)
2 |
3 | #set(CMAKE_BUILD_TYPE "Debug")
4 | set(CMAKE_BUILD_TYPE "Release")
5 |
6 | PROJECT(yolov7 VERSION 1.0.0 LANGUAGES C CXX CUDA)
7 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR})
8 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake)
9 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT})
10 |
11 | message(STATUS ${ALL_LIBS})
12 | file(GLOB CPPS
13 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
14 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu
15 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp
16 | ${TensorRT_ROOT}/samples/common/logger.cpp
17 | ${TensorRT_ROOT}/samples/common/sampleOptions.cpp
18 | )
19 | list(REMOVE_ITEM CPPS app_yolov7.cpp)
20 | message(STATUS CPPS = ${CPPS})
21 | list (LENGTH CPPS length)
22 | message(STATUS ***length*** = ${length})
23 | find_package(OpenCV REQUIRED)
24 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR})
25 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS})
26 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS})
27 | add_library(${PROJECT_NAME} SHARED ${CPPS})
28 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES})
29 |
30 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75)
31 | target_compile_options(${PROJECT_NAME} PUBLIC
32 | $<$:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>)
33 |
34 | add_executable(app_yolov7 app_yolov7.cpp)
35 |
36 | # NVCC
37 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a)
38 | target_link_libraries(app_yolov7 ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} )
39 |
--------------------------------------------------------------------------------
/yolov7/README.md:
--------------------------------------------------------------------------------
1 | ## 1. get onnx
2 |
3 | download directly at [weiyun]:[weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv?usp=sharing) or export onnx:
4 | ```bash
5 | git clone https://github.com/WongKinYiu/yolov7
6 | git checkout 072f76c72c641c7a1ee482e39f604f6f8ef7ee92
7 | # 640
8 | python export.py --weights yolov7-tiny.pt --dynamic --grid
9 | python export.py --weights yolov7.pt --dynamic --grid
10 | python export.py --weights yolov7x.pt --dynamic --grid
11 | # 1280
12 | python export.py --weights yolov7-w6.pt --dynamic --grid --img-size 1280
13 | ```
14 | ## 2.edit and save onnx
15 | ```bash
16 | # note: If you have obtained onnx by downloading, this step can be ignored
17 | ignore
18 | ```
19 |
20 | ## 3.compile onnx
21 | ```bash
22 | # put your onnx file in this path:tensorrt-alpha/data/yolov7
23 | cd tensorrt-alpha/data/yolov7
24 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib
25 | # 640
26 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov7-tiny.onnx --saveEngine=yolov7-tiny.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
27 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov7.onnx --saveEngine=yolov7.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
28 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov7x.onnx --saveEngine=yolov7x.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
29 | # 1280
30 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov7-w6.onnx --saveEngine=yolov7-w6.trt --buildOnly --minShapes=images:1x3x1280x1280 --optShapes=images:2x3x1280x1280 --maxShapes=images:4x3x1280x1280
31 |
32 | # note:if report an error(Error Code 1: Cuda Runtime (an illegal memory access was encountered "bool context = m_context->executeV2((void**)bindings)" returns false)
33 | when running the model(yolov7-w6), just lower the batch_size.
34 | ```
35 | ## 4.run
36 | ```bash
37 | git clone https://github.com/FeiYull/tensorrt-alpha
38 | cd tensorrt-alpha/yolov7
39 | mkdir build
40 | cd build
41 | cmake ..
42 | make -j10
43 | # note: the dstImage will be saved in tensorrt-alpha/yolov7/build by default
44 |
45 | ## 640
46 | # infer image
47 | ./app_yolov7 --model=../../data/yolov7/yolov7-tiny.trt --size=640 --batch_size=1 --img=../../data/6406401.jpg --show --savePath
48 | ./app_yolov7 --model=../../data/yolov7/yolov7-w6.trt --size=1280 --batch_size=1 --img=../../data/6406401.jpg --show --savePath=../
49 |
50 | # infer video
51 | ./app_yolov7 --model=../../data/yolov7/yolov7-tiny.trt --size=640 --batch_size=2 --video=../../data/people.mp4 --show
52 |
53 | # infer camera
54 | ./app_yolov7 --model=../../data/yolov7/yolov7-tiny.trt --size=640 --batch_size=2 --cam_id=0 --show
55 | ```
56 | ## 5. appendix
57 | ignore
--------------------------------------------------------------------------------
/yolov8-pose/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.10)
2 |
3 | #set(CMAKE_BUILD_TYPE "Debug")
4 | set(CMAKE_BUILD_TYPE "Release")
5 |
6 | PROJECT(yolov8_pose VERSION 1.0.0 LANGUAGES C CXX CUDA)
7 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR})
8 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake)
9 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT})
10 | message(STATUS ${ALL_LIBS})
11 | file(GLOB CPPS
12 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
13 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cu
14 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu
15 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp
16 | ${TensorRT_ROOT}/samples/common/logger.cpp
17 | ${TensorRT_ROOT}/samples/common/sampleOptions.cpp
18 | #${TensorRT_ROOT}/samples/common/sampleUtils.cpp
19 | )
20 | list(REMOVE_ITEM CPPS app_yolov8_pose.cpp)
21 | message(STATUS CPPS = ${CPPS})
22 | list (LENGTH CPPS length)
23 | message(STATUS ***length*** = ${length})
24 | find_package(OpenCV REQUIRED)
25 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR})
26 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS})
27 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS})
28 | add_library(${PROJECT_NAME} SHARED ${CPPS})
29 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES})
30 |
31 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75)
32 | target_compile_options(${PROJECT_NAME} PUBLIC
33 | $<$:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>)
34 |
35 | add_executable(app_yolov8_pose app_yolov8_pose.cpp)
36 |
37 | # NVCC
38 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a)
39 | target_link_libraries(app_yolov8_pose ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} )
40 |
--------------------------------------------------------------------------------
/yolov8-pose/README.md:
--------------------------------------------------------------------------------
1 | ## 1. get onnx
2 | download directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv) or export onnx:
3 | ```bash
4 | # 🔥 yolov8 offical repo: https://github.com/ultralytics/ultralytics
5 | # 🔥 yolov8 quickstart: https://docs.ultralytics.com/quickstart/
6 | # 🚀TensorRT-Alpha will be updated synchronously as soon as possible!
7 |
8 | # install yolov8
9 | conda create -n yolov8 python==3.8 -y # for Linux
10 | # conda create -n yolov8 python=3.9 -y # for Windows10
11 | conda activate yolov8
12 | pip install ultralytics==8.0.200
13 | pip install onnx==1.12.0
14 |
15 | # download offical weights(".pt" file)
16 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt
17 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt
18 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt
19 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt
20 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt
21 | ```
22 |
23 | export onnx:
24 | ```bash
25 | yolo mode=export model=yolov8n-pose.pt format=onnx dynamic=True opset=12
26 | yolo mode=export model=yolov8s-pose.pt format=onnx dynamic=True opset=12
27 | yolo mode=export model=yolov8m-pose.pt format=onnx dynamic=True opset=12
28 | yolo mode=export model=yolov8l-pose.pt format=onnx dynamic=True opset=12
29 | yolo mode=export model=yolov8x-pose.pt format=onnx dynamic=True opset=12
30 | ```
31 |
32 | ## 2.edit and save onnx
33 | ```bash
34 | # note: If you have obtained onnx by downloading, this step can be ignored
35 | ignore
36 | ```
37 |
38 | ## 3.compile onnx
39 | ```bash
40 | # put your onnx file in this path:tensorrt-alpha/data/yolov8-pose
41 | cd tensorrt-alpha/data/yolov8-pose
42 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib
43 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov8n-pose.onnx --saveEngine=yolov8n-pose.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
44 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov8s-pose.onnx --saveEngine=yolov8s-pose.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
45 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov8m-pose.onnx --saveEngine=yolov8m-pose.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
46 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov8l-pose.onnx --saveEngine=yolov8l-pose.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
47 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov8x-pose.onnx --saveEngine=yolov8x-pose.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
48 | ```
49 | ## 4.run
50 | ```bash
51 | git clone https://github.com/FeiYull/tensorrt-alpha
52 | cd tensorrt-alpha/yolov8-pose
53 | mkdir build
54 | cd build
55 | cmake ..
56 | make -j10
57 | # note: the dstImage will be saved in tensorrt-alpha/yolov8-pose/build by default
58 |
59 | ## 640
60 | # infer image
61 | ./app_yolov8_pose --model=../../data/yolov8/yolov8n-pose.trt --size=640 --batch_size=1 --img=../../data/6406407.jpg --show --savePath=../
62 |
63 | # infer video
64 | ./app_yolov8_pose --model=../../data/yolov8/yolov8n-pose.trt --size=640 --batch_size=2 --video=../../data/people.mp4 --show
65 |
66 | # infer camera
67 | ./app_yolov8_pose --model=../../data/yolov8/yolov8n-pose.trt --size=640 --batch_size=2 --cam_id=0 --show
68 |
69 | ```
70 | ## 5. appendix
71 | ignore
--------------------------------------------------------------------------------
/yolov8-pose/app_yolov8_pose.cpp:
--------------------------------------------------------------------------------
1 | #include"../utils/yolo.h"
2 | #include"yolov8_pose.h"
3 |
4 | void setParameters(utils::InitParameter& initParameters)
5 | {
6 | initParameters.class_names = utils::dataSets::coco80;
7 | //initParameters.class_names = utils::dataSets::voc20;
8 | initParameters.num_class = 80; // for coco
9 | //initParameters.num_class = 20; // for voc2012
10 | initParameters.batch_size = 8;
11 | initParameters.dst_h = 640;
12 | initParameters.dst_w = 640;
13 | initParameters.input_output_names = { "images", "output0" };
14 | initParameters.conf_thresh = 0.25f;
15 | initParameters.iou_thresh = 0.7f;
16 | initParameters.save_path = "D:/Data/1/";
17 | }
18 |
19 | void task(YOLOv8Pose& yolo, const utils::InitParameter& param, std::vector& imgsBatch, const int& delayTime, const int& batchi)
20 | {
21 | yolo.copy(imgsBatch);
22 | utils::DeviceTimer d_t1; yolo.preprocess(imgsBatch); float t1 = d_t1.getUsedTime();
23 | utils::DeviceTimer d_t2; yolo.infer(); float t2 = d_t2.getUsedTime();
24 | utils::DeviceTimer d_t3; yolo.postprocess(imgsBatch); float t3 = d_t3.getUsedTime();
25 | float avg_times[3] = { t1 / param.batch_size, t2 / param.batch_size, t3 / param.batch_size };
26 | sample::gLogInfo << "preprocess time = " << avg_times[0] << "; "
27 | "infer time = " << avg_times[1] << "; "
28 | "postprocess time = " << avg_times[2] << std::endl;
29 | yolo.showAndSave(param.class_names, delayTime, imgsBatch, avg_times);
30 | yolo.reset();
31 | }
32 |
33 | int main(int argc, char** argv)
34 | {
35 | cv::CommandLineParser parser(argc, argv,
36 | {
37 | "{model || tensorrt model file }"
38 | "{size || image (h, w), eg: 640 }"
39 | "{batch_size|| batch size }"
40 | "{video || video's path }"
41 | "{img || image's path }"
42 | "{cam_id || camera's device id }"
43 | "{show || if show the result }"
44 | "{savePath || save path, can be ignore}"
45 | });
46 | utils::InitParameter param;
47 | setParameters(param);
48 | std::string model_path = "../../data/yolov8/yolov8n-pose.trt";
49 | std::string video_path = "../../data/people.mp4";
50 | std::string image_path = "../../data/bus.jpg";
51 | int camera_id = 0;
52 | utils::InputStream source;
53 | source = utils::InputStream::IMAGE;
54 | //source = utils::InputStream::VIDEO;
55 | //source = utils::InputStream::CAMERA;
56 | int size = -1; // w or h
57 | int batch_size = 8;
58 | bool is_show = false;
59 | bool is_save = false;
60 | if (parser.has("model"))
61 | {
62 | model_path = parser.get("model");
63 | sample::gLogInfo << "model_path = " << model_path << std::endl;
64 | }
65 | if (parser.has("size"))
66 | {
67 | size = parser.get("size");
68 | sample::gLogInfo << "size = " << size << std::endl;
69 | param.dst_h = param.dst_w = size;
70 | }
71 | if (parser.has("batch_size"))
72 | {
73 | batch_size = parser.get("batch_size");
74 | sample::gLogInfo << "batch_size = " << batch_size << std::endl;
75 | param.batch_size = batch_size;
76 | }
77 | if (parser.has("video"))
78 | {
79 | source = utils::InputStream::VIDEO;
80 | video_path = parser.get("video");
81 | sample::gLogInfo << "video_path = " << video_path << std::endl;
82 | }
83 | if (parser.has("img"))
84 | {
85 | source = utils::InputStream::IMAGE;
86 | image_path = parser.get("img");
87 | sample::gLogInfo << "image_path = " << image_path << std::endl;
88 | }
89 | if (parser.has("cam_id"))
90 | {
91 | source = utils::InputStream::CAMERA;
92 | camera_id = parser.get("cam_id");
93 | sample::gLogInfo << "camera_id = " << camera_id << std::endl;
94 | }
95 | if (parser.has("show"))
96 | {
97 | param.is_show = true;
98 | sample::gLogInfo << "is_show = " << is_show << std::endl;
99 | }
100 | if (parser.has("savePath"))
101 | {
102 | param.is_save = true;
103 | param.save_path = parser.get("savePath");
104 | sample::gLogInfo << "save_path = " << param.save_path << std::endl;
105 | }
106 | int total_batches = 0;
107 | int delay_time = 1;
108 | cv::VideoCapture capture;
109 | if (!setInputStream(source, image_path, video_path, camera_id,
110 | capture, total_batches, delay_time, param))
111 | {
112 | sample::gLogError << "read the input data errors!" << std::endl;
113 | return -1;
114 | }
115 | setRenderWindow(param);
116 | YOLOv8Pose yolo(param);
117 | std::vector trt_file = utils::loadModel(model_path);
118 | if (trt_file.empty())
119 | {
120 | sample::gLogError << "trt_file is empty!" << std::endl;
121 | return -1;
122 | }
123 | if (!yolo.init(trt_file))
124 | {
125 | sample::gLogError << "initEngine() ocur errors!" << std::endl;
126 | return -1;
127 | }
128 | yolo.check();
129 | cv::Mat frame;
130 | std::vector imgs_batch;
131 | imgs_batch.reserve(param.batch_size);
132 | sample::gLogInfo << imgs_batch.capacity() << std::endl;
133 | int batchi = 0;
134 | while (capture.isOpened())
135 | {
136 | if (batchi >= total_batches && source != utils::InputStream::CAMERA)
137 | {
138 | break;
139 | }
140 | if (imgs_batch.size() < param.batch_size)
141 | {
142 | if (source != utils::InputStream::IMAGE)
143 | {
144 | capture.read(frame);
145 | }
146 | else
147 | {
148 | frame = cv::imread(image_path);
149 | }
150 | if (frame.empty())
151 | {
152 | sample::gLogWarning << "no more video or camera frame" << std::endl;
153 | task(yolo, param, imgs_batch, delay_time, batchi);
154 | imgs_batch.clear();
155 | batchi++;
156 | break;
157 | }
158 | else
159 | {
160 | imgs_batch.emplace_back(frame.clone());
161 | }
162 | }
163 | else
164 | {
165 | task(yolo, param, imgs_batch, delay_time, batchi);
166 | imgs_batch.clear();
167 | batchi++;
168 | }
169 | }
170 | return -1;
171 | }
--------------------------------------------------------------------------------
/yolov8-pose/decode_yolov8_pose.cu:
--------------------------------------------------------------------------------
1 | #include "decode_yolov8_pose.h"
2 |
3 | __global__ void decode_yolov8_pose_device_kernel(int batch_size, int num_class, int topK, float conf_thresh,
4 | float* src, int srcWidth, int srcHeight, int srcArea,
5 | float* dst, int dstWidth, int dstArea)
6 | {
7 | int dx = blockDim.x * blockIdx.x + threadIdx.x;
8 | int dy = blockDim.y * blockIdx.y + threadIdx.y;
9 | if (dx >= srcHeight || dy >= batch_size)
10 | {
11 | return;
12 | }
13 | float* pitem = src + dy * srcArea + dx * srcWidth;
14 | float confidence = pitem[4];
15 | if (confidence < conf_thresh)
16 | {
17 | return;
18 | }
19 | int index = atomicAdd(dst + dy * dstArea, 1);
20 |
21 | if (index >= topK)
22 | {
23 | return;
24 | }
25 | float cx = *pitem++;
26 | float cy = *pitem++;
27 | float width = *pitem++;
28 | float height = *pitem++;
29 |
30 | float left = cx - width * 0.5f;
31 | float top = cy - height * 0.5f;
32 | float right = cx + width * 0.5f;
33 | float bottom = cy + height * 0.5f;
34 | float* pout_item = dst + dy * dstArea + 1 + index * dstWidth;
35 | *pout_item++ = left;
36 | *pout_item++ = top;
37 | *pout_item++ = right;
38 | *pout_item++ = bottom;
39 | *pout_item++ = confidence;
40 | *pout_item++ = 0;
41 | *pout_item++ = 1;
42 | memcpy(pout_item, pitem + 1, (dstWidth - 7) * sizeof(float));
43 | }
44 |
45 | void yolov8pose::decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea, float* dst, int dstWidth, int dstHeight)
46 | {
47 | dim3 block_size(BLOCK_SIZE, BLOCK_SIZE);
48 | dim3 grid_size((srcHeight + BLOCK_SIZE - 1) / BLOCK_SIZE,
49 | (param.batch_size + BLOCK_SIZE - 1) / BLOCK_SIZE);
50 | int dstArea = 1 + dstWidth * dstHeight;
51 |
52 | decode_yolov8_pose_device_kernel << < grid_size, block_size, 0, nullptr >> > (param.batch_size, param.num_class, param.topK, param.conf_thresh,
53 | src, srcWidth, srcHeight, srcArea,
54 | dst, dstWidth, dstArea);
55 | }
56 |
57 | __global__ void transpose_device_kernel(int batch_size,
58 | float* src, int srcWidth, int srcHeight, int srcArea,
59 | float* dst, int dstWidth, int dstHeight, int dstArea)
60 | {
61 | int dx = blockDim.x * blockIdx.x + threadIdx.x;
62 | int dy = blockDim.y * blockIdx.y + threadIdx.y;
63 | if (dx >= dstHeight || dy >= batch_size)
64 | {
65 | return;
66 | }
67 | float* p_dst_row = dst + dy * dstArea + dx * dstWidth;
68 | float* p_src_col = src + dy * srcArea + dx;
69 |
70 | for (int i = 0; i < dstWidth; i++)
71 | {
72 | p_dst_row[i] = p_src_col[i * srcWidth];
73 | }
74 | }
75 |
76 | void yolov8pose::transposeDevice(utils::InitParameter param,
77 | float* src, int srcWidth, int srcHeight, int srcArea,
78 | float* dst, int dstWidth, int dstHeight)
79 | {
80 | dim3 block_size(BLOCK_SIZE, BLOCK_SIZE);
81 | dim3 grid_size((dstHeight + BLOCK_SIZE - 1) / BLOCK_SIZE,
82 | (param.batch_size + BLOCK_SIZE - 1) / BLOCK_SIZE);
83 | int dstArea = dstWidth * dstHeight;
84 |
85 | transpose_device_kernel << < grid_size, block_size, 0, nullptr >> > (param.batch_size,
86 | src, srcWidth, srcHeight, srcArea,
87 | dst, dstWidth, dstHeight, dstArea);
88 | }
89 |
90 |
--------------------------------------------------------------------------------
/yolov8-pose/decode_yolov8_pose.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include"../utils/utils.h"
3 | #include"../utils/kernel_function.h"
4 |
5 | namespace yolov8pose
6 | {
7 | void decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcLength, float* dst, int dstWidth, int dstHeight);
8 | void transposeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea, float* dst, int dstWidth, int dstHeight);
9 | }
10 |
--------------------------------------------------------------------------------
/yolov8-pose/yolov8_pose.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include"../utils/yolo.h"
3 | #include"../utils/utils.h"
4 | class YOLOv8Pose : public yolo::YOLO
5 | {
6 | public:
7 | YOLOv8Pose(const utils::InitParameter& param);
8 | ~YOLOv8Pose();
9 | virtual bool init(const std::vector& trtFile);
10 | virtual void preprocess(const std::vector& imgsBatch);
11 | virtual void postprocess(const std::vector& imgsBatch);
12 | virtual void reset();
13 |
14 | public:
15 | void showAndSave(const std::vector& classNames,
16 | const int& cvDelayTime, std::vector& imgsBatch, float* avg_times);
17 |
18 | private:
19 | float* m_output_src_transpose_device;
20 | float* m_output_objects_device;
21 | float* m_output_objects_host;
22 | int m_output_objects_width;
23 |
24 | const size_t m_nkpts;
25 | std::vector m_skeleton;
26 | std::vector m_kpt_color;
27 | std::vector m_limb_color;
28 | };
--------------------------------------------------------------------------------
/yolov8-seg/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.10)
2 |
3 | #set(CMAKE_BUILD_TYPE "Debug")
4 | set(CMAKE_BUILD_TYPE "Release")
5 |
6 | PROJECT(yolov8_seg VERSION 1.0.0 LANGUAGES C CXX CUDA)
7 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR})
8 | set( CMAKE_CXX_FLAGS "-O3" )
9 | include_directories( "/usr/include/eigen3" )
10 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake)
11 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT})
12 | message(STATUS ${ALL_LIBS})
13 | file(GLOB CPPS
14 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
15 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cu
16 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu
17 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp
18 | ${TensorRT_ROOT}/samples/common/logger.cpp
19 | ${TensorRT_ROOT}/samples/common/sampleOptions.cpp
20 | #${TensorRT_ROOT}/samples/common/sampleUtils.cpp
21 | )
22 | list(REMOVE_ITEM CPPS app_yolov8_seg.cpp)
23 | message(STATUS CPPS = ${CPPS})
24 | list (LENGTH CPPS length)
25 | message(STATUS ***length*** = ${length})
26 | find_package(OpenCV REQUIRED)
27 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR})
28 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS})
29 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS})
30 | add_library(${PROJECT_NAME} SHARED ${CPPS})
31 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES})
32 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75)
33 | target_compile_options(${PROJECT_NAME} PUBLIC
34 | $<$:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>)
35 |
36 | add_executable(app_yolov8_seg app_yolov8_seg.cpp)
37 |
38 | # NVCC
39 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a)
40 | target_link_libraries(app_yolov8_seg ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} )
41 |
--------------------------------------------------------------------------------
/yolov8-seg/README.md:
--------------------------------------------------------------------------------
1 | ## 0. install eigen
2 | eigen3.4.0 has been tested and passed!
3 | ```bash
4 | # for linux
5 | sudo apt-get install libeigen3-dev
6 |
7 | # for windows
8 | # download from https://eigen.tuxfamily.org/index.php?title=Main_Page
9 | # decompressing the package
10 | # Just manually add the include directory in the vs project
11 | ```
12 |
13 | ## 1. get onnx
14 | download directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv) or export onnx:
15 | ```bash
16 | # 🔥 yolov8 offical repo: https://github.com/ultralytics/ultralytics
17 | # 🔥 yolov8 quickstart: https://docs.ultralytics.com/quickstart/
18 | # 🚀TensorRT-Alpha will be updated synchronously as soon as possible!
19 |
20 | # install yolov8
21 | conda create -n yolov8 python==3.8 -y # for Linux
22 | # conda create -n yolov8 python=3.9 -y # for Windows10
23 | conda activate yolov8
24 | pip install ultralytics==8.0.200
25 | pip install onnx==1.12.0
26 |
27 | # download offical weights(".pt" file)
28 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt
29 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt
30 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-seg.pt
31 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-seg.pt
32 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-seg.pt
33 | ```
34 |
35 | export onnx:
36 | ```bash
37 | yolo mode=export model=yolov8n-seg.pt format=onnx dynamic=True opset=12
38 | yolo mode=export model=yolov8s-seg.pt format=onnx dynamic=True opset=12
39 | yolo mode=export model=yolov8m-seg.pt format=onnx dynamic=True opset=12
40 | yolo mode=export model=yolov8l-seg.pt format=onnx dynamic=True opset=12
41 | yolo mode=export model=yolov8x-seg.pt format=onnx dynamic=True opset=12
42 | ```
43 |
44 | ## 2.edit and save onnx
45 | ```bash
46 | # note: If you have obtained onnx by downloading, this step can be ignored
47 | ignore
48 | ```
49 |
50 | ## 3.compile onnx
51 | ```bash
52 | # put your onnx file in this path:tensorrt-alpha/data/yolov8-seg
53 | cd tensorrt-alpha/data/yolov8-seg
54 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib
55 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov8n-seg.onnx --saveEngine=yolov8n-seg.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
56 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov8s-seg.onnx --saveEngine=yolov8s-seg.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
57 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov8m-seg.onnx --saveEngine=yolov8m-seg.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
58 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov8l-seg.onnx --saveEngine=yolov8l-seg.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
59 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov8x-seg.onnx --saveEngine=yolov8x-seg.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
60 | ```
61 | ## 4.run
62 | ```bash
63 | git clone https://github.com/FeiYull/tensorrt-alpha
64 | cd tensorrt-alpha/yolov8-seg
65 | mkdir build
66 | cd build
67 | cmake ..
68 | make -j10
69 | # note: the dstImage will be saved in tensorrt-alpha/yolov8-seg/build by default
70 |
71 | ## 640
72 | # infer image
73 | ./app_yolov8_seg --model=../../data/yolov8/yolov8n-seg.trt --size=640 --batch_size=1 --img=../../data/6406407.jpg --show --savePath=../
74 |
75 | # infer video
76 | ./app_yolov8_seg --model=../../data/yolov8/yolov8n-seg.trt --size=640 --batch_size=1 --video=../../data/people.mp4 --show
77 |
78 | # infer camera
79 | ./app_yolov8_seg --model=../../data/yolov8/yolov8n-seg.trt --size=640 --batch_size=1 --cam_id=0 --show
80 |
81 | ```
82 | ## 5. appendix
83 | ignore
--------------------------------------------------------------------------------
/yolov8-seg/app_yolov8_seg.cpp:
--------------------------------------------------------------------------------
1 | #include"../utils/yolo.h"
2 | #include"yolov8_seg.h"
3 |
4 | void setParameters(utils::InitParameter& initParameters)
5 | {
6 | initParameters.class_names = utils::dataSets::coco80;
7 | //initParameters.class_names = utils::dataSets::voc20;
8 | initParameters.num_class = 80; // for coco
9 | //initParameters.num_class = 20; // for voc2012
10 | initParameters.batch_size = 8;
11 | initParameters.dst_h = 640;
12 | initParameters.dst_w = 640;
13 | initParameters.input_output_names = { "images", "output0" };
14 | initParameters.conf_thresh = 0.25f;
15 | initParameters.iou_thresh = 0.7f;
16 | initParameters.save_path = "D:/Data/1/";
17 | }
18 |
19 | void task(YOLOv8Seg& yolo, const utils::InitParameter& param, std::vector& imgsBatch, const int& delayTime, const int& batchi)
20 | {
21 | yolo.copy(imgsBatch);
22 | utils::DeviceTimer d_t1; yolo.preprocess(imgsBatch); float t1 = d_t1.getUsedTime();
23 | utils::DeviceTimer d_t2; yolo.infer(); float t2 = d_t2.getUsedTime();
24 | utils::DeviceTimer d_t3; yolo.postprocess(imgsBatch); float t3 = d_t3.getUsedTime();
25 | float avg_times[3] = { t1 / param.batch_size, t2 / param.batch_size, t3 / param.batch_size };
26 | sample::gLogInfo << "preprocess time = " << avg_times[0] << "; "
27 | "infer time = " << avg_times[1] << "; "
28 | "postprocess time = " << avg_times[2] << std::endl;
29 | yolo.showAndSave(param.class_names, delayTime, imgsBatch);
30 | yolo.reset();
31 | }
32 |
33 | int main(int argc, char** argv)
34 | {
35 | cv::CommandLineParser parser(argc, argv,
36 | {
37 | "{model || tensorrt model file }"
38 | "{size || image (h, w), eg: 640 }"
39 | "{batch_size|| batch size }"
40 | "{video || video's path }"
41 | "{img || image's path }"
42 | "{cam_id || camera's device id }"
43 | "{show || if show the result }"
44 | "{savePath || save path, can be ignore}"
45 | });
46 | utils::InitParameter param;
47 | setParameters(param);
48 | std::string model_path = "../../data/yolov8/yolov8n-seg.trt";
49 | std::string video_path = "../../data/people.mp4";
50 | std::string image_path = "../../data/bus.jpg";
51 | int camera_id = 0;
52 | utils::InputStream source;
53 | source = utils::InputStream::IMAGE;
54 | //source = utils::InputStream::VIDEO;
55 | //source = utils::InputStream::CAMERA;
56 | // update params from command line parser
57 | int size = -1;
58 | int batch_size = 8;
59 | bool is_show = false;
60 | bool is_save = false;
61 | if (parser.has("model"))
62 | {
63 | model_path = parser.get("model");
64 | sample::gLogInfo << "model_path = " << model_path << std::endl;
65 | }
66 | if (parser.has("size"))
67 | {
68 | size = parser.get("size");
69 | sample::gLogInfo << "size = " << size << std::endl;
70 | param.dst_h = param.dst_w = size;
71 | }
72 | if (parser.has("batch_size"))
73 | {
74 | batch_size = parser.get("batch_size");
75 | sample::gLogInfo << "batch_size = " << batch_size << std::endl;
76 | param.batch_size = batch_size;
77 | }
78 | if (parser.has("video"))
79 | {
80 | source = utils::InputStream::VIDEO;
81 | video_path = parser.get("video");
82 | sample::gLogInfo << "video_path = " << video_path << std::endl;
83 | }
84 | if (parser.has("img"))
85 | {
86 | source = utils::InputStream::IMAGE;
87 | image_path = parser.get("img");
88 | sample::gLogInfo << "image_path = " << image_path << std::endl;
89 | }
90 | if (parser.has("cam_id"))
91 | {
92 | source = utils::InputStream::CAMERA;
93 | camera_id = parser.get("cam_id");
94 | sample::gLogInfo << "camera_id = " << camera_id << std::endl;
95 | }
96 |
97 | if (parser.has("show"))
98 | {
99 | param.is_show = true;
100 | sample::gLogInfo << "is_show = " << is_show << std::endl;
101 | }
102 | if (parser.has("savePath"))
103 | {
104 | param.is_save = true;
105 | param.save_path = parser.get("savePath");
106 | sample::gLogInfo << "save_path = " << param.save_path << std::endl;
107 | }
108 | int total_batches = 0;
109 | int delay_time = 1;
110 | cv::VideoCapture capture;
111 | if (!setInputStream(source, image_path, video_path, camera_id,
112 | capture, total_batches, delay_time, param))
113 | {
114 | sample::gLogError << "read the input data errors!" << std::endl;
115 | return -1;
116 | }
117 | setRenderWindow(param);
118 | YOLOv8Seg yolo(param);
119 | std::vector trt_file = utils::loadModel(model_path);
120 | if (trt_file.empty())
121 | {
122 | sample::gLogError << "trt_file is empty!" << std::endl;
123 | return -1;
124 | }
125 | if (!yolo.init(trt_file))
126 | {
127 | sample::gLogError << "initEngine() ocur errors!" << std::endl;
128 | return -1;
129 | }
130 | yolo.check();
131 | cv::Mat frame;
132 | std::vector imgs_batch;
133 | imgs_batch.reserve(param.batch_size);
134 | sample::gLogInfo << imgs_batch.capacity() << std::endl;
135 | int batchi = 0;
136 | while (capture.isOpened())
137 | {
138 | if (batchi >= total_batches && source != utils::InputStream::CAMERA)
139 | {
140 | break;
141 | }
142 | if (imgs_batch.size() < param.batch_size)
143 | {
144 | if (source != utils::InputStream::IMAGE)
145 | {
146 | capture.read(frame);
147 | }
148 | else
149 | {
150 | frame = cv::imread(image_path);
151 | }
152 | if (frame.empty())
153 | {
154 | sample::gLogWarning << "no more video or camera frame" << std::endl;
155 | task(yolo, param, imgs_batch, delay_time, batchi);
156 | imgs_batch.clear();
157 | batchi++;
158 | break;
159 | }
160 | else
161 | {
162 | imgs_batch.emplace_back(frame.clone());
163 | }
164 | }
165 | else
166 | {
167 | task(yolo, param, imgs_batch, delay_time, batchi);
168 | imgs_batch.clear();
169 | batchi++;
170 | }
171 | }
172 | return -1;
173 | }
--------------------------------------------------------------------------------
/yolov8-seg/decode_yolov8_seg.cu:
--------------------------------------------------------------------------------
1 | #include "decode_yolov8_seg.h"
2 |
3 | __global__ void decode_yolov8_seg_device_kernel(int batch_size, int num_class, int topK, float conf_thresh,
4 | float* src, int srcWidth, int srcHeight, int srcArea,
5 | float* dst, int dstWidth, int dstArea)
6 | {
7 | int dx = blockDim.x * blockIdx.x + threadIdx.x;
8 | int dy = blockDim.y * blockIdx.y + threadIdx.y;
9 | if (dx >= srcHeight || dy >= batch_size)
10 | {
11 | return;
12 | }
13 | float* pitem = src + dy * srcArea + dx * srcWidth;
14 | float* class_confidence = pitem + 4;
15 | float confidence = *class_confidence++;
16 | int label = 0;
17 | for (int i = 1; i < num_class; ++i, ++class_confidence)
18 | {
19 | if (*class_confidence > confidence)
20 | {
21 | confidence = *class_confidence;
22 | label = i;
23 | }
24 | }
25 | if (confidence < conf_thresh)
26 | {
27 | return;
28 | }
29 | int index = atomicAdd(dst + dy * dstArea, 1);
30 |
31 | if (index >= topK)
32 | {
33 | return;
34 | }
35 | float cx = *pitem++;
36 | float cy = *pitem++;
37 | float width = *pitem++;
38 | float height = *pitem++;
39 |
40 | float left = cx - width * 0.5f;
41 | float top = cy - height * 0.5f;
42 | float right = cx + width * 0.5f;
43 | float bottom = cy + height * 0.5f;
44 | float* pout_item = dst + dy * dstArea + 1 + index * dstWidth;
45 | *pout_item++ = left;
46 | *pout_item++ = top;
47 | *pout_item++ = right;
48 | *pout_item++ = bottom;
49 | *pout_item++ = confidence;
50 | *pout_item++ = label;
51 | *pout_item++ = 1;
52 | memcpy(pout_item, pitem + num_class, 32 * sizeof(float));
53 | }
54 |
55 | void yolov8seg::decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea, float* dst, int dstWidth, int dstHeight)
56 | {
57 | dim3 block_size(BLOCK_SIZE, BLOCK_SIZE);
58 | dim3 grid_size((srcHeight + BLOCK_SIZE - 1) / BLOCK_SIZE,
59 | (param.batch_size + BLOCK_SIZE - 1) / BLOCK_SIZE);
60 | int dstArea = 1 + dstWidth * dstHeight;
61 | decode_yolov8_seg_device_kernel <<< grid_size, block_size, 0, nullptr >>> (param.batch_size, param.num_class, param.topK, param.conf_thresh,
62 | src, srcWidth, srcHeight, srcArea,
63 | dst, dstWidth, dstArea);
64 | }
65 |
66 | __global__ void transpose_device_kernel(int batch_size,
67 | float* src, int srcWidth, int srcHeight, int srcArea,
68 | float* dst, int dstWidth, int dstHeight, int dstArea)
69 | {
70 | int dx = blockDim.x * blockIdx.x + threadIdx.x;
71 | int dy = blockDim.y * blockIdx.y + threadIdx.y;
72 | if (dx >= dstHeight || dy >= batch_size)
73 | {
74 | return;
75 | }
76 | float* p_dst_row = dst + dy * dstArea + dx * dstWidth;
77 | float* p_src_col = src + dy * srcArea + dx;
78 |
79 | for (int i = 0; i < dstWidth; i++)
80 | {
81 | p_dst_row[i] = p_src_col[i * srcWidth];
82 | }
83 | }
84 |
85 | void yolov8seg::transposeDevice(utils::InitParameter param,
86 | float* src, int srcWidth, int srcHeight, int srcArea,
87 | float* dst, int dstWidth, int dstHeight)
88 | {
89 | dim3 block_size(BLOCK_SIZE, BLOCK_SIZE);
90 | dim3 grid_size((dstHeight + BLOCK_SIZE - 1) / BLOCK_SIZE,
91 | (param.batch_size + BLOCK_SIZE - 1) / BLOCK_SIZE);
92 | int dstArea = dstWidth * dstHeight;
93 | transpose_device_kernel << < grid_size, block_size, 0, nullptr >> > (param.batch_size,
94 | src, srcWidth, srcHeight, srcArea,
95 | dst, dstWidth, dstHeight, dstArea);
96 | }
97 |
98 |
--------------------------------------------------------------------------------
/yolov8-seg/decode_yolov8_seg.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include"../utils/utils.h"
3 | #include"../utils/kernel_function.h"
4 |
5 | namespace yolov8seg
6 | {
7 | void decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcLength, float* dst, int dstWidth, int dstHeight);
8 | void transposeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea, float* dst, int dstWidth, int dstHeight);
9 | }
10 |
--------------------------------------------------------------------------------
/yolov8-seg/yolov8_seg.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 | #include
4 | #include"../utils/yolo.h"
5 | #include"../utils/utils.h"
6 | class YOLOv8Seg : public yolo::YOLO
7 | {
8 | public:
9 | YOLOv8Seg(const utils::InitParameter& param);
10 | ~YOLOv8Seg();
11 | virtual bool init(const std::vector& trtFile);
12 | virtual void preprocess(const std::vector& imgsBatch);
13 | virtual bool infer();
14 | virtual void postprocess(const std::vector& imgsBatch);
15 | virtual void reset();
16 |
17 | public:
18 | void showAndSave(const std::vector& classNames,
19 | const int& cvDelayTime, std::vector& imgsBatch);
20 |
21 | private:
22 | float* m_output_src_transpose_device;
23 | float* m_output_seg_device; // eg:116 * 8400, 116=4+80+32
24 | float* m_output_objects_device;
25 |
26 | float* m_output_seg_host;
27 | float* m_output_objects_host;
28 |
29 | int m_output_objects_width; // 39 = 32 + 7, 7:left, top, right, bottom, confidence, class, keepflag;
30 | int m_output_src_width; // 116 = 4+80+32, 4:xyxy; 80:coco label; 32:seg
31 | nvinfer1::Dims m_output_seg_dims;
32 | int m_output_obj_area;
33 | int m_output_seg_area;
34 | int m_output_seg_w;
35 | int m_output_seg_h;
36 |
37 | cv::Mat m_mask160;
38 | Eigen::MatrixXf m_mask_eigen160;
39 | cv::Rect m_thresh_roi160;
40 | cv::Rect m_thresh_roisrc;
41 | float m_downsample_scale;
42 | cv::Mat m_mask_src;
43 | cv::Mat m_img_canvas;
44 | };
--------------------------------------------------------------------------------
/yolov8/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.10)
2 |
3 | #set(CMAKE_BUILD_TYPE "Debug")
4 | set(CMAKE_BUILD_TYPE "Release")
5 |
6 | PROJECT(yolov8 VERSION 1.0.0 LANGUAGES C CXX CUDA)
7 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR})
8 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake)
9 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT})
10 | message(STATUS ${ALL_LIBS})
11 | file(GLOB CPPS
12 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
13 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cu
14 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu
15 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp
16 | ${TensorRT_ROOT}/samples/common/logger.cpp
17 | ${TensorRT_ROOT}/samples/common/sampleOptions.cpp
18 | #${TensorRT_ROOT}/samples/common/sampleUtils.cpp
19 | )
20 | list(REMOVE_ITEM CPPS app_yolov8.cpp)
21 | message(STATUS CPPS = ${CPPS})
22 | list (LENGTH CPPS length)
23 | message(STATUS ***length*** = ${length})
24 | find_package(OpenCV REQUIRED)
25 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR})
26 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS})
27 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS})
28 | add_library(${PROJECT_NAME} SHARED ${CPPS})
29 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES})
30 |
31 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75)
32 | target_compile_options(${PROJECT_NAME} PUBLIC
33 | $<$:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>)
34 |
35 | add_executable(app_yolov8 app_yolov8.cpp)
36 |
37 | # NVCC
38 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a)
39 | target_link_libraries(app_yolov8 ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} )
40 |
--------------------------------------------------------------------------------
/yolov8/README.md:
--------------------------------------------------------------------------------
1 | ## 1. get onnx
2 | download directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv) or export onnx:
3 | ```bash
4 | # 🔥 yolov8 offical repo: https://github.com/ultralytics/ultralytics
5 | # 🔥 yolov8 quickstart: https://docs.ultralytics.com/quickstart/
6 | # 🚀TensorRT-Alpha will be updated synchronously as soon as possible!
7 |
8 | # install yolov8
9 | conda create -n yolov8 python==3.8 -y # for Linux
10 | # conda create -n yolov8 python=3.9 -y # for Windows10
11 | conda activate yolov8
12 | pip install ultralytics==8.0.5
13 | pip install onnx==1.12.0
14 |
15 | # download offical weights(".pt" file)
16 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt
17 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt
18 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m.pt
19 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt
20 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt
21 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x6.pt
22 | ```
23 |
24 | export onnx:
25 | ```bash
26 | # 640
27 | yolo mode=export model=yolov8n.pt format=onnx dynamic=True opset=12 #simplify=True
28 | yolo mode=export model=yolov8s.pt format=onnx dynamic=True opset=12 #simplify=True
29 | yolo mode=export model=yolov8m.pt format=onnx dynamic=True opset=12 #simplify=True
30 | yolo mode=export model=yolov8l.pt format=onnx dynamic=True opset=12 #simplify=True
31 | yolo mode=export model=yolov8x.pt format=onnx dynamic=True opset=12 #simplify=True
32 | # 1280
33 | yolo mode=export model=yolov8x6.pt format=onnx dynamic=True opset=12 #simplify=True
34 | ```
35 |
36 | ## 2.edit and save onnx
37 | ```bash
38 | # note: If you have obtained onnx by downloading, this step can be ignored
39 | ignore
40 | ```
41 |
42 | ## 3.compile onnx
43 | ```bash
44 | # put your onnx file in this path:tensorrt-alpha/data/yolov8
45 | cd tensorrt-alpha/data/yolov8
46 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib
47 | # 640
48 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov8n.onnx --saveEngine=yolov8n.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
49 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov8s.onnx --saveEngine=yolov8s.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
50 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov8m.onnx --saveEngine=yolov8m.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
51 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov8l.onnx --saveEngine=yolov8l.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
52 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov8x.onnx --saveEngine=yolov8x.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
53 | # 1280
54 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov8x6.onnx --saveEngine=yolov8x6.trt --buildOnly --minShapes=images:1x3x1280x1280 --optShapes=images:2x3x1280x1280 --maxShapes=images:4x3x1280x1280
55 | ```
56 | ## 4.run
57 | ```bash
58 | git clone https://github.com/FeiYull/tensorrt-alpha
59 | cd tensorrt-alpha/yolov8
60 | mkdir build
61 | cd build
62 | cmake ..
63 | make -j10
64 | # note: the dstImage will be saved in tensorrt-alpha/yolov8/build by default
65 |
66 | ## 640
67 | # infer image
68 | ./app_yolov8 --model=../../data/yolov8/yolov8n.trt --size=640 --batch_size=1 --img=../../data/6406407.jpg --show --savePath=../
69 |
70 | # infer video
71 | ./app_yolov8 --model=../../data/yolov8/yolov8n.trt --size=640 --batch_size=2 --video=../../data/people.mp4 --show
72 |
73 | # infer camera
74 | ./app_yolov8 --model=../../data/yolov8/yolov8n.trt --size=640 --batch_size=2 --cam_id=0 --show
75 |
76 | ## 1280
77 | # infer camera
78 | ./app_yolov8 --model=../../data/yolov8/yolov8x6.trt --size=1280 --batch_size=2 --cam_id=0 --show
79 | ```
80 | ## 5. appendix
81 | ignore
--------------------------------------------------------------------------------
/yolov8/decode_yolov8.cu:
--------------------------------------------------------------------------------
1 | #include "decode_yolov8.h"
2 |
3 | __global__ void decode_yolov8_device_kernel(int batch_size, int num_class, int topK, float conf_thresh,
4 | float* src, int srcWidth, int srcHeight, int srcArea,
5 | float* dst, int dstWidth, int dstHeight, int dstArea)
6 | {
7 | int dx = blockDim.x * blockIdx.x + threadIdx.x;
8 | int dy = blockDim.y * blockIdx.y + threadIdx.y;
9 | if (dx >= srcHeight || dy >= batch_size)
10 | {
11 | return;
12 | }
13 | float* pitem = src + dy * srcArea + dx * srcWidth;
14 | float* class_confidence = pitem + 4;
15 | float confidence = *class_confidence++;
16 | int label = 0;
17 | for (int i = 1; i < num_class; ++i, ++class_confidence)
18 | {
19 | if (*class_confidence > confidence)
20 | {
21 | confidence = *class_confidence;
22 | label = i;
23 | }
24 | }
25 | if (confidence < conf_thresh)
26 | {
27 | return;
28 | }
29 | int index = atomicAdd(dst + dy * dstArea, 1);
30 |
31 | if (index >= topK)
32 | {
33 | return;
34 | }
35 | float cx = *pitem++;
36 | float cy = *pitem++;
37 | float width = *pitem++;
38 | float height = *pitem++;
39 |
40 | float left = cx - width * 0.5f;
41 | float top = cy - height * 0.5f;
42 | float right = cx + width * 0.5f;
43 | float bottom = cy + height * 0.5f;
44 | float* pout_item = dst + dy * dstArea + 1 + index * dstWidth;
45 | *pout_item++ = left;
46 | *pout_item++ = top;
47 | *pout_item++ = right;
48 | *pout_item++ = bottom;
49 | *pout_item++ = confidence;
50 | *pout_item++ = label;
51 | *pout_item++ = 1;
52 | }
53 |
54 | void yolov8::decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea, float* dst, int dstWidth, int dstHeight)
55 | {
56 | dim3 block_size(BLOCK_SIZE, BLOCK_SIZE);
57 | dim3 grid_size((srcHeight + BLOCK_SIZE - 1) / BLOCK_SIZE,
58 | (param.batch_size + BLOCK_SIZE - 1) / BLOCK_SIZE);
59 | int dstArea = 1 + dstWidth * dstHeight;
60 |
61 | decode_yolov8_device_kernel << < grid_size, block_size, 0, nullptr >> > (param.batch_size, param.num_class, param.topK, param.conf_thresh,
62 | src, srcWidth, srcHeight, srcArea,
63 | dst, dstWidth, dstHeight, dstArea);
64 | }
65 |
66 |
67 | __global__ void transpose_device_kernel(int batch_size,
68 | float* src, int srcWidth, int srcHeight, int srcArea,
69 | float* dst, int dstWidth, int dstHeight, int dstArea)
70 | {
71 | int dx = blockDim.x * blockIdx.x + threadIdx.x;
72 | int dy = blockDim.y * blockIdx.y + threadIdx.y;
73 | if (dx >= dstHeight || dy >= batch_size)
74 | {
75 | return;
76 | }
77 | float* p_dst_row = dst + dy * dstArea + dx * dstWidth;
78 | float* p_src_col = src + dy * srcArea + dx;
79 |
80 | for (int i = 0; i < dstWidth; i++)
81 | {
82 | p_dst_row[i] = p_src_col[i * srcWidth];
83 | }
84 | }
85 |
86 | void yolov8::transposeDevice(utils::InitParameter param,
87 | float* src, int srcWidth, int srcHeight, int srcArea,
88 | float* dst, int dstWidth, int dstHeight)
89 | {
90 | dim3 block_size(BLOCK_SIZE, BLOCK_SIZE);
91 | dim3 grid_size((dstHeight + BLOCK_SIZE - 1) / BLOCK_SIZE,
92 | (param.batch_size + BLOCK_SIZE - 1) / BLOCK_SIZE);
93 | int dstArea = dstWidth * dstHeight;
94 |
95 | transpose_device_kernel << < grid_size, block_size, 0, nullptr >> > (param.batch_size,
96 | src, srcWidth, srcHeight, srcArea,
97 | dst, dstWidth, dstHeight, dstArea);
98 | }
99 |
100 |
101 |
--------------------------------------------------------------------------------
/yolov8/decode_yolov8.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include"../utils/utils.h"
3 | #include"../utils/kernel_function.h"
4 |
5 | namespace yolov8
6 | {
7 | void decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcLength, float* dst, int dstWidth, int dstHeight);
8 | void transposeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea, float* dst, int dstWidth, int dstHeight);
9 | }
10 |
--------------------------------------------------------------------------------
/yolov8/yolov8.cpp:
--------------------------------------------------------------------------------
1 | #include"yolov8.h"
2 | #include"decode_yolov8.h"
3 |
4 | YOLOV8::YOLOV8(const utils::InitParameter& param) :yolo::YOLO(param)
5 | {
6 | }
7 |
8 | YOLOV8::~YOLOV8()
9 | {
10 | CHECK(cudaFree(m_output_src_transpose_device));
11 | }
12 |
13 | bool YOLOV8::init(const std::vector& trtFile)
14 | {
15 | if (trtFile.empty())
16 | {
17 | return false;
18 | }
19 | std::unique_ptr runtime =
20 | std::unique_ptr(nvinfer1::createInferRuntime(sample::gLogger.getTRTLogger()));
21 | if (runtime == nullptr)
22 | {
23 | return false;
24 | }
25 | this->m_engine = std::unique_ptr(runtime->deserializeCudaEngine(trtFile.data(), trtFile.size()));
26 |
27 | if (this->m_engine == nullptr)
28 | {
29 | return false;
30 | }
31 | this->m_context = std::unique_ptr(this->m_engine->createExecutionContext());
32 | if (this->m_context == nullptr)
33 | {
34 | return false;
35 | }
36 | if (m_param.dynamic_batch)
37 | {
38 | this->m_context->setBindingDimensions(0, nvinfer1::Dims4(m_param.batch_size, 3, m_param.dst_h, m_param.dst_w));
39 | }
40 | m_output_dims = this->m_context->getBindingDimensions(1);
41 | m_total_objects = m_output_dims.d[2];
42 | assert(m_param.batch_size <= m_output_dims.d[0]);
43 | m_output_area = 1;
44 | for (int i = 1; i < m_output_dims.nbDims; i++)
45 | {
46 | if (m_output_dims.d[i] != 0)
47 | {
48 | m_output_area *= m_output_dims.d[i];
49 | }
50 | }
51 | CHECK(cudaMalloc(&m_output_src_device, m_param.batch_size * m_output_area * sizeof(float)));
52 | CHECK(cudaMalloc(&m_output_src_transpose_device, m_param.batch_size * m_output_area * sizeof(float)));
53 | float a = float(m_param.dst_h) / m_param.src_h;
54 | float b = float(m_param.dst_w) / m_param.src_w;
55 | float scale = a < b ? a : b;
56 | cv::Mat src2dst = (cv::Mat_(2, 3) << scale, 0.f, (-scale * m_param.src_w + m_param.dst_w + scale - 1) * 0.5,
57 | 0.f, scale, (-scale * m_param.src_h + m_param.dst_h + scale - 1) * 0.5);
58 | cv::Mat dst2src = cv::Mat::zeros(2, 3, CV_32FC1);
59 | cv::invertAffineTransform(src2dst, dst2src);
60 |
61 | m_dst2src.v0 = dst2src.ptr(0)[0];
62 | m_dst2src.v1 = dst2src.ptr(0)[1];
63 | m_dst2src.v2 = dst2src.ptr(0)[2];
64 | m_dst2src.v3 = dst2src.ptr(1)[0];
65 | m_dst2src.v4 = dst2src.ptr(1)[1];
66 | m_dst2src.v5 = dst2src.ptr(1)[2];
67 |
68 | return true;
69 | }
70 |
71 | void YOLOV8::preprocess(const std::vector& imgsBatch)
72 | {
73 | resizeDevice(m_param.batch_size, m_input_src_device, m_param.src_w, m_param.src_h,
74 | m_input_resize_device, m_param.dst_w, m_param.dst_h, 114, m_dst2src);
75 | bgr2rgbDevice(m_param.batch_size, m_input_resize_device, m_param.dst_w, m_param.dst_h,
76 | m_input_rgb_device, m_param.dst_w, m_param.dst_h);
77 | normDevice(m_param.batch_size, m_input_rgb_device, m_param.dst_w, m_param.dst_h,
78 | m_input_norm_device, m_param.dst_w, m_param.dst_h, m_param);
79 | hwc2chwDevice(m_param.batch_size, m_input_norm_device, m_param.dst_w, m_param.dst_h,
80 | m_input_hwc_device, m_param.dst_w, m_param.dst_h);
81 | }
82 |
83 |
84 | void YOLOV8::postprocess(const std::vector& imgsBatch)
85 | {
86 | yolov8::transposeDevice(m_param, m_output_src_device, m_total_objects, 4 + m_param.num_class, m_total_objects * (4 + m_param.num_class),
87 | m_output_src_transpose_device, 4 + m_param.num_class, m_total_objects);
88 | yolov8::decodeDevice(m_param, m_output_src_transpose_device, 4 + m_param.num_class, m_total_objects, m_output_area,
89 | m_output_objects_device, m_output_objects_width, m_param.topK);
90 | // nms
91 | //nmsDeviceV1(m_param, m_output_objects_device, m_output_objects_width, m_param.topK, m_param.topK * m_output_objects_width + 1);
92 | nmsDeviceV2(m_param, m_output_objects_device, m_output_objects_width, m_param.topK, m_param.topK * m_output_objects_width + 1, m_output_idx_device, m_output_conf_device);
93 | CHECK(cudaMemcpy(m_output_objects_host, m_output_objects_device, m_param.batch_size * sizeof(float) * (1 + 7 * m_param.topK), cudaMemcpyDeviceToHost));
94 | for (size_t bi = 0; bi < imgsBatch.size(); bi++)
95 | {
96 | int num_boxes = std::min((int)(m_output_objects_host + bi * (m_param.topK * m_output_objects_width + 1))[0], m_param.topK);
97 | for (size_t i = 0; i < num_boxes; i++)
98 | {
99 | float* ptr = m_output_objects_host + bi * (m_param.topK * m_output_objects_width + 1) + m_output_objects_width * i + 1;
100 | int keep_flag = ptr[6];
101 | if (keep_flag)
102 | {
103 | float x_lt = m_dst2src.v0 * ptr[0] + m_dst2src.v1 * ptr[1] + m_dst2src.v2;
104 | float y_lt = m_dst2src.v3 * ptr[0] + m_dst2src.v4 * ptr[1] + m_dst2src.v5;
105 | float x_rb = m_dst2src.v0 * ptr[2] + m_dst2src.v1 * ptr[3] + m_dst2src.v2;
106 | float y_rb = m_dst2src.v3 * ptr[2] + m_dst2src.v4 * ptr[3] + m_dst2src.v5;
107 | m_objectss[bi].emplace_back(x_lt, y_lt, x_rb, y_rb, ptr[4], (int)ptr[5]);
108 | }
109 | }
110 |
111 | }
112 | }
--------------------------------------------------------------------------------
/yolov8/yolov8.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include"../utils/yolo.h"
3 | #include"../utils/utils.h"
4 | class YOLOV8 : public yolo::YOLO
5 | {
6 | public:
7 | YOLOV8(const utils::InitParameter& param);
8 | ~YOLOV8();
9 | virtual bool init(const std::vector& trtFile);
10 | virtual void preprocess(const std::vector& imgsBatch);
11 | virtual void postprocess(const std::vector& imgsBatch);
12 |
13 | private:
14 | float* m_output_src_transpose_device;
15 | };
--------------------------------------------------------------------------------
/yolox/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.10)
2 |
3 | #set(CMAKE_BUILD_TYPE "Debug")
4 | set(CMAKE_BUILD_TYPE "Release")
5 |
6 | PROJECT(yolox VERSION 1.0.0 LANGUAGES C CXX CUDA)
7 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR})
8 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake)
9 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT})
10 | message(STATUS ${ALL_LIBS})
11 | file(GLOB CPPS
12 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
13 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cu
14 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu
15 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp
16 | ${TensorRT_ROOT}/samples/common/logger.cpp
17 | ${TensorRT_ROOT}/samples/common/sampleOptions.cpp
18 | )
19 | list(REMOVE_ITEM CPPS app_yolox.cpp)
20 | message(STATUS CPPS = ${CPPS})
21 | list (LENGTH CPPS length)
22 | message(STATUS ***length*** = ${length})
23 | find_package(OpenCV REQUIRED)
24 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR})
25 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS})
26 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS})
27 | add_library(${PROJECT_NAME} SHARED ${CPPS})
28 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES})
29 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75)
30 | target_compile_options(${PROJECT_NAME} PUBLIC
31 | $<$:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>)
32 |
33 | add_executable(app_yolox app_yolox.cpp)
34 |
35 | # NVCC
36 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a)
37 | target_link_libraries(app_yolox ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} )
38 |
--------------------------------------------------------------------------------
/yolox/README.md:
--------------------------------------------------------------------------------
1 | ## 特别说明
2 | 这里yolox官方在图像预处理的时候,其中resize是带有padding,只不过是在图像右边界、下边界进行padding
3 | ,本仓库的是核函数做法是:将图像进行等比例缩放插值,效果图存在m_input_resize_without_padding_device中,
4 | 然后将上述效果图像 copy 到m_input_resize_device(申请内存大小为:
5 | 416 * 416 * 3 * batch_size or 640 * 640 * 3 * batch_size,初始值为:{114, 114, 114})
6 | 另外,由于插值始终和opencv严格对齐,略有差异,但最终检测结果几乎一样,框的位置一样,置信度小数点后第二位才有差异。
7 | 最后,模型支持固定batch size
8 |
9 | ## 1. get onnx
10 | download onnx(default:batch_size=2) directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv?usp=sharing)
11 | or export onnx:
12 | ```bash
13 | git clone https://github.com/Megvii-BaseDetection/YOLOX
14 | git checkout 0.3.0
15 |
16 | ## batch_size=1
17 | # 640 for image
18 | python tools/export_onnx.py --output-name=yolox_s.onnx --exp_file=exps/default/yolox_s.py --ckpt=yolox_s.pth --decode_in_inference --batch-size=1
19 | python tools/export_onnx.py --output-name=yolox_m.onnx --exp_file=exps/default/yolox_m.py --ckpt=yolox_m.pth --decode_in_inference --batch-size=1
20 | python tools/export_onnx.py --output-name=yolox_x.onnx --exp_file=exps/default/yolox_x.py --ckpt=yolox_x.pth --decode_in_inference --batch-size=1
21 | python tools/export_onnx.py --output-name=yolox_s.onnx --exp_file=exps/default/yolox_s.py --ckpt=yolox_s.pth --decode_in_inference --batch-size=1
22 | # 416 for image
23 | python tools/export_onnx.py --output-name=yolox_nano.onnx --exp_file=exps/default/yolox_nano.py --ckpt=yolox_nano.pth --decode_in_inference --batch-size=1
24 | python tools/export_onnx.py --output-name=yolox_tiny.onnx --exp_file=exps/default/yolox_tiny.py --ckpt=yolox_tiny.pth --decode_in_inference --batch-size=1
25 |
26 | ## batch_size > 1
27 | # 例如将上述6条编译onnx的指令中,设置参数--batch-size=2,也是可行,但需注意:最后运行的时候,需要设置一样参数:--batch_size=2
28 | ```
29 |
30 | ## 2.edit and save onnx
31 | ```bash
32 | # note: If you have obtained onnx by downloading, this step can be ignored
33 | ignore
34 | ```
35 |
36 | ## 3.compile onnx
37 | ```bash
38 | # put your onnx file in this path:tensorrt-alpha/data/yolox
39 | cd tensorrt-alpha/data/yolox
40 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib
41 |
42 | # 640
43 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolox_s.onnx --saveEngine=yolox_s.trt --buildOnly
44 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolox_m.onnx --saveEngine=yolox_m.trt --buildOnly
45 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolox_x.onnx --saveEngine=yolox_x.trt --buildOnly
46 |
47 | # 416
48 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolox_nano.onnx --saveEngine=yolox_nano.trt --buildOnly
49 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolox_tiny.onnx --saveEngine=yolox_tiny.trt --buildOnly
50 | ```
51 |
52 | ## 4.run
53 | ```bash
54 | git clone https://github.com/FeiYull/tensorrt-alpha
55 | cd tensorrt-alpha/yolox
56 | mkdir build
57 | cd build
58 | cmake ..
59 | make -j10
60 | # note: the dstImage will be saved in tensorrt-alpha/yolox/build by default
61 | # only support static multi-batch inference!
62 | # the values of batch_size are different, and onnx needs to be compiled additionally
63 |
64 | ## 640
65 | # infer image
66 | ./app_yolox --model=../../data/yolox/yolox_s.trt --size=640 --batch_size=1 --img=../../data/6406401.jpg --show --savePath=../
67 |
68 | # infer video
69 | ./app_yolox --model=../../data/yolox/yolox_s.trt --size=640 --batch_size=1 --video=../../data/people.mp4 --show
70 |
71 | # infer camera
72 | ./app_yolox --model=../../data/yolox/yolox_s.trt --size=640 --batch_size=1 --cam_id=0 --show
73 |
74 | # 416
75 | ./app_yolox --model=../../data/yolox/yolox_nano.trt --size=416 --batch_size=1 --img=../../data/6406401.jpg --show --savePath
76 | ```
77 | ## 5. appendix
78 | ignore
--------------------------------------------------------------------------------
/yolox/yolox.cu:
--------------------------------------------------------------------------------
1 | #include"yolox.h"
2 |
3 | YOLOX::YOLOX(const utils::InitParameter& param) :yolo::YOLO(param)
4 | {
5 | }
6 | YOLOX::~YOLOX()
7 | {
8 | CHECK(cudaFree(m_input_resize_without_padding_device));
9 | }
10 | bool YOLOX::init(const std::vector& trtFile)
11 | {
12 | if (trtFile.empty())
13 | {
14 | return false;
15 | }
16 | std::unique_ptr runtime =
17 | std::unique_ptr(nvinfer1::createInferRuntime(sample::gLogger.getTRTLogger()));
18 | if (runtime == nullptr)
19 | {
20 | return false;
21 | }
22 | this->m_engine = std::unique_ptr(runtime->deserializeCudaEngine(trtFile.data(), trtFile.size()));
23 |
24 | if (this->m_engine == nullptr)
25 | {
26 | return false;
27 | }
28 | this->m_context = std::unique_ptr(this->m_engine->createExecutionContext());
29 | if (this->m_context == nullptr)
30 | {
31 | return false;
32 | }
33 | // binding dim
34 | // ...
35 | //nvinfer1::Dims input_dims = this->m_context->getBindingDimensions(0);
36 | m_output_dims = this->m_context->getBindingDimensions(1);
37 | m_total_objects = m_output_dims.d[1];
38 | assert(m_param.batch_size == m_output_dims.d[0] ||
39 | m_param.batch_size == 1 // batch_size = 1, but it will infer with "batch_size=m_output_dims.d[0]", only support static batch
40 | );
41 | m_output_area = 1;
42 | for (int i = 1; i < m_output_dims.nbDims; i++)
43 | {
44 | if (m_output_dims.d[i] != 0)
45 | {
46 | m_output_area *= m_output_dims.d[i];
47 | }
48 | }
49 | CHECK(cudaMalloc(&m_output_src_device, m_param.batch_size * m_output_area * sizeof(float)));
50 | float a = float(m_param.dst_h) / m_param.src_h;
51 | float b = float(m_param.dst_w) / m_param.src_w;
52 | float scale = a < b ? a : b;
53 | m_resized_h = roundf((float)m_param.src_h * scale);
54 | m_resized_w = roundf((float)m_param.src_w * scale);
55 |
56 | CHECK(cudaMalloc(&m_input_resize_without_padding_device,
57 | m_param.batch_size * 3 * m_resized_h * m_resized_w * sizeof(float)));
58 | cv::Mat src2dst = (cv::Mat_(2, 3) << scale, 0.f, (scale - 1) * 0.5,
59 | 0.f, scale, (scale - 1) * 0.5);
60 | cv::Mat dst2src = cv::Mat::zeros(2, 3, CV_32FC1);
61 | cv::invertAffineTransform(src2dst, dst2src);
62 | m_dst2src.v0 = dst2src.ptr(0)[0];
63 | m_dst2src.v1 = dst2src.ptr(0)[1];
64 | m_dst2src.v2 = dst2src.ptr(0)[2];
65 | m_dst2src.v3 = dst2src.ptr(1)[0];
66 | m_dst2src.v4 = dst2src.ptr(1)[1];
67 | m_dst2src.v5 = dst2src.ptr(1)[2];
68 | return true;
69 | }
70 | void YOLOX::preprocess(const std::vector& imgsBatch)
71 | {
72 | resizeDevice(m_param.batch_size, m_input_src_device, m_param.src_w, m_param.src_h,
73 | m_input_resize_without_padding_device, m_resized_w, m_resized_h, 114, m_dst2src);
74 | copyWithPaddingDevice(m_param.batch_size, m_input_resize_without_padding_device, m_resized_w, m_resized_h,
75 | m_input_resize_device, m_param.dst_w, m_param.dst_h, 114.f);
76 | hwc2chwDevice(m_param.batch_size, m_input_resize_device, m_param.dst_w, m_param.dst_h,
77 | m_input_hwc_device, m_param.dst_w, m_param.dst_h);
78 | }
79 | __global__
80 | void copy_with_padding_kernel_function(int batchSize, float* src, int srcWidth, int srcHeight, int srcArea, int srcVolume,
81 | float* dst, int dstWidth, int dstHeight, int dstArea, int dstVolume, float paddingValue)
82 | {
83 | int dx = blockDim.x * blockIdx.x + threadIdx.x;
84 | int dy = blockDim.y * blockIdx.y + threadIdx.y;
85 | if (dx < dstArea && dy < batchSize)
86 | {
87 | int dst_y = dx / dstWidth;
88 | int dst_x = dx % dstWidth;
89 | float* pdst = dst + dy * dstVolume + dst_y * dstWidth * 3 + dst_x * 3;
90 |
91 | if (dst_y < srcHeight && dst_x < srcWidth)
92 | {
93 | float* psrc = src + dy * srcVolume + dst_y * srcWidth * 3 + dst_x * 3;
94 | pdst[0] = psrc[0];
95 | pdst[1] = psrc[1];
96 | pdst[2] = psrc[2];
97 | }
98 | else
99 | {
100 | pdst[0] = paddingValue;
101 | pdst[1] = paddingValue;
102 | pdst[2] = paddingValue;
103 | }
104 | }
105 | }
106 | void copyWithPaddingDevice(const int& batchSize, float* src, int srcWidth, int srcHeight,
107 | float* dst, int dstWidth, int dstHeight, float paddingValue)
108 | {
109 | dim3 block_size(BLOCK_SIZE, BLOCK_SIZE);
110 | dim3 grid_size((dstWidth * dstHeight + BLOCK_SIZE - 1) / BLOCK_SIZE,
111 | (batchSize + BLOCK_SIZE - 1) / BLOCK_SIZE);
112 | int src_area = srcHeight * srcWidth;
113 | int dst_area = dstHeight * dstWidth;
114 |
115 | int src_volume = 3 * srcHeight * srcWidth;
116 | int dst_volume = 3 * dstHeight * dstWidth;
117 | assert(srcWidth <= dstWidth);
118 | assert(srcHeight <= dstHeight);
119 | copy_with_padding_kernel_function <<< grid_size, block_size, 0, nullptr >>>(batchSize, src, srcWidth, srcHeight, src_area, src_volume,
120 | dst, dstWidth, dstHeight, dst_area, dst_volume, paddingValue);
121 | }
--------------------------------------------------------------------------------
/yolox/yolox.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include"../utils/yolo.h"
3 | #include"../utils/kernel_function.h"
4 |
5 | class YOLOX : public yolo::YOLO
6 | {
7 | public:
8 | YOLOX(const utils::InitParameter& param);
9 | ~YOLOX();
10 | virtual bool init(const std::vector& trtFile);
11 | virtual void preprocess(const std::vector& imgsBatch);
12 | private:
13 | float* m_input_resize_without_padding_device;
14 | int m_resized_w;
15 | int m_resized_h;
16 | };
17 | void copyWithPaddingDevice(const int& batchSize, float* src, int srcWidth, int srcHeight,
18 | float* dst, int dstWidth, int dstHeight, float paddingValue);
--------------------------------------------------------------------------------