├── .github ├── examples │ └── python_with_dll │ │ ├── image-20230302203606848.png │ │ ├── image-20230302203807549.png │ │ ├── image-20230302205149660.png │ │ ├── image-20230302211219640.png │ │ ├── image-20230302211258968.png │ │ ├── image-20230302211446110.png │ │ ├── image-20230302212805461.png │ │ ├── image-20230302213219151.png │ │ ├── image-20230302213246167.png │ │ ├── image-20230302213433177.png │ │ ├── image-20230302214103308.png │ │ ├── image-20230302214127422.png │ │ ├── image-20230302220950777.png │ │ ├── image-20230302221408389.png │ │ ├── image-20230302221617892.png │ │ └── images-20230304121452.png ├── facemesh.jpg ├── libfacedet-Offical(left)vsOurs(right-topk-2000).jpg ├── libfacedet.gif ├── people.gif ├── u2net.gif ├── yolov5s-v5.7-Offical(left)vsOurs(right)-img1.jpg ├── yolov5s-v5.7-Offical(left)vsOurs(right)-img2.jpg ├── yolov6s-v6.3-Offical(left)vsOurs(right).jpg ├── yolov7-tiny-Offical(left)vsOurs(right).jpg ├── yolov8-snow.gif ├── yolov8-stree.gif ├── yolov8n-Offical(left)vsOurs(right).jpg └── yolov8n-b8-1080p-to-640.jpg ├── .gitignore ├── Install_For_Ubuntu18.04 └── Install_For_Ubuntu18.04.md ├── LICENSE ├── README.md ├── README_en.md ├── cmake └── common.cmake ├── data ├── 12801.jpg ├── 12802.jpg ├── 12803.jpg ├── 12804.jpg ├── 2.png ├── 51204.jpg ├── 6.jpg ├── 6086083.jpg ├── 6406401.jpg ├── 6406402.jpg ├── 6406403.jpg ├── 6406404.jpg ├── 6406406.jpg ├── 6406407.jpg ├── 7.jpg ├── bus.jpg ├── dog.jpg ├── efficientdet │ └── .gitkeep ├── im_01.png ├── image1.jpg ├── image2.jpg ├── image3.jpg ├── libfacedetction │ └── .gitkeep ├── long.jpg ├── mobilenetv3 │ └── .gitkeep ├── people.mp4 ├── pphumanseg │ └── .gitkeep ├── resnet18 │ └── .gitkeep ├── retinanet │ └── .gitkeep ├── rifle2.jpeg ├── road0.png ├── road1.jpg ├── sailboat3.jpg ├── ssd │ └── .gitkeep ├── swin │ └── .gitkeep ├── u2net │ └── .gitkeep ├── yolor │ ├── .gitkeep │ └── coco.names ├── yolov3 │ └── .gitkeep ├── yolov4 │ └── .gitkeep ├── yolov5 │ └── .gitkeep ├── yolov6 │ └── .gitkeep ├── yolov7 │ └── .gitkeep ├── yolov8-pose │ └── .gitkeep ├── yolov8 │ └── .gitkeep ├── yolox │ └── .gitkeep └── zidane.jpg ├── docker ├── README.md └── ubuntu18.04-cu113.Dockerfile ├── efficientdet ├── CMakeLists.txt ├── README.md ├── app_efficientdet.cpp ├── efficientdet.cpp └── efficientdet.h ├── examples └── python_with_dll │ ├── README.md │ ├── c_files │ ├── pch.cpp │ └── pch.h │ ├── config │ └── screen_inf.py │ └── python_trt.py ├── libfacedetection ├── CMakeLists.txt ├── README.md ├── alpha_edit.py ├── app_libfacedetction.cpp ├── libfacedetection.cu └── libfacedetection.h ├── pphumanseg ├── CMakeLists.txt ├── README.md ├── alpha_edit.py ├── app_pphunmanseg.cpp ├── decode_pphunmanseg.cu ├── decode_pphunmanseg.h ├── pphunmanseg.cpp └── pphunmanseg.h ├── requirements.txt ├── tools └── onnx2trt.cpp ├── u2net ├── CMakeLists.txt ├── README.md ├── alpha_export.py ├── app_u2net.cpp ├── u2net.cu └── u2net.h ├── utils ├── common_include.h ├── kernel_function.cu ├── kernel_function.h ├── tracking │ └── .gitkeep ├── utils.cpp ├── utils.h ├── yolo.cpp └── yolo.h ├── vscode └── launch.json ├── yolonas ├── CMakeLists.txt ├── README.md ├── alpha_export_dynamic.py ├── app_yolo_nas.cpp ├── decode_yolo_nas.cu ├── decode_yolo_nas.h ├── yolo_nas.cpp └── yolo_nas.h ├── yolor ├── CMakeLists.txt ├── README.md ├── alpha_export.py └── app_yolor.cpp ├── yolov3 ├── CMakeLists.txt ├── README.md ├── alpha_edit.py └── app_yolov3.cpp ├── yolov4 ├── CMakeLists.txt ├── README.md ├── alpha_export.py ├── app_yolov4.cpp ├── decode_yolov4.cu ├── decode_yolov4.h ├── yolov4.cpp └── yolov4.h ├── yolov5 ├── CMakeLists.txt ├── README.md ├── alpha_edit.py └── app_yolov5.cpp ├── yolov6 ├── CMakeLists.txt ├── README.md └── app_yolov6.cpp ├── yolov7 ├── CMakeLists.txt ├── README.md └── app_yolov7.cpp ├── yolov8-pose ├── CMakeLists.txt ├── README.md ├── app_yolov8_pose.cpp ├── decode_yolov8_pose.cu ├── decode_yolov8_pose.h ├── yolov8_pose.cpp └── yolov8_pose.h ├── yolov8-seg ├── CMakeLists.txt ├── README.md ├── app_yolov8_seg.cpp ├── decode_yolov8_seg.cu ├── decode_yolov8_seg.h ├── yolov8_seg.cpp └── yolov8_seg.h ├── yolov8 ├── CMakeLists.txt ├── README.md ├── app_yolov8.cpp ├── decode_yolov8.cu ├── decode_yolov8.h ├── yolov8.cpp └── yolov8.h └── yolox ├── CMakeLists.txt ├── README.md ├── app_yolox.cpp ├── yolox.cu └── yolox.h /.github/examples/python_with_dll/image-20230302203606848.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302203606848.png -------------------------------------------------------------------------------- /.github/examples/python_with_dll/image-20230302203807549.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302203807549.png -------------------------------------------------------------------------------- /.github/examples/python_with_dll/image-20230302205149660.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302205149660.png -------------------------------------------------------------------------------- /.github/examples/python_with_dll/image-20230302211219640.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302211219640.png -------------------------------------------------------------------------------- /.github/examples/python_with_dll/image-20230302211258968.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302211258968.png -------------------------------------------------------------------------------- /.github/examples/python_with_dll/image-20230302211446110.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302211446110.png -------------------------------------------------------------------------------- /.github/examples/python_with_dll/image-20230302212805461.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302212805461.png -------------------------------------------------------------------------------- /.github/examples/python_with_dll/image-20230302213219151.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302213219151.png -------------------------------------------------------------------------------- /.github/examples/python_with_dll/image-20230302213246167.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302213246167.png -------------------------------------------------------------------------------- /.github/examples/python_with_dll/image-20230302213433177.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302213433177.png -------------------------------------------------------------------------------- /.github/examples/python_with_dll/image-20230302214103308.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302214103308.png -------------------------------------------------------------------------------- /.github/examples/python_with_dll/image-20230302214127422.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302214127422.png -------------------------------------------------------------------------------- /.github/examples/python_with_dll/image-20230302220950777.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302220950777.png -------------------------------------------------------------------------------- /.github/examples/python_with_dll/image-20230302221408389.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302221408389.png -------------------------------------------------------------------------------- /.github/examples/python_with_dll/image-20230302221617892.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302221617892.png -------------------------------------------------------------------------------- /.github/examples/python_with_dll/images-20230304121452.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/images-20230304121452.png -------------------------------------------------------------------------------- /.github/facemesh.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/facemesh.jpg -------------------------------------------------------------------------------- /.github/libfacedet-Offical(left)vsOurs(right-topk-2000).jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/libfacedet-Offical(left)vsOurs(right-topk-2000).jpg -------------------------------------------------------------------------------- /.github/libfacedet.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/libfacedet.gif -------------------------------------------------------------------------------- /.github/people.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/people.gif -------------------------------------------------------------------------------- /.github/u2net.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/u2net.gif -------------------------------------------------------------------------------- /.github/yolov5s-v5.7-Offical(left)vsOurs(right)-img1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/yolov5s-v5.7-Offical(left)vsOurs(right)-img1.jpg -------------------------------------------------------------------------------- /.github/yolov5s-v5.7-Offical(left)vsOurs(right)-img2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/yolov5s-v5.7-Offical(left)vsOurs(right)-img2.jpg -------------------------------------------------------------------------------- /.github/yolov6s-v6.3-Offical(left)vsOurs(right).jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/yolov6s-v6.3-Offical(left)vsOurs(right).jpg -------------------------------------------------------------------------------- /.github/yolov7-tiny-Offical(left)vsOurs(right).jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/yolov7-tiny-Offical(left)vsOurs(right).jpg -------------------------------------------------------------------------------- /.github/yolov8-snow.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/yolov8-snow.gif -------------------------------------------------------------------------------- /.github/yolov8-stree.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/yolov8-stree.gif -------------------------------------------------------------------------------- /.github/yolov8n-Offical(left)vsOurs(right).jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/yolov8n-Offical(left)vsOurs(right).jpg -------------------------------------------------------------------------------- /.github/yolov8n-b8-1080p-to-640.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/yolov8n-b8-1080p-to-640.jpg -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Compiled Object files 5 | *.slo 6 | *.lo 7 | *.o 8 | *.obj 9 | 10 | # Precompiled Headers 11 | *.gch 12 | *.pch 13 | 14 | # Compiled Dynamic libraries 15 | *.so 16 | *.dylib 17 | *.dll 18 | 19 | # Fortran module files 20 | *.mod 21 | *.smod 22 | 23 | # Compiled Static libraries 24 | *.lai 25 | *.la 26 | *.a 27 | *.lib 28 | 29 | # Executables 30 | *.exe 31 | *.out 32 | *.app 33 | 34 | 35 | # VS 2019 36 | *x64/ 37 | *.idea 38 | *CUDA_Demo.sln 39 | *vs 40 | *.vcxproj 41 | *.vcxproj.user 42 | *.user 43 | *.onnx 44 | *.trt 45 | *.vcxproj.filters 46 | *.sln 47 | 48 | #vscode 49 | *.vscode 50 | 51 | # linux 52 | *build 53 | 54 | *.ppm 55 | *.tgz 56 | *.prototxt 57 | *.caffemodel 58 | *.code-workspace 59 | 60 | *__pycache__ 61 | 62 | # deep learning's file 63 | 64 | *onnx 65 | *trt 66 | *pt 67 | *pth -------------------------------------------------------------------------------- /Install_For_Ubuntu18.04/Install_For_Ubuntu18.04.md: -------------------------------------------------------------------------------- 1 | ## 1. Install Tool Chains 2 | ```bash 3 | sudo apt-get update 4 | sudo apt-get install build-essential 5 | sudo apt-get install git 6 | sudo apt-get install gdb 7 | sudo apt-get install cmake 8 | ``` 9 | ```bash 10 | sudo apt-get install pkg-config libgtk-3-dev libavcodec-dev libavformat-dev libswscale-dev libv4l-dev libxvidcore-dev libx264-dev 11 | sudo apt-get install libopencv-dev 12 | # pkg-config --modversion opencv 13 | ``` 14 | ## 2. Install Nvidia Libs 15 | ### 2.1 install nvidia driver470 16 | ```bash 17 | ubuntu-drivers devices 18 | sudo add-apt-repository ppa:graphics-drivers/ppa 19 | sudo apt update 20 | sudo apt install nvidia-driver-470-server # for ubuntu18.04 21 | nvidia-smi 22 | ``` 23 | ### 2.2 install cuda11.3 24 | - enter: https://developer.nvidia.com/cuda-toolkit-archive 25 | - select:CUDA Toolkit 11.3.0(April 2021) 26 | - select:[Linux] -> [x86_64] -> [Ubuntu] -> [18.04] -> [runfile(local)]
27 | You will see installation instructions on the web page like this: 28 | ```bash 29 | wget https://developer.download.nvidia.com/compute/cuda/11.3.0/local_installers/cuda_11.3.0_465.19.01_linux.run 30 | sudo sh cuda_11.3.0_465.19.01_linux.run 31 | ``` 32 | The cuda installation process will have a window display. 33 | - select:[continue] -> [accept] -> Press enter to cancel the first and second options like the following(**it is important!**) -> [Install]
34 | 35 | ```bash 36 | CUDA Installer 37 | [ ] Driver # cancel the first 38 | [ ] 465.19.01 # cancel the second 39 | [X] CUDA Toolkit 11.3 40 | [X] CUDA Samples 11.3 41 | [X] CUDA Demo Suite 11.3 42 | [X] CUDA Documentation 11.3 0tions 43 | ``` 44 | 45 | The bash window prints the following, which means the installation is OK. 46 | ```bash 47 | #=========== 48 | #= Summary = 49 | #=========== 50 | 51 | #Driver: Not Selected 52 | #Toolkit: Installed in /usr/local/cuda-11.3/ 53 | #...... 54 | ``` 55 | add environment variables: 56 | ```bash 57 | vim ~/.bashrc 58 | ``` 59 | Copy and paste the following into .bashrc 60 | ```bash 61 | # cuda v11.3 62 | export PATH=/usr/local/cuda-11.3/bin${PATH:+:${PATH}} 63 | export LD_LIBRARY_PATH=/usr/local/cuda-11.3/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} 64 | export CUDA_HOME=/usr/local/cuda-11.3 65 | ``` 66 | ```bash 67 | source ~/.bashrc 68 | nvcc -V 69 | ``` 70 | The bash window prints the following content:
71 |
72 | nvcc: NVIDIA (R) Cuda compiler driver
73 | Copyright (c) 2005-2021 NVIDIA Corporation
74 | Built on Sun_Mar_21_19:15:46_PDT_2021
75 | Cuda compilation tools, release 11.3, V11.3.58
76 | Build cuda_11.3.r11.3/compiler.29745058_0
77 |
78 | 79 | ### 2.3 install cudnn8.2 80 | - enter:https://developer.nvidia.com/rdp/cudnn-archive 81 | - select: Download cuDNN v8.2.0 (April 23rd, 2021), for CUDA 11.x 82 | - select: cuDNN Library for Linux (x86_64) 83 | - you will download file: "cudnn-11.3-linux-x64-v8.2.0.53.tgz" 84 | ```bash 85 | tar -zxvf cudnn-11.3-linux-x64-v8.2.0.53.tgz 86 | ``` 87 | copy cudnn to cuda11.3's install dir 88 | ```bash 89 | sudo cp cuda/include/cudnn.h /usr/local/cuda/include/ 90 | sudo cp cuda/lib64/libcudnn* /usr/local/cuda/lib64/ 91 | sudo chmod a+r /usr/local/cuda/include/cudnn.h 92 | sudo chmod a+r /usr/local/cuda/lib64/libcudnn* 93 | ``` 94 | ### 2.4 download tensorrt8.4.2.4 95 | - enter: https://developer.nvidia.cn/nvidia-tensorrt-8x-download 96 | - select: I Agree To the Terms of the NVIDIA TensorRT License Agreement 97 | - select: TensorRT 8.4 GA Update 1 98 | - select: TensorRT 8.4 GA Update 1 for Linux x86_64 and CUDA 11.0, 11.1, 11.2, 11.3, 11.4, 11.5, 11.6 and 11.7 TAR Package 99 | - you will download file: "TensorRT-8.4.2.4.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz" 100 | ```bash 101 | tar -zxvf TensorRT-8.4.2.4.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz 102 | # test 103 | cd TensorRT-8.4.2.4/samples/sampleMNIST 104 | make 105 | cd ../../bin/ 106 | ``` 107 | Change the following path to your path!(**it is important!**) 108 | ```bash 109 | 110 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/xxx/temp/TensorRT-8.4.2.4/lib 111 | ./sample_mnist 112 | ``` 113 | The bash window prints digit recognition task information, which indicats tensorrt8.4.2.4 is installed normally. 114 | -------------------------------------------------------------------------------- /cmake/common.cmake: -------------------------------------------------------------------------------- 1 | # set 2 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations") 3 | # find thirdparty 4 | find_package(CUDA REQUIRED) 5 | list(APPEND ALL_LIBS 6 | ${CUDA_LIBRARIES} 7 | ${CUDA_cublas_LIBRARY} 8 | ${CUDA_nppc_LIBRARY} ${CUDA_nppig_LIBRARY} ${CUDA_nppidei_LIBRARY} ${CUDA_nppial_LIBRARY}) 9 | 10 | # include cuda's header 11 | list(APPEND INCLUDE_DRIS ${CUDA_INCLUDE_DIRS}) 12 | # message(FATAL_ERROR "CUDA_npp_LIBRARY: ${CUDA_npp_LIBRARY}") 13 | 14 | # gather TensorRT lib 15 | #set(TensorRT_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../TensorRT) 16 | #set(TensorRT_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../../../TensorRT-8.2.1.8) 17 | #set(TensorRT_ROOT /home/feiyull/TensorRT-Plugin) 18 | set(TensorRT_ROOT /home/feiyull/TensorRT-8.4.2.4) 19 | #set(TensorRT_ROOT /home/feiyull/TensorRT-8.6.1.6) 20 | 21 | find_library(TRT_NVINFER NAMES nvinfer HINTS ${TensorRT_ROOT} PATH_SUFFIXES lib lib64 lib/x64) 22 | find_library(TRT_NVINFER_PLUGIN NAMES nvinfer_plugin HINTS ${TensorRT_ROOT} PATH_SUFFIXES lib lib64 lib/x64) 23 | find_library(TRT_NVONNX_PARSER NAMES nvonnxparser HINTS ${TensorRT_ROOT} PATH_SUFFIXES lib lib64 lib/x64) 24 | find_library(TRT_NVCAFFE_PARSER NAMES nvcaffe_parser HINTS ${TensorRT_ROOT} PATH_SUFFIXES lib lib64 lib/x64) 25 | find_path(TENSORRT_INCLUDE_DIR NAMES NvInfer.h HINTS ${TensorRT_ROOT} PATH_SUFFIXES include) 26 | list(APPEND ALL_LIBS ${TRT_NVINFER} ${TRT_NVINFER_PLUGIN} ${TRT_NVONNX_PARSER} ${TRT_NVCAFFE_PARSER}) 27 | 28 | # include tensorrt's headers 29 | list(APPEND INCLUDE_DRIS ${TENSORRT_INCLUDE_DIR}) 30 | 31 | # include tensorrt's sample/common headers 32 | #set(SAMPLES_COMMON_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../common) 33 | #set(SAMPLES_COMMON_DIR ${CMAKE_CURRENT_SOURCE_DIR}/common) 34 | set(SAMPLES_COMMON_DIR ${TensorRT_ROOT}/samples/common) 35 | list(APPEND INCLUDE_DRIS ${SAMPLES_COMMON_DIR}) 36 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS}) 37 | message(STATUS "ALL_LIBS: ${ALL_LIBS}") 38 | -------------------------------------------------------------------------------- /data/12801.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/12801.jpg -------------------------------------------------------------------------------- /data/12802.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/12802.jpg -------------------------------------------------------------------------------- /data/12803.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/12803.jpg -------------------------------------------------------------------------------- /data/12804.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/12804.jpg -------------------------------------------------------------------------------- /data/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/2.png -------------------------------------------------------------------------------- /data/51204.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/51204.jpg -------------------------------------------------------------------------------- /data/6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/6.jpg -------------------------------------------------------------------------------- /data/6086083.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/6086083.jpg -------------------------------------------------------------------------------- /data/6406401.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/6406401.jpg -------------------------------------------------------------------------------- /data/6406402.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/6406402.jpg -------------------------------------------------------------------------------- /data/6406403.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/6406403.jpg -------------------------------------------------------------------------------- /data/6406404.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/6406404.jpg -------------------------------------------------------------------------------- /data/6406406.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/6406406.jpg -------------------------------------------------------------------------------- /data/6406407.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/6406407.jpg -------------------------------------------------------------------------------- /data/7.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/7.jpg -------------------------------------------------------------------------------- /data/bus.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/bus.jpg -------------------------------------------------------------------------------- /data/dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/dog.jpg -------------------------------------------------------------------------------- /data/efficientdet/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/efficientdet/.gitkeep -------------------------------------------------------------------------------- /data/im_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/im_01.png -------------------------------------------------------------------------------- /data/image1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/image1.jpg -------------------------------------------------------------------------------- /data/image2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/image2.jpg -------------------------------------------------------------------------------- /data/image3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/image3.jpg -------------------------------------------------------------------------------- /data/libfacedetction/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/libfacedetction/.gitkeep -------------------------------------------------------------------------------- /data/long.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/long.jpg -------------------------------------------------------------------------------- /data/mobilenetv3/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/mobilenetv3/.gitkeep -------------------------------------------------------------------------------- /data/people.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/people.mp4 -------------------------------------------------------------------------------- /data/pphumanseg/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/pphumanseg/.gitkeep -------------------------------------------------------------------------------- /data/resnet18/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/resnet18/.gitkeep -------------------------------------------------------------------------------- /data/retinanet/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/retinanet/.gitkeep -------------------------------------------------------------------------------- /data/rifle2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/rifle2.jpeg -------------------------------------------------------------------------------- /data/road0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/road0.png -------------------------------------------------------------------------------- /data/road1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/road1.jpg -------------------------------------------------------------------------------- /data/sailboat3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/sailboat3.jpg -------------------------------------------------------------------------------- /data/ssd/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/ssd/.gitkeep -------------------------------------------------------------------------------- /data/swin/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/swin/.gitkeep -------------------------------------------------------------------------------- /data/u2net/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/u2net/.gitkeep -------------------------------------------------------------------------------- /data/yolor/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/yolor/.gitkeep -------------------------------------------------------------------------------- /data/yolor/coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /data/yolov3/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/yolov3/.gitkeep -------------------------------------------------------------------------------- /data/yolov4/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/yolov4/.gitkeep -------------------------------------------------------------------------------- /data/yolov5/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/yolov5/.gitkeep -------------------------------------------------------------------------------- /data/yolov6/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/yolov6/.gitkeep -------------------------------------------------------------------------------- /data/yolov7/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/yolov7/.gitkeep -------------------------------------------------------------------------------- /data/yolov8-pose/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/yolov8-pose/.gitkeep -------------------------------------------------------------------------------- /data/yolov8/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/yolov8/.gitkeep -------------------------------------------------------------------------------- /data/yolox/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/yolox/.gitkeep -------------------------------------------------------------------------------- /data/zidane.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/zidane.jpg -------------------------------------------------------------------------------- /docker/README.md: -------------------------------------------------------------------------------- 1 | ## 1. download tensorrt8.4.2.4 2 | - enter: https://developer.nvidia.cn/nvidia-tensorrt-8x-download 3 | - select: I Agree To the Terms of the NVIDIA TensorRT License Agreement 4 | - select: TensorRT 8.4 GA Update 1 5 | - select: TensorRT 8.4 GA Update 1 for Linux x86_64 and CUDA 11.0, 11.1, 11.2, 11.3, 11.4, 11.5, 11.6 and 11.7 TAR Package 6 | - download file: "TensorRT-8.4.2.4.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz" 7 | 8 | ```bash 9 | cd TensorRT-Alpha/docker 10 | cp TensorRT-8.4.2.4.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz . 11 | ``` 12 | 13 | ## 2. build docker images 14 | ```bash 15 | docker build -f ubuntu18.04-cu113.Dockerfile --network=host -t trta . 16 | ``` -------------------------------------------------------------------------------- /docker/ubuntu18.04-cu113.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:11.3.1-cudnn8-devel-ubuntu18.04 2 | RUN sed -i 's#http://archive.ubuntu.com/#http://mirrors.tuna.tsinghua.edu.cn/#' /etc/apt/sources.list && \ 3 | apt-get update 4 | 5 | RUN apt-get install -y software-properties-common && \ 6 | add-apt-repository ppa:deadsnakes/ppa && \ 7 | apt-get update && \ 8 | DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 9 | build-essential \ 10 | git \ 11 | gdb \ 12 | cmake \ 13 | python3.8 \ 14 | python3.8-dev \ 15 | python3-pip \ 16 | && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.6 1 \ 17 | && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.8 2 \ 18 | && update-alternatives --config python3 19 | 20 | #copy and unzip tensorrt8.4.2.4 21 | RUN mkdir -p /home/feiyull/ 22 | COPY TensorRT-8.4.2.4.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz /home/feiyull/ 23 | RUN cd /home/feiyull/ && \ 24 | tar -zxvf TensorRT-8.4.2.4.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz && \ 25 | rm TensorRT-8.4.2.4.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz && \ 26 | mkdir workspace 27 | 28 | RUN \ 29 | DEBIAN_FRONTEND=noninteractive apt-get install libgl1-mesa-glx -y \ 30 | pkg-config \ 31 | libgtk-3-dev \ 32 | libavcodec-dev \ 33 | libavformat-dev \ 34 | libswscale-dev \ 35 | libv4l-dev \ 36 | libxvidcore-dev \ 37 | libx264-dev \ 38 | libopencv-dev \ 39 | && apt-get clean 40 | 41 | # RUN pip3 install --upgrade pip -i https://pypi.tuna.tsinghua.edu.cn/simple 42 | # RUN pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple 43 | # RUN pip install opencv-python-headless==4.8.0.74 && \ 44 | # pip install opencv-python==4.8.0.74 \ 45 | # pip install onnx==1.9.0 \ 46 | # pip install torch==1.9.0 \ 47 | # pip install torchvision==0.10.0 \ 48 | # pip install onnx-simplifier==0.4.8 49 | 50 | #RUN cd /root/.cache/pip && \ 51 | # rm -r * -------------------------------------------------------------------------------- /efficientdet/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | 3 | set(CMAKE_BUILD_TYPE "Debug") 4 | #set(CMAKE_BUILD_TYPE "Release") 5 | 6 | PROJECT(efficientdet VERSION 1.0.0 LANGUAGES C CXX CUDA) 7 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR}) 8 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake) 9 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT}) 10 | 11 | message(STATUS ${ALL_LIBS}) 12 | file(GLOB CPPS 13 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp 14 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cu 15 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu 16 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp 17 | ${TensorRT_ROOT}/samples/common/logger.cpp 18 | ${TensorRT_ROOT}/samples/common/sampleOptions.cpp 19 | ) 20 | list(REMOVE_ITEM CPPS app_efficientdet.cpp) 21 | 22 | message(STATUS CPPS = ${CPPS}) 23 | list (LENGTH CPPS length) 24 | message(STATUS ***length*** = ${length}) 25 | find_package(OpenCV REQUIRED) 26 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}) 27 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS}) 28 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS}) 29 | 30 | add_library(${PROJECT_NAME} SHARED ${CPPS}) 31 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES}) 32 | 33 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75) 34 | target_compile_options(${PROJECT_NAME} PUBLIC 35 | $<$:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>) 36 | 37 | add_executable(app_efficientdet app_efficientdet.cpp) 38 | # NVCC 39 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a) 40 | target_link_libraries(app_efficientdet ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} ) 41 | -------------------------------------------------------------------------------- /efficientdet/README.md: -------------------------------------------------------------------------------- 1 | ## 1. get onnx 2 | download directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv) 3 | 4 | or export onnx: 5 | ```bash 6 | # Please refer to following site, it is tensorrt's offical doc, and it lead you to export onnx from efficientdet's offical weights. 7 | # TensorRT-Alpha converts python to cuda c. 8 | https://github.com/NVIDIA/TensorRT/blob/release/8.4/samples/python/efficientdet/README.md 9 | ``` 10 | ## 2.edit and save onnx 11 | ```bash 12 | # note: If you have obtained onnx by downloading, this step can be ignored 13 | ignored 14 | ``` 15 | ## 3.compile onnx 16 | ```bash 17 | # put your onnx file in this path:tensorrt-alpha/data/efficientdet 18 | cd tensorrt-alpha/data/efficientdet 19 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib 20 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=efficientdet0.onnx --saveEngine=efficientdet0.trt --buildOnly --minShapes=input:1x512x512x3 --optShapes=input:2x512x512x3 --maxShapes=input:4x512x512x3 21 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=efficientdet1.onnx --saveEngine=efficientdet1.trt --buildOnly --minShapes=input:1x640x640x3 --optShapes=input:2x640x640x3 --maxShapes=input:4x640x640x3 22 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=efficientdet2.onnx --saveEngine=efficientdet2.trt --buildOnly --minShapes=input:1x768x768x3 --optShapes=input:2x768x768x3 --maxShapes=input:4x768x768x3 23 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=efficientdet3.onnx --saveEngine=efficientdet3.trt --buildOnly --minShapes=input:1x896x896x3 --optShapes=input:2x896x896x3 --maxShapes=input:4x896x896x3 24 | 25 | ``` 26 | ## 4.run 27 | ```bash 28 | git clone https://github.com/FeiYull/tensorrt-alpha 29 | cd tensorrt-alpha/efficientdet 30 | mkdir build 31 | cd build 32 | cmake .. 33 | make -j10 34 | # note: the dstImage will be saved in tensorrt-alpha/efficientdet/build by default 35 | 36 | # infer image 37 | ./app_efficientdet --model=../../data/efficientdet/efficientdet0.trt --img=../../data/road0.png --size=512 --batch_size=1 --show --savePath 38 | ./app_efficientdet --model=../../data/efficientdet/efficientdet1.trt --img=../../data/road0.png --size=640 --batch_size=1 --show --savePath 39 | ./app_efficientdet --model=../../data/efficientdet/efficientdet2.trt --img=../../data/road0.png --size=768 --batch_size=1 --show --savePath 40 | ./app_efficientdet --model=../../data/efficientdet/efficientdet3.trt --img=../../data/road0.png --size=896 --batch_size=1 --show --savePath 41 | 42 | 43 | # infer video 44 | ./app_efficientdet --model=../../data/efficientdet/efficientdet0.trt --size=512 --batch_size=2 --video=../../data/people.mp4 --show 45 | 46 | # infer camera 47 | ./app_efficientdet --model=../../data/efficientdet/efficientdet0.trt --size=512 --batch_size=2 --cam_id=0 --show 48 | ``` 49 | ## 5. appendix 50 | ignore -------------------------------------------------------------------------------- /efficientdet/efficientdet.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include"../utils/common_include.h" 3 | #include"../utils/utils.h" 4 | #include"../utils/kernel_function.h" 5 | 6 | class EfficientDet 7 | { 8 | public: 9 | EfficientDet(const utils::InitParameter& param); 10 | ~EfficientDet(); 11 | 12 | public: 13 | bool init(const std::vector& trtFile); 14 | void check(); 15 | void copy(const std::vector& imgsBatch); 16 | void preprocess(const std::vector& imgsBatch); 17 | bool infer(); 18 | void postprocess(const std::vector& imgsBatch); 19 | void reset(); 20 | 21 | public: 22 | std::vector> getObjectss() const; 23 | 24 | protected: 25 | std::shared_ptr m_engine; 26 | std::unique_ptr m_context; 27 | 28 | protected: 29 | utils::InitParameter m_param; 30 | std::vector> m_objectss; 31 | utils::AffineMat m_dst2src; 32 | // input 33 | float* m_input_src_device; 34 | float* m_input_resize_device; 35 | float* m_input_rgb_device; 36 | // output 37 | int* m_output_num_device; 38 | int* m_output_boxes_device; 39 | int* m_output_scores_device; 40 | int* m_output_classes_device; 41 | int* m_output_num_host; 42 | int* m_output_boxes_host; 43 | int* m_output_scores_host; 44 | int* m_output_classes_host; 45 | }; -------------------------------------------------------------------------------- /examples/python_with_dll/c_files/pch.cpp: -------------------------------------------------------------------------------- 1 | // pch.cpp: 与预编译标头对应的源文件 2 | #include"./utils/yolo.h" 3 | #include "pch.h" 4 | #include"./yolov8/yolov8.h" 5 | // 当使用预编译的头时,需要使用此源文件,编译才能成功。 6 | 7 | void getAimsInfo(const std::vector>& objectss, float(*res_array)[6]) 8 | { 9 | for (const auto& objects : objectss) 10 | { 11 | for (const auto& box : objects) 12 | { 13 | res_array[0][0] = box.left; 14 | res_array[0][1] = box.top; 15 | res_array[0][2] = box.right; 16 | res_array[0][3] = box.bottom; 17 | res_array[0][4] = box.label; 18 | res_array[0][5] = box.confidence; 19 | 20 | ++res_array; 21 | } 22 | } 23 | } 24 | 25 | 26 | // c++ code 27 | 28 | void* Init( 29 | const char* trt_file_path, 30 | int src_w, 31 | int src_h, 32 | float conf_thresh, 33 | float iou_thresh, 34 | int num_class 35 | ) 36 | 37 | { 38 | // parameters 39 | utils::InitParameter param; 40 | 41 | param.input_output_names = { "images", "output0" }; 42 | param.batch_size = 1; 43 | param.src_h = src_h; 44 | param.src_w = src_w; 45 | param.dst_h = 640; 46 | param.dst_w = 640; 47 | param.iou_thresh = iou_thresh; 48 | param.conf_thresh = conf_thresh; 49 | param.num_class = num_class; 50 | 51 | YOLOV8* yolov8 = new YOLOV8(param); 52 | 53 | std::vector trt_file = utils::loadModel(trt_file_path); 54 | if (trt_file.empty()) 55 | { 56 | sample::gLogError << "trt_file is empty!" << std::endl; 57 | return nullptr; 58 | } 59 | 60 | if (!yolov8->init(trt_file)) 61 | { 62 | sample::gLogError << "initEngine() ocur errors!" << std::endl; 63 | return nullptr; 64 | } 65 | yolov8->check(); 66 | return yolov8; 67 | } 68 | 69 | 70 | // 2. img inference 71 | void Detect(void* yolo, int rows, int cols, unsigned char* src_data, float(*res_array)[6]) 72 | 73 | { 74 | YOLOV8* yolov8 = (YOLOV8*)yolo; 75 | 76 | cv::Mat frame = cv::Mat(rows, cols, CV_8UC3, src_data); 77 | 78 | std::vector imgs_batch(1, frame.clone()); 79 | 80 | yolov8->reset(); 81 | 82 | yolov8->copy(imgs_batch); 83 | 84 | utils::DeviceTimer d_t1; yolov8->preprocess(imgs_batch); float t1 = d_t1.getUsedTime(); 85 | utils::DeviceTimer d_t2; yolov8->infer(); float t2 = d_t2.getUsedTime(); 86 | utils::DeviceTimer d_t3; yolov8->postprocess(imgs_batch); float t3 = d_t3.getUsedTime(); 87 | 88 | sample::gLogInfo << 89 | "preprocess time = " << t1 << "; " 90 | "infer time = " << t2 << "; " 91 | "postprocess time = " << t3 << std::endl; 92 | 93 | getAimsInfo(yolov8->getObjectss(), res_array); 94 | } 95 | -------------------------------------------------------------------------------- /examples/python_with_dll/c_files/pch.h: -------------------------------------------------------------------------------- 1 | // pch.h: 这是预编译标头文件。 2 | // 下方列出的文件仅编译一次,提高了将来生成的生成性能。 3 | // 这还将影响 IntelliSense 性能,包括代码完成和许多代码浏览功能。 4 | // 但是,如果此处列出的文件中的任何一个在生成之间有更新,它们全部都将被重新编译。 5 | // 请勿在此处添加要频繁更新的文件,这将使得性能优势无效。 6 | 7 | #ifndef PCH_H 8 | #define PCH_H 9 | 10 | // 添加要在此处预编译的标头 11 | #include "framework.h" 12 | #endif //PCH_H 13 | 14 | //定义宏 15 | #ifdef IMPORT_DLL 16 | #else 17 | #define IMPORT_DLL extern "C" _declspec(dllimport) 18 | #endif 19 | 20 | 21 | IMPORT_DLL void* Init( 22 | const char* trt_file_path, 23 | int src_w, 24 | int src_h, 25 | float conf_thresh, 26 | float iou_thresh, 27 | int num_class 28 | ); 29 | IMPORT_DLL void Detect(void* yolo, int rows, int cols, unsigned char* src_data, float(*res_array)[6]); -------------------------------------------------------------------------------- /examples/python_with_dll/config/screen_inf.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import mss 4 | import win32api 5 | 6 | cap = mss.mss() 7 | def grab_screen_mss(monitor): 8 | return cv2.cvtColor(np.array(cap.grab(monitor)), cv2.COLOR_BGRA2BGR) 9 | 10 | def get_parameters(): 11 | x, y = get_screen_size().values() 12 | return 0, 0, x, y 13 | 14 | def get_screen_size(): 15 | wide = win32api.GetSystemMetrics(0) 16 | high = win32api.GetSystemMetrics(1) 17 | return {"wide": wide, "high": high} -------------------------------------------------------------------------------- /examples/python_with_dll/python_trt.py: -------------------------------------------------------------------------------- 1 | from ctypes import * 2 | from threading import Thread 3 | import cv2 4 | import numpy as np 5 | import numpy.ctypeslib as npct 6 | from pygame.time import Clock 7 | from config.screen_inf import get_parameters, grab_screen_mss 8 | 9 | 10 | class Detector: 11 | def __init__( 12 | self, dll_path, trt_path, window_width=640, window_height=640, conf_thresh=0.25, iou_thresh=0.45, 13 | num_class=80): 14 | self.yolo = CDLL(dll_path) 15 | self.max_bbox = 50 16 | 17 | self.yolo.Detect.argtypes = [c_void_p, c_int, c_int, POINTER(c_ubyte), 18 | npct.ndpointer(dtype=np.float32, ndim=2, shape=(self.max_bbox, 6), 19 | flags="C_CONTIGUOUS")] 20 | 21 | self.yolo.Init.argtypes = [c_char_p, c_int, c_int, c_float, c_float, c_int] 22 | self.yolo.Init.restype = c_void_p 23 | 24 | self.c_point = self.yolo.Init(trt_path.encode('utf-8'), window_width, window_height, conf_thresh, iou_thresh, 25 | num_class) 26 | 27 | def predict(self, img): 28 | rows, cols = img.shape[0], img.shape[1] 29 | res_arr = np.zeros((self.max_bbox, 6), dtype=np.float32) 30 | self.yolo.Detect(self.c_point, c_int(rows), c_int(cols), img.ctypes.data_as(POINTER(c_ubyte)), res_arr) 31 | self.bbox_array = res_arr[~(res_arr == 0).all(1)] 32 | return self.bbox_array 33 | 34 | 35 | class_names = [ 36 | "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", 37 | "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", 38 | "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", 39 | "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", 40 | "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", 41 | "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", 42 | "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", 43 | "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", 44 | "hair drier", "toothbrush" 45 | ] 46 | 47 | # 对屏幕指定的区域录屏,并推理 48 | if __name__ == '__main__': 49 | 50 | def img_grab_thread(): 51 | 52 | global frame 53 | global monitor 54 | clock = Clock() 55 | 56 | while True: 57 | frame = grab_screen_mss(monitor) 58 | clock.tick(200) 59 | 60 | 61 | def img_pred_thread(): 62 | 63 | global frame 64 | global source_w 65 | global source_h 66 | det = Detector(dll_path="./python_dll.dll", trt_path="./yolov8n.trt", window_width=source_w, 67 | window_height=source_h) 68 | clock = Clock() 69 | 70 | windows_title = "cvwindow" 71 | cv2.namedWindow(windows_title, cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux) 72 | 73 | max_w = 576 74 | max_h = 324 75 | if source_h > max_h or source_w > max_w: 76 | cv2.resizeWindow(windows_title, max_w, source_h * max_w // source_w) 77 | 78 | while True: 79 | aims = det.predict(frame) 80 | for aim in aims: 81 | cv2.rectangle(frame, (int(aim[0]), int(aim[1])), (int(aim[2]), int(aim[3])), (0, 255, 0), 2) 82 | det_info = class_names[int(aim[4])] + " " + str(aim[5]) 83 | cv2.putText(frame, det_info, (int(aim[0]), int(aim[1])), cv2.FONT_HERSHEY_DUPLEX, 0.6, (255, 0, 255), 1, 84 | cv2.LINE_AA) 85 | 86 | cv2.putText(frame, "FPS:{:.1f}".format(clock.get_fps()), (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 87 | 2, (0, 0, 235), 4) 88 | cv2.imshow('cvwindow', frame) 89 | cv2.waitKey(1) 90 | 91 | clock.tick(200) 92 | 93 | 94 | # 4:3 800x600 center region detect 95 | source_w = int(800) 96 | source_h = int(600) 97 | 98 | _, _, x, y = get_parameters() 99 | top_x = (x // 2) - (source_w // 2) 100 | top_y = (y // 2) - (source_h // 2) 101 | 102 | monitor = {'left': top_x, 'top': top_y, 'width': source_w, 'height': source_h} 103 | 104 | frame = None 105 | 106 | # To demonstrate the inference speed more intuitively, 107 | # two threads are used here: 108 | # img_grab_thread for image fetching 109 | # img_pred_thread for inference 110 | # Lock is not used here, so the display effect may be poor if the image fetching speed is too high 111 | Thread(target=img_grab_thread).start() 112 | Thread(target=img_pred_thread).start() 113 | 114 | # VideoCapture predict demo 115 | if __name__ == '__main__OFF': 116 | cap = cv2.VideoCapture('./people.mp4') 117 | 118 | source_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) 119 | source_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 120 | 121 | det = Detector(dll_path="./yoloDemo.dll", trt_path="./yolov8n.trt", window_width=source_w, window_height=source_h) 122 | 123 | clock = Clock() 124 | while True: 125 | ret, frame = cap.read() 126 | if not ret: 127 | break 128 | 129 | aims = det.predict(frame) 130 | 131 | # do something here 132 | for aim in aims: 133 | cv2.rectangle(frame, (int(aim[0]), int(aim[1])), (int(aim[2]), int(aim[3])), (0, 255, 0), 2) 134 | det_info = class_names[int(aim[4])] + " " + str(aim[5]) 135 | cv2.putText(frame, det_info, (int(aim[0]), int(aim[1])), cv2.FONT_HERSHEY_DUPLEX, 0.6, (255, 0, 255), 1, 136 | cv2.LINE_AA) 137 | 138 | cv2.imshow('cvwindow', frame) 139 | cv2.waitKey(1) 140 | 141 | print('pred fps: ', clock.get_fps()) 142 | clock.tick(5) 143 | 144 | cap.release() 145 | cv2.destroyAllWindows() 146 | -------------------------------------------------------------------------------- /libfacedetection/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | 3 | #set(CMAKE_BUILD_TYPE "Debug") 4 | set(CMAKE_BUILD_TYPE "Release") 5 | 6 | # cuda 7 | PROJECT(facedet VERSION 1.0.0 LANGUAGES C CXX CUDA) 8 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR}) 9 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake) 10 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT}) 11 | 12 | message(STATUS ${ALL_LIBS}) 13 | file(GLOB CPPS 14 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp 15 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cu 16 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu 17 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp 18 | ${TensorRT_ROOT}/samples/common/logger.cpp 19 | ${TensorRT_ROOT}/samples/common/sampleOptions.cpp 20 | ) 21 | list(REMOVE_ITEM CPPS app_libfacedetction.cpp) 22 | 23 | message(STATUS CPPS = ${CPPS}) 24 | list (LENGTH CPPS length) 25 | message(STATUS ***length*** = ${length}) 26 | find_package(OpenCV REQUIRED) 27 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}) 28 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS}) 29 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS}) 30 | add_library(${PROJECT_NAME} SHARED ${CPPS}) 31 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES}) 32 | 33 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75) 34 | target_compile_options(${PROJECT_NAME} PUBLIC 35 | $<$:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>) 36 | 37 | add_executable(app_libfacedetction app_libfacedetction.cpp) 38 | 39 | # NVCC 40 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a) 41 | target_link_libraries(app_libfacedetction ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} ) 42 | -------------------------------------------------------------------------------- /libfacedetection/README.md: -------------------------------------------------------------------------------- 1 | ## 1. get onnx 2 | download directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv) 3 | 4 | or export onnx: 5 | ```bash 6 | git clone https://github.com/ShiqiYu/libfacedetection.train 7 | git checkout a3bc97c7e85bb206c9feca97fbd541ce82cfa3a9 8 | 9 | # note:The official repository gives the following three models: 10 | yunet_yunet_final_320_320_simplify.onnx 11 | yunet_yunet_final_640_640_simplify.onnx 12 | yunet_yunet_final_dynamic_simplify.onnx 13 | choose the third model here. 14 | ``` 15 | ## 2.edit and save onnx 16 | ```bash 17 | # note: If you have obtained onnx by downloading, this step can be ignored 18 | conda activate tensorrt-alpha 19 | # put your onnx file in this path:tensorrt-alpha/data/libfacedetection 20 | cd tensorrt-alpha/data/libfacedetction 21 | python alpha_edit.py --onnx=yunet_yunet_final_dynamic_simplify.onnx 22 | ``` 23 | ## 3.compile onnx 24 | ```bash 25 | # put your onnx file in this path:tensorrt-alpha/data/libfacedetection 26 | cd tensorrt-alpha/data/libfacedetection 27 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib 28 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=alpha_yunet_yunet_final_dynamic_simplify.onnx --saveEngine=alpha_yunet_yunet_final_dynamic_simplify.trt --buildOnly --minShapes=input:1x3x120x120 --optShapes=input:4x3x320x320 --maxShapes=input:8x3x2000x2000 29 | ``` 30 | ## 4.run 31 | ```bash 32 | git clone https://github.com/FeiYull/tensorrt-alpha 33 | cd tensorrt-alpha/libfacedetction 34 | mkdir build 35 | cd build 36 | cmake .. 37 | make -j10 38 | # note: the dstImage will be saved in tensorrt-alpha/libfacedetction/build by default 39 | 40 | # dynamic [b w h] 41 | # infer image 42 | ./app_libfacedetction --model=../../data/libfacedetction/alpha_yunet_yunet_final_dynamic_simplify.trt --batch_size=1 --img=../../data/6406401.jpg --show --savePath 43 | 44 | # infer video 45 | ./app_libfacedetction --model=../../data/libfacedetction/alpha_yunet_yunet_final_dynamic_simplify.trt --batch_size=8 --video=../../data/people.mp4 --show 46 | 47 | # infer camera 48 | ./app_libfacedetction --model=../../data/libfacedetction/alpha_yunet_yunet_final_dynamic_simplify.trt --batch_size=2 --cam_id=0 --show 49 | ``` 50 | ## 5. appendix 51 | ignore -------------------------------------------------------------------------------- /libfacedetection/alpha_edit.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pyexpat import model 3 | from turtle import width 4 | import onnx 5 | import onnx.checker 6 | import onnx.utils 7 | from onnx.tools import update_model_dims 8 | import onnx.helper as helper 9 | import torch 10 | 11 | if __name__ == '__main__': 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument('--onnx', type=str, default='yunet_yunet_final_dynamic_simplify.onnx', help='onnx path') 14 | opt = parser.parse_args() 15 | 16 | model = onnx.load(opt.onnx) 17 | in_b = model.graph.input[0].type.tensor_type.shape.dim[0] 18 | in_c = model.graph.input[0].type.tensor_type.shape.dim[1] 19 | in_h = model.graph.input[0].type.tensor_type.shape.dim[2] 20 | in_w = model.graph.input[0].type.tensor_type.shape.dim[3] 21 | # loc 22 | out_loc_b = model.graph.output[0].type.tensor_type.shape.dim[0] 23 | out_loc_num_candidates = model.graph.output[0].type.tensor_type.shape.dim[1] 24 | out_loc_dim2 = model.graph.output[0].type.tensor_type.shape.dim[2] 25 | # conf 26 | out_conf_b = model.graph.output[1].type.tensor_type.shape.dim[0] 27 | out_conf_num_candidates = model.graph.output[1].type.tensor_type.shape.dim[1] 28 | out_conf_dim2 = model.graph.output[1].type.tensor_type.shape.dim[2] 29 | # iou 30 | out_iou_b = model.graph.output[2].type.tensor_type.shape.dim[0] 31 | out_iou_num_candidates = model.graph.output[2].type.tensor_type.shape.dim[1] 32 | out_iou_dim2 = model.graph.output[2].type.tensor_type.shape.dim[2] 33 | in_b.dim_param= "batch_size" 34 | in_h.dim_param= "height" 35 | in_w.dim_param= "width" 36 | out_loc_b.dim_param = "batch_size" 37 | out_conf_b.dim_param= "batch_size" 38 | out_iou_b.dim_param = "batch_size" 39 | out_loc_num_candidates.dim_param = "num_condidates" 40 | out_conf_num_candidates.dim_param = "num_condidates" 41 | out_iou_num_candidates.dim_param = "num_condidates" 42 | 43 | onnx.save(model, 'alpha_yunet_yunet_final_dynamic_simplify.onnx') 44 | print("ok") 45 | 46 | -------------------------------------------------------------------------------- /libfacedetection/app_libfacedetction.cpp: -------------------------------------------------------------------------------- 1 | #include"../utils/common_include.h" 2 | #include"../utils/utils.h" 3 | #include"libfacedetection.h" 4 | 5 | void setParameters(utils::InitParameter& initParameters) 6 | { 7 | initParameters.class_names = utils::dataSets::face2; 8 | 9 | initParameters.num_class = 2; 10 | initParameters.batch_size = 8; 11 | // dynamic: HWC 12 | /*initParameters.dst_h = 640; 13 | initParameters.dst_w = 640;*/ 14 | 15 | initParameters.topK = 1000; 16 | 17 | initParameters.input_output_names = { "input", "loc", "conf", "iou"}; 18 | initParameters.conf_thresh = 0.3f; 19 | initParameters.iou_thresh = 0.45f; 20 | initParameters.save_path = ""; 21 | } 22 | 23 | void task(LibFaceDet& face_det, const utils::InitParameter& param, std::vector& imgsBatch, const int& delayTime, const int& batchi, 24 | const bool& isShow, const bool& isSave) 25 | { 26 | face_det.copy(imgsBatch); 27 | utils::DeviceTimer d_t1; face_det.preprocess(imgsBatch); float t1 = d_t1.getUsedTime(); 28 | utils::DeviceTimer d_t2; face_det.infer(); float t2 = d_t2.getUsedTime(); 29 | utils::DeviceTimer d_t3; face_det.postprocess(imgsBatch); float t3 = d_t3.getUsedTime(); 30 | sample::gLogInfo << "preprocess time = " << t1 / param.batch_size << "; " 31 | "infer time = " << t2 / param.batch_size << "; " 32 | "postprocess time = " << t3 / param.batch_size << std::endl; 33 | if(isShow) 34 | utils::show(face_det.getObjectss(), param.class_names, delayTime, imgsBatch); 35 | if(isSave) 36 | utils::save(face_det.getObjectss(), param.class_names, param.save_path, imgsBatch, param.batch_size, batchi); 37 | face_det.reset(); 38 | } 39 | 40 | int main(int argc, char** argv) 41 | { 42 | cv::CommandLineParser parser(argc, argv, 43 | { 44 | "{model || tensorrt model file }" 45 | "{batch_size|| batch size }" 46 | "{video || video's path }" 47 | "{img || image's path }" 48 | "{cam_id || camera's device id }" 49 | "{show || if show the result }" 50 | "{savePath || save path, can be ignore}" 51 | }); 52 | // parameters 53 | utils::InitParameter param; 54 | setParameters(param); 55 | // path 56 | std::string model_path = "../../data/libfacedetction/alpha_yunet_yunet_final_dynamic_simplify.trt"; 57 | std::string video_path = "../../data/people.mp4"; 58 | std::string image_path = "../../data/6406403.jpg"; 59 | // camera' id 60 | int camera_id = 0; 61 | 62 | // get input 63 | utils::InputStream source; 64 | source = utils::InputStream::IMAGE; 65 | //source = utils::InputStream::VIDEO; 66 | //source = utils::InputStream::CAMERA; 67 | 68 | // update params from command line parser 69 | //int size = -1; // w or h 70 | int batch_size = 8; 71 | bool is_show = false; 72 | bool is_save = false; 73 | if(parser.has("model")) 74 | { 75 | model_path = parser.get("model"); 76 | sample::gLogInfo << "model_path = " << model_path << std::endl; 77 | } 78 | 79 | if(parser.has("batch_size")) 80 | { 81 | batch_size = parser.get("batch_size"); 82 | sample::gLogInfo << "batch_size = " << batch_size << std::endl; 83 | param.batch_size = batch_size; 84 | } 85 | if(parser.has("video")) 86 | { 87 | source = utils::InputStream::VIDEO; 88 | video_path = parser.get("video"); 89 | sample::gLogInfo << "video_path = " << video_path << std::endl; 90 | } 91 | if(parser.has("img")) 92 | { 93 | source = utils::InputStream::IMAGE; 94 | image_path = parser.get("img"); 95 | sample::gLogInfo << "image_path = " << image_path << std::endl; 96 | } 97 | if(parser.has("cam_id")) 98 | { 99 | source = utils::InputStream::CAMERA; 100 | camera_id = parser.get("cam_id"); 101 | sample::gLogInfo << "camera_id = " << camera_id << std::endl; 102 | } 103 | if(parser.has("show")) 104 | { 105 | is_show = true; 106 | sample::gLogInfo << "is_show = " << is_show << std::endl; 107 | } 108 | if(parser.has("savePath")) 109 | { 110 | is_save = true; 111 | param.save_path = parser.get("savePath"); 112 | sample::gLogInfo << "save_path = " << param.save_path << std::endl; 113 | } 114 | 115 | int total_batches = 0; 116 | int delay_time = 1; 117 | cv::VideoCapture capture; 118 | if (!setInputStream(source, image_path, video_path, camera_id, 119 | capture, total_batches, delay_time, param)) 120 | { 121 | sample::gLogError << "read the input data errors!" << std::endl; 122 | return -1; 123 | } 124 | 125 | LibFaceDet face_det(param); 126 | 127 | // read model 128 | std::vector trt_file = utils::loadModel(model_path); 129 | if (trt_file.empty()) 130 | { 131 | sample::gLogError << "trt_file is empty!" << std::endl; 132 | return -1; 133 | } 134 | // init model 135 | if (!face_det.init(trt_file)) 136 | { 137 | sample::gLogError << "initEngine() ocur errors!" << std::endl; 138 | return -1; 139 | } 140 | face_det.check(); 141 | cv::Mat frame; 142 | std::vector imgs_batch; 143 | imgs_batch.reserve(param.batch_size); 144 | sample::gLogInfo << imgs_batch.capacity() << std::endl; 145 | int batchi = 0; 146 | while (capture.isOpened()) 147 | { 148 | if (batchi >= total_batches && source != utils::InputStream::CAMERA) 149 | { 150 | break; 151 | } 152 | if (imgs_batch.size() < param.batch_size) 153 | { 154 | if (source != utils::InputStream::IMAGE) 155 | { 156 | capture.read(frame); 157 | } 158 | else 159 | { 160 | frame = cv::imread(image_path); 161 | } 162 | 163 | if (frame.empty()) 164 | { 165 | sample::gLogWarning << "no more video or camera frame" << std::endl; 166 | task(face_det, param, imgs_batch, delay_time, batchi, is_show, is_save); 167 | imgs_batch.clear(); 168 | batchi++; 169 | break; 170 | } 171 | else 172 | { 173 | imgs_batch.emplace_back(frame.clone()); 174 | } 175 | 176 | } 177 | else 178 | { 179 | task(face_det, param, imgs_batch, delay_time, batchi, is_show, is_save); 180 | imgs_batch.clear(); 181 | batchi++; 182 | } 183 | } 184 | return -1; 185 | } -------------------------------------------------------------------------------- /libfacedetection/libfacedetection.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include"../utils/common_include.h" 3 | #include"../utils/utils.h" 4 | #include"../utils/kernel_function.h" 5 | 6 | class LibFaceDet 7 | { 8 | public: 9 | LibFaceDet(const utils::InitParameter& param); 10 | ~LibFaceDet(); 11 | 12 | public: 13 | bool init(const std::vector& trtFile); 14 | void check(); 15 | void copy(const std::vector& imgsBatch); 16 | void preprocess(const std::vector& imgsBatch); 17 | bool infer(); 18 | void postprocess(const std::vector& imgsBatch); 19 | void reset(); 20 | 21 | public: 22 | std::vector> getObjectss() const; 23 | 24 | private: 25 | std::shared_ptr m_engine; 26 | std::unique_ptr m_context; 27 | 28 | protected: 29 | utils::InitParameter m_param; 30 | nvinfer1::Dims m_output_loc_dims; 31 | nvinfer1::Dims m_output_conf_dims; 32 | nvinfer1::Dims m_output_iou_dims; 33 | int m_total_objects; 34 | 35 | // const params on host 36 | const float m_min_sizes_host[4 * 3] = 37 | { 10, 16, 24, 32, 48, FLT_MAX, 64, 96, FLT_MAX, 128, 192, 256 }; 38 | const int m_min_sizes_host_dim[4] = 39 | { 3, 2, 2, 3 }; 40 | float* m_feat_hw_host; 41 | float* m_prior_boxes_host; 42 | const float m_variances_host[2] = { 0.1f, 0.2f }; 43 | // const params on device 44 | float* m_min_sizes_device; 45 | float* m_feat_hw_host_device; 46 | float* m_prior_boxes_device; 47 | float* m_variances_device; 48 | std::vector> m_objectss; 49 | // input 50 | float* m_input_src_device; 51 | float* m_input_hwc_device; 52 | // output 53 | float* m_output_loc_device; 54 | float* m_output_conf_device; 55 | float* m_output_iou_device; 56 | float* m_output_objects_device; 57 | float* m_output_objects_host; 58 | int m_output_objects_width; 59 | 60 | }; 61 | 62 | void decodeLibFaceDetDevice(float* minSizes, float* feat_hw, float* priorBoxes, float* variances, 63 | int srcImgWidth, int srcImgHeight, 64 | float confThreshold, int batchSize, int srcHeight, 65 | float* srcLoc, int srcLocWidth, 66 | float* srcConf, int srcConfWidth, 67 | float* srcIou, int srcIouWidth, 68 | float* dst, int dstWidth, int dstHeight); -------------------------------------------------------------------------------- /pphumanseg/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | 3 | #set(CMAKE_BUILD_TYPE "Debug") 4 | set(CMAKE_BUILD_TYPE "Release") 5 | 6 | PROJECT(pphunmanseg VERSION 1.0.0 LANGUAGES C CXX CUDA) 7 | 8 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR}) 9 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake) 10 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT}) 11 | 12 | message(STATUS ${ALL_LIBS}) 13 | file(GLOB CPPS 14 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp 15 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cu 16 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu 17 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp 18 | ${TensorRT_ROOT}/samples/common/logger.cpp 19 | ${TensorRT_ROOT}/samples/common/sampleOptions.cpp 20 | ) 21 | list(REMOVE_ITEM CPPS app_pphunmanseg.cpp) 22 | 23 | message(STATUS CPPS = ${CPPS}) 24 | list (LENGTH CPPS length) 25 | message(STATUS ***length*** = ${length}) 26 | find_package(OpenCV REQUIRED) 27 | 28 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}) 29 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS}) 30 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS}) 31 | 32 | add_library(${PROJECT_NAME} SHARED ${CPPS}) 33 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES}) 34 | 35 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75) 36 | target_compile_options(${PROJECT_NAME} PUBLIC 37 | $<$:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>) 38 | 39 | add_executable(app_pphunmanseg app_pphunmanseg.cpp) 40 | 41 | # NVCC 42 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a) 43 | target_link_libraries(app_pphunmanseg ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} ) 44 | -------------------------------------------------------------------------------- /pphumanseg/README.md: -------------------------------------------------------------------------------- 1 | ## 1. get onnx 2 | download directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv) 3 | 4 | or export onnx: 5 | ```bash 6 | # Install git-lfs from https://git-lfs.github.com/ 7 | git clone https://github.com/opencv/opencv_zoo && cd opencv_zoo 8 | git checkout ae1d754a3ea14e4244fbea7d781cca2e18584035 9 | git lfs install 10 | git lfs pull 11 | # note:The official onnx is in this path:opencv_zoo/models/human_segmentation_pphumanseg. 12 | ``` 13 | ## 2.edit and save onnx 14 | ```bash 15 | # note: If you have obtained onnx by downloading, this step can be ignored 16 | conda activate tensorrt-alpha 17 | # put your onnx file in this path:tensorrt-alpha/data/pphumanseg 18 | cd tensorrt-alpha/data/pphumanseg 19 | python alpha_edit.py --onnx=../data/pphumanseg/human_segmentation_pphumanseg_2021oct.onnx 20 | ``` 21 | ## 3.compile onnx 22 | ```bash 23 | # put your onnx file in this path:tensorrt-alpha/data/pphumanseg 24 | cd tensorrt-alpha/data/pphumanseg 25 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib 26 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=human_segmentation_pphumanseg_2021oct_dynamic.onnx --saveEngine=human_segmentation_pphumanseg_2021oct_dynamic.trt --buildOnly --minShapes=x:1x3x192x192 --optShapes=x:2x3x192x192 --maxShapes=x:4x3x192x192 27 | ``` 28 | ## 4.run 29 | ```bash 30 | git clone https://github.com/FeiYull/tensorrt-alpha 31 | cd tensorrt-alpha/pphumanseg 32 | mkdir build 33 | cd build 34 | cmake .. 35 | make -j10 36 | # note: the dstImage will be saved in tensorrt-alpha/pphumanseg/build by default 37 | 38 | # infer image 39 | ./app_pphunmanseg --model=../../data/pphumanseg/human_segmentation_pphumanseg_2021oct_dynamic.trt --img=../../data/6.jpg --size=192 --batch_size=1 --show -savePath 40 | 41 | # infer video 42 | ./app_pphunmanseg --model=../../data/pphumanseg/human_segmentation_pphumanseg_2021oct_dynamic.trt --batch_size=2 --video=../../data/people.mp4 --show 43 | 44 | # infer camera 45 | ./app_pphunmanseg --model=../../data/pphumanseg/human_segmentation_pphumanseg_2021oct_dynamic.trt --batch_size=2 --cam_id=0 --show 46 | ``` 47 | ## 5. appendix 48 | ignore -------------------------------------------------------------------------------- /pphumanseg/alpha_edit.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import onnx 3 | import onnx.checker 4 | import onnx.utils 5 | from onnx.tools import update_model_dims 6 | import onnx.helper as helper 7 | 8 | if __name__ == '__main__': 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument('--onnx', type=str, default='../data/pphumanseg/human_segmentation_pphumanseg_2021oct.onnx', help='onnx path') 11 | opt = parser.parse_args() 12 | 13 | model = onnx.load(opt.onnx) 14 | 15 | in_b = model.graph.input[0].type.tensor_type.shape.dim[0] 16 | in_c = model.graph.input[0].type.tensor_type.shape.dim[1] 17 | in_h = model.graph.input[0].type.tensor_type.shape.dim[2] 18 | in_w = model.graph.input[0].type.tensor_type.shape.dim[3] 19 | 20 | out_loc_b = model.graph.output[0].type.tensor_type.shape.dim[0] 21 | out_loc_num_candidates = model.graph.output[0].type.tensor_type.shape.dim[1] 22 | out_loc_dim2 = model.graph.output[0].type.tensor_type.shape.dim[2] # 这个维度不修改 23 | 24 | in_b.dim_param= "batch_size" 25 | 26 | out_loc_b.dim_param = "batch_size" 27 | 28 | onnx.save(model, '../data/pphumanseg//human_segmentation_pphumanseg_2021oct_dynamic.onnx') 29 | print("ok") -------------------------------------------------------------------------------- /pphumanseg/app_pphunmanseg.cpp: -------------------------------------------------------------------------------- 1 | #include"pphunmanseg.h" 2 | 3 | void setParameters(utils::InitParameter& initParameters) 4 | { 5 | initParameters.batch_size = 8; 6 | initParameters.dst_h = 192; 7 | initParameters.dst_w = 192; 8 | 9 | initParameters.means[0] = 0.5f; 10 | initParameters.means[1] = 0.5f; 11 | initParameters.means[2] = 0.5f; 12 | initParameters.stds[0] = 0.5f; 13 | initParameters.stds[1] = 0.5f; 14 | initParameters.stds[2] = 0.5f; 15 | 16 | initParameters.input_output_names = { "x", "save_infer_model/scale_0.tmp_1" }; 17 | initParameters.save_path = ""; 18 | } 19 | 20 | void task(PPHunmanSeg& hunman_seg, const utils::InitParameter& param, std::vector& imgsBatch, const int& delayTime, const int& batchi, 21 | const bool& isShow, const bool& isSave) 22 | { 23 | hunman_seg.copy(imgsBatch); 24 | utils::DeviceTimer d_t1; hunman_seg.preprocess(imgsBatch); float t1 = d_t1.getUsedTime(); 25 | utils::DeviceTimer d_t2; hunman_seg.infer(); float t2 = d_t2.getUsedTime(); 26 | utils::DeviceTimer d_t3; hunman_seg.postprocess(imgsBatch); float t3 = d_t3.getUsedTime(); 27 | sample::gLogInfo << "preprocess time = " << t1 / param.batch_size << "; " 28 | "infer time = " << t2 / param.batch_size << "; " 29 | "postprocess time = " << t3 / param.batch_size << std::endl; 30 | if (isShow) 31 | hunman_seg.showMask(imgsBatch, delayTime); 32 | if (isSave) 33 | hunman_seg.saveMask(imgsBatch, param.save_path, param.batch_size, batchi); 34 | } 35 | 36 | int main(int argc, char** argv) 37 | { 38 | cv::CommandLineParser parser(argc, argv, 39 | { 40 | "{model || tensorrt model file }" 41 | "{size || image (h, w), eg: 640}" 42 | "{batch_size|| batch size }" 43 | "{video || video's path }" 44 | "{img || image's path }" 45 | "{cam_id || camera's device id }" 46 | "{show || if show the result }" 47 | "{savePath || save path, can be ignore}" 48 | }); 49 | // parameters 50 | utils::InitParameter param; 51 | setParameters(param); 52 | // path 53 | std::string model_path = "../../data/pphumanseg/human_segmentation_pphumanseg_2021oct_dynamic.trt"; 54 | std::string video_path = "../../data/people.mp4"; 55 | std::string image_path = "../../data/6406403.jpg"; 56 | int camera_id = 0; // camera' id 57 | 58 | // get input 59 | utils::InputStream source; 60 | //source = utils::InputStream::IMAGE; 61 | source = utils::InputStream::VIDEO; 62 | //source = utils::InputStream::CAMERA; 63 | 64 | // update params from command line parser 65 | int size = -1; // w or h 66 | int batch_size = 8; 67 | bool is_show = false; 68 | bool is_save = false; 69 | if(parser.has("model")) 70 | { 71 | model_path = parser.get("model"); 72 | sample::gLogInfo << "model_path = " << model_path << std::endl; 73 | } 74 | if(parser.has("size")) 75 | { 76 | size = parser.get("size"); 77 | sample::gLogInfo << "size = " << size << std::endl; 78 | param.dst_h = param.dst_w = size; 79 | } 80 | if(parser.has("batch_size")) 81 | { 82 | batch_size = parser.get("batch_size"); 83 | sample::gLogInfo << "batch_size = " << batch_size << std::endl; 84 | param.batch_size = batch_size; 85 | } 86 | if(parser.has("video")) 87 | { 88 | source = utils::InputStream::VIDEO; 89 | video_path = parser.get("video"); 90 | sample::gLogInfo << "video_path = " << video_path << std::endl; 91 | } 92 | if(parser.has("img")) 93 | { 94 | source = utils::InputStream::IMAGE; 95 | image_path = parser.get("img"); 96 | sample::gLogInfo << "image_path = " << image_path << std::endl; 97 | } 98 | if(parser.has("cam_id")) 99 | { 100 | source = utils::InputStream::CAMERA; 101 | camera_id = parser.get("cam_id"); 102 | sample::gLogInfo << "camera_id = " << camera_id << std::endl; 103 | } 104 | if(parser.has("show")) 105 | { 106 | is_show = true; 107 | sample::gLogInfo << "is_show = " << is_show << std::endl; 108 | } 109 | if(parser.has("savePath")) 110 | { 111 | is_save = true; 112 | param.save_path = parser.get("savePath"); 113 | sample::gLogInfo << "save_path = " << param.save_path << std::endl; 114 | } 115 | 116 | 117 | 118 | int total_batches = 0; 119 | int delay_time = 1; 120 | cv::VideoCapture capture; 121 | if (!setInputStream(source, image_path, video_path, camera_id, 122 | capture, total_batches, delay_time, param)) 123 | { 124 | sample::gLogError << "read the input data errors!" << std::endl; 125 | return -1; 126 | } 127 | 128 | PPHunmanSeg hunman_seg(param); 129 | 130 | // read model 131 | std::vector trt_file = utils::loadModel(model_path); 132 | if (trt_file.empty()) 133 | { 134 | sample::gLogError << "trt_file is empty!" << std::endl; 135 | return -1; 136 | } 137 | // init model 138 | if (!hunman_seg.init(trt_file)) 139 | { 140 | sample::gLogError << "initEngine() ocur errors!" << std::endl; 141 | return -1; 142 | } 143 | hunman_seg.check(); 144 | cv::Mat frame; 145 | std::vector imgs_batch; 146 | imgs_batch.reserve(param.batch_size); 147 | sample::gLogInfo << imgs_batch.capacity() << std::endl; 148 | int batchi = 0; 149 | while (capture.isOpened()) 150 | { 151 | if (batchi >= total_batches && source != utils::InputStream::CAMERA) 152 | { 153 | break; 154 | } 155 | if (imgs_batch.size() < param.batch_size) // get input 156 | { 157 | if (source != utils::InputStream::IMAGE) 158 | { 159 | capture.read(frame); 160 | } 161 | else 162 | { 163 | frame = cv::imread(image_path); 164 | } 165 | 166 | if (frame.empty()) 167 | { 168 | sample::gLogWarning << "no more video or camera frame" << std::endl; 169 | task(hunman_seg, param, imgs_batch, delay_time, batchi, is_show, is_save); 170 | imgs_batch.clear(); 171 | batchi++; 172 | break; 173 | } 174 | else 175 | { 176 | imgs_batch.emplace_back(frame.clone()); 177 | } 178 | 179 | } 180 | else // infer 181 | { 182 | task(hunman_seg, param, imgs_batch, delay_time, batchi, is_show, is_save); 183 | imgs_batch.clear(); 184 | batchi++; 185 | } 186 | } 187 | return -1; 188 | } 189 | 190 | -------------------------------------------------------------------------------- /pphumanseg/decode_pphunmanseg.cu: -------------------------------------------------------------------------------- 1 | #include"decode_pphunmanseg.h" 2 | #include"../utils/kernel_function.h" 3 | 4 | __global__ 5 | void decode_pphunmanseg_device_kernel(int batch_size, 6 | float* src, int src_width, int src_height, int src_area, int src_volum, 7 | float* dst, int dst_width, int dst_height, int dst_area, int dst_volum) 8 | { 9 | int dx = blockDim.x * blockIdx.x + threadIdx.x; 10 | int dy = blockDim.y * blockIdx.y + threadIdx.y; 11 | if (dx >= dst_area || dy >= batch_size) 12 | { 13 | return; 14 | } 15 | dst[dy * dst_volum + dx] = (src[dy * src_volum + dx + src_area] > src[dy * src_volum + dx] ? 1.f : 0.f); 16 | } 17 | void pphunmanseg::decodeDevice(int batchSize, float* src, int srcWidth, int srcHeight, float* dst, int dstWidth, int dstHeight) 18 | { 19 | dim3 block_size(BLOCK_SIZE, BLOCK_SIZE); 20 | dim3 grid_size((dstWidth * dstHeight + BLOCK_SIZE - 1) / BLOCK_SIZE, 21 | (batchSize + BLOCK_SIZE - 1) / BLOCK_SIZE); 22 | int src_area = srcWidth * srcHeight; 23 | int src_volum = srcWidth * srcHeight * 2; 24 | int dst_area = dstWidth * dstHeight; 25 | int dst_volum = dstWidth * dstHeight * 1; 26 | decode_pphunmanseg_device_kernel << < grid_size, block_size, 0, nullptr >> > (batchSize, 27 | src, srcWidth, srcHeight, src_area, src_volum, 28 | dst, dstWidth, dstHeight, dst_area, dst_volum); 29 | } -------------------------------------------------------------------------------- /pphumanseg/decode_pphunmanseg.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include"../utils/utils.h" 3 | #include"../utils/common_include.h" 4 | 5 | namespace pphunmanseg 6 | { 7 | void decodeDevice(int batchSize, float* src, int srcWidth, int srcHeight, float* dst, int dstWidth, int dstHeight); 8 | } -------------------------------------------------------------------------------- /pphumanseg/pphunmanseg.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include"../utils/common_include.h" 3 | #include"../utils/utils.h" 4 | #include"../utils/kernel_function.h" 5 | 6 | 7 | class PPHunmanSeg 8 | { 9 | public: 10 | PPHunmanSeg(const utils::InitParameter& param); 11 | ~PPHunmanSeg(); 12 | 13 | public: 14 | bool init(const std::vector& trtFile); 15 | void check(); 16 | void copy(const std::vector& imgsBatch); 17 | void preprocess(const std::vector& imgsBatch); 18 | bool infer(); 19 | void postprocess(const std::vector& imgsBatch); 20 | void reset(); 21 | void showMask(const std::vector& imgsBatch, const int& cvDelayTime); 22 | void saveMask(const std::vector& imgsBatch, const std::string& savePath, const int& batchSize, const int& batchi); 23 | 24 | protected: 25 | std::shared_ptr m_engine; 26 | std::unique_ptr m_context; 27 | 28 | protected: 29 | utils::InitParameter m_param; 30 | nvinfer1::Dims m_output_src_dims; 31 | int m_output_src_area; 32 | 33 | utils::AffineMat m_dst2src; 34 | utils::AffineMat m_src2dst; 35 | 36 | // input 37 | float* m_input_src_device; 38 | float* m_input_resize_device; 39 | float* m_input_rgb_device; 40 | float* m_input_norm_device; 41 | float* m_input_hwc_device; 42 | 43 | // output 44 | float* m_output_src_device; 45 | float* m_output_mask_device; 46 | float* m_output_resize_device; 47 | float* m_output_resize_host; 48 | }; 49 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch==1.9.0 2 | onnx== 1.9.0 3 | torchvision==0.10.1 # Image classification 4 | onnx-simplifier==0.4.8 5 | onnxruntime==1.8.0 6 | opencv-python==4.6.0 -------------------------------------------------------------------------------- /tools/onnx2trt.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include // add file: ../TensorRT-8.4.2.4/samples/common/logger.cpp 9 | using namespace std; 10 | 11 | int main() { 12 | // setting 13 | std::string onnx_file = "D:/ThirdParty/TensorRT-8.4.2.4/bin/yolov8n.onnx"; 14 | std::string trt_file = "yolov8n.trt"; 15 | int min_batchsize = 1; 16 | int opt_batchsize = 1; 17 | int max_batchsize = 2; 18 | nvinfer1::Dims4 min_shape(min_batchsize, 3, 640, 640); 19 | nvinfer1::Dims4 opt_shape(opt_batchsize, 3, 640, 640); 20 | nvinfer1::Dims4 max_shape(max_batchsize, 3, 640, 640); 21 | 22 | 23 | 24 | nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()); 25 | nvinfer1::IBuilderConfig* config = builder->createBuilderConfig(); 26 | nvinfer1::INetworkDefinition* network = builder->createNetworkV2(1); 27 | 28 | nvonnxparser::IParser* parser = nvonnxparser::createParser(*network, sample::gLogger.getTRTLogger()); 29 | if (!parser->parseFromFile(onnx_file.c_str(), 1)) { 30 | printf("Failed to parser demo.onnx\n"); 31 | return false; 32 | } 33 | 34 | printf("Workspace Size = %.2f MB\n", (1 << 28) / 1024.0f / 1024.0f); 35 | config->setMaxWorkspaceSize(1 << 28); 36 | 37 | auto profile = builder->createOptimizationProfile(); 38 | auto input_tensor = network->getInput(0); 39 | int input_channel = input_tensor->getDimensions().d[1]; 40 | 41 | profile->setDimensions(input_tensor->getName(), nvinfer1::OptProfileSelector::kMIN, min_shape); 42 | profile->setDimensions(input_tensor->getName(), nvinfer1::OptProfileSelector::kOPT, opt_shape); 43 | profile->setDimensions(input_tensor->getName(), nvinfer1::OptProfileSelector::kMAX, max_shape); 44 | config->addOptimizationProfile(profile); 45 | 46 | nvinfer1::ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config); 47 | if (engine == nullptr) { 48 | printf("Build engine failed.\n"); 49 | return false; 50 | } 51 | nvinfer1::IHostMemory* model_data = engine->serialize(); 52 | FILE* f = fopen(trt_file.c_str(), "wb"); 53 | fwrite(model_data->data(), 1, model_data->size(), f); 54 | fclose(f); 55 | 56 | model_data->destroy(); 57 | parser->destroy(); 58 | engine->destroy(); 59 | network->destroy(); 60 | config->destroy(); 61 | builder->destroy(); 62 | printf("Done.\n"); 63 | return true; 64 | } 65 | -------------------------------------------------------------------------------- /u2net/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | 3 | #set(CMAKE_BUILD_TYPE "Debug") 4 | set(CMAKE_BUILD_TYPE "Release") 5 | 6 | PROJECT(u2net VERSION 1.0.0 LANGUAGES C CXX CUDA) 7 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR}) 8 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake) 9 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT}) 10 | 11 | message(STATUS ${ALL_LIBS}) 12 | file(GLOB CPPS 13 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp 14 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cu 15 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu 16 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp 17 | ${TensorRT_ROOT}/samples/common/logger.cpp 18 | ${TensorRT_ROOT}/samples/common/sampleOptions.cpp 19 | ) 20 | list(REMOVE_ITEM CPPS u2net.cpp) 21 | message(STATUS CPPS = ${CPPS}) 22 | list (LENGTH CPPS length) 23 | message(STATUS ***length*** = ${length}) 24 | find_package(OpenCV REQUIRED) 25 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}) 26 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS}) 27 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS}) 28 | 29 | add_library(${PROJECT_NAME} SHARED ${CPPS}) 30 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES}) 31 | 32 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75) 33 | target_compile_options(${PROJECT_NAME} PUBLIC 34 | $<$:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>) 35 | 36 | add_executable(app_u2net app_u2net.cpp) 37 | 38 | # NVCC 39 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a) 40 | target_link_libraries(app_u2net ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} ) 41 | -------------------------------------------------------------------------------- /u2net/README.md: -------------------------------------------------------------------------------- 1 | ## 1. get onnx 2 | download directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv) 3 | 4 | or export onnx: 5 | ```bash 6 | git clone https://github.com/xuebinqin/U-2-Net 7 | cd U-2-Net-master 8 | # Use the script alpha_export.py provided by this repo to export onnx 9 | cp alpha_export.py U-2-Net-master 10 | python alpha_export.py --net=u2net --weights=saved_models/u2net/u2net.pth 11 | python alpha_export.py --net=u2netp --weights=saved_models/u2netp/u2netp.pth 12 | ``` 13 | ## 2.edit and save onnx 14 | ```bash 15 | # note: If you have obtained onnx by downloading, this step can be ignored 16 | ignore 17 | ``` 18 | ## 3.compile onnx 19 | ```bash 20 | # put your onnx file in this path:tensorrt-alpha/data/u2net 21 | cd tensorrt-alpha/data/u2net 22 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib 23 | 24 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=u2net.onnx --saveEngine=u2net.trt --buildOnly --minShapes=images:1x3x320x320 --optShapes=images:4x3x320x320 --maxShapes=images:8x3x320x320 25 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=u2netp.onnx --saveEngine=u2netp.trt --buildOnly --minShapes=images:1x3x320x320 --optShapes=images:4x3x320x320 --maxShapes=images:8x3x320x320 26 | ``` 27 | ## 4.run 28 | ```bash 29 | git clone https://github.com/FeiYull/tensorrt-alpha 30 | cd tensorrt-alpha/u2net 31 | mkdir build 32 | cd build 33 | cmake .. 34 | make -j10 35 | # note: the dstImage will be saved in tensorrt-alpha/u2net/build by default 36 | 37 | ## 320 38 | # infer image 39 | ./app_u2net --model=../../data/u2net/u2net.trt --size=320 --batch_size=1 --img=../../data/sailboat3.jpg --show --savePath 40 | 41 | # infer video 42 | ./app_u2net --model=../../data/u2net/u2net.trt --size=320 --batch_size=2 --video=../../data/people.mp4 --show 43 | 44 | # infer camera 45 | ./app_u2net --model=../../data/u2net/u2net.trt --size=320 --batch_size=2 --cam_id=0 --show 46 | ``` 47 | ## 5. appendix 48 | ignore -------------------------------------------------------------------------------- /u2net/alpha_export.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch.nn 3 | from model import U2NET 4 | from model import U2NETP 5 | 6 | import onnx 7 | import numpy as np 8 | import onnxsim # pip install onnx-simplifier 9 | import onnxruntime as ort 10 | import numpy as np 11 | 12 | class Alpha_U2Net(torch.nn.Module): 13 | def __init__(self, weight_file): 14 | super().__init__() 15 | self.model = U2NET(3, 1) 16 | self.model.load_state_dict(torch.load(model_path, map_location='cpu')) 17 | self.model.eval() 18 | 19 | def forward(self, x): 20 | y = self.model(x) 21 | return y[0] 22 | 23 | class Alpha_U2Netp(torch.nn.Module): 24 | def __init__(self, weight_file): 25 | super().__init__() 26 | self.model = U2NETP(3, 1) 27 | self.model.load_state_dict(torch.load(model_path, map_location='cpu')) 28 | self.model.eval() 29 | 30 | def forward(self, x): 31 | y = self.model(x) 32 | return y[0] 33 | """ 34 | example: 35 | python alpha_export.py --net=u2net --weights=saved_models/u2net/u2net.pth 36 | python alpha_export.py --net=u2netp --weights=saved_models/u2netp/u2netp.pth 37 | """ 38 | if __name__ == '__main__': 39 | parser = argparse.ArgumentParser() 40 | parser.add_argument('--net', type=str, default='u2net', help='net type') 41 | parser.add_argument('--weights', type=str, default='saved_models/u2net/u2net.pth', help='net path') 42 | opt = parser.parse_args() 43 | 44 | net = '' 45 | image_input_shape = [1, 3, 320, 320] 46 | image_input = torch.autograd.Variable(torch.randn(image_input_shape)) 47 | input_names = ["images"] 48 | output_names = ["output"] 49 | dynamic_axes = {"images": {0: "batch_size"}, "output": {0: "batch_size"}} 50 | 51 | net = opt.net 52 | if net=='u2net': # for u2net.pt 53 | net_name = "u2net" 54 | onnx_name = net_name + ".onnx" 55 | model_path = opt.weights 56 | u2net = Alpha_U2Net(model_path) 57 | torch.onnx.export(u2net, image_input, "saved_models/onnx/" + onnx_name, 58 | verbose=True, 59 | input_names=input_names, 60 | output_names=output_names, 61 | opset_version=11, # try opset_version=9 62 | training=False, 63 | dynamic_axes=dynamic_axes) 64 | elif net=='u2netp': # for u2netp.pt 65 | model_path = opt.weights 66 | u2netp = Alpha_U2Netp(model_path) 67 | torch.onnx.export(u2netp, image_input, "saved_models/onnx/u2netp.onnx", 68 | verbose=True, 69 | input_names=input_names, 70 | output_names=output_names, 71 | opset_version=11, 72 | training=False, 73 | dynamic_axes=dynamic_axes) 74 | -------------------------------------------------------------------------------- /u2net/app_u2net.cpp: -------------------------------------------------------------------------------- 1 | #include"u2net.h" 2 | 3 | void setParameters(utils::InitParameter& initParameters) 4 | { 5 | initParameters.class_names = utils::dataSets::coco80; 6 | //initParameters.num_class = 80; // for coco 7 | 8 | initParameters.batch_size = 1; 9 | initParameters.dst_h = 320; 10 | initParameters.dst_w = 320; 11 | initParameters.input_output_names = { "images", "output" }; 12 | initParameters.scale = 1.0; // div by max in u2net! 13 | initParameters.means[0] = 0.485; 14 | initParameters.means[1] = 0.456; 15 | initParameters.means[2] = 0.406; 16 | initParameters.stds[0] = 0.229; 17 | initParameters.stds[1] = 0.224; 18 | initParameters.stds[2] = 0.225; 19 | 20 | initParameters.save_path = ""; 21 | } 22 | 23 | void task(u2net::U2NET& u2net, const utils::InitParameter& param, std::vector& imgsBatch, const int& delayTime, const int& batchi, 24 | const bool& isShow, const bool& isSave) 25 | { 26 | u2net.copy(imgsBatch); 27 | utils::DeviceTimer d_t1; u2net.preprocess(imgsBatch); float t1 = d_t1.getUsedTime(); 28 | utils::DeviceTimer d_t2; u2net.infer(); float t2 = d_t2.getUsedTime(); 29 | utils::DeviceTimer d_t3; u2net.postprocess(imgsBatch); float t3 = d_t3.getUsedTime(); 30 | sample::gLogInfo << "preprocess time = " << t1 / param.batch_size << "; " 31 | "infer time = " << t2 / param.batch_size << "; " 32 | "postprocess time = " << t3 / param.batch_size << std::endl; 33 | if(isShow) 34 | u2net.showMask(imgsBatch, delayTime); 35 | if(isSave) 36 | u2net.saveMask(imgsBatch, param.save_path, param.batch_size, batchi); 37 | } 38 | 39 | int main(int argc, char** argv) 40 | { 41 | cv::CommandLineParser parser(argc, argv, 42 | { 43 | "{model || tensorrt model file }" 44 | "{size || image (h, w), eg: 640}" 45 | "{batch_size|| batch size }" 46 | "{video || video's path }" 47 | "{img || image's path }" 48 | "{cam_id || camera's device id }" 49 | "{show || if show the result }" 50 | "{savePath || save path, can be ignore}" 51 | }); 52 | // parameters 53 | utils::InitParameter param; 54 | setParameters(param); 55 | // path 56 | std::string model_path = "../../data/u2net/u2net.trt"; 57 | std::string video_path = "../../data/people.mp4"; 58 | std::string image_path = "../../data/6406403.jpg"; 59 | // camera' id 60 | int camera_id = 0; 61 | 62 | // get input 63 | utils::InputStream source; 64 | //source = utils::InputStream::IMAGE; 65 | //source = utils::InputStream::VIDEO; 66 | source = utils::InputStream::CAMERA; 67 | 68 | // update params from command line parser 69 | int size = -1; // w or h 70 | int batch_size = 8; 71 | bool is_show = false; 72 | bool is_save = false; 73 | if(parser.has("model")) 74 | { 75 | model_path = parser.get("model"); 76 | sample::gLogInfo << "model_path = " << model_path << std::endl; 77 | } 78 | if(parser.has("size")) 79 | { 80 | size = parser.get("size"); 81 | sample::gLogInfo << "size = " << size << std::endl; 82 | param.dst_h = param.dst_w = size; 83 | } 84 | if(parser.has("batch_size")) 85 | { 86 | batch_size = parser.get("batch_size"); 87 | sample::gLogInfo << "batch_size = " << batch_size << std::endl; 88 | param.batch_size = batch_size; 89 | } 90 | if(parser.has("video")) 91 | { 92 | source = utils::InputStream::VIDEO; 93 | video_path = parser.get("video"); 94 | sample::gLogInfo << "video_path = " << video_path << std::endl; 95 | } 96 | if(parser.has("img")) 97 | { 98 | source = utils::InputStream::IMAGE; 99 | image_path = parser.get("img"); 100 | sample::gLogInfo << "image_path = " << image_path << std::endl; 101 | } 102 | if(parser.has("cam_id")) 103 | { 104 | source = utils::InputStream::CAMERA; 105 | camera_id = parser.get("cam_id"); 106 | sample::gLogInfo << "camera_id = " << camera_id << std::endl; 107 | } 108 | if(parser.has("show")) 109 | { 110 | is_show = true; 111 | sample::gLogInfo << "is_show = " << is_show << std::endl; 112 | } 113 | if(parser.has("savePath")) 114 | { 115 | is_save = true; 116 | param.save_path = parser.get("savePath"); 117 | sample::gLogInfo << "save_path = " << param.save_path << std::endl; 118 | } 119 | 120 | int total_batches = 0; 121 | int delay_time = 1; 122 | cv::VideoCapture capture; 123 | if (!setInputStream(source, image_path, video_path, camera_id, 124 | capture, total_batches, delay_time, param)) 125 | { 126 | sample::gLogError << "read the input data errors!" << std::endl; 127 | return -1; 128 | } 129 | u2net::U2NET u2net(param); 130 | // read model 131 | std::vector trt_file = utils::loadModel(model_path); 132 | if (trt_file.empty()) 133 | { 134 | sample::gLogError << "trt_file is empty!" << std::endl; 135 | return -1; 136 | } 137 | // init model 138 | if (!u2net.init(trt_file)) 139 | { 140 | sample::gLogError << "initEngine() ocur errors!" << std::endl; 141 | return -1; 142 | } 143 | u2net.check(); 144 | cv::Mat frame; 145 | std::vector imgs_batch; 146 | imgs_batch.reserve(param.batch_size); 147 | sample::gLogInfo << imgs_batch.capacity() << std::endl; 148 | int batchi = 0; 149 | while (capture.isOpened()) 150 | { 151 | if (batchi >= total_batches && source != utils::InputStream::CAMERA) 152 | { 153 | break; 154 | } 155 | if (imgs_batch.size() < param.batch_size) 156 | { 157 | if (source != utils::InputStream::IMAGE) 158 | { 159 | capture.read(frame); 160 | } 161 | else 162 | { 163 | frame = cv::imread(image_path); 164 | } 165 | 166 | if (frame.empty()) 167 | { 168 | sample::gLogWarning << "no more video or camera frame" << std::endl; 169 | task(u2net, param, imgs_batch, delay_time, batchi, is_show, is_save); 170 | imgs_batch.clear(); 171 | batchi++; 172 | break; 173 | } 174 | else 175 | { 176 | imgs_batch.emplace_back(frame.clone()); 177 | } 178 | } 179 | else 180 | { 181 | task(u2net, param, imgs_batch, delay_time, batchi, is_show, is_save); 182 | imgs_batch.clear(); 183 | batchi++; 184 | } 185 | } 186 | return -1; 187 | } 188 | 189 | -------------------------------------------------------------------------------- /u2net/u2net.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include"../utils/common_include.h" 3 | #include"../utils/utils.h" 4 | #include"../utils/kernel_function.h" 5 | namespace u2net 6 | { 7 | class U2NET 8 | { 9 | public: 10 | U2NET(const utils::InitParameter& param); 11 | ~U2NET(); 12 | 13 | public: 14 | bool init(const std::vector& trtFile); 15 | void check(); 16 | void copy(const std::vector& imgsBatch); 17 | void preprocess(const std::vector& imgsBatch); 18 | bool infer(); 19 | void postprocess(const std::vector& imgsBatch); 20 | void showMask(const std::vector& imgsBatch, const int& cvDelayTime); 21 | void saveMask(const std::vector& imgsBatch, const std::string& savePath, const int& batchSize, const int& batchi); 22 | void reset(); 23 | private: 24 | std::shared_ptr m_engine; 25 | std::unique_ptr m_context; 26 | 27 | //private: 28 | protected: 29 | utils::InitParameter m_param; 30 | nvinfer1::Dims m_output_dims; 31 | int m_output_area; 32 | std::vector> m_objectss; 33 | 34 | 35 | utils::AffineMat m_dst2src; 36 | utils::AffineMat m_src2dst; 37 | 38 | // input 39 | float* m_input_src_device; 40 | float* m_input_resize_device; 41 | float* m_input_rgb_device; 42 | float* m_input_norm_device; 43 | float* m_input_hwc_device; 44 | 45 | float* m_max_val_device; 46 | float* m_min_val_device; 47 | 48 | // output 49 | float* m_output_src_device; 50 | float* m_output_resize_device; 51 | float* m_output_resize_host; 52 | float* m_output_mask_host; 53 | 54 | }; 55 | } 56 | 57 | void u2netDivMaxDevice(const int& batchSize, float* src, int srcWidth, int srcHeight, int channel, float* maxVals); 58 | 59 | void u2netNormPredDevice(const int& batchSize, float* src, int srcWidth, int srcHeight, float scale, float* minVals, float* maxVals); -------------------------------------------------------------------------------- /utils/common_include.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // tensorrt 3 | #include 4 | #include 5 | #include 6 | // cuda 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | // opencv 16 | #include 17 | // cpp std 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include -------------------------------------------------------------------------------- /utils/kernel_function.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include"../utils/common_include.h" 3 | #include"../utils/utils.h" 4 | 5 | #define CHECK(op) __check_cuda_runtime((op), #op, __FILE__, __LINE__) 6 | 7 | bool __check_cuda_runtime(cudaError_t code, const char* op, const char* file, int line); 8 | 9 | #define BLOCK_SIZE 8 10 | 11 | //note: resize rgb with padding 12 | void resizeDevice(const int& batch_size, float* src, int src_width, int src_height, 13 | float* dst, int dstWidth, int dstHeight, 14 | float paddingValue, utils::AffineMat matrix); 15 | 16 | //overload:resize rgb with padding, but src's type is uin8 17 | void resizeDevice(const int& batch_size, unsigned char* src, int src_width, int src_height, 18 | float* dst, int dstWidth, int dstHeight, 19 | float paddingValue, utils::AffineMat matrix); 20 | 21 | // overload: resize rgb/gray without padding 22 | void resizeDevice(const int& batchSize, float* src, int srcWidth, int srcHeight, 23 | float* dst, int dstWidth, int dstHeight, 24 | utils::ColorMode mode, utils::AffineMat matrix); 25 | 26 | void bgr2rgbDevice(const int& batch_size, float* src, int srcWidth, int srcHeight, 27 | float* dst, int dstWidth, int dstHeight); 28 | 29 | void normDevice(const int& batch_size, float* src, int srcWidth, int srcHeight, 30 | float* dst, int dstWidth, int dstHeight, 31 | utils::InitParameter norm_param); 32 | 33 | void hwc2chwDevice(const int& batch_size, float* src, int srcWidth, int srcHeight, 34 | float* dst, int dstWidth, int dstHeight); 35 | 36 | void decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcLength, float* dst, int dstWidth, int dstHeight); 37 | 38 | // nms fast 39 | void nmsDeviceV1(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea); 40 | 41 | // nms sort 42 | void nmsDeviceV2(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea, 43 | int* idx, float* conf); 44 | 45 | void copyWithPaddingDevice(const int& batchSize, float* src, int srcWidth, int srcHeight, 46 | float* dst, int dstWidth, int dstHeight, float paddingValue, int padTop, int padLeft); -------------------------------------------------------------------------------- /utils/tracking/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/utils/tracking/.gitkeep -------------------------------------------------------------------------------- /utils/yolo.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include"../utils/common_include.h" 3 | #include"../utils/utils.h" 4 | #include"../utils/kernel_function.h" 5 | 6 | namespace yolo 7 | { 8 | class YOLO 9 | { 10 | public: 11 | YOLO(const utils::InitParameter& param); 12 | ~YOLO(); 13 | 14 | public: 15 | virtual bool init(const std::vector& trtFile); 16 | virtual void check(); 17 | virtual void copy(const std::vector& imgsBatch); 18 | virtual void preprocess(const std::vector& imgsBatch); 19 | virtual bool infer(); 20 | virtual void postprocess(const std::vector& imgsBatch); 21 | virtual void reset(); 22 | 23 | public: 24 | std::vector> getObjectss() const; 25 | 26 | protected: 27 | std::shared_ptr m_engine; 28 | std::unique_ptr m_context; 29 | 30 | protected: 31 | utils::InitParameter m_param; 32 | nvinfer1::Dims m_output_dims; 33 | int m_output_area; 34 | int m_total_objects; 35 | std::vector> m_objectss; 36 | utils::AffineMat m_dst2src; 37 | 38 | // input 39 | unsigned char* m_input_src_device; 40 | float* m_input_resize_device; 41 | float* m_input_rgb_device; 42 | float* m_input_norm_device; 43 | float* m_input_hwc_device; 44 | // output 45 | float* m_output_src_device; 46 | float* m_output_objects_device; 47 | float* m_output_objects_host; 48 | int m_output_objects_width; 49 | int* m_output_idx_device; 50 | float* m_output_conf_device; 51 | }; 52 | } 53 | -------------------------------------------------------------------------------- /vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.2.0", 3 | "configurations": [ 4 | { 5 | "name": "C++ Launch", 6 | "type": "cppdbg", 7 | "request": "launch", 8 | "program": "${workspaceFolder}/build/app_yolox", 9 | "args": [ 10 | 11 | "--model=../data/yolox/yolox_tiny.trt", 12 | "--size=416", 13 | "--batch_size=8", 14 | 15 | // image 16 | //"--img= ../data/6406403.jpg", 17 | 18 | // video 19 | "--video=../data/people.mp4", 20 | 21 | // camera 22 | // "--cam_id= 0", 23 | 24 | "--show", 25 | "--savePath= build/" 26 | ], 27 | "stopAtEntry": false, 28 | "cwd": "${workspaceFolder}", 29 | //"preLaunchTask": "C/C++: g++ build active file", 30 | //"miDebuggerPath": "/usr/bin/gdb" 31 | } 32 | ] 33 | } -------------------------------------------------------------------------------- /yolonas/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | 3 | #set(CMAKE_BUILD_TYPE "Debug") 4 | set(CMAKE_BUILD_TYPE "Release") 5 | 6 | PROJECT(yolo_nas VERSION 1.0.0 LANGUAGES C CXX CUDA) 7 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR}) 8 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake) 9 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT}) 10 | message(STATUS ${ALL_LIBS}) 11 | file(GLOB CPPS 12 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp 13 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cu 14 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu 15 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp 16 | ${TensorRT_ROOT}/samples/common/logger.cpp 17 | ${TensorRT_ROOT}/samples/common/sampleOptions.cpp 18 | #${TensorRT_ROOT}/samples/common/sampleUtils.cpp 19 | ) 20 | list(REMOVE_ITEM CPPS app_yolo_nas.cpp) 21 | message(STATUS CPPS = ${CPPS}) 22 | 23 | list (LENGTH CPPS length) 24 | message(STATUS ***length*** = ${length}) 25 | find_package(OpenCV REQUIRED) 26 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}) 27 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS}) 28 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS}) 29 | add_library(${PROJECT_NAME} SHARED ${CPPS}) 30 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES}) 31 | 32 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75) 33 | target_compile_options(${PROJECT_NAME} PUBLIC 34 | $<$:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>) 35 | 36 | add_executable(app_yolo_nas app_yolo_nas.cpp) 37 | 38 | # NVCC 39 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a) 40 | target_link_libraries(app_yolo_nas ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} ) 41 | -------------------------------------------------------------------------------- /yolonas/README.md: -------------------------------------------------------------------------------- 1 | ## 1. get onnx 2 | download directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv) or export onnx: 3 | ```bash 4 | pip install super-gradients==3.3.1 5 | cd super-gradients 6 | # copy the python script provided in this repository to your workspace 7 | # note:The weight file is downloaded automatically 8 | cp TensorRT-Alpha/yolonas/alpha_export_dynamic.py YOUR_WORKSPACE 9 | 10 | # for YOLO_NAS_S 11 | # Changing lines 9-11 of the code allows you to switch to other models, eg:YOLO_NAS_M 12 | python alpha_export_dynamic.py 13 | ``` 14 | 15 | ## 2.edit and save onnx 16 | ```bash 17 | # note: If you have obtained onnx by downloading, this step can be ignored 18 | ignore 19 | ``` 20 | 21 | ## 3.compile onnx 22 | ```bash 23 | # put your onnx file in this path:tensorrt-alpha/data/yolonas 24 | cd tensorrt-alpha/data/yolonas 25 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib 26 | # 640 27 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolonas_s.onnx --saveEngine=yolonas_s.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640 28 | ``` 29 | ## 4.run 30 | ```bash 31 | git clone https://github.com/FeiYull/tensorrt-alpha 32 | cd tensorrt-alpha/yolonas 33 | mkdir build 34 | cd build 35 | cmake .. 36 | make -j10 37 | # note: the dstImage will be saved in tensorrt-alpha/yolonas/build by default 38 | 39 | ## 640 40 | # infer image 41 | ./app_yolo_nas --model=../../data/yolo_nas/yolonas_s.trt --size=640 --batch_size=1 --img=../../data/6406407.jpg --show --savePath=../ 42 | 43 | # infer video 44 | ./app_yolo_nas --model=../../data/yolo_nas/yolonas_s.trt --size=640 --batch_size=2 --video=../../data/people.mp4 --show 45 | 46 | # infer camera 47 | ./app_yolo_nas --model=../../data/yolo_nas/yolonas_s.trt --size=640 --batch_size=2 --cam_id=0 --show 48 | ``` 49 | ## 5. appendix 50 | ignore -------------------------------------------------------------------------------- /yolonas/alpha_export_dynamic.py: -------------------------------------------------------------------------------- 1 | from super_gradients.training import models 2 | from super_gradients.common.object_names import Models 3 | import torch 4 | import numpy as np 5 | 6 | class AlphaYoloNas(torch.nn.Module): 7 | def __init__(self): 8 | super().__init__() 9 | self.model = models.get(Models.YOLO_NAS_S, pretrained_weights="coco") 10 | # self.model = models.get(Models.YOLO_NAS_M, pretrained_weights="coco") 11 | # self.model = models.get(Models.YOLO_NAS_L, pretrained_weights="coco") 12 | self.model.eval() 13 | 14 | def forward(self, x): 15 | y = self.model(x) 16 | return torch.cat((y[0], y[1]), 2) 17 | 18 | input_size = (1, 3, 640, 640) 19 | onnx_input = torch.Tensor(np.zeros(input_size)) 20 | 21 | net = AlphaYoloNas() 22 | input_names = ["images"] 23 | output_names = ["output"] 24 | dynamic_axes = {input_names[0]: {0: "batch_size"}, 25 | output_names[0]: {0: "batch_size"}} 26 | 27 | torch.onnx.export(net, onnx_input, "yolonas_s.onnx", 28 | #verbose=True, 29 | input_names=input_names, 30 | output_names=output_names, 31 | opset_version=12, 32 | dynamic_axes=dynamic_axes) -------------------------------------------------------------------------------- /yolonas/app_yolo_nas.cpp: -------------------------------------------------------------------------------- 1 | #include"../utils/yolo.h" 2 | #include"yolo_nas.h" 3 | 4 | void setParameters(utils::InitParameter& initParameters) 5 | { 6 | initParameters.class_names = utils::dataSets::coco80; 7 | //initParameters.class_names = utils::dataSets::voc20; 8 | initParameters.num_class = 80; // for coco 9 | //initParameters.num_class = 20; // for voc2012 10 | initParameters.batch_size = 8; 11 | initParameters.dst_h = 636; 12 | initParameters.dst_w = 636; 13 | initParameters.input_output_names = { "images", "output" }; 14 | initParameters.conf_thresh = 0.25f; 15 | initParameters.iou_thresh = 0.7f; 16 | initParameters.save_path = ""; 17 | } 18 | 19 | void task(YOLO_NAS& yolo, const utils::InitParameter& param, std::vector& imgsBatch, const int& delayTime, const int& batchi, 20 | const bool& isShow, const bool& isSave) 21 | { 22 | utils::DeviceTimer d_t0; yolo.copy(imgsBatch); float t0 = d_t0.getUsedTime(); 23 | utils::DeviceTimer d_t1; yolo.preprocess(imgsBatch); float t1 = d_t1.getUsedTime(); 24 | utils::DeviceTimer d_t2; yolo.infer(); float t2 = d_t2.getUsedTime(); 25 | utils::DeviceTimer d_t3; yolo.postprocess(imgsBatch); float t3 = d_t3.getUsedTime(); 26 | sample::gLogInfo << 27 | "preprocess time = " << t1 / param.batch_size << "; " 28 | "infer time = " << t2 / param.batch_size << "; " 29 | "postprocess time = " << t3 / param.batch_size << std::endl; 30 | 31 | if(isShow) 32 | utils::show(yolo.getObjectss(), param.class_names, delayTime, imgsBatch); 33 | if(isSave) 34 | utils::save(yolo.getObjectss(), param.class_names, param.save_path, imgsBatch, param.batch_size, batchi); 35 | yolo.reset(); 36 | } 37 | 38 | int main(int argc, char** argv) 39 | { 40 | cv::CommandLineParser parser(argc, argv, 41 | { 42 | "{model || tensorrt model file }" 43 | "{size || image (h, w), eg: 640 }" 44 | "{batch_size|| batch size }" 45 | "{video || video's path }" 46 | "{img || image's path }" 47 | "{cam_id || camera's device id }" 48 | "{show || if show the result }" 49 | "{savePath || save path, can be ignore}" 50 | }); 51 | utils::InitParameter param; 52 | setParameters(param); 53 | std::string model_path = "../../data/yolov8/yolonas_s.trt"; 54 | std::string video_path = "../../data/people.mp4"; 55 | std::string image_path = "../../data/bus.jpg"; 56 | int camera_id = 0; 57 | utils::InputStream source; 58 | source = utils::InputStream::IMAGE; 59 | //source = utils::InputStream::VIDEO; 60 | //source = utils::InputStream::CAMERA; 61 | 62 | int size = -1; 63 | int batch_size = 8; 64 | bool is_show = false; 65 | bool is_save = false; 66 | if(parser.has("model")) 67 | { 68 | model_path = parser.get("model"); 69 | sample::gLogInfo << "model_path = " << model_path << std::endl; 70 | } 71 | if(parser.has("size")) 72 | { 73 | size = parser.get("size"); 74 | sample::gLogInfo << "size = " << size << std::endl; 75 | param.dst_h = param.dst_w = size; 76 | } 77 | if(parser.has("batch_size")) 78 | { 79 | batch_size = parser.get("batch_size"); 80 | sample::gLogInfo << "batch_size = " << batch_size << std::endl; 81 | param.batch_size = batch_size; 82 | } 83 | if(parser.has("video")) 84 | { 85 | source = utils::InputStream::VIDEO; 86 | video_path = parser.get("video"); 87 | sample::gLogInfo << "video_path = " << video_path << std::endl; 88 | } 89 | if(parser.has("img")) 90 | { 91 | source = utils::InputStream::IMAGE; 92 | image_path = parser.get("img"); 93 | sample::gLogInfo << "image_path = " << image_path << std::endl; 94 | } 95 | if(parser.has("cam_id")) 96 | { 97 | source = utils::InputStream::CAMERA; 98 | camera_id = parser.get("cam_id"); 99 | sample::gLogInfo << "camera_id = " << camera_id << std::endl; 100 | } 101 | if(parser.has("show")) 102 | { 103 | is_show = true; 104 | sample::gLogInfo << "is_show = " << is_show << std::endl; 105 | } 106 | if(parser.has("savePath")) 107 | { 108 | is_save = true; 109 | param.save_path = parser.get("savePath"); 110 | sample::gLogInfo << "save_path = " << param.save_path << std::endl; 111 | } 112 | int total_batches = 0; 113 | int delay_time = 1; 114 | cv::VideoCapture capture; 115 | if (!setInputStream(source, image_path, video_path, camera_id, 116 | capture, total_batches, delay_time, param)) 117 | { 118 | sample::gLogError << "read the input data errors!" << std::endl; 119 | return -1; 120 | } 121 | YOLO_NAS yolo(param); 122 | std::vector trt_file = utils::loadModel(model_path); 123 | if (trt_file.empty()) 124 | { 125 | sample::gLogError << "trt_file is empty!" << std::endl; 126 | return -1; 127 | } 128 | if (!yolo.init(trt_file)) 129 | { 130 | sample::gLogError << "initEngine() ocur errors!" << std::endl; 131 | return -1; 132 | } 133 | yolo.check(); 134 | cv::Mat frame; 135 | std::vector imgs_batch; 136 | imgs_batch.reserve(param.batch_size); 137 | sample::gLogInfo << imgs_batch.capacity() << std::endl; 138 | int batchi = 0; 139 | while (capture.isOpened()) 140 | { 141 | if (batchi >= total_batches && source != utils::InputStream::CAMERA) 142 | { 143 | break; 144 | } 145 | if (imgs_batch.size() < param.batch_size) 146 | { 147 | if (source != utils::InputStream::IMAGE) 148 | { 149 | capture.read(frame); 150 | } 151 | else 152 | { 153 | frame = cv::imread(image_path); 154 | } 155 | 156 | if (frame.empty()) 157 | { 158 | sample::gLogWarning << "no more video or camera frame" << std::endl; 159 | task(yolo, param, imgs_batch, delay_time, batchi, is_show, is_save); 160 | imgs_batch.clear(); 161 | batchi++; 162 | break; 163 | } 164 | else 165 | { 166 | imgs_batch.emplace_back(frame.clone()); 167 | } 168 | } 169 | else 170 | { 171 | task(yolo, param, imgs_batch, delay_time, batchi, is_show, is_save); 172 | imgs_batch.clear(); 173 | batchi++; 174 | } 175 | } 176 | return -1; 177 | } 178 | 179 | -------------------------------------------------------------------------------- /yolonas/decode_yolo_nas.cu: -------------------------------------------------------------------------------- 1 | #include "decode_yolo_nas.h" 2 | 3 | __global__ void decode_yolo_nas_device_kernel(int batch_size, int num_class, int topK, float conf_thresh, 4 | float* src, int srcWidth, int srcHeight, int srcArea, 5 | float* dst, int dstWidth, int dstHeight, int dstArea) 6 | { 7 | int dx = blockDim.x * blockIdx.x + threadIdx.x; 8 | int dy = blockDim.y * blockIdx.y + threadIdx.y; 9 | if (dx >= srcHeight || dy >= batch_size) 10 | { 11 | return; 12 | } 13 | float* pitem = src + dy * srcArea + dx * srcWidth; 14 | float* class_confidence = pitem + 4; 15 | float confidence = *class_confidence++; 16 | int label = 0; 17 | for (int i = 1; i < num_class; ++i, ++class_confidence) 18 | { 19 | if (*class_confidence > confidence) 20 | { 21 | confidence = *class_confidence; 22 | label = i; 23 | } 24 | } 25 | if (confidence < conf_thresh) 26 | { 27 | return; 28 | } 29 | int index = atomicAdd(dst + dy * dstArea, 1); 30 | 31 | if (index >= topK) 32 | { 33 | return; 34 | } 35 | float left = *pitem++; 36 | float top = *pitem++; 37 | float right = *pitem++; 38 | float bottom = *pitem++; 39 | 40 | float* pout_item = dst + dy * dstArea + 1 + index * dstWidth; 41 | *pout_item++ = left; 42 | *pout_item++ = top; 43 | *pout_item++ = right; 44 | *pout_item++ = bottom; 45 | *pout_item++ = confidence; 46 | *pout_item++ = label; 47 | *pout_item++ = 1; 48 | } 49 | 50 | void yolo_nas::decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea, float* dst, int dstWidth, int dstHeight) 51 | { 52 | dim3 block_size(BLOCK_SIZE, BLOCK_SIZE); 53 | dim3 grid_size((srcHeight + BLOCK_SIZE - 1) / BLOCK_SIZE, 54 | (param.batch_size + BLOCK_SIZE - 1) / BLOCK_SIZE); 55 | int dstArea = 1 + dstWidth * dstHeight; 56 | decode_yolo_nas_device_kernel << < grid_size, block_size, 0, nullptr >> > (param.batch_size, param.num_class, param.topK, param.conf_thresh, 57 | src, srcWidth, srcHeight, srcArea, 58 | dst, dstWidth, dstHeight, dstArea); 59 | } -------------------------------------------------------------------------------- /yolonas/decode_yolo_nas.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include"../utils/utils.h" 3 | #include"../utils/kernel_function.h" 4 | 5 | namespace yolo_nas 6 | { 7 | void decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcLength, float* dst, int dstWidth, int dstHeight); 8 | } 9 | -------------------------------------------------------------------------------- /yolonas/yolo_nas.cpp: -------------------------------------------------------------------------------- 1 | #include"yolo_nas.h" 2 | #include"decode_yolo_nas.h" 3 | 4 | YOLO_NAS::YOLO_NAS(const utils::InitParameter& param) :yolo::YOLO(param) 5 | { 6 | m_resize_shape = cv::Size(636, 636); 7 | m_input_resize_padding_device = nullptr; 8 | CHECK(cudaMalloc(&m_input_resize_padding_device, param.batch_size * 3 * m_param.dst_h * m_param.dst_w * sizeof(float))); 9 | } 10 | 11 | YOLO_NAS::~YOLO_NAS() 12 | { 13 | CHECK(cudaFree(m_input_resize_padding_device)); 14 | } 15 | 16 | bool YOLO_NAS::init(const std::vector& trtFile) 17 | { 18 | if (trtFile.empty()) 19 | { 20 | return false; 21 | } 22 | std::unique_ptr runtime = 23 | std::unique_ptr(nvinfer1::createInferRuntime(sample::gLogger.getTRTLogger())); 24 | if (runtime == nullptr) 25 | { 26 | return false; 27 | } 28 | this->m_engine = std::unique_ptr(runtime->deserializeCudaEngine(trtFile.data(), trtFile.size())); 29 | if (this->m_engine == nullptr) 30 | { 31 | return false; 32 | } 33 | this->m_context = std::unique_ptr(this->m_engine->createExecutionContext()); 34 | if (this->m_context == nullptr) 35 | { 36 | return false; 37 | } 38 | if (m_param.dynamic_batch) 39 | { 40 | this->m_context->setBindingDimensions(0, nvinfer1::Dims4(m_param.batch_size, 3, m_param.dst_h, m_param.dst_w)); 41 | } 42 | m_output_dims = this->m_context->getBindingDimensions(1); 43 | m_total_objects = m_output_dims.d[1]; 44 | assert(m_param.batch_size <= m_output_dims.d[0]); 45 | m_output_area = 1; 46 | for (int i = 1; i < m_output_dims.nbDims; i++) 47 | { 48 | if (m_output_dims.d[i] != 0) 49 | { 50 | m_output_area *= m_output_dims.d[i]; 51 | } 52 | } 53 | CHECK(cudaMalloc(&m_output_src_device, m_param.batch_size * m_output_area * sizeof(float))); 54 | float a = float(m_resize_shape.height) / m_param.src_h; 55 | float b = float(m_resize_shape.width) / m_param.src_w; 56 | float scale = a < b ? a : b; 57 | cv::Mat src2dst = (cv::Mat_(2, 3) << scale, 0.f, (-scale * m_param.src_w + m_resize_shape.width + scale - 1) * 0.5, 58 | 0.f, scale, (-scale * m_param.src_h + m_resize_shape.height + scale - 1) * 0.5); 59 | cv::Mat dst2src = cv::Mat::zeros(2, 3, CV_32FC1); 60 | cv::invertAffineTransform(src2dst, dst2src); 61 | int pad_height = m_param.dst_h - m_resize_shape.height; 62 | int pad_width = m_param.dst_w - m_resize_shape.width; 63 | m_pad_top = pad_height / 2; 64 | m_pad_left = pad_width / 2; 65 | 66 | m_dst2src.v0 = dst2src.ptr(0)[0]; 67 | m_dst2src.v1 = dst2src.ptr(0)[1]; 68 | m_dst2src.v2 = dst2src.ptr(0)[2]; 69 | m_dst2src.v3 = dst2src.ptr(1)[0]; 70 | m_dst2src.v4 = dst2src.ptr(1)[1]; 71 | m_dst2src.v5 = dst2src.ptr(1)[2]; 72 | return true; 73 | } 74 | 75 | void YOLO_NAS::preprocess(const std::vector& imgsBatch) 76 | { 77 | resizeDevice(m_param.batch_size, m_input_src_device, m_param.src_w, m_param.src_h, 78 | m_input_resize_device, m_resize_shape.width, m_resize_shape.height, 114, m_dst2src); 79 | copyWithPaddingDevice(m_param.batch_size, m_input_resize_device, m_resize_shape.width, m_resize_shape.height, 80 | m_input_resize_padding_device, m_param.dst_w, m_param.dst_h, 114.f, m_pad_top, m_pad_left); 81 | bgr2rgbDevice(m_param.batch_size, m_input_resize_padding_device, m_param.dst_w, m_param.dst_h, 82 | m_input_rgb_device, m_param.dst_w, m_param.dst_h); 83 | normDevice(m_param.batch_size, m_input_rgb_device, m_param.dst_w, m_param.dst_h, 84 | m_input_norm_device, m_param.dst_w, m_param.dst_h, m_param); 85 | hwc2chwDevice(m_param.batch_size, m_input_norm_device, m_param.dst_w, m_param.dst_h, 86 | m_input_hwc_device, m_param.dst_w, m_param.dst_h); 87 | } 88 | void YOLO_NAS::postprocess(const std::vector& imgsBatch) 89 | { 90 | yolo_nas::decodeDevice(m_param, m_output_src_device, 4 + m_param.num_class, m_total_objects, m_output_area, 91 | m_output_objects_device, m_output_objects_width, m_param.topK); 92 | nmsDeviceV1(m_param, m_output_objects_device, m_output_objects_width, m_param.topK, m_param.topK * m_output_objects_width + 1); 93 | //nmsDeviceV2(m_param, m_output_objects_device, m_output_objects_width, m_param.topK, m_param.topK * m_output_objects_width + 1, m_output_idx_device, m_output_conf_device); 94 | CHECK(cudaMemcpy(m_output_objects_host, m_output_objects_device, m_param.batch_size * sizeof(float) * (1 + 7 * m_param.topK), cudaMemcpyDeviceToHost)); 95 | for (size_t bi = 0; bi < imgsBatch.size(); bi++) 96 | { 97 | int num_boxes = std::min((int)(m_output_objects_host + bi * (m_param.topK * m_output_objects_width + 1))[0], m_param.topK); 98 | for (size_t i = 0; i < num_boxes; i++) 99 | { 100 | float* ptr = m_output_objects_host + bi * (m_param.topK * m_output_objects_width + 1) + m_output_objects_width * i + 1; 101 | int keep_flag = ptr[6]; 102 | if (keep_flag) 103 | { 104 | ptr[0] -= m_pad_left; 105 | ptr[1] -= m_pad_top; 106 | ptr[2] -= m_pad_left; 107 | ptr[3] -= m_pad_top; 108 | float x_lt = m_dst2src.v0 * ptr[0] + m_dst2src.v1 * ptr[1] + m_dst2src.v2; 109 | float y_lt = m_dst2src.v3 * ptr[0] + m_dst2src.v4 * ptr[1] + m_dst2src.v5; 110 | float x_rb = m_dst2src.v0 * ptr[2] + m_dst2src.v1 * ptr[3] + m_dst2src.v2; 111 | float y_rb = m_dst2src.v3 * ptr[2] + m_dst2src.v4 * ptr[3] + m_dst2src.v5; 112 | m_objectss[bi].emplace_back(x_lt, y_lt, x_rb, y_rb, ptr[4], (int)ptr[5]); 113 | } 114 | } 115 | } 116 | } -------------------------------------------------------------------------------- /yolonas/yolo_nas.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include"../utils/yolo.h" 3 | #include"../utils/utils.h" 4 | class YOLO_NAS : public yolo::YOLO 5 | { 6 | public: 7 | YOLO_NAS(const utils::InitParameter& param); 8 | ~YOLO_NAS(); 9 | virtual bool init(const std::vector& trtFile); 10 | virtual void preprocess(const std::vector& imgsBatch); 11 | virtual void postprocess(const std::vector& imgsBatch); 12 | 13 | private: 14 | float* m_input_resize_padding_device; 15 | cv::Size m_resize_shape; 16 | int m_pad_top; 17 | int m_pad_left; 18 | }; -------------------------------------------------------------------------------- /yolor/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | 3 | #set(CMAKE_BUILD_TYPE "Debug") 4 | set(CMAKE_BUILD_TYPE "Release") 5 | 6 | PROJECT(yolor VERSION 1.0.0 LANGUAGES C CXX CUDA) 7 | 8 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR}) 9 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake) 10 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT}) 11 | message(STATUS ${ALL_LIBS}) 12 | file(GLOB CPPS 13 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp 14 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu 15 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp 16 | ${TensorRT_ROOT}/samples/common/logger.cpp 17 | ${TensorRT_ROOT}/samples/common/sampleOptions.cpp 18 | ) 19 | list(REMOVE_ITEM CPPS app_yolor.cpp) 20 | message(STATUS CPPS = ${CPPS}) 21 | list (LENGTH CPPS length) 22 | message(STATUS ***length*** = ${length}) 23 | find_package(OpenCV REQUIRED) 24 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}) 25 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS}) 26 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS}) 27 | 28 | add_library(${PROJECT_NAME} SHARED ${CPPS}) 29 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES}) 30 | 31 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75) 32 | target_compile_options(${PROJECT_NAME} PUBLIC 33 | $<$:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>) 34 | 35 | add_executable(app_yolor app_yolor.cpp) 36 | 37 | # NVCC 38 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a) 39 | target_link_libraries(app_yolor ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} ) 40 | -------------------------------------------------------------------------------- /yolor/README.md: -------------------------------------------------------------------------------- 1 | ## 说明 2 | - 0、请使用本仓库提供的导出脚本“alpha_export.py: 3 | - 1、使用torch1.7+onnx1.8.0时候,导出onnx的时候会报错: 4 | “RuntimeError: Exporting the operator silu to ONNX opset version 11 is not supported. Please open a bug to request ONNX export support for the missing operator.” 5 | - 2、将环境改为:torch1.9+onnx1.11.0,上述不支持的op问题就解决了导出onnx问题。 6 | 7 | 8 | ## 1. get onnx 9 | download directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv) 10 | 11 | or export onnx: 12 | ```bash 13 | git clone https://github.com/WongKinYiu/yolor 14 | git checkout 462858e8737f56388f812cfe381a69c4ffca0cc7 15 | # PLease use the "alpha_export.py" file provided by TensorRT-Alpha to export onnx 16 | cd yolor-main 17 | cp alpha_export.py yolor-main 18 | 19 | # 1280 20 | python alpha_export.py --net=yolor_p6 21 | # 640 22 | python alpha_export.py --net=yolor_csp 23 | python alpha_export.py --net=yolor_csp_star 24 | python alpha_export.py --net=yolor_csp_x 25 | python alpha_export.py --net=yolor_csp_x_star 26 | ``` 27 | ## 2.edit and save onnx 28 | ```bash 29 | # note: If you have obtained onnx by downloading, this step can be ignored 30 | ignore 31 | ``` 32 | ## 3.compile onnx 33 | ```bash 34 | # put your onnx file in this path:tensorrt-alpha/data/yolor 35 | cd tensorrt-alpha/data/yolor 36 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib 37 | 38 | #1280 39 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolor_p6.onnx --saveEngine=yolor_p6.trt --buildOnly --minShapes=images:1x3x1280x1280 --optShapes=images:2x3x1280x1280 --maxShapes=images:4x3x1280x1280 40 | 41 | # 640 42 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolor_csp.onnx --saveEngine=yolor_csp.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640 43 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolor_csp_star.onnx --saveEngine=yolor_csp_star.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640 44 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolor_csp_x.onnx --saveEngine=yolor_csp_x.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640 45 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolor_csp_x_star.onnx --saveEngine=yolor_csp_x_star.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640 46 | ``` 47 | ## 4.run 48 | ```bash 49 | git clone https://github.com/FeiYull/tensorrt-alpha 50 | cd tensorrt-alpha/yolor 51 | mkdir build 52 | cd build 53 | cmake .. 54 | make -j10 55 | # note: the dstImage will be saved in tensorrt-alpha/yolor/build by default 56 | 57 | ## 640 58 | # infer image 59 | ./app_yolor --model=../../data/yolor/yolor_csp.trt --size=640 --batch_size=1 --img=../../data/6406401.jpg --show --savePath=../ 60 | 61 | # infer video 62 | ./app_yolor --model=../../data/yolor/yolor_csp.trt --size=640 --batch_size=2 --video=../../data/people.mp4 --show 63 | 64 | # infer camera 65 | ./app_yolor --model=../../data/yolor/yolor_csp.trt --size=640 --batch_size=2 --cam_id=0 --show 66 | 67 | 68 | ## 1280 69 | ./app_yolor --model=../../data/yolor/yolor_p6.trt --size=1280 --batch_size=1 --img=../../data/6406401.jpg --show --savePath 70 | ``` 71 | ## 5. appendix 72 | ignore -------------------------------------------------------------------------------- /yolor/alpha_export.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import platform 4 | import shutil 5 | import time 6 | from pathlib import Path 7 | 8 | import cv2 9 | import torch 10 | import torch.backends.cudnn as cudnn 11 | from numpy import random 12 | 13 | from utils.google_utils import attempt_load 14 | from utils.datasets import LoadStreams, LoadImages 15 | from utils.general import ( 16 | check_img_size, non_max_suppression, apply_classifier, scale_coords, xyxy2xywh, strip_optimizer) 17 | from utils.plots import plot_one_box 18 | from utils.torch_utils import select_device, load_classifier, time_synchronized 19 | 20 | from models.models import * 21 | from utils.datasets import * 22 | from utils.general import * 23 | 24 | import argparse 25 | 26 | import torch 27 | from utils.google_utils import attempt_download 28 | 29 | 30 | import onnx 31 | import onnxruntime as ort 32 | import numpy as np 33 | 34 | """ 35 | example: 36 | python alpha_export.py --net=yolor_p6 37 | python alpha_export.py --net=yolor_csp 38 | python alpha_export.py --net=yolor_csp_star 39 | python alpha_export.py --net=yolor_csp_x 40 | python alpha_export.py --net=yolor_csp_x_star 41 | """ 42 | if __name__ == '__main__': 43 | parser = argparse.ArgumentParser() 44 | parser.add_argument('--net', type=str, default='yolor_p6', help='net type') 45 | opt = parser.parse_args() 46 | # init 47 | image_input_shape = '' 48 | img = '' 49 | model = '' 50 | 51 | net = opt.net 52 | if net == "yolor_p6": 53 | # yolor_p6 54 | image_input_shape = (1, 3, 1280, 1280) 55 | img = torch.ones(image_input_shape) # image size(1,3,320,192) iDetection 56 | model = Darknet("cfg/yolor_p6.cfg", 1280).cpu() 57 | opt.weights = 'yolor_p6.pt' 58 | elif net == "yolor_csp": 59 | # yolor_csp 60 | image_input_shape = (1, 3, 640, 640) 61 | img = torch.ones(image_input_shape) # image size(1,3,320,192) iDetection 62 | model = Darknet("cfg/yolor_csp.cfg", 640).cpu() 63 | opt.weights = 'yolor_csp.pt' 64 | elif net == "yolor_csp_star": 65 | # yolor_csp_star 66 | image_input_shape = (1, 3, 640, 640) 67 | img = torch.ones(image_input_shape) # image size(1,3,320,192) iDetection 68 | model = Darknet("cfg/yolor_csp.cfg", 640).cpu() 69 | opt.weights = 'yolor_csp_star.pt' 70 | elif net == "yolor_csp_x": 71 | # yolor_csp_x: 72 | image_input_shape = (1, 3, 640, 640) 73 | img = torch.ones(image_input_shape) # image size(1,3,320,192) iDetection 74 | model = Darknet("cfg/yolor_csp_x.cfg", 640).cpu() 75 | opt.weights = 'yolor_csp_x.pt' 76 | elif net == "yolor_csp_x_star": 77 | # yolor_csp_x_star: 640*640 78 | image_input_shape = (1, 3, 640, 640) 79 | img = torch.ones(image_input_shape) # image size(1,3,320,192) iDetection 80 | model = Darknet("cfg/yolor_csp_x.cfg", 640).cpu() 81 | opt.weights = 'yolor_csp_x_star.pt' 82 | 83 | model.load_state_dict(torch.load(opt.weights, map_location="cpu")['model']) 84 | 85 | model.eval() 86 | y = model(img) # dry run 87 | print(y[0][0][0][0:10]) 88 | 89 | # ONNX export 90 | # try 91 | print('\nStarting ONNX export with onnx %s...' % onnx.__version__) 92 | f = opt.weights.replace('.pt', '.onnx') # filename 93 | torch.onnx.export(model, img, f, verbose=False, opset_version=11, input_names=['images'], output_names=['output'], 94 | dynamic_axes={ 95 | 'images': { 96 | 0: 'batch', 97 | 2: 'height', 98 | 3: 'width'}, # shape(1,3,640,640) 99 | 'output': { 100 | 0: 'batch', 101 | 1: 'anchors'} # shape(1,25200,85) 102 | }) 103 | 104 | # Checks 105 | onnx_model = onnx.load(f) # load onnx model 106 | 107 | input_names = ("images") 108 | ort_session = ort.InferenceSession(f) 109 | outputs = ort_session.run( 110 | None, 111 | {input_names: np.ones(shape=image_input_shape).astype(np.float32)}, 112 | ) 113 | print(outputs[0][0][0][0:10]) 114 | onnx.checker.check_model(onnx_model) # check onnx model 115 | print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model 116 | print('ONNX export success, saved as %s' % f) 117 | 118 | if net == "yolor_p6": 119 | item1 = onnx_model.graph.output[1] 120 | item2 = onnx_model.graph.output[2] 121 | item3 = onnx_model.graph.output[3] 122 | item4 = onnx_model.graph.output[4] 123 | onnx_model.graph.output.remove(item1) 124 | onnx_model.graph.output.remove(item2) 125 | onnx_model.graph.output.remove(item3) 126 | onnx_model.graph.output.remove(item4) 127 | else: 128 | item1 = onnx_model.graph.output[1] 129 | item2 = onnx_model.graph.output[2] 130 | item3 = onnx_model.graph.output[3] 131 | onnx_model.graph.output.remove(item1) 132 | onnx_model.graph.output.remove(item2) 133 | onnx_model.graph.output.remove(item3) 134 | 135 | # save 136 | onnx.save(onnx_model, f) 137 | # Finish 138 | print('\nExport complete. Visualize with https://github.com/lutzroeder/netron.') 139 | -------------------------------------------------------------------------------- /yolov3/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | 3 | #set(CMAKE_BUILD_TYPE "Debug") 4 | set(CMAKE_BUILD_TYPE "Release") 5 | 6 | PROJECT(yolov3 VERSION 1.0.0 LANGUAGES C CXX CUDA) 7 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR}) 8 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake) 9 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT}) 10 | 11 | message(STATUS ${ALL_LIBS}) 12 | file(GLOB CPPS 13 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp 14 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu 15 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp 16 | ${TensorRT_ROOT}/samples/common/logger.cpp 17 | ${TensorRT_ROOT}/samples/common/sampleOptions.cpp 18 | ) 19 | list(REMOVE_ITEM CPPS app_yolov3.cpp) 20 | message(STATUS CPPS = ${CPPS}) 21 | list (LENGTH CPPS length) 22 | message(STATUS ***length*** = ${length}) 23 | find_package(OpenCV REQUIRED) 24 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}) 25 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS}) 26 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS}) 27 | 28 | add_library(${PROJECT_NAME} SHARED ${CPPS}) 29 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES}) 30 | 31 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75) 32 | target_compile_options(${PROJECT_NAME} PUBLIC 33 | $<$:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>) 34 | 35 | add_executable(app_yolov3 app_yolov3.cpp) 36 | 37 | # NVCC 38 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a) 39 | target_link_libraries(app_yolov3 ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} ) 40 | -------------------------------------------------------------------------------- /yolov3/README.md: -------------------------------------------------------------------------------- 1 | ## 1. get onnx 2 | download directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv) 3 | or export onnx: 4 | ```bash 5 | git clone https://github.com/ultralytics/yolov3 6 | git checkout dd838e25863169d0de4f10631a609350658efb69 7 | ``` 8 | ```bash 9 | # note: When using the official export.py to export onnx, you need to comment the following two lines: 10 | #--------------------------------------------------------------------------------------------------------- 11 | if simplify: 12 | try: 13 | check_requirements(('onnx-simplifier',)) 14 | import onnxsim 15 | 16 | LOGGER.info(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...') 17 | model_onnx, check = onnxsim.simplify( 18 | model_onnx, 19 | #------------------------------------------------------------------------------- 20 | #dynamic_input_shape=dynamic, 21 | #input_shapes={'images': list(im.shape)} if dynamic else None 22 | #------------------------------------------------------------------------------- 23 | ) 24 | assert check, 'assert check failed' 25 | onnx.save(model_onnx, f) 26 | except Exception as e: 27 | LOGGER.info(f'{prefix} simplifier failure: {e}') 28 | LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)') 29 | LOGGER.info(f"{prefix} run --dynamic ONNX model inference with: 'python detect.py --weights {f}'") 30 | #--------------------------------------------------------------------------------------------------------- 31 | ``` 32 | ```bash 33 | cd yolov3 34 | python export.py --weights yolov3-tiny.pt --dynamic --simplify 35 | python export.py --weights yolov3.pt --dynamic --simplify 36 | python export.py --weights yolov3-spp.pt --dynamic 37 | ``` 38 | ## 2.edit and save onnx 39 | ```bash 40 | # note: If you have obtained onnx by downloading, this step can be ignored 41 | git clone https://github.com/FeiYull/tensorrt-alpha 42 | cd tensorrt-alpha/yolov3 43 | conda activate tensorrt-alpha 44 | #edit alpha_edit.py on line21 & line25. 45 | python alpha_edit.py 46 | ``` 47 | ## 3.compile onnx 48 | ```bash 49 | # put your onnx file in this path:tensorrt-alpha/data/yolov3 50 | cd tensorrt-alpha/data/yolov3 51 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib 52 | # 640 53 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=alpha_yolov3.onnx --saveEngine=alpha_yolov3.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640 54 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=alpha_yolov3-spp.onnx --saveEngine=alpha_yolov3-spp.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640 55 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=alpha_yolov3-tiny.onnx --saveEngine=alpha_yolov3-tiny.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640 56 | 57 | # note: When compiling the alpha_yolov3-tiny model, if the following error may occur 58 | # error: Error Code 4: Internal Error (/model.11/Reshape: IShuffleLayer applied to shape tensor must have 0 or 1 #reshape dimensions: dimensions were [-1,2]) 59 | # solve:Add the parameter --simplify when exporting onnx (opset defaults to 13, which is high enough) 60 | ``` 61 | ## 4.run 62 | ```bash 63 | git clone https://github.com/FeiYull/tensorrt-alpha 64 | cd tensorrt-alpha/yolov3 65 | mkdir build 66 | cd build 67 | cmake .. 68 | make -j10 69 | # note: the dstImage will be saved in tensorrt-alpha/yolov3/build by default 70 | 71 | ## 640 72 | # infer image 73 | ./app_yolov3 --model=../../data/yolov3/alpha_yolov3-tiny.trt --size=640 --batch_size=1 --img=../../data/6406403.jpg --show --savePath 74 | 75 | # infer video 76 | ./app_yolov3 --model=../../data/yolov3/alpha_yolov3-tiny.trt --size=640 --batch_size=2 --video=../../data/people.mp4 --show 77 | 78 | # infer camera 79 | ./app_yolov3 --model=../../data/yolov3/alpha_yolov3-tiny.trt --size=640 --batch_size=2 --cam_id=0 --show 80 | 81 | # note:yolov3-tiny has obvious missed detection on the image 6406401.jpg, don't worry, the effect is consistent with the official 82 | ``` 83 | ## 5. appendix 84 | ignore -------------------------------------------------------------------------------- /yolov3/alpha_edit.py: -------------------------------------------------------------------------------- 1 | import onnx 2 | import onnx.helper as helper 3 | import torch 4 | # import torchvision 5 | import onnxsim # pip install onnx-simplifier 6 | import onnxruntime as ort 7 | import numpy as np 8 | import os 9 | 10 | 11 | def infer_onnx(onnx_file, input_names, image_input_shape): 12 | ort_session = ort.InferenceSession(onnx_file) 13 | outputs = ort_session.run( 14 | None, 15 | # {"data": np.ones(shape=image_input_shape).astype(np.float32)}, 16 | {input_names[0]: np.ones(shape=image_input_shape).astype(np.float32)}, 17 | ) 18 | return outputs 19 | 20 | 21 | net_name = "yolov3-tiny" 22 | # net_name = "yolov3" 23 | # net_name = "yolov3-spp" 24 | path = "../data/yolov3/" 25 | 26 | image_input_shape = [1, 3, 640, 640] 27 | onnx_name = net_name + ".onnx" 28 | input_names = ["images"] 29 | output_names = ["output"] 30 | 31 | model = onnx.load_model(path + onnx_name) 32 | 33 | outputs = infer_onnx(path + onnx_name, input_names, image_input_shape) 34 | for output in outputs: 35 | print(output.shape) 36 | 37 | # delete some nodes 38 | if net_name == "yolov3-tiny": 39 | item1 = model.graph.output[1] 40 | item2 = model.graph.output[2] 41 | model.graph.output.remove(item1) 42 | model.graph.output.remove(item2) 43 | elif net_name == "yolov3" or net_name == "yolov3-spp": 44 | item1 = model.graph.output[1] 45 | item2 = model.graph.output[2] 46 | item3 = model.graph.output[3] 47 | model.graph.output.remove(item1) 48 | model.graph.output.remove(item2) 49 | model.graph.output.remove(item3) 50 | 51 | onnx.save(model, path + "alpha_" + onnx_name) 52 | outputs = infer_onnx(path + "alpha_" + onnx_name, input_names, image_input_shape) 53 | for output in outputs: 54 | print(output.shape) 55 | print("") -------------------------------------------------------------------------------- /yolov4/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | 3 | #set(CMAKE_BUILD_TYPE "Debug") 4 | set(CMAKE_BUILD_TYPE "Release") 5 | 6 | PROJECT(yolov4 VERSION 1.0.0 LANGUAGES C CXX CUDA) 7 | 8 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR}) 9 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake) 10 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT}) 11 | 12 | message(STATUS ${ALL_LIBS}) 13 | file(GLOB CPPS 14 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp 15 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cu 16 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu 17 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp 18 | ${TensorRT_ROOT}/samples/common/logger.cpp 19 | ${TensorRT_ROOT}/samples/common/sampleOptions.cpp 20 | ) 21 | list(REMOVE_ITEM CPPS app_yolov4.cpp) 22 | message(STATUS CPPS = ${CPPS}) 23 | list (LENGTH CPPS length) 24 | message(STATUS ***length*** = ${length}) 25 | find_package(OpenCV REQUIRED) 26 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}) 27 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS}) 28 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS}) 29 | add_library(${PROJECT_NAME} SHARED ${CPPS}) 30 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES}) 31 | 32 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75) 33 | target_compile_options(${PROJECT_NAME} PUBLIC 34 | $<$:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>) 35 | 36 | add_executable(app_yolov4 app_yolov4.cpp) 37 | 38 | # NVCC 39 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a) 40 | target_link_libraries(app_yolov4 ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} ) 41 | -------------------------------------------------------------------------------- /yolov4/README.md: -------------------------------------------------------------------------------- 1 | ## 1. get onnx 2 | download directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv) 3 | or export onnx: 4 | ```bash 5 | git clone https://github.com/Tianxiaomo/pytorch-YOLOv4 6 | git checkout a65d219f9066bae4e12003bd7cdc04531860c672 7 | git clone https://github.com/FeiYull/tensorrt-alpha 8 | cd tensorrt-alpha/yolov4 9 | # PLease use the "alpha_export.py" file provided by TensorRT-Alpha to export onnx 10 | cp alpha_export.py Pytorch_YOLOV4/ 11 | cd Pytorch_YOLOV4/ 12 | # 608 13 | python alpha_export.py cfg/yolov4.cfg yolov4.weights --batch_size=-1 --onnx_file_path=alpha_yolov4_-1_3_608_608_dynamic.onnx 14 | ``` 15 | ## 2.edit and save onnx 16 | ```bash 17 | # note: If you have obtained onnx by downloading, this step can be ignored 18 | ignore 19 | ``` 20 | ## 3.compile onnx 21 | ```bash 22 | # put your onnx file in this path:tensorrt-alpha/data/yolov4 23 | cd tensorrt-alpha/data/yolov4 24 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib 25 | # 608 26 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov4_-1_3_608_608_dynamic.onnx --saveEngine=yolov4_-1_3_608_608_dynamic.trt --buildOnly --minShapes=input:1x3x608x608 --optShapes=input:2x3x608x608 --maxShapes=input:4x3x608x608 27 | ``` 28 | ## 4.run 29 | ```bash 30 | git clone https://github.com/FeiYull/tensorrt-alpha 31 | cd tensorrt-alpha/yolov4 32 | mkdir build 33 | cd build 34 | cmake .. 35 | make -j10 36 | # note: the dstImage will be saved in tensorrt-alpha/yolov4/build by default 37 | 38 | ## 608 39 | # infer image 40 | ./app_yolov4 --model=../../data/yolov4/alpha_yolov4_-1_3_608_608_dynamic.trt --size=608 --batch_size=1 --img=../../data/6406402.jpg --show --savePath=../ 41 | 42 | # infer video 43 | ./app_yolov4 --model=../../data/yolov4/alpha_yolov4_-1_3_608_608_dynamic.trt --size=608 --batch_size=2 --video=../../data/people.mp4 --show 44 | 45 | # infer camera 46 | ./app_yolov4 --model=../../data/yolov4/alpha_yolov4_-1_3_608_608_dynamic.trt --size=608 --batch_size=2 --cam_id=0 --show 47 | ``` 48 | ## 5. appendix 49 | ignore -------------------------------------------------------------------------------- /yolov4/alpha_export.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import torch 3 | from tool.darknet2pytorch import Darknet 4 | 5 | class AlphaYolov4(torch.nn.Module): 6 | def __init__(self, cfgfile, weightfile): 7 | super().__init__() 8 | self.model = Darknet(cfgfile) 9 | self.model.load_weights(weightfile) 10 | self.model.eval() 11 | self.model.print_network() 12 | 13 | def forward(self, x): 14 | y = self.model(x) 15 | boxes = y[0] 16 | confs = y[1].unsqueeze(dim = 2) 17 | return torch.cat((boxes, confs), 3) 18 | 19 | def transform_to_onnx(cfgfile, weightfile, batch_size=1, onnx_file_name=None): 20 | model = AlphaYolov4(cfgfile, weightfile) 21 | 22 | dynamic = False 23 | if batch_size <= 0: 24 | dynamic = True 25 | 26 | input_names = ["input"] 27 | output_names = ['output'] 28 | 29 | if dynamic: 30 | x = torch.randn((1, 3, model.model.height, model.model.width), requires_grad=True) 31 | if not onnx_file_name: 32 | onnx_file_name = "yolov4_-1_3_{}_{}_dynamic.onnx".format(model.model.height, model.model.width) 33 | dynamic_axes = {"input": {0: "batch_size"}, "output": {0: "batch_size"}} 34 | # Export the model 35 | print('Export the onnx model ...') 36 | torch.onnx.export(model, 37 | x, 38 | onnx_file_name, 39 | export_params=True, 40 | opset_version=11, 41 | do_constant_folding=True, 42 | input_names=input_names, output_names=output_names, 43 | dynamic_axes=dynamic_axes) 44 | 45 | print('Onnx model exporting done') 46 | return onnx_file_name 47 | 48 | else: 49 | x = torch.randn((batch_size, 3, model.model.height, model.model.width), requires_grad=True) 50 | onnx_file_name = "yolov4_{}_3_{}_{}_static.onnx".format(batch_size, model.model.height, model.model.width) 51 | torch.onnx.export(model, 52 | x, 53 | onnx_file_name, 54 | export_params=True, 55 | opset_version=11, 56 | do_constant_folding=True, 57 | input_names=input_names, output_names=output_names, 58 | dynamic_axes=None) 59 | 60 | print('Onnx model exporting done') 61 | return onnx_file_name 62 | 63 | 64 | if __name__ == '__main__': 65 | from argparse import ArgumentParser 66 | parser = ArgumentParser() 67 | parser.add_argument('config') 68 | parser.add_argument('weightfile') 69 | parser.add_argument('--batch_size', type=int, help="Static Batchsize of the model. use batch_size<=0 for dynamic batch size") 70 | parser.add_argument('--onnx_file_path', help="Output onnx file path") 71 | args = parser.parse_args() 72 | transform_to_onnx(args.config, args.weightfile, args.batch_size, args.onnx_file_path) 73 | 74 | -------------------------------------------------------------------------------- /yolov4/decode_yolov4.cu: -------------------------------------------------------------------------------- 1 | #include "decode_yolov4.h" 2 | 3 | __global__ void decode_yolov4_device_kernel(int batch_size, int num_class, int topK, float conf_thresh, 4 | float* src, int srcWidth, int srcHeight, int srcArea, 5 | float* dst, int dstWidth, int dstHeight, int dstArea) 6 | { 7 | int dx = blockDim.x * blockIdx.x + threadIdx.x; 8 | int dy = blockDim.y * blockIdx.y + threadIdx.y; 9 | if (dx >= srcHeight || dy >= batch_size) 10 | { 11 | return; 12 | } 13 | float* pitem = src + dy * srcArea + dx * srcWidth; 14 | float* class_confidence = pitem + 4; 15 | float confidence = *class_confidence++; 16 | int label = 0; 17 | for (int i = 1; i < num_class; ++i, ++class_confidence) 18 | { 19 | if (*class_confidence > confidence) 20 | { 21 | confidence = *class_confidence; 22 | label = i; 23 | } 24 | } 25 | if (confidence < conf_thresh) 26 | { 27 | return; 28 | } 29 | int index = atomicAdd(dst + dy * dstArea, 1); 30 | if (index >= topK) 31 | { 32 | return; 33 | } 34 | float cx = *pitem++; 35 | float cy = *pitem++; 36 | float width = *pitem++; 37 | float height = *pitem++; 38 | 39 | float left = cx; 40 | float top = cy; 41 | float right = width; 42 | float bottom = height; 43 | float* pout_item = dst + dy * dstArea + 1 + index * dstWidth; 44 | *pout_item++ = left; 45 | *pout_item++ = top; 46 | *pout_item++ = right; 47 | *pout_item++ = bottom; 48 | *pout_item++ = confidence; 49 | *pout_item++ = label; 50 | *pout_item++ = 1; 51 | } 52 | 53 | static __device__ float box_iou( 54 | float aleft, float atop, float aright, float abottom, 55 | float bleft, float btop, float bright, float bbottom 56 | ) { 57 | float cleft = max(aleft, bleft); 58 | float ctop = max(atop, btop); 59 | float cright = min(aright, bright); 60 | float cbottom = min(abottom, bbottom); 61 | 62 | float c_area = max(cright - cleft, 0.0f) * max(cbottom - ctop, 0.0f); 63 | if (c_area == 0.0f) 64 | return 0.0f; 65 | 66 | float a_area = max(0.0f, aright - aleft) * max(0.0f, abottom - atop); 67 | float b_area = max(0.0f, bright - bleft) * max(0.0f, bbottom - btop); 68 | return c_area / (a_area + b_area - c_area); 69 | } 70 | 71 | void yolov4::decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea, float* dst, int dstWidth, int dstHeight) 72 | { 73 | dim3 block_size(BLOCK_SIZE, BLOCK_SIZE); 74 | dim3 grid_size((srcHeight + BLOCK_SIZE - 1) / BLOCK_SIZE, 75 | (param.batch_size + BLOCK_SIZE - 1) / BLOCK_SIZE); 76 | int dstArea = 1 + dstWidth * dstHeight; 77 | 78 | decode_yolov4_device_kernel << < grid_size, block_size, 0, nullptr >> >(param.batch_size, param.num_class, param.topK, param.conf_thresh, 79 | src, srcWidth, srcHeight, srcArea, 80 | dst, dstWidth, dstHeight, dstArea); 81 | } 82 | 83 | -------------------------------------------------------------------------------- /yolov4/decode_yolov4.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include"../utils/utils.h" 3 | #include"../utils/kernel_function.h" 4 | 5 | namespace yolov4 6 | { 7 | void decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcLength, float* dst, int dstWidth, int dstHeight); 8 | } 9 | -------------------------------------------------------------------------------- /yolov4/yolov4.cpp: -------------------------------------------------------------------------------- 1 | #include"yolov4.h" 2 | #include"decode_yolov4.h" 3 | 4 | YOLOV4::YOLOV4(const utils::InitParameter& param) :yolo::YOLO(param) 5 | { 6 | } 7 | 8 | YOLOV4::~YOLOV4() 9 | { 10 | } 11 | 12 | void YOLOV4::postprocess(const std::vector& imgsBatch) 13 | { 14 | yolov4::decodeDevice(m_param, m_output_src_device, 4 + m_param.num_class, m_total_objects, m_output_area, 15 | m_output_objects_device, m_output_objects_width, m_param.topK); 16 | nmsDeviceV1(m_param, m_output_objects_device, m_output_objects_width, m_param.topK, m_param.topK * m_output_objects_width + 1); 17 | CHECK(cudaMemcpy(m_output_objects_host, m_output_objects_device, m_param.batch_size * sizeof(float) * (1 + 7 * m_param.topK), cudaMemcpyDeviceToHost)); 18 | for (size_t bi = 0; bi < imgsBatch.size(); bi++) 19 | { 20 | int num_boxes = std::min((int)(m_output_objects_host + bi * (m_param.topK * m_output_objects_width + 1))[0], m_param.topK); 21 | for (size_t i = 0; i < num_boxes; i++) 22 | { 23 | float* ptr = m_output_objects_host + bi * (m_param.topK * m_output_objects_width + 1) + m_output_objects_width * i + 1; 24 | int keep_flag = ptr[6]; 25 | if (keep_flag) 26 | { 27 | float x_lt = m_dst2src.v0 * ptr[0] * m_param.dst_w + m_dst2src.v1 * ptr[1] * m_param.dst_h + m_dst2src.v2; 28 | float y_lt = m_dst2src.v3 * ptr[0] * m_param.dst_w + m_dst2src.v4 * ptr[1] * m_param.dst_h + m_dst2src.v5; 29 | float x_rb = m_dst2src.v0 * ptr[2] * m_param.dst_w + m_dst2src.v1 * ptr[3] * m_param.dst_h + m_dst2src.v2; 30 | float y_rb = m_dst2src.v3 * ptr[2] * m_param.dst_w + m_dst2src.v4 * ptr[3] * m_param.dst_h + m_dst2src.v5; 31 | 32 | m_objectss[bi].emplace_back(x_lt, y_lt, x_rb, y_rb, ptr[4], (int)ptr[5]); 33 | } 34 | } 35 | 36 | } 37 | } -------------------------------------------------------------------------------- /yolov4/yolov4.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include"../utils/yolo.h" 3 | #include"../utils/utils.h" 4 | class YOLOV4 : public yolo::YOLO 5 | { 6 | public: 7 | YOLOV4(const utils::InitParameter& param); 8 | ~YOLOV4(); 9 | virtual void postprocess(const std::vector& imgsBatch); 10 | }; -------------------------------------------------------------------------------- /yolov5/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | 3 | #set(CMAKE_BUILD_TYPE "Debug") 4 | set(CMAKE_BUILD_TYPE "Release") 5 | 6 | # cuda 7 | PROJECT(yolov5 VERSION 1.0.0 LANGUAGES C CXX CUDA) 8 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR}) 9 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake) 10 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT}) 11 | message(STATUS ${ALL_LIBS}) 12 | file(GLOB CPPS 13 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp 14 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu 15 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp 16 | ${TensorRT_ROOT}/samples/common/logger.cpp 17 | ${TensorRT_ROOT}/samples/common/sampleOptions.cpp 18 | ) 19 | list(REMOVE_ITEM CPPS app_yolov5.cpp) 20 | message(STATUS CPPS = ${CPPS}) 21 | list (LENGTH CPPS length) 22 | message(STATUS ***length*** = ${length}) 23 | find_package(OpenCV REQUIRED) 24 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}) 25 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS}) 26 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS}) 27 | add_library(${PROJECT_NAME} SHARED ${CPPS}) 28 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES}) 29 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75) 30 | target_compile_options(${PROJECT_NAME} PUBLIC 31 | $<$:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>) 32 | 33 | add_executable(app_yolov5 app_yolov5.cpp) 34 | 35 | # NVCC 36 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a) 37 | target_link_libraries(app_yolov5 ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} ) 38 | -------------------------------------------------------------------------------- /yolov5/alpha_edit.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import onnx 3 | import onnx.helper as helper 4 | import torch 5 | # import torchvision 6 | import onnxsim # pip install onnx-simplifier 7 | import onnxruntime as ort 8 | import numpy as np 9 | import os 10 | 11 | 12 | def infer_onnx(onnx_file, input_names, image_input_shape): 13 | ort_session = ort.InferenceSession(onnx_file) 14 | outputs = ort_session.run( 15 | None, 16 | {input_names[0]: np.ones(shape=image_input_shape).astype(np.float32)}, 17 | ) 18 | return outputs 19 | 20 | 21 | def run(mode, net_name, model_path): 22 | #mode = "p5" 23 | mode = mode 24 | 25 | if mode == "p5": 26 | #net_name = "yolov5m" 27 | net_name = net_name 28 | image_input_shape = [1, 3, 640, 640] 29 | else: # mode == "p6": 30 | #net_name = "yolov5m6" 31 | net_name = net_name 32 | image_input_shape = [1, 3, 1280, 1280] 33 | 34 | 35 | 36 | path = model_path 37 | onnx_name = net_name + ".onnx" 38 | input_names = ["images"] 39 | output_names = ["output"] 40 | 41 | model = onnx.load_model(path + onnx_name) 42 | 43 | outputs = infer_onnx(path + onnx_name, input_names, image_input_shape) 44 | for output in outputs: 45 | print(output.shape) 46 | 47 | # delete some nodes 48 | if mode == "p5": 49 | item1 = model.graph.output[1] 50 | item2 = model.graph.output[2] 51 | item3 = model.graph.output[3] 52 | model.graph.output.remove(item1) 53 | model.graph.output.remove(item2) 54 | model.graph.output.remove(item3) 55 | else: # mode == "p6": 56 | item1 = model.graph.output[1] 57 | item2 = model.graph.output[2] 58 | item3 = model.graph.output[3] 59 | item4 = model.graph.output[4] 60 | model.graph.output.remove(item1) 61 | model.graph.output.remove(item2) 62 | model.graph.output.remove(item3) 63 | model.graph.output.remove(item4) 64 | 65 | onnx.save(model, path + "alpha_" + onnx_name) 66 | outputs = infer_onnx(path + "alpha_" + onnx_name, input_names, image_input_shape) 67 | for output in outputs: 68 | print(output.shape) 69 | 70 | def parse_opt(): 71 | parser = argparse.ArgumentParser() 72 | parser.add_argument('--mode', type=str, default='p5', help='p5:640*640, p6:1280*1280') 73 | parser.add_argument('--net_name', type=str, default='yolov5s', help='yolov5n yolov5s yolov5m ... yolov5s6 ...') 74 | parser.add_argument('--model_path', type=str, default='', help='pth file path') 75 | opt = parser.parse_args() 76 | #print_args(vars(opt)) 77 | return opt 78 | 79 | def main(opt): 80 | run(**vars(opt)) 81 | 82 | if __name__ == "__main__": 83 | opt = parse_opt() 84 | main(opt) 85 | 86 | -------------------------------------------------------------------------------- /yolov6/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | 3 | #set(CMAKE_BUILD_TYPE "Debug") 4 | set(CMAKE_BUILD_TYPE "Release") 5 | 6 | PROJECT(yolov6 VERSION 1.0.0 LANGUAGES C CXX CUDA) 7 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR}) 8 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake) 9 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT}) 10 | message(STATUS ${ALL_LIBS}) 11 | file(GLOB CPPS 12 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp 13 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu 14 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp 15 | ${TensorRT_ROOT}/samples/common/logger.cpp 16 | ${TensorRT_ROOT}/samples/common/sampleOptions.cpp 17 | ) 18 | list(REMOVE_ITEM CPPS app_yolov6.cpp) 19 | message(STATUS CPPS = ${CPPS}) 20 | list (LENGTH CPPS length) 21 | message(STATUS ***length*** = ${length}) 22 | find_package(OpenCV REQUIRED) 23 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}) 24 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS}) 25 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS}) 26 | add_library(${PROJECT_NAME} SHARED ${CPPS}) 27 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES}) 28 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75) 29 | target_compile_options(${PROJECT_NAME} PUBLIC 30 | $<$:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>) 31 | 32 | add_executable(app_yolov6 app_yolov6.cpp) 33 | 34 | # NVCC 35 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a) 36 | target_link_libraries(app_yolov6 ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} ) 37 | -------------------------------------------------------------------------------- /yolov7/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | 3 | #set(CMAKE_BUILD_TYPE "Debug") 4 | set(CMAKE_BUILD_TYPE "Release") 5 | 6 | PROJECT(yolov7 VERSION 1.0.0 LANGUAGES C CXX CUDA) 7 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR}) 8 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake) 9 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT}) 10 | 11 | message(STATUS ${ALL_LIBS}) 12 | file(GLOB CPPS 13 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp 14 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu 15 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp 16 | ${TensorRT_ROOT}/samples/common/logger.cpp 17 | ${TensorRT_ROOT}/samples/common/sampleOptions.cpp 18 | ) 19 | list(REMOVE_ITEM CPPS app_yolov7.cpp) 20 | message(STATUS CPPS = ${CPPS}) 21 | list (LENGTH CPPS length) 22 | message(STATUS ***length*** = ${length}) 23 | find_package(OpenCV REQUIRED) 24 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}) 25 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS}) 26 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS}) 27 | add_library(${PROJECT_NAME} SHARED ${CPPS}) 28 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES}) 29 | 30 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75) 31 | target_compile_options(${PROJECT_NAME} PUBLIC 32 | $<$:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>) 33 | 34 | add_executable(app_yolov7 app_yolov7.cpp) 35 | 36 | # NVCC 37 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a) 38 | target_link_libraries(app_yolov7 ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} ) 39 | -------------------------------------------------------------------------------- /yolov7/README.md: -------------------------------------------------------------------------------- 1 | ## 1. get onnx 2 | 3 | download directly at [weiyun]:[weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv?usp=sharing) or export onnx: 4 | ```bash 5 | git clone https://github.com/WongKinYiu/yolov7 6 | git checkout 072f76c72c641c7a1ee482e39f604f6f8ef7ee92 7 | # 640 8 | python export.py --weights yolov7-tiny.pt --dynamic --grid 9 | python export.py --weights yolov7.pt --dynamic --grid 10 | python export.py --weights yolov7x.pt --dynamic --grid 11 | # 1280 12 | python export.py --weights yolov7-w6.pt --dynamic --grid --img-size 1280 13 | ``` 14 | ## 2.edit and save onnx 15 | ```bash 16 | # note: If you have obtained onnx by downloading, this step can be ignored 17 | ignore 18 | ``` 19 | 20 | ## 3.compile onnx 21 | ```bash 22 | # put your onnx file in this path:tensorrt-alpha/data/yolov7 23 | cd tensorrt-alpha/data/yolov7 24 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib 25 | # 640 26 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov7-tiny.onnx --saveEngine=yolov7-tiny.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640 27 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov7.onnx --saveEngine=yolov7.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640 28 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov7x.onnx --saveEngine=yolov7x.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640 29 | # 1280 30 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov7-w6.onnx --saveEngine=yolov7-w6.trt --buildOnly --minShapes=images:1x3x1280x1280 --optShapes=images:2x3x1280x1280 --maxShapes=images:4x3x1280x1280 31 | 32 | # note:if report an error(Error Code 1: Cuda Runtime (an illegal memory access was encountered "bool context = m_context->executeV2((void**)bindings)" returns false) 33 | when running the model(yolov7-w6), just lower the batch_size. 34 | ``` 35 | ## 4.run 36 | ```bash 37 | git clone https://github.com/FeiYull/tensorrt-alpha 38 | cd tensorrt-alpha/yolov7 39 | mkdir build 40 | cd build 41 | cmake .. 42 | make -j10 43 | # note: the dstImage will be saved in tensorrt-alpha/yolov7/build by default 44 | 45 | ## 640 46 | # infer image 47 | ./app_yolov7 --model=../../data/yolov7/yolov7-tiny.trt --size=640 --batch_size=1 --img=../../data/6406401.jpg --show --savePath 48 | ./app_yolov7 --model=../../data/yolov7/yolov7-w6.trt --size=1280 --batch_size=1 --img=../../data/6406401.jpg --show --savePath=../ 49 | 50 | # infer video 51 | ./app_yolov7 --model=../../data/yolov7/yolov7-tiny.trt --size=640 --batch_size=2 --video=../../data/people.mp4 --show 52 | 53 | # infer camera 54 | ./app_yolov7 --model=../../data/yolov7/yolov7-tiny.trt --size=640 --batch_size=2 --cam_id=0 --show 55 | ``` 56 | ## 5. appendix 57 | ignore -------------------------------------------------------------------------------- /yolov8-pose/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | 3 | #set(CMAKE_BUILD_TYPE "Debug") 4 | set(CMAKE_BUILD_TYPE "Release") 5 | 6 | PROJECT(yolov8_pose VERSION 1.0.0 LANGUAGES C CXX CUDA) 7 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR}) 8 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake) 9 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT}) 10 | message(STATUS ${ALL_LIBS}) 11 | file(GLOB CPPS 12 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp 13 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cu 14 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu 15 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp 16 | ${TensorRT_ROOT}/samples/common/logger.cpp 17 | ${TensorRT_ROOT}/samples/common/sampleOptions.cpp 18 | #${TensorRT_ROOT}/samples/common/sampleUtils.cpp 19 | ) 20 | list(REMOVE_ITEM CPPS app_yolov8_pose.cpp) 21 | message(STATUS CPPS = ${CPPS}) 22 | list (LENGTH CPPS length) 23 | message(STATUS ***length*** = ${length}) 24 | find_package(OpenCV REQUIRED) 25 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}) 26 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS}) 27 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS}) 28 | add_library(${PROJECT_NAME} SHARED ${CPPS}) 29 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES}) 30 | 31 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75) 32 | target_compile_options(${PROJECT_NAME} PUBLIC 33 | $<$:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>) 34 | 35 | add_executable(app_yolov8_pose app_yolov8_pose.cpp) 36 | 37 | # NVCC 38 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a) 39 | target_link_libraries(app_yolov8_pose ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} ) 40 | -------------------------------------------------------------------------------- /yolov8-pose/README.md: -------------------------------------------------------------------------------- 1 | ## 1. get onnx 2 | download directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv) or export onnx: 3 | ```bash 4 | # 🔥 yolov8 offical repo: https://github.com/ultralytics/ultralytics 5 | # 🔥 yolov8 quickstart: https://docs.ultralytics.com/quickstart/ 6 | # 🚀TensorRT-Alpha will be updated synchronously as soon as possible! 7 | 8 | # install yolov8 9 | conda create -n yolov8 python==3.8 -y # for Linux 10 | # conda create -n yolov8 python=3.9 -y # for Windows10 11 | conda activate yolov8 12 | pip install ultralytics==8.0.200 13 | pip install onnx==1.12.0 14 | 15 | # download offical weights(".pt" file) 16 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt 17 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt 18 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt 19 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt 20 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt 21 | ``` 22 | 23 | export onnx: 24 | ```bash 25 | yolo mode=export model=yolov8n-pose.pt format=onnx dynamic=True opset=12 26 | yolo mode=export model=yolov8s-pose.pt format=onnx dynamic=True opset=12 27 | yolo mode=export model=yolov8m-pose.pt format=onnx dynamic=True opset=12 28 | yolo mode=export model=yolov8l-pose.pt format=onnx dynamic=True opset=12 29 | yolo mode=export model=yolov8x-pose.pt format=onnx dynamic=True opset=12 30 | ``` 31 | 32 | ## 2.edit and save onnx 33 | ```bash 34 | # note: If you have obtained onnx by downloading, this step can be ignored 35 | ignore 36 | ``` 37 | 38 | ## 3.compile onnx 39 | ```bash 40 | # put your onnx file in this path:tensorrt-alpha/data/yolov8-pose 41 | cd tensorrt-alpha/data/yolov8-pose 42 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib 43 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov8n-pose.onnx --saveEngine=yolov8n-pose.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640 44 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov8s-pose.onnx --saveEngine=yolov8s-pose.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640 45 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov8m-pose.onnx --saveEngine=yolov8m-pose.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640 46 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov8l-pose.onnx --saveEngine=yolov8l-pose.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640 47 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov8x-pose.onnx --saveEngine=yolov8x-pose.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640 48 | ``` 49 | ## 4.run 50 | ```bash 51 | git clone https://github.com/FeiYull/tensorrt-alpha 52 | cd tensorrt-alpha/yolov8-pose 53 | mkdir build 54 | cd build 55 | cmake .. 56 | make -j10 57 | # note: the dstImage will be saved in tensorrt-alpha/yolov8-pose/build by default 58 | 59 | ## 640 60 | # infer image 61 | ./app_yolov8_pose --model=../../data/yolov8/yolov8n-pose.trt --size=640 --batch_size=1 --img=../../data/6406407.jpg --show --savePath=../ 62 | 63 | # infer video 64 | ./app_yolov8_pose --model=../../data/yolov8/yolov8n-pose.trt --size=640 --batch_size=2 --video=../../data/people.mp4 --show 65 | 66 | # infer camera 67 | ./app_yolov8_pose --model=../../data/yolov8/yolov8n-pose.trt --size=640 --batch_size=2 --cam_id=0 --show 68 | 69 | ``` 70 | ## 5. appendix 71 | ignore -------------------------------------------------------------------------------- /yolov8-pose/app_yolov8_pose.cpp: -------------------------------------------------------------------------------- 1 | #include"../utils/yolo.h" 2 | #include"yolov8_pose.h" 3 | 4 | void setParameters(utils::InitParameter& initParameters) 5 | { 6 | initParameters.class_names = utils::dataSets::coco80; 7 | //initParameters.class_names = utils::dataSets::voc20; 8 | initParameters.num_class = 80; // for coco 9 | //initParameters.num_class = 20; // for voc2012 10 | initParameters.batch_size = 8; 11 | initParameters.dst_h = 640; 12 | initParameters.dst_w = 640; 13 | initParameters.input_output_names = { "images", "output0" }; 14 | initParameters.conf_thresh = 0.25f; 15 | initParameters.iou_thresh = 0.7f; 16 | initParameters.save_path = "D:/Data/1/"; 17 | } 18 | 19 | void task(YOLOv8Pose& yolo, const utils::InitParameter& param, std::vector& imgsBatch, const int& delayTime, const int& batchi) 20 | { 21 | yolo.copy(imgsBatch); 22 | utils::DeviceTimer d_t1; yolo.preprocess(imgsBatch); float t1 = d_t1.getUsedTime(); 23 | utils::DeviceTimer d_t2; yolo.infer(); float t2 = d_t2.getUsedTime(); 24 | utils::DeviceTimer d_t3; yolo.postprocess(imgsBatch); float t3 = d_t3.getUsedTime(); 25 | float avg_times[3] = { t1 / param.batch_size, t2 / param.batch_size, t3 / param.batch_size }; 26 | sample::gLogInfo << "preprocess time = " << avg_times[0] << "; " 27 | "infer time = " << avg_times[1] << "; " 28 | "postprocess time = " << avg_times[2] << std::endl; 29 | yolo.showAndSave(param.class_names, delayTime, imgsBatch, avg_times); 30 | yolo.reset(); 31 | } 32 | 33 | int main(int argc, char** argv) 34 | { 35 | cv::CommandLineParser parser(argc, argv, 36 | { 37 | "{model || tensorrt model file }" 38 | "{size || image (h, w), eg: 640 }" 39 | "{batch_size|| batch size }" 40 | "{video || video's path }" 41 | "{img || image's path }" 42 | "{cam_id || camera's device id }" 43 | "{show || if show the result }" 44 | "{savePath || save path, can be ignore}" 45 | }); 46 | utils::InitParameter param; 47 | setParameters(param); 48 | std::string model_path = "../../data/yolov8/yolov8n-pose.trt"; 49 | std::string video_path = "../../data/people.mp4"; 50 | std::string image_path = "../../data/bus.jpg"; 51 | int camera_id = 0; 52 | utils::InputStream source; 53 | source = utils::InputStream::IMAGE; 54 | //source = utils::InputStream::VIDEO; 55 | //source = utils::InputStream::CAMERA; 56 | int size = -1; // w or h 57 | int batch_size = 8; 58 | bool is_show = false; 59 | bool is_save = false; 60 | if (parser.has("model")) 61 | { 62 | model_path = parser.get("model"); 63 | sample::gLogInfo << "model_path = " << model_path << std::endl; 64 | } 65 | if (parser.has("size")) 66 | { 67 | size = parser.get("size"); 68 | sample::gLogInfo << "size = " << size << std::endl; 69 | param.dst_h = param.dst_w = size; 70 | } 71 | if (parser.has("batch_size")) 72 | { 73 | batch_size = parser.get("batch_size"); 74 | sample::gLogInfo << "batch_size = " << batch_size << std::endl; 75 | param.batch_size = batch_size; 76 | } 77 | if (parser.has("video")) 78 | { 79 | source = utils::InputStream::VIDEO; 80 | video_path = parser.get("video"); 81 | sample::gLogInfo << "video_path = " << video_path << std::endl; 82 | } 83 | if (parser.has("img")) 84 | { 85 | source = utils::InputStream::IMAGE; 86 | image_path = parser.get("img"); 87 | sample::gLogInfo << "image_path = " << image_path << std::endl; 88 | } 89 | if (parser.has("cam_id")) 90 | { 91 | source = utils::InputStream::CAMERA; 92 | camera_id = parser.get("cam_id"); 93 | sample::gLogInfo << "camera_id = " << camera_id << std::endl; 94 | } 95 | if (parser.has("show")) 96 | { 97 | param.is_show = true; 98 | sample::gLogInfo << "is_show = " << is_show << std::endl; 99 | } 100 | if (parser.has("savePath")) 101 | { 102 | param.is_save = true; 103 | param.save_path = parser.get("savePath"); 104 | sample::gLogInfo << "save_path = " << param.save_path << std::endl; 105 | } 106 | int total_batches = 0; 107 | int delay_time = 1; 108 | cv::VideoCapture capture; 109 | if (!setInputStream(source, image_path, video_path, camera_id, 110 | capture, total_batches, delay_time, param)) 111 | { 112 | sample::gLogError << "read the input data errors!" << std::endl; 113 | return -1; 114 | } 115 | setRenderWindow(param); 116 | YOLOv8Pose yolo(param); 117 | std::vector trt_file = utils::loadModel(model_path); 118 | if (trt_file.empty()) 119 | { 120 | sample::gLogError << "trt_file is empty!" << std::endl; 121 | return -1; 122 | } 123 | if (!yolo.init(trt_file)) 124 | { 125 | sample::gLogError << "initEngine() ocur errors!" << std::endl; 126 | return -1; 127 | } 128 | yolo.check(); 129 | cv::Mat frame; 130 | std::vector imgs_batch; 131 | imgs_batch.reserve(param.batch_size); 132 | sample::gLogInfo << imgs_batch.capacity() << std::endl; 133 | int batchi = 0; 134 | while (capture.isOpened()) 135 | { 136 | if (batchi >= total_batches && source != utils::InputStream::CAMERA) 137 | { 138 | break; 139 | } 140 | if (imgs_batch.size() < param.batch_size) 141 | { 142 | if (source != utils::InputStream::IMAGE) 143 | { 144 | capture.read(frame); 145 | } 146 | else 147 | { 148 | frame = cv::imread(image_path); 149 | } 150 | if (frame.empty()) 151 | { 152 | sample::gLogWarning << "no more video or camera frame" << std::endl; 153 | task(yolo, param, imgs_batch, delay_time, batchi); 154 | imgs_batch.clear(); 155 | batchi++; 156 | break; 157 | } 158 | else 159 | { 160 | imgs_batch.emplace_back(frame.clone()); 161 | } 162 | } 163 | else 164 | { 165 | task(yolo, param, imgs_batch, delay_time, batchi); 166 | imgs_batch.clear(); 167 | batchi++; 168 | } 169 | } 170 | return -1; 171 | } -------------------------------------------------------------------------------- /yolov8-pose/decode_yolov8_pose.cu: -------------------------------------------------------------------------------- 1 | #include "decode_yolov8_pose.h" 2 | 3 | __global__ void decode_yolov8_pose_device_kernel(int batch_size, int num_class, int topK, float conf_thresh, 4 | float* src, int srcWidth, int srcHeight, int srcArea, 5 | float* dst, int dstWidth, int dstArea) 6 | { 7 | int dx = blockDim.x * blockIdx.x + threadIdx.x; 8 | int dy = blockDim.y * blockIdx.y + threadIdx.y; 9 | if (dx >= srcHeight || dy >= batch_size) 10 | { 11 | return; 12 | } 13 | float* pitem = src + dy * srcArea + dx * srcWidth; 14 | float confidence = pitem[4]; 15 | if (confidence < conf_thresh) 16 | { 17 | return; 18 | } 19 | int index = atomicAdd(dst + dy * dstArea, 1); 20 | 21 | if (index >= topK) 22 | { 23 | return; 24 | } 25 | float cx = *pitem++; 26 | float cy = *pitem++; 27 | float width = *pitem++; 28 | float height = *pitem++; 29 | 30 | float left = cx - width * 0.5f; 31 | float top = cy - height * 0.5f; 32 | float right = cx + width * 0.5f; 33 | float bottom = cy + height * 0.5f; 34 | float* pout_item = dst + dy * dstArea + 1 + index * dstWidth; 35 | *pout_item++ = left; 36 | *pout_item++ = top; 37 | *pout_item++ = right; 38 | *pout_item++ = bottom; 39 | *pout_item++ = confidence; 40 | *pout_item++ = 0; 41 | *pout_item++ = 1; 42 | memcpy(pout_item, pitem + 1, (dstWidth - 7) * sizeof(float)); 43 | } 44 | 45 | void yolov8pose::decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea, float* dst, int dstWidth, int dstHeight) 46 | { 47 | dim3 block_size(BLOCK_SIZE, BLOCK_SIZE); 48 | dim3 grid_size((srcHeight + BLOCK_SIZE - 1) / BLOCK_SIZE, 49 | (param.batch_size + BLOCK_SIZE - 1) / BLOCK_SIZE); 50 | int dstArea = 1 + dstWidth * dstHeight; 51 | 52 | decode_yolov8_pose_device_kernel << < grid_size, block_size, 0, nullptr >> > (param.batch_size, param.num_class, param.topK, param.conf_thresh, 53 | src, srcWidth, srcHeight, srcArea, 54 | dst, dstWidth, dstArea); 55 | } 56 | 57 | __global__ void transpose_device_kernel(int batch_size, 58 | float* src, int srcWidth, int srcHeight, int srcArea, 59 | float* dst, int dstWidth, int dstHeight, int dstArea) 60 | { 61 | int dx = blockDim.x * blockIdx.x + threadIdx.x; 62 | int dy = blockDim.y * blockIdx.y + threadIdx.y; 63 | if (dx >= dstHeight || dy >= batch_size) 64 | { 65 | return; 66 | } 67 | float* p_dst_row = dst + dy * dstArea + dx * dstWidth; 68 | float* p_src_col = src + dy * srcArea + dx; 69 | 70 | for (int i = 0; i < dstWidth; i++) 71 | { 72 | p_dst_row[i] = p_src_col[i * srcWidth]; 73 | } 74 | } 75 | 76 | void yolov8pose::transposeDevice(utils::InitParameter param, 77 | float* src, int srcWidth, int srcHeight, int srcArea, 78 | float* dst, int dstWidth, int dstHeight) 79 | { 80 | dim3 block_size(BLOCK_SIZE, BLOCK_SIZE); 81 | dim3 grid_size((dstHeight + BLOCK_SIZE - 1) / BLOCK_SIZE, 82 | (param.batch_size + BLOCK_SIZE - 1) / BLOCK_SIZE); 83 | int dstArea = dstWidth * dstHeight; 84 | 85 | transpose_device_kernel << < grid_size, block_size, 0, nullptr >> > (param.batch_size, 86 | src, srcWidth, srcHeight, srcArea, 87 | dst, dstWidth, dstHeight, dstArea); 88 | } 89 | 90 | -------------------------------------------------------------------------------- /yolov8-pose/decode_yolov8_pose.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include"../utils/utils.h" 3 | #include"../utils/kernel_function.h" 4 | 5 | namespace yolov8pose 6 | { 7 | void decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcLength, float* dst, int dstWidth, int dstHeight); 8 | void transposeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea, float* dst, int dstWidth, int dstHeight); 9 | } 10 | -------------------------------------------------------------------------------- /yolov8-pose/yolov8_pose.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include"../utils/yolo.h" 3 | #include"../utils/utils.h" 4 | class YOLOv8Pose : public yolo::YOLO 5 | { 6 | public: 7 | YOLOv8Pose(const utils::InitParameter& param); 8 | ~YOLOv8Pose(); 9 | virtual bool init(const std::vector& trtFile); 10 | virtual void preprocess(const std::vector& imgsBatch); 11 | virtual void postprocess(const std::vector& imgsBatch); 12 | virtual void reset(); 13 | 14 | public: 15 | void showAndSave(const std::vector& classNames, 16 | const int& cvDelayTime, std::vector& imgsBatch, float* avg_times); 17 | 18 | private: 19 | float* m_output_src_transpose_device; 20 | float* m_output_objects_device; 21 | float* m_output_objects_host; 22 | int m_output_objects_width; 23 | 24 | const size_t m_nkpts; 25 | std::vector m_skeleton; 26 | std::vector m_kpt_color; 27 | std::vector m_limb_color; 28 | }; -------------------------------------------------------------------------------- /yolov8-seg/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | 3 | #set(CMAKE_BUILD_TYPE "Debug") 4 | set(CMAKE_BUILD_TYPE "Release") 5 | 6 | PROJECT(yolov8_seg VERSION 1.0.0 LANGUAGES C CXX CUDA) 7 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR}) 8 | set( CMAKE_CXX_FLAGS "-O3" ) 9 | include_directories( "/usr/include/eigen3" ) 10 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake) 11 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT}) 12 | message(STATUS ${ALL_LIBS}) 13 | file(GLOB CPPS 14 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp 15 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cu 16 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu 17 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp 18 | ${TensorRT_ROOT}/samples/common/logger.cpp 19 | ${TensorRT_ROOT}/samples/common/sampleOptions.cpp 20 | #${TensorRT_ROOT}/samples/common/sampleUtils.cpp 21 | ) 22 | list(REMOVE_ITEM CPPS app_yolov8_seg.cpp) 23 | message(STATUS CPPS = ${CPPS}) 24 | list (LENGTH CPPS length) 25 | message(STATUS ***length*** = ${length}) 26 | find_package(OpenCV REQUIRED) 27 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}) 28 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS}) 29 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS}) 30 | add_library(${PROJECT_NAME} SHARED ${CPPS}) 31 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES}) 32 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75) 33 | target_compile_options(${PROJECT_NAME} PUBLIC 34 | $<$:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>) 35 | 36 | add_executable(app_yolov8_seg app_yolov8_seg.cpp) 37 | 38 | # NVCC 39 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a) 40 | target_link_libraries(app_yolov8_seg ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} ) 41 | -------------------------------------------------------------------------------- /yolov8-seg/README.md: -------------------------------------------------------------------------------- 1 | ## 0. install eigen 2 | eigen3.4.0 has been tested and passed! 3 | ```bash 4 | # for linux 5 | sudo apt-get install libeigen3-dev 6 | 7 | # for windows 8 | # download from https://eigen.tuxfamily.org/index.php?title=Main_Page 9 | # decompressing the package 10 | # Just manually add the include directory in the vs project 11 | ``` 12 | 13 | ## 1. get onnx 14 | download directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv) or export onnx: 15 | ```bash 16 | # 🔥 yolov8 offical repo: https://github.com/ultralytics/ultralytics 17 | # 🔥 yolov8 quickstart: https://docs.ultralytics.com/quickstart/ 18 | # 🚀TensorRT-Alpha will be updated synchronously as soon as possible! 19 | 20 | # install yolov8 21 | conda create -n yolov8 python==3.8 -y # for Linux 22 | # conda create -n yolov8 python=3.9 -y # for Windows10 23 | conda activate yolov8 24 | pip install ultralytics==8.0.200 25 | pip install onnx==1.12.0 26 | 27 | # download offical weights(".pt" file) 28 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt 29 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt 30 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-seg.pt 31 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-seg.pt 32 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-seg.pt 33 | ``` 34 | 35 | export onnx: 36 | ```bash 37 | yolo mode=export model=yolov8n-seg.pt format=onnx dynamic=True opset=12 38 | yolo mode=export model=yolov8s-seg.pt format=onnx dynamic=True opset=12 39 | yolo mode=export model=yolov8m-seg.pt format=onnx dynamic=True opset=12 40 | yolo mode=export model=yolov8l-seg.pt format=onnx dynamic=True opset=12 41 | yolo mode=export model=yolov8x-seg.pt format=onnx dynamic=True opset=12 42 | ``` 43 | 44 | ## 2.edit and save onnx 45 | ```bash 46 | # note: If you have obtained onnx by downloading, this step can be ignored 47 | ignore 48 | ``` 49 | 50 | ## 3.compile onnx 51 | ```bash 52 | # put your onnx file in this path:tensorrt-alpha/data/yolov8-seg 53 | cd tensorrt-alpha/data/yolov8-seg 54 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib 55 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov8n-seg.onnx --saveEngine=yolov8n-seg.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640 56 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov8s-seg.onnx --saveEngine=yolov8s-seg.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640 57 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov8m-seg.onnx --saveEngine=yolov8m-seg.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640 58 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov8l-seg.onnx --saveEngine=yolov8l-seg.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640 59 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov8x-seg.onnx --saveEngine=yolov8x-seg.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640 60 | ``` 61 | ## 4.run 62 | ```bash 63 | git clone https://github.com/FeiYull/tensorrt-alpha 64 | cd tensorrt-alpha/yolov8-seg 65 | mkdir build 66 | cd build 67 | cmake .. 68 | make -j10 69 | # note: the dstImage will be saved in tensorrt-alpha/yolov8-seg/build by default 70 | 71 | ## 640 72 | # infer image 73 | ./app_yolov8_seg --model=../../data/yolov8/yolov8n-seg.trt --size=640 --batch_size=1 --img=../../data/6406407.jpg --show --savePath=../ 74 | 75 | # infer video 76 | ./app_yolov8_seg --model=../../data/yolov8/yolov8n-seg.trt --size=640 --batch_size=1 --video=../../data/people.mp4 --show 77 | 78 | # infer camera 79 | ./app_yolov8_seg --model=../../data/yolov8/yolov8n-seg.trt --size=640 --batch_size=1 --cam_id=0 --show 80 | 81 | ``` 82 | ## 5. appendix 83 | ignore -------------------------------------------------------------------------------- /yolov8-seg/app_yolov8_seg.cpp: -------------------------------------------------------------------------------- 1 | #include"../utils/yolo.h" 2 | #include"yolov8_seg.h" 3 | 4 | void setParameters(utils::InitParameter& initParameters) 5 | { 6 | initParameters.class_names = utils::dataSets::coco80; 7 | //initParameters.class_names = utils::dataSets::voc20; 8 | initParameters.num_class = 80; // for coco 9 | //initParameters.num_class = 20; // for voc2012 10 | initParameters.batch_size = 8; 11 | initParameters.dst_h = 640; 12 | initParameters.dst_w = 640; 13 | initParameters.input_output_names = { "images", "output0" }; 14 | initParameters.conf_thresh = 0.25f; 15 | initParameters.iou_thresh = 0.7f; 16 | initParameters.save_path = "D:/Data/1/"; 17 | } 18 | 19 | void task(YOLOv8Seg& yolo, const utils::InitParameter& param, std::vector& imgsBatch, const int& delayTime, const int& batchi) 20 | { 21 | yolo.copy(imgsBatch); 22 | utils::DeviceTimer d_t1; yolo.preprocess(imgsBatch); float t1 = d_t1.getUsedTime(); 23 | utils::DeviceTimer d_t2; yolo.infer(); float t2 = d_t2.getUsedTime(); 24 | utils::DeviceTimer d_t3; yolo.postprocess(imgsBatch); float t3 = d_t3.getUsedTime(); 25 | float avg_times[3] = { t1 / param.batch_size, t2 / param.batch_size, t3 / param.batch_size }; 26 | sample::gLogInfo << "preprocess time = " << avg_times[0] << "; " 27 | "infer time = " << avg_times[1] << "; " 28 | "postprocess time = " << avg_times[2] << std::endl; 29 | yolo.showAndSave(param.class_names, delayTime, imgsBatch); 30 | yolo.reset(); 31 | } 32 | 33 | int main(int argc, char** argv) 34 | { 35 | cv::CommandLineParser parser(argc, argv, 36 | { 37 | "{model || tensorrt model file }" 38 | "{size || image (h, w), eg: 640 }" 39 | "{batch_size|| batch size }" 40 | "{video || video's path }" 41 | "{img || image's path }" 42 | "{cam_id || camera's device id }" 43 | "{show || if show the result }" 44 | "{savePath || save path, can be ignore}" 45 | }); 46 | utils::InitParameter param; 47 | setParameters(param); 48 | std::string model_path = "../../data/yolov8/yolov8n-seg.trt"; 49 | std::string video_path = "../../data/people.mp4"; 50 | std::string image_path = "../../data/bus.jpg"; 51 | int camera_id = 0; 52 | utils::InputStream source; 53 | source = utils::InputStream::IMAGE; 54 | //source = utils::InputStream::VIDEO; 55 | //source = utils::InputStream::CAMERA; 56 | // update params from command line parser 57 | int size = -1; 58 | int batch_size = 8; 59 | bool is_show = false; 60 | bool is_save = false; 61 | if (parser.has("model")) 62 | { 63 | model_path = parser.get("model"); 64 | sample::gLogInfo << "model_path = " << model_path << std::endl; 65 | } 66 | if (parser.has("size")) 67 | { 68 | size = parser.get("size"); 69 | sample::gLogInfo << "size = " << size << std::endl; 70 | param.dst_h = param.dst_w = size; 71 | } 72 | if (parser.has("batch_size")) 73 | { 74 | batch_size = parser.get("batch_size"); 75 | sample::gLogInfo << "batch_size = " << batch_size << std::endl; 76 | param.batch_size = batch_size; 77 | } 78 | if (parser.has("video")) 79 | { 80 | source = utils::InputStream::VIDEO; 81 | video_path = parser.get("video"); 82 | sample::gLogInfo << "video_path = " << video_path << std::endl; 83 | } 84 | if (parser.has("img")) 85 | { 86 | source = utils::InputStream::IMAGE; 87 | image_path = parser.get("img"); 88 | sample::gLogInfo << "image_path = " << image_path << std::endl; 89 | } 90 | if (parser.has("cam_id")) 91 | { 92 | source = utils::InputStream::CAMERA; 93 | camera_id = parser.get("cam_id"); 94 | sample::gLogInfo << "camera_id = " << camera_id << std::endl; 95 | } 96 | 97 | if (parser.has("show")) 98 | { 99 | param.is_show = true; 100 | sample::gLogInfo << "is_show = " << is_show << std::endl; 101 | } 102 | if (parser.has("savePath")) 103 | { 104 | param.is_save = true; 105 | param.save_path = parser.get("savePath"); 106 | sample::gLogInfo << "save_path = " << param.save_path << std::endl; 107 | } 108 | int total_batches = 0; 109 | int delay_time = 1; 110 | cv::VideoCapture capture; 111 | if (!setInputStream(source, image_path, video_path, camera_id, 112 | capture, total_batches, delay_time, param)) 113 | { 114 | sample::gLogError << "read the input data errors!" << std::endl; 115 | return -1; 116 | } 117 | setRenderWindow(param); 118 | YOLOv8Seg yolo(param); 119 | std::vector trt_file = utils::loadModel(model_path); 120 | if (trt_file.empty()) 121 | { 122 | sample::gLogError << "trt_file is empty!" << std::endl; 123 | return -1; 124 | } 125 | if (!yolo.init(trt_file)) 126 | { 127 | sample::gLogError << "initEngine() ocur errors!" << std::endl; 128 | return -1; 129 | } 130 | yolo.check(); 131 | cv::Mat frame; 132 | std::vector imgs_batch; 133 | imgs_batch.reserve(param.batch_size); 134 | sample::gLogInfo << imgs_batch.capacity() << std::endl; 135 | int batchi = 0; 136 | while (capture.isOpened()) 137 | { 138 | if (batchi >= total_batches && source != utils::InputStream::CAMERA) 139 | { 140 | break; 141 | } 142 | if (imgs_batch.size() < param.batch_size) 143 | { 144 | if (source != utils::InputStream::IMAGE) 145 | { 146 | capture.read(frame); 147 | } 148 | else 149 | { 150 | frame = cv::imread(image_path); 151 | } 152 | if (frame.empty()) 153 | { 154 | sample::gLogWarning << "no more video or camera frame" << std::endl; 155 | task(yolo, param, imgs_batch, delay_time, batchi); 156 | imgs_batch.clear(); 157 | batchi++; 158 | break; 159 | } 160 | else 161 | { 162 | imgs_batch.emplace_back(frame.clone()); 163 | } 164 | } 165 | else 166 | { 167 | task(yolo, param, imgs_batch, delay_time, batchi); 168 | imgs_batch.clear(); 169 | batchi++; 170 | } 171 | } 172 | return -1; 173 | } -------------------------------------------------------------------------------- /yolov8-seg/decode_yolov8_seg.cu: -------------------------------------------------------------------------------- 1 | #include "decode_yolov8_seg.h" 2 | 3 | __global__ void decode_yolov8_seg_device_kernel(int batch_size, int num_class, int topK, float conf_thresh, 4 | float* src, int srcWidth, int srcHeight, int srcArea, 5 | float* dst, int dstWidth, int dstArea) 6 | { 7 | int dx = blockDim.x * blockIdx.x + threadIdx.x; 8 | int dy = blockDim.y * blockIdx.y + threadIdx.y; 9 | if (dx >= srcHeight || dy >= batch_size) 10 | { 11 | return; 12 | } 13 | float* pitem = src + dy * srcArea + dx * srcWidth; 14 | float* class_confidence = pitem + 4; 15 | float confidence = *class_confidence++; 16 | int label = 0; 17 | for (int i = 1; i < num_class; ++i, ++class_confidence) 18 | { 19 | if (*class_confidence > confidence) 20 | { 21 | confidence = *class_confidence; 22 | label = i; 23 | } 24 | } 25 | if (confidence < conf_thresh) 26 | { 27 | return; 28 | } 29 | int index = atomicAdd(dst + dy * dstArea, 1); 30 | 31 | if (index >= topK) 32 | { 33 | return; 34 | } 35 | float cx = *pitem++; 36 | float cy = *pitem++; 37 | float width = *pitem++; 38 | float height = *pitem++; 39 | 40 | float left = cx - width * 0.5f; 41 | float top = cy - height * 0.5f; 42 | float right = cx + width * 0.5f; 43 | float bottom = cy + height * 0.5f; 44 | float* pout_item = dst + dy * dstArea + 1 + index * dstWidth; 45 | *pout_item++ = left; 46 | *pout_item++ = top; 47 | *pout_item++ = right; 48 | *pout_item++ = bottom; 49 | *pout_item++ = confidence; 50 | *pout_item++ = label; 51 | *pout_item++ = 1; 52 | memcpy(pout_item, pitem + num_class, 32 * sizeof(float)); 53 | } 54 | 55 | void yolov8seg::decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea, float* dst, int dstWidth, int dstHeight) 56 | { 57 | dim3 block_size(BLOCK_SIZE, BLOCK_SIZE); 58 | dim3 grid_size((srcHeight + BLOCK_SIZE - 1) / BLOCK_SIZE, 59 | (param.batch_size + BLOCK_SIZE - 1) / BLOCK_SIZE); 60 | int dstArea = 1 + dstWidth * dstHeight; 61 | decode_yolov8_seg_device_kernel <<< grid_size, block_size, 0, nullptr >>> (param.batch_size, param.num_class, param.topK, param.conf_thresh, 62 | src, srcWidth, srcHeight, srcArea, 63 | dst, dstWidth, dstArea); 64 | } 65 | 66 | __global__ void transpose_device_kernel(int batch_size, 67 | float* src, int srcWidth, int srcHeight, int srcArea, 68 | float* dst, int dstWidth, int dstHeight, int dstArea) 69 | { 70 | int dx = blockDim.x * blockIdx.x + threadIdx.x; 71 | int dy = blockDim.y * blockIdx.y + threadIdx.y; 72 | if (dx >= dstHeight || dy >= batch_size) 73 | { 74 | return; 75 | } 76 | float* p_dst_row = dst + dy * dstArea + dx * dstWidth; 77 | float* p_src_col = src + dy * srcArea + dx; 78 | 79 | for (int i = 0; i < dstWidth; i++) 80 | { 81 | p_dst_row[i] = p_src_col[i * srcWidth]; 82 | } 83 | } 84 | 85 | void yolov8seg::transposeDevice(utils::InitParameter param, 86 | float* src, int srcWidth, int srcHeight, int srcArea, 87 | float* dst, int dstWidth, int dstHeight) 88 | { 89 | dim3 block_size(BLOCK_SIZE, BLOCK_SIZE); 90 | dim3 grid_size((dstHeight + BLOCK_SIZE - 1) / BLOCK_SIZE, 91 | (param.batch_size + BLOCK_SIZE - 1) / BLOCK_SIZE); 92 | int dstArea = dstWidth * dstHeight; 93 | transpose_device_kernel << < grid_size, block_size, 0, nullptr >> > (param.batch_size, 94 | src, srcWidth, srcHeight, srcArea, 95 | dst, dstWidth, dstHeight, dstArea); 96 | } 97 | 98 | -------------------------------------------------------------------------------- /yolov8-seg/decode_yolov8_seg.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include"../utils/utils.h" 3 | #include"../utils/kernel_function.h" 4 | 5 | namespace yolov8seg 6 | { 7 | void decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcLength, float* dst, int dstWidth, int dstHeight); 8 | void transposeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea, float* dst, int dstWidth, int dstHeight); 9 | } 10 | -------------------------------------------------------------------------------- /yolov8-seg/yolov8_seg.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include"../utils/yolo.h" 5 | #include"../utils/utils.h" 6 | class YOLOv8Seg : public yolo::YOLO 7 | { 8 | public: 9 | YOLOv8Seg(const utils::InitParameter& param); 10 | ~YOLOv8Seg(); 11 | virtual bool init(const std::vector& trtFile); 12 | virtual void preprocess(const std::vector& imgsBatch); 13 | virtual bool infer(); 14 | virtual void postprocess(const std::vector& imgsBatch); 15 | virtual void reset(); 16 | 17 | public: 18 | void showAndSave(const std::vector& classNames, 19 | const int& cvDelayTime, std::vector& imgsBatch); 20 | 21 | private: 22 | float* m_output_src_transpose_device; 23 | float* m_output_seg_device; // eg:116 * 8400, 116=4+80+32 24 | float* m_output_objects_device; 25 | 26 | float* m_output_seg_host; 27 | float* m_output_objects_host; 28 | 29 | int m_output_objects_width; // 39 = 32 + 7, 7:left, top, right, bottom, confidence, class, keepflag; 30 | int m_output_src_width; // 116 = 4+80+32, 4:xyxy; 80:coco label; 32:seg 31 | nvinfer1::Dims m_output_seg_dims; 32 | int m_output_obj_area; 33 | int m_output_seg_area; 34 | int m_output_seg_w; 35 | int m_output_seg_h; 36 | 37 | cv::Mat m_mask160; 38 | Eigen::MatrixXf m_mask_eigen160; 39 | cv::Rect m_thresh_roi160; 40 | cv::Rect m_thresh_roisrc; 41 | float m_downsample_scale; 42 | cv::Mat m_mask_src; 43 | cv::Mat m_img_canvas; 44 | }; -------------------------------------------------------------------------------- /yolov8/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | 3 | #set(CMAKE_BUILD_TYPE "Debug") 4 | set(CMAKE_BUILD_TYPE "Release") 5 | 6 | PROJECT(yolov8 VERSION 1.0.0 LANGUAGES C CXX CUDA) 7 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR}) 8 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake) 9 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT}) 10 | message(STATUS ${ALL_LIBS}) 11 | file(GLOB CPPS 12 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp 13 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cu 14 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu 15 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp 16 | ${TensorRT_ROOT}/samples/common/logger.cpp 17 | ${TensorRT_ROOT}/samples/common/sampleOptions.cpp 18 | #${TensorRT_ROOT}/samples/common/sampleUtils.cpp 19 | ) 20 | list(REMOVE_ITEM CPPS app_yolov8.cpp) 21 | message(STATUS CPPS = ${CPPS}) 22 | list (LENGTH CPPS length) 23 | message(STATUS ***length*** = ${length}) 24 | find_package(OpenCV REQUIRED) 25 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}) 26 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS}) 27 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS}) 28 | add_library(${PROJECT_NAME} SHARED ${CPPS}) 29 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES}) 30 | 31 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75) 32 | target_compile_options(${PROJECT_NAME} PUBLIC 33 | $<$:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>) 34 | 35 | add_executable(app_yolov8 app_yolov8.cpp) 36 | 37 | # NVCC 38 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a) 39 | target_link_libraries(app_yolov8 ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} ) 40 | -------------------------------------------------------------------------------- /yolov8/README.md: -------------------------------------------------------------------------------- 1 | ## 1. get onnx 2 | download directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv) or export onnx: 3 | ```bash 4 | # 🔥 yolov8 offical repo: https://github.com/ultralytics/ultralytics 5 | # 🔥 yolov8 quickstart: https://docs.ultralytics.com/quickstart/ 6 | # 🚀TensorRT-Alpha will be updated synchronously as soon as possible! 7 | 8 | # install yolov8 9 | conda create -n yolov8 python==3.8 -y # for Linux 10 | # conda create -n yolov8 python=3.9 -y # for Windows10 11 | conda activate yolov8 12 | pip install ultralytics==8.0.5 13 | pip install onnx==1.12.0 14 | 15 | # download offical weights(".pt" file) 16 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt 17 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt 18 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m.pt 19 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt 20 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt 21 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x6.pt 22 | ``` 23 | 24 | export onnx: 25 | ```bash 26 | # 640 27 | yolo mode=export model=yolov8n.pt format=onnx dynamic=True opset=12 #simplify=True 28 | yolo mode=export model=yolov8s.pt format=onnx dynamic=True opset=12 #simplify=True 29 | yolo mode=export model=yolov8m.pt format=onnx dynamic=True opset=12 #simplify=True 30 | yolo mode=export model=yolov8l.pt format=onnx dynamic=True opset=12 #simplify=True 31 | yolo mode=export model=yolov8x.pt format=onnx dynamic=True opset=12 #simplify=True 32 | # 1280 33 | yolo mode=export model=yolov8x6.pt format=onnx dynamic=True opset=12 #simplify=True 34 | ``` 35 | 36 | ## 2.edit and save onnx 37 | ```bash 38 | # note: If you have obtained onnx by downloading, this step can be ignored 39 | ignore 40 | ``` 41 | 42 | ## 3.compile onnx 43 | ```bash 44 | # put your onnx file in this path:tensorrt-alpha/data/yolov8 45 | cd tensorrt-alpha/data/yolov8 46 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib 47 | # 640 48 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov8n.onnx --saveEngine=yolov8n.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640 49 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov8s.onnx --saveEngine=yolov8s.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640 50 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov8m.onnx --saveEngine=yolov8m.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640 51 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov8l.onnx --saveEngine=yolov8l.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640 52 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov8x.onnx --saveEngine=yolov8x.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640 53 | # 1280 54 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolov8x6.onnx --saveEngine=yolov8x6.trt --buildOnly --minShapes=images:1x3x1280x1280 --optShapes=images:2x3x1280x1280 --maxShapes=images:4x3x1280x1280 55 | ``` 56 | ## 4.run 57 | ```bash 58 | git clone https://github.com/FeiYull/tensorrt-alpha 59 | cd tensorrt-alpha/yolov8 60 | mkdir build 61 | cd build 62 | cmake .. 63 | make -j10 64 | # note: the dstImage will be saved in tensorrt-alpha/yolov8/build by default 65 | 66 | ## 640 67 | # infer image 68 | ./app_yolov8 --model=../../data/yolov8/yolov8n.trt --size=640 --batch_size=1 --img=../../data/6406407.jpg --show --savePath=../ 69 | 70 | # infer video 71 | ./app_yolov8 --model=../../data/yolov8/yolov8n.trt --size=640 --batch_size=2 --video=../../data/people.mp4 --show 72 | 73 | # infer camera 74 | ./app_yolov8 --model=../../data/yolov8/yolov8n.trt --size=640 --batch_size=2 --cam_id=0 --show 75 | 76 | ## 1280 77 | # infer camera 78 | ./app_yolov8 --model=../../data/yolov8/yolov8x6.trt --size=1280 --batch_size=2 --cam_id=0 --show 79 | ``` 80 | ## 5. appendix 81 | ignore -------------------------------------------------------------------------------- /yolov8/decode_yolov8.cu: -------------------------------------------------------------------------------- 1 | #include "decode_yolov8.h" 2 | 3 | __global__ void decode_yolov8_device_kernel(int batch_size, int num_class, int topK, float conf_thresh, 4 | float* src, int srcWidth, int srcHeight, int srcArea, 5 | float* dst, int dstWidth, int dstHeight, int dstArea) 6 | { 7 | int dx = blockDim.x * blockIdx.x + threadIdx.x; 8 | int dy = blockDim.y * blockIdx.y + threadIdx.y; 9 | if (dx >= srcHeight || dy >= batch_size) 10 | { 11 | return; 12 | } 13 | float* pitem = src + dy * srcArea + dx * srcWidth; 14 | float* class_confidence = pitem + 4; 15 | float confidence = *class_confidence++; 16 | int label = 0; 17 | for (int i = 1; i < num_class; ++i, ++class_confidence) 18 | { 19 | if (*class_confidence > confidence) 20 | { 21 | confidence = *class_confidence; 22 | label = i; 23 | } 24 | } 25 | if (confidence < conf_thresh) 26 | { 27 | return; 28 | } 29 | int index = atomicAdd(dst + dy * dstArea, 1); 30 | 31 | if (index >= topK) 32 | { 33 | return; 34 | } 35 | float cx = *pitem++; 36 | float cy = *pitem++; 37 | float width = *pitem++; 38 | float height = *pitem++; 39 | 40 | float left = cx - width * 0.5f; 41 | float top = cy - height * 0.5f; 42 | float right = cx + width * 0.5f; 43 | float bottom = cy + height * 0.5f; 44 | float* pout_item = dst + dy * dstArea + 1 + index * dstWidth; 45 | *pout_item++ = left; 46 | *pout_item++ = top; 47 | *pout_item++ = right; 48 | *pout_item++ = bottom; 49 | *pout_item++ = confidence; 50 | *pout_item++ = label; 51 | *pout_item++ = 1; 52 | } 53 | 54 | void yolov8::decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea, float* dst, int dstWidth, int dstHeight) 55 | { 56 | dim3 block_size(BLOCK_SIZE, BLOCK_SIZE); 57 | dim3 grid_size((srcHeight + BLOCK_SIZE - 1) / BLOCK_SIZE, 58 | (param.batch_size + BLOCK_SIZE - 1) / BLOCK_SIZE); 59 | int dstArea = 1 + dstWidth * dstHeight; 60 | 61 | decode_yolov8_device_kernel << < grid_size, block_size, 0, nullptr >> > (param.batch_size, param.num_class, param.topK, param.conf_thresh, 62 | src, srcWidth, srcHeight, srcArea, 63 | dst, dstWidth, dstHeight, dstArea); 64 | } 65 | 66 | 67 | __global__ void transpose_device_kernel(int batch_size, 68 | float* src, int srcWidth, int srcHeight, int srcArea, 69 | float* dst, int dstWidth, int dstHeight, int dstArea) 70 | { 71 | int dx = blockDim.x * blockIdx.x + threadIdx.x; 72 | int dy = blockDim.y * blockIdx.y + threadIdx.y; 73 | if (dx >= dstHeight || dy >= batch_size) 74 | { 75 | return; 76 | } 77 | float* p_dst_row = dst + dy * dstArea + dx * dstWidth; 78 | float* p_src_col = src + dy * srcArea + dx; 79 | 80 | for (int i = 0; i < dstWidth; i++) 81 | { 82 | p_dst_row[i] = p_src_col[i * srcWidth]; 83 | } 84 | } 85 | 86 | void yolov8::transposeDevice(utils::InitParameter param, 87 | float* src, int srcWidth, int srcHeight, int srcArea, 88 | float* dst, int dstWidth, int dstHeight) 89 | { 90 | dim3 block_size(BLOCK_SIZE, BLOCK_SIZE); 91 | dim3 grid_size((dstHeight + BLOCK_SIZE - 1) / BLOCK_SIZE, 92 | (param.batch_size + BLOCK_SIZE - 1) / BLOCK_SIZE); 93 | int dstArea = dstWidth * dstHeight; 94 | 95 | transpose_device_kernel << < grid_size, block_size, 0, nullptr >> > (param.batch_size, 96 | src, srcWidth, srcHeight, srcArea, 97 | dst, dstWidth, dstHeight, dstArea); 98 | } 99 | 100 | 101 | -------------------------------------------------------------------------------- /yolov8/decode_yolov8.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include"../utils/utils.h" 3 | #include"../utils/kernel_function.h" 4 | 5 | namespace yolov8 6 | { 7 | void decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcLength, float* dst, int dstWidth, int dstHeight); 8 | void transposeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea, float* dst, int dstWidth, int dstHeight); 9 | } 10 | -------------------------------------------------------------------------------- /yolov8/yolov8.cpp: -------------------------------------------------------------------------------- 1 | #include"yolov8.h" 2 | #include"decode_yolov8.h" 3 | 4 | YOLOV8::YOLOV8(const utils::InitParameter& param) :yolo::YOLO(param) 5 | { 6 | } 7 | 8 | YOLOV8::~YOLOV8() 9 | { 10 | CHECK(cudaFree(m_output_src_transpose_device)); 11 | } 12 | 13 | bool YOLOV8::init(const std::vector& trtFile) 14 | { 15 | if (trtFile.empty()) 16 | { 17 | return false; 18 | } 19 | std::unique_ptr runtime = 20 | std::unique_ptr(nvinfer1::createInferRuntime(sample::gLogger.getTRTLogger())); 21 | if (runtime == nullptr) 22 | { 23 | return false; 24 | } 25 | this->m_engine = std::unique_ptr(runtime->deserializeCudaEngine(trtFile.data(), trtFile.size())); 26 | 27 | if (this->m_engine == nullptr) 28 | { 29 | return false; 30 | } 31 | this->m_context = std::unique_ptr(this->m_engine->createExecutionContext()); 32 | if (this->m_context == nullptr) 33 | { 34 | return false; 35 | } 36 | if (m_param.dynamic_batch) 37 | { 38 | this->m_context->setBindingDimensions(0, nvinfer1::Dims4(m_param.batch_size, 3, m_param.dst_h, m_param.dst_w)); 39 | } 40 | m_output_dims = this->m_context->getBindingDimensions(1); 41 | m_total_objects = m_output_dims.d[2]; 42 | assert(m_param.batch_size <= m_output_dims.d[0]); 43 | m_output_area = 1; 44 | for (int i = 1; i < m_output_dims.nbDims; i++) 45 | { 46 | if (m_output_dims.d[i] != 0) 47 | { 48 | m_output_area *= m_output_dims.d[i]; 49 | } 50 | } 51 | CHECK(cudaMalloc(&m_output_src_device, m_param.batch_size * m_output_area * sizeof(float))); 52 | CHECK(cudaMalloc(&m_output_src_transpose_device, m_param.batch_size * m_output_area * sizeof(float))); 53 | float a = float(m_param.dst_h) / m_param.src_h; 54 | float b = float(m_param.dst_w) / m_param.src_w; 55 | float scale = a < b ? a : b; 56 | cv::Mat src2dst = (cv::Mat_(2, 3) << scale, 0.f, (-scale * m_param.src_w + m_param.dst_w + scale - 1) * 0.5, 57 | 0.f, scale, (-scale * m_param.src_h + m_param.dst_h + scale - 1) * 0.5); 58 | cv::Mat dst2src = cv::Mat::zeros(2, 3, CV_32FC1); 59 | cv::invertAffineTransform(src2dst, dst2src); 60 | 61 | m_dst2src.v0 = dst2src.ptr(0)[0]; 62 | m_dst2src.v1 = dst2src.ptr(0)[1]; 63 | m_dst2src.v2 = dst2src.ptr(0)[2]; 64 | m_dst2src.v3 = dst2src.ptr(1)[0]; 65 | m_dst2src.v4 = dst2src.ptr(1)[1]; 66 | m_dst2src.v5 = dst2src.ptr(1)[2]; 67 | 68 | return true; 69 | } 70 | 71 | void YOLOV8::preprocess(const std::vector& imgsBatch) 72 | { 73 | resizeDevice(m_param.batch_size, m_input_src_device, m_param.src_w, m_param.src_h, 74 | m_input_resize_device, m_param.dst_w, m_param.dst_h, 114, m_dst2src); 75 | bgr2rgbDevice(m_param.batch_size, m_input_resize_device, m_param.dst_w, m_param.dst_h, 76 | m_input_rgb_device, m_param.dst_w, m_param.dst_h); 77 | normDevice(m_param.batch_size, m_input_rgb_device, m_param.dst_w, m_param.dst_h, 78 | m_input_norm_device, m_param.dst_w, m_param.dst_h, m_param); 79 | hwc2chwDevice(m_param.batch_size, m_input_norm_device, m_param.dst_w, m_param.dst_h, 80 | m_input_hwc_device, m_param.dst_w, m_param.dst_h); 81 | } 82 | 83 | 84 | void YOLOV8::postprocess(const std::vector& imgsBatch) 85 | { 86 | yolov8::transposeDevice(m_param, m_output_src_device, m_total_objects, 4 + m_param.num_class, m_total_objects * (4 + m_param.num_class), 87 | m_output_src_transpose_device, 4 + m_param.num_class, m_total_objects); 88 | yolov8::decodeDevice(m_param, m_output_src_transpose_device, 4 + m_param.num_class, m_total_objects, m_output_area, 89 | m_output_objects_device, m_output_objects_width, m_param.topK); 90 | // nms 91 | //nmsDeviceV1(m_param, m_output_objects_device, m_output_objects_width, m_param.topK, m_param.topK * m_output_objects_width + 1); 92 | nmsDeviceV2(m_param, m_output_objects_device, m_output_objects_width, m_param.topK, m_param.topK * m_output_objects_width + 1, m_output_idx_device, m_output_conf_device); 93 | CHECK(cudaMemcpy(m_output_objects_host, m_output_objects_device, m_param.batch_size * sizeof(float) * (1 + 7 * m_param.topK), cudaMemcpyDeviceToHost)); 94 | for (size_t bi = 0; bi < imgsBatch.size(); bi++) 95 | { 96 | int num_boxes = std::min((int)(m_output_objects_host + bi * (m_param.topK * m_output_objects_width + 1))[0], m_param.topK); 97 | for (size_t i = 0; i < num_boxes; i++) 98 | { 99 | float* ptr = m_output_objects_host + bi * (m_param.topK * m_output_objects_width + 1) + m_output_objects_width * i + 1; 100 | int keep_flag = ptr[6]; 101 | if (keep_flag) 102 | { 103 | float x_lt = m_dst2src.v0 * ptr[0] + m_dst2src.v1 * ptr[1] + m_dst2src.v2; 104 | float y_lt = m_dst2src.v3 * ptr[0] + m_dst2src.v4 * ptr[1] + m_dst2src.v5; 105 | float x_rb = m_dst2src.v0 * ptr[2] + m_dst2src.v1 * ptr[3] + m_dst2src.v2; 106 | float y_rb = m_dst2src.v3 * ptr[2] + m_dst2src.v4 * ptr[3] + m_dst2src.v5; 107 | m_objectss[bi].emplace_back(x_lt, y_lt, x_rb, y_rb, ptr[4], (int)ptr[5]); 108 | } 109 | } 110 | 111 | } 112 | } -------------------------------------------------------------------------------- /yolov8/yolov8.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include"../utils/yolo.h" 3 | #include"../utils/utils.h" 4 | class YOLOV8 : public yolo::YOLO 5 | { 6 | public: 7 | YOLOV8(const utils::InitParameter& param); 8 | ~YOLOV8(); 9 | virtual bool init(const std::vector& trtFile); 10 | virtual void preprocess(const std::vector& imgsBatch); 11 | virtual void postprocess(const std::vector& imgsBatch); 12 | 13 | private: 14 | float* m_output_src_transpose_device; 15 | }; -------------------------------------------------------------------------------- /yolox/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | 3 | #set(CMAKE_BUILD_TYPE "Debug") 4 | set(CMAKE_BUILD_TYPE "Release") 5 | 6 | PROJECT(yolox VERSION 1.0.0 LANGUAGES C CXX CUDA) 7 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR}) 8 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake) 9 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT}) 10 | message(STATUS ${ALL_LIBS}) 11 | file(GLOB CPPS 12 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp 13 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cu 14 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu 15 | ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp 16 | ${TensorRT_ROOT}/samples/common/logger.cpp 17 | ${TensorRT_ROOT}/samples/common/sampleOptions.cpp 18 | ) 19 | list(REMOVE_ITEM CPPS app_yolox.cpp) 20 | message(STATUS CPPS = ${CPPS}) 21 | list (LENGTH CPPS length) 22 | message(STATUS ***length*** = ${length}) 23 | find_package(OpenCV REQUIRED) 24 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}) 25 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS}) 26 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS}) 27 | add_library(${PROJECT_NAME} SHARED ${CPPS}) 28 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES}) 29 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75) 30 | target_compile_options(${PROJECT_NAME} PUBLIC 31 | $<$:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>) 32 | 33 | add_executable(app_yolox app_yolox.cpp) 34 | 35 | # NVCC 36 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a) 37 | target_link_libraries(app_yolox ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} ) 38 | -------------------------------------------------------------------------------- /yolox/README.md: -------------------------------------------------------------------------------- 1 | ## 特别说明 2 | 这里yolox官方在图像预处理的时候,其中resize是带有padding,只不过是在图像右边界、下边界进行padding 3 | ,本仓库的是核函数做法是:将图像进行等比例缩放插值,效果图存在m_input_resize_without_padding_device中, 4 | 然后将上述效果图像 copy 到m_input_resize_device(申请内存大小为: 5 | 416 * 416 * 3 * batch_size or 640 * 640 * 3 * batch_size,初始值为:{114, 114, 114}) 6 | 另外,由于插值始终和opencv严格对齐,略有差异,但最终检测结果几乎一样,框的位置一样,置信度小数点后第二位才有差异。 7 | 最后,模型支持固定batch size 8 | 9 | ## 1. get onnx 10 | download onnx(default:batch_size=2) directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv?usp=sharing) 11 | or export onnx: 12 | ```bash 13 | git clone https://github.com/Megvii-BaseDetection/YOLOX 14 | git checkout 0.3.0 15 | 16 | ## batch_size=1 17 | # 640 for image 18 | python tools/export_onnx.py --output-name=yolox_s.onnx --exp_file=exps/default/yolox_s.py --ckpt=yolox_s.pth --decode_in_inference --batch-size=1 19 | python tools/export_onnx.py --output-name=yolox_m.onnx --exp_file=exps/default/yolox_m.py --ckpt=yolox_m.pth --decode_in_inference --batch-size=1 20 | python tools/export_onnx.py --output-name=yolox_x.onnx --exp_file=exps/default/yolox_x.py --ckpt=yolox_x.pth --decode_in_inference --batch-size=1 21 | python tools/export_onnx.py --output-name=yolox_s.onnx --exp_file=exps/default/yolox_s.py --ckpt=yolox_s.pth --decode_in_inference --batch-size=1 22 | # 416 for image 23 | python tools/export_onnx.py --output-name=yolox_nano.onnx --exp_file=exps/default/yolox_nano.py --ckpt=yolox_nano.pth --decode_in_inference --batch-size=1 24 | python tools/export_onnx.py --output-name=yolox_tiny.onnx --exp_file=exps/default/yolox_tiny.py --ckpt=yolox_tiny.pth --decode_in_inference --batch-size=1 25 | 26 | ## batch_size > 1 27 | # 例如将上述6条编译onnx的指令中,设置参数--batch-size=2,也是可行,但需注意:最后运行的时候,需要设置一样参数:--batch_size=2 28 | ``` 29 | 30 | ## 2.edit and save onnx 31 | ```bash 32 | # note: If you have obtained onnx by downloading, this step can be ignored 33 | ignore 34 | ``` 35 | 36 | ## 3.compile onnx 37 | ```bash 38 | # put your onnx file in this path:tensorrt-alpha/data/yolox 39 | cd tensorrt-alpha/data/yolox 40 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib 41 | 42 | # 640 43 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolox_s.onnx --saveEngine=yolox_s.trt --buildOnly 44 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolox_m.onnx --saveEngine=yolox_m.trt --buildOnly 45 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolox_x.onnx --saveEngine=yolox_x.trt --buildOnly 46 | 47 | # 416 48 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolox_nano.onnx --saveEngine=yolox_nano.trt --buildOnly 49 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=yolox_tiny.onnx --saveEngine=yolox_tiny.trt --buildOnly 50 | ``` 51 | 52 | ## 4.run 53 | ```bash 54 | git clone https://github.com/FeiYull/tensorrt-alpha 55 | cd tensorrt-alpha/yolox 56 | mkdir build 57 | cd build 58 | cmake .. 59 | make -j10 60 | # note: the dstImage will be saved in tensorrt-alpha/yolox/build by default 61 | # only support static multi-batch inference! 62 | # the values of batch_size are different, and onnx needs to be compiled additionally 63 | 64 | ## 640 65 | # infer image 66 | ./app_yolox --model=../../data/yolox/yolox_s.trt --size=640 --batch_size=1 --img=../../data/6406401.jpg --show --savePath=../ 67 | 68 | # infer video 69 | ./app_yolox --model=../../data/yolox/yolox_s.trt --size=640 --batch_size=1 --video=../../data/people.mp4 --show 70 | 71 | # infer camera 72 | ./app_yolox --model=../../data/yolox/yolox_s.trt --size=640 --batch_size=1 --cam_id=0 --show 73 | 74 | # 416 75 | ./app_yolox --model=../../data/yolox/yolox_nano.trt --size=416 --batch_size=1 --img=../../data/6406401.jpg --show --savePath 76 | ``` 77 | ## 5. appendix 78 | ignore -------------------------------------------------------------------------------- /yolox/yolox.cu: -------------------------------------------------------------------------------- 1 | #include"yolox.h" 2 | 3 | YOLOX::YOLOX(const utils::InitParameter& param) :yolo::YOLO(param) 4 | { 5 | } 6 | YOLOX::~YOLOX() 7 | { 8 | CHECK(cudaFree(m_input_resize_without_padding_device)); 9 | } 10 | bool YOLOX::init(const std::vector& trtFile) 11 | { 12 | if (trtFile.empty()) 13 | { 14 | return false; 15 | } 16 | std::unique_ptr runtime = 17 | std::unique_ptr(nvinfer1::createInferRuntime(sample::gLogger.getTRTLogger())); 18 | if (runtime == nullptr) 19 | { 20 | return false; 21 | } 22 | this->m_engine = std::unique_ptr(runtime->deserializeCudaEngine(trtFile.data(), trtFile.size())); 23 | 24 | if (this->m_engine == nullptr) 25 | { 26 | return false; 27 | } 28 | this->m_context = std::unique_ptr(this->m_engine->createExecutionContext()); 29 | if (this->m_context == nullptr) 30 | { 31 | return false; 32 | } 33 | // binding dim 34 | // ... 35 | //nvinfer1::Dims input_dims = this->m_context->getBindingDimensions(0); 36 | m_output_dims = this->m_context->getBindingDimensions(1); 37 | m_total_objects = m_output_dims.d[1]; 38 | assert(m_param.batch_size == m_output_dims.d[0] || 39 | m_param.batch_size == 1 // batch_size = 1, but it will infer with "batch_size=m_output_dims.d[0]", only support static batch 40 | ); 41 | m_output_area = 1; 42 | for (int i = 1; i < m_output_dims.nbDims; i++) 43 | { 44 | if (m_output_dims.d[i] != 0) 45 | { 46 | m_output_area *= m_output_dims.d[i]; 47 | } 48 | } 49 | CHECK(cudaMalloc(&m_output_src_device, m_param.batch_size * m_output_area * sizeof(float))); 50 | float a = float(m_param.dst_h) / m_param.src_h; 51 | float b = float(m_param.dst_w) / m_param.src_w; 52 | float scale = a < b ? a : b; 53 | m_resized_h = roundf((float)m_param.src_h * scale); 54 | m_resized_w = roundf((float)m_param.src_w * scale); 55 | 56 | CHECK(cudaMalloc(&m_input_resize_without_padding_device, 57 | m_param.batch_size * 3 * m_resized_h * m_resized_w * sizeof(float))); 58 | cv::Mat src2dst = (cv::Mat_(2, 3) << scale, 0.f, (scale - 1) * 0.5, 59 | 0.f, scale, (scale - 1) * 0.5); 60 | cv::Mat dst2src = cv::Mat::zeros(2, 3, CV_32FC1); 61 | cv::invertAffineTransform(src2dst, dst2src); 62 | m_dst2src.v0 = dst2src.ptr(0)[0]; 63 | m_dst2src.v1 = dst2src.ptr(0)[1]; 64 | m_dst2src.v2 = dst2src.ptr(0)[2]; 65 | m_dst2src.v3 = dst2src.ptr(1)[0]; 66 | m_dst2src.v4 = dst2src.ptr(1)[1]; 67 | m_dst2src.v5 = dst2src.ptr(1)[2]; 68 | return true; 69 | } 70 | void YOLOX::preprocess(const std::vector& imgsBatch) 71 | { 72 | resizeDevice(m_param.batch_size, m_input_src_device, m_param.src_w, m_param.src_h, 73 | m_input_resize_without_padding_device, m_resized_w, m_resized_h, 114, m_dst2src); 74 | copyWithPaddingDevice(m_param.batch_size, m_input_resize_without_padding_device, m_resized_w, m_resized_h, 75 | m_input_resize_device, m_param.dst_w, m_param.dst_h, 114.f); 76 | hwc2chwDevice(m_param.batch_size, m_input_resize_device, m_param.dst_w, m_param.dst_h, 77 | m_input_hwc_device, m_param.dst_w, m_param.dst_h); 78 | } 79 | __global__ 80 | void copy_with_padding_kernel_function(int batchSize, float* src, int srcWidth, int srcHeight, int srcArea, int srcVolume, 81 | float* dst, int dstWidth, int dstHeight, int dstArea, int dstVolume, float paddingValue) 82 | { 83 | int dx = blockDim.x * blockIdx.x + threadIdx.x; 84 | int dy = blockDim.y * blockIdx.y + threadIdx.y; 85 | if (dx < dstArea && dy < batchSize) 86 | { 87 | int dst_y = dx / dstWidth; 88 | int dst_x = dx % dstWidth; 89 | float* pdst = dst + dy * dstVolume + dst_y * dstWidth * 3 + dst_x * 3; 90 | 91 | if (dst_y < srcHeight && dst_x < srcWidth) 92 | { 93 | float* psrc = src + dy * srcVolume + dst_y * srcWidth * 3 + dst_x * 3; 94 | pdst[0] = psrc[0]; 95 | pdst[1] = psrc[1]; 96 | pdst[2] = psrc[2]; 97 | } 98 | else 99 | { 100 | pdst[0] = paddingValue; 101 | pdst[1] = paddingValue; 102 | pdst[2] = paddingValue; 103 | } 104 | } 105 | } 106 | void copyWithPaddingDevice(const int& batchSize, float* src, int srcWidth, int srcHeight, 107 | float* dst, int dstWidth, int dstHeight, float paddingValue) 108 | { 109 | dim3 block_size(BLOCK_SIZE, BLOCK_SIZE); 110 | dim3 grid_size((dstWidth * dstHeight + BLOCK_SIZE - 1) / BLOCK_SIZE, 111 | (batchSize + BLOCK_SIZE - 1) / BLOCK_SIZE); 112 | int src_area = srcHeight * srcWidth; 113 | int dst_area = dstHeight * dstWidth; 114 | 115 | int src_volume = 3 * srcHeight * srcWidth; 116 | int dst_volume = 3 * dstHeight * dstWidth; 117 | assert(srcWidth <= dstWidth); 118 | assert(srcHeight <= dstHeight); 119 | copy_with_padding_kernel_function <<< grid_size, block_size, 0, nullptr >>>(batchSize, src, srcWidth, srcHeight, src_area, src_volume, 120 | dst, dstWidth, dstHeight, dst_area, dst_volume, paddingValue); 121 | } -------------------------------------------------------------------------------- /yolox/yolox.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include"../utils/yolo.h" 3 | #include"../utils/kernel_function.h" 4 | 5 | class YOLOX : public yolo::YOLO 6 | { 7 | public: 8 | YOLOX(const utils::InitParameter& param); 9 | ~YOLOX(); 10 | virtual bool init(const std::vector& trtFile); 11 | virtual void preprocess(const std::vector& imgsBatch); 12 | private: 13 | float* m_input_resize_without_padding_device; 14 | int m_resized_w; 15 | int m_resized_h; 16 | }; 17 | void copyWithPaddingDevice(const int& batchSize, float* src, int srcWidth, int srcHeight, 18 | float* dst, int dstWidth, int dstHeight, float paddingValue); --------------------------------------------------------------------------------