├── .github
    ├── examples
    │   └── python_with_dll
    │   │   ├── image-20230302203606848.png
    │   │   ├── image-20230302203807549.png
    │   │   ├── image-20230302205149660.png
    │   │   ├── image-20230302211219640.png
    │   │   ├── image-20230302211258968.png
    │   │   ├── image-20230302211446110.png
    │   │   ├── image-20230302212805461.png
    │   │   ├── image-20230302213219151.png
    │   │   ├── image-20230302213246167.png
    │   │   ├── image-20230302213433177.png
    │   │   ├── image-20230302214103308.png
    │   │   ├── image-20230302214127422.png
    │   │   ├── image-20230302220950777.png
    │   │   ├── image-20230302221408389.png
    │   │   ├── image-20230302221617892.png
    │   │   └── images-20230304121452.png
    ├── facemesh.jpg
    ├── libfacedet-Offical(left)vsOurs(right-topk-2000).jpg
    ├── libfacedet.gif
    ├── people.gif
    ├── u2net.gif
    ├── yolov5s-v5.7-Offical(left)vsOurs(right)-img1.jpg
    ├── yolov5s-v5.7-Offical(left)vsOurs(right)-img2.jpg
    ├── yolov6s-v6.3-Offical(left)vsOurs(right).jpg
    ├── yolov7-tiny-Offical(left)vsOurs(right).jpg
    ├── yolov8-snow.gif
    ├── yolov8-stree.gif
    ├── yolov8n-Offical(left)vsOurs(right).jpg
    └── yolov8n-b8-1080p-to-640.jpg
├── .gitignore
├── Install_For_Ubuntu18.04
    └── Install_For_Ubuntu18.04.md
├── LICENSE
├── README.md
├── README_en.md
├── cmake
    └── common.cmake
├── data
    ├── 12801.jpg
    ├── 12802.jpg
    ├── 12803.jpg
    ├── 12804.jpg
    ├── 2.png
    ├── 51204.jpg
    ├── 6.jpg
    ├── 6086083.jpg
    ├── 6406401.jpg
    ├── 6406402.jpg
    ├── 6406403.jpg
    ├── 6406404.jpg
    ├── 6406406.jpg
    ├── 6406407.jpg
    ├── 7.jpg
    ├── bus.jpg
    ├── dog.jpg
    ├── efficientdet
    │   └── .gitkeep
    ├── im_01.png
    ├── image1.jpg
    ├── image2.jpg
    ├── image3.jpg
    ├── libfacedetction
    │   └── .gitkeep
    ├── long.jpg
    ├── mobilenetv3
    │   └── .gitkeep
    ├── people.mp4
    ├── pphumanseg
    │   └── .gitkeep
    ├── resnet18
    │   └── .gitkeep
    ├── retinanet
    │   └── .gitkeep
    ├── rifle2.jpeg
    ├── road0.png
    ├── road1.jpg
    ├── sailboat3.jpg
    ├── ssd
    │   └── .gitkeep
    ├── swin
    │   └── .gitkeep
    ├── u2net
    │   └── .gitkeep
    ├── yolor
    │   ├── .gitkeep
    │   └── coco.names
    ├── yolov3
    │   └── .gitkeep
    ├── yolov4
    │   └── .gitkeep
    ├── yolov5
    │   └── .gitkeep
    ├── yolov6
    │   └── .gitkeep
    ├── yolov7
    │   └── .gitkeep
    ├── yolov8-pose
    │   └── .gitkeep
    ├── yolov8
    │   └── .gitkeep
    ├── yolox
    │   └── .gitkeep
    └── zidane.jpg
├── docker
    ├── README.md
    └── ubuntu18.04-cu113.Dockerfile
├── efficientdet
    ├── CMakeLists.txt
    ├── README.md
    ├── app_efficientdet.cpp
    ├── efficientdet.cpp
    └── efficientdet.h
├── examples
    └── python_with_dll
    │   ├── README.md
    │   ├── c_files
    │       ├── pch.cpp
    │       └── pch.h
    │   ├── config
    │       └── screen_inf.py
    │   └── python_trt.py
├── libfacedetection
    ├── CMakeLists.txt
    ├── README.md
    ├── alpha_edit.py
    ├── app_libfacedetction.cpp
    ├── libfacedetection.cu
    └── libfacedetection.h
├── pphumanseg
    ├── CMakeLists.txt
    ├── README.md
    ├── alpha_edit.py
    ├── app_pphunmanseg.cpp
    ├── decode_pphunmanseg.cu
    ├── decode_pphunmanseg.h
    ├── pphunmanseg.cpp
    └── pphunmanseg.h
├── requirements.txt
├── tools
    └── onnx2trt.cpp
├── u2net
    ├── CMakeLists.txt
    ├── README.md
    ├── alpha_export.py
    ├── app_u2net.cpp
    ├── u2net.cu
    └── u2net.h
├── utils
    ├── common_include.h
    ├── kernel_function.cu
    ├── kernel_function.h
    ├── tracking
    │   └── .gitkeep
    ├── utils.cpp
    ├── utils.h
    ├── yolo.cpp
    └── yolo.h
├── vscode
    └── launch.json
├── yolonas
    ├── CMakeLists.txt
    ├── README.md
    ├── alpha_export_dynamic.py
    ├── app_yolo_nas.cpp
    ├── decode_yolo_nas.cu
    ├── decode_yolo_nas.h
    ├── yolo_nas.cpp
    └── yolo_nas.h
├── yolor
    ├── CMakeLists.txt
    ├── README.md
    ├── alpha_export.py
    └── app_yolor.cpp
├── yolov3
    ├── CMakeLists.txt
    ├── README.md
    ├── alpha_edit.py
    └── app_yolov3.cpp
├── yolov4
    ├── CMakeLists.txt
    ├── README.md
    ├── alpha_export.py
    ├── app_yolov4.cpp
    ├── decode_yolov4.cu
    ├── decode_yolov4.h
    ├── yolov4.cpp
    └── yolov4.h
├── yolov5
    ├── CMakeLists.txt
    ├── README.md
    ├── alpha_edit.py
    └── app_yolov5.cpp
├── yolov6
    ├── CMakeLists.txt
    ├── README.md
    └── app_yolov6.cpp
├── yolov7
    ├── CMakeLists.txt
    ├── README.md
    └── app_yolov7.cpp
├── yolov8-pose
    ├── CMakeLists.txt
    ├── README.md
    ├── app_yolov8_pose.cpp
    ├── decode_yolov8_pose.cu
    ├── decode_yolov8_pose.h
    ├── yolov8_pose.cpp
    └── yolov8_pose.h
├── yolov8-seg
    ├── CMakeLists.txt
    ├── README.md
    ├── app_yolov8_seg.cpp
    ├── decode_yolov8_seg.cu
    ├── decode_yolov8_seg.h
    ├── yolov8_seg.cpp
    └── yolov8_seg.h
├── yolov8
    ├── CMakeLists.txt
    ├── README.md
    ├── app_yolov8.cpp
    ├── decode_yolov8.cu
    ├── decode_yolov8.h
    ├── yolov8.cpp
    └── yolov8.h
└── yolox
    ├── CMakeLists.txt
    ├── README.md
    ├── app_yolox.cpp
    ├── yolox.cu
    └── yolox.h


/.github/examples/python_with_dll/image-20230302203606848.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302203606848.png


--------------------------------------------------------------------------------
/.github/examples/python_with_dll/image-20230302203807549.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302203807549.png


--------------------------------------------------------------------------------
/.github/examples/python_with_dll/image-20230302205149660.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302205149660.png


--------------------------------------------------------------------------------
/.github/examples/python_with_dll/image-20230302211219640.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302211219640.png


--------------------------------------------------------------------------------
/.github/examples/python_with_dll/image-20230302211258968.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302211258968.png


--------------------------------------------------------------------------------
/.github/examples/python_with_dll/image-20230302211446110.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302211446110.png


--------------------------------------------------------------------------------
/.github/examples/python_with_dll/image-20230302212805461.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302212805461.png


--------------------------------------------------------------------------------
/.github/examples/python_with_dll/image-20230302213219151.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302213219151.png


--------------------------------------------------------------------------------
/.github/examples/python_with_dll/image-20230302213246167.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302213246167.png


--------------------------------------------------------------------------------
/.github/examples/python_with_dll/image-20230302213433177.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302213433177.png


--------------------------------------------------------------------------------
/.github/examples/python_with_dll/image-20230302214103308.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302214103308.png


--------------------------------------------------------------------------------
/.github/examples/python_with_dll/image-20230302214127422.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302214127422.png


--------------------------------------------------------------------------------
/.github/examples/python_with_dll/image-20230302220950777.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302220950777.png


--------------------------------------------------------------------------------
/.github/examples/python_with_dll/image-20230302221408389.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302221408389.png


--------------------------------------------------------------------------------
/.github/examples/python_with_dll/image-20230302221617892.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/image-20230302221617892.png


--------------------------------------------------------------------------------
/.github/examples/python_with_dll/images-20230304121452.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/examples/python_with_dll/images-20230304121452.png


--------------------------------------------------------------------------------
/.github/facemesh.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/facemesh.jpg


--------------------------------------------------------------------------------
/.github/libfacedet-Offical(left)vsOurs(right-topk-2000).jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/libfacedet-Offical(left)vsOurs(right-topk-2000).jpg


--------------------------------------------------------------------------------
/.github/libfacedet.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/libfacedet.gif


--------------------------------------------------------------------------------
/.github/people.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/people.gif


--------------------------------------------------------------------------------
/.github/u2net.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/u2net.gif


--------------------------------------------------------------------------------
/.github/yolov5s-v5.7-Offical(left)vsOurs(right)-img1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/yolov5s-v5.7-Offical(left)vsOurs(right)-img1.jpg


--------------------------------------------------------------------------------
/.github/yolov5s-v5.7-Offical(left)vsOurs(right)-img2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/yolov5s-v5.7-Offical(left)vsOurs(right)-img2.jpg


--------------------------------------------------------------------------------
/.github/yolov6s-v6.3-Offical(left)vsOurs(right).jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/yolov6s-v6.3-Offical(left)vsOurs(right).jpg


--------------------------------------------------------------------------------
/.github/yolov7-tiny-Offical(left)vsOurs(right).jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/yolov7-tiny-Offical(left)vsOurs(right).jpg


--------------------------------------------------------------------------------
/.github/yolov8-snow.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/yolov8-snow.gif


--------------------------------------------------------------------------------
/.github/yolov8-stree.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/yolov8-stree.gif


--------------------------------------------------------------------------------
/.github/yolov8n-Offical(left)vsOurs(right).jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/yolov8n-Offical(left)vsOurs(right).jpg


--------------------------------------------------------------------------------
/.github/yolov8n-b8-1080p-to-640.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/.github/yolov8n-b8-1080p-to-640.jpg


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Prerequisites
 2 | *.d
 3 | 
 4 | # Compiled Object files
 5 | *.slo
 6 | *.lo
 7 | *.o
 8 | *.obj
 9 | 
10 | # Precompiled Headers
11 | *.gch
12 | *.pch
13 | 
14 | # Compiled Dynamic libraries
15 | *.so
16 | *.dylib
17 | *.dll
18 | 
19 | # Fortran module files
20 | *.mod
21 | *.smod
22 | 
23 | # Compiled Static libraries
24 | *.lai
25 | *.la
26 | *.a
27 | *.lib
28 | 
29 | # Executables
30 | *.exe
31 | *.out
32 | *.app
33 | 
34 | 
35 | # VS 2019
36 | *x64/
37 | *.idea
38 | *CUDA_Demo.sln
39 | *vs
40 | *.vcxproj
41 | *.vcxproj.user
42 | *.user
43 | *.onnx
44 | *.trt
45 | *.vcxproj.filters
46 | *.sln
47 | 
48 | #vscode
49 | *.vscode
50 | 
51 | # linux
52 | *build
53 | 
54 | *.ppm
55 | *.tgz
56 | *.prototxt
57 | *.caffemodel
58 | *.code-workspace
59 | 
60 | *__pycache__
61 | 
62 | # deep learning's file
63 | 
64 | *onnx
65 | *trt
66 | *pt
67 | *pth


--------------------------------------------------------------------------------
/Install_For_Ubuntu18.04/Install_For_Ubuntu18.04.md:
--------------------------------------------------------------------------------
  1 | ## 1. Install Tool Chains 
  2 | ```bash
  3 | sudo apt-get update 
  4 | sudo apt-get install build-essential 
  5 | sudo apt-get install git
  6 | sudo apt-get install gdb
  7 | sudo apt-get install cmake
  8 | ```
  9 | ```bash
 10 | sudo apt-get install pkg-config libgtk-3-dev libavcodec-dev libavformat-dev libswscale-dev libv4l-dev libxvidcore-dev libx264-dev 
 11 | sudo apt-get install libopencv-dev  
 12 | # pkg-config --modversion opencv
 13 | ```
 14 | ## 2. Install Nvidia Libs
 15 | ### 2.1 install nvidia driver470
 16 | ```bash
 17 | ubuntu-drivers devices
 18 | sudo add-apt-repository ppa:graphics-drivers/ppa
 19 | sudo apt update
 20 | sudo apt install nvidia-driver-470-server # for ubuntu18.04
 21 | nvidia-smi
 22 | ```
 23 | ### 2.2 install cuda11.3
 24 | - enter: https://developer.nvidia.com/cuda-toolkit-archive
 25 | - select：CUDA Toolkit 11.3.0(April 2021)
 26 | - select：[Linux] -> [x86_64] -> [Ubuntu] -> [18.04] -> [runfile(local)]<br>
 27 | You will see installation instructions on the web page like this：
 28 | ```bash
 29 | wget https://developer.download.nvidia.com/compute/cuda/11.3.0/local_installers/cuda_11.3.0_465.19.01_linux.run
 30 | sudo sh cuda_11.3.0_465.19.01_linux.run
 31 | ```
 32 | The cuda installation process will have a window display.
 33 | - select：[continue] -> [accept] -> Press enter to cancel the first and second options like the following(<font color=#FFFF00 >**it is important!**</font>) -> [Install]<br>
 34 | 
 35 | ```bash
 36 | CUDA Installer
 37 | [ ] Driver        # cancel the first
 38 |     [ ] 465.19.01 # cancel the second
 39 | [X] CUDA Toolkit 11.3 
 40 | [X] CUDA Samples 11.3 
 41 | [X] CUDA Demo Suite 11.3 
 42 | [X] CUDA Documentation 11.3 0tions 
 43 | ```
 44 | 
 45 | The bash window prints the following， which means the installation is OK.
 46 | ```bash
 47 | #===========
 48 | #= Summary =
 49 | #===========
 50 | 
 51 | #Driver:   Not Selected
 52 | #Toolkit:  Installed in /usr/local/cuda-11.3/
 53 | #......
 54 | ```
 55 | add environment variables：
 56 | ```bash
 57 | vim ~/.bashrc
 58 | ```
 59 | Copy and paste the following into .bashrc
 60 | ```bash
 61 | # cuda v11.3
 62 | export PATH=/usr/local/cuda-11.3/bin${PATH:+:${PATH}}
 63 | export LD_LIBRARY_PATH=/usr/local/cuda-11.3/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}
 64 | export CUDA_HOME=/usr/local/cuda-11.3
 65 | ```
 66 | ```bash
 67 | source ~/.bashrc
 68 | nvcc -V
 69 | ```
 70 | The bash window prints the following content：<br>
 71 | <br>
 72 | nvcc: NVIDIA (R) Cuda compiler driver<br>
 73 | Copyright (c) 2005-2021 NVIDIA Corporation<br>
 74 | Built on Sun_Mar_21_19:15:46_PDT_2021<br>
 75 | Cuda compilation tools, release 11.3, V11.3.58<br>
 76 | Build cuda_11.3.r11.3/compiler.29745058_0<br>
 77 | <br>
 78 | 
 79 | ### 2.3 install cudnn8.2
 80 | - enter：https://developer.nvidia.com/rdp/cudnn-archive
 81 | - select: Download cuDNN v8.2.0 (April 23rd, 2021), for CUDA 11.x
 82 | - select： cuDNN Library for Linux (x86_64)
 83 | - you will download file:  "cudnn-11.3-linux-x64-v8.2.0.53.tgz"
 84 | ```bash
 85 | tar -zxvf cudnn-11.3-linux-x64-v8.2.0.53.tgz
 86 | ```
 87 | copy cudnn  to cuda11.3's install dir
 88 | ```bash
 89 | sudo cp cuda/include/cudnn.h /usr/local/cuda/include/
 90 | sudo cp cuda/lib64/libcudnn* /usr/local/cuda/lib64/
 91 | sudo chmod a+r /usr/local/cuda/include/cudnn.h
 92 | sudo chmod a+r /usr/local/cuda/lib64/libcudnn*
 93 | ```
 94 | ### 2.4 download tensorrt8.4.2.4
 95 | - enter： https://developer.nvidia.cn/nvidia-tensorrt-8x-download
 96 | - select： I Agree To the Terms of the NVIDIA TensorRT License Agreement
 97 | - select:   TensorRT 8.4 GA Update 1
 98 | - select:   TensorRT 8.4 GA Update 1 for Linux x86_64 and CUDA 11.0, 11.1, 11.2, 11.3, 11.4, 11.5, 11.6 and 11.7 TAR Package
 99 | - you will download file:  "TensorRT-8.4.2.4.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz"
100 | ```bash
101 | tar -zxvf TensorRT-8.4.2.4.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz
102 | # test
103 | cd TensorRT-8.4.2.4/samples/sampleMNIST
104 | make
105 | cd ../../bin/
106 | ```
107 | Change the following path to your path!(<font color=#FFFF00 >**it is important!**</font>)
108 | ```bash
109 | 
110 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/xxx/temp/TensorRT-8.4.2.4/lib
111 | ./sample_mnist
112 | ```
113 | The bash window prints digit recognition task information， which indicats tensorrt8.4.2.4 is installed normally.
114 | 


--------------------------------------------------------------------------------
/cmake/common.cmake:
--------------------------------------------------------------------------------
 1 | # set
 2 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations")
 3 | # find thirdparty
 4 | find_package(CUDA REQUIRED)
 5 | list(APPEND ALL_LIBS 
 6 |   ${CUDA_LIBRARIES} 
 7 |   ${CUDA_cublas_LIBRARY} 
 8 |   ${CUDA_nppc_LIBRARY} ${CUDA_nppig_LIBRARY} ${CUDA_nppidei_LIBRARY} ${CUDA_nppial_LIBRARY})
 9 | 
10 | # include cuda's header
11 | list(APPEND INCLUDE_DRIS ${CUDA_INCLUDE_DIRS})
12 | # message(FATAL_ERROR "CUDA_npp_LIBRARY: ${CUDA_npp_LIBRARY}")
13 | 
14 | # gather TensorRT lib
15 | #set(TensorRT_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../TensorRT)
16 | #set(TensorRT_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../../../TensorRT-8.2.1.8)
17 | #set(TensorRT_ROOT /home/feiyull/TensorRT-Plugin)
18 | set(TensorRT_ROOT /home/feiyull/TensorRT-8.4.2.4)
19 | #set(TensorRT_ROOT /home/feiyull/TensorRT-8.6.1.6)
20 | 
21 | find_library(TRT_NVINFER NAMES nvinfer HINTS ${TensorRT_ROOT} PATH_SUFFIXES lib lib64 lib/x64)
22 | find_library(TRT_NVINFER_PLUGIN NAMES nvinfer_plugin HINTS ${TensorRT_ROOT} PATH_SUFFIXES lib lib64 lib/x64)
23 | find_library(TRT_NVONNX_PARSER NAMES nvonnxparser HINTS ${TensorRT_ROOT} PATH_SUFFIXES lib lib64 lib/x64)
24 | find_library(TRT_NVCAFFE_PARSER NAMES nvcaffe_parser HINTS ${TensorRT_ROOT} PATH_SUFFIXES lib lib64 lib/x64)
25 | find_path(TENSORRT_INCLUDE_DIR NAMES NvInfer.h HINTS ${TensorRT_ROOT} PATH_SUFFIXES include)
26 | list(APPEND ALL_LIBS ${TRT_NVINFER} ${TRT_NVINFER_PLUGIN} ${TRT_NVONNX_PARSER} ${TRT_NVCAFFE_PARSER})
27 | 
28 | # include tensorrt's headers
29 | list(APPEND INCLUDE_DRIS ${TENSORRT_INCLUDE_DIR})
30 | 
31 | # include tensorrt's sample/common headers
32 | #set(SAMPLES_COMMON_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../common)
33 | #set(SAMPLES_COMMON_DIR ${CMAKE_CURRENT_SOURCE_DIR}/common)
34 | set(SAMPLES_COMMON_DIR ${TensorRT_ROOT}/samples/common)
35 | list(APPEND INCLUDE_DRIS ${SAMPLES_COMMON_DIR})
36 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS})
37 | message(STATUS "ALL_LIBS: ${ALL_LIBS}")
38 | 


--------------------------------------------------------------------------------
/data/12801.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/12801.jpg


--------------------------------------------------------------------------------
/data/12802.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/12802.jpg


--------------------------------------------------------------------------------
/data/12803.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/12803.jpg


--------------------------------------------------------------------------------
/data/12804.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/12804.jpg


--------------------------------------------------------------------------------
/data/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/2.png


--------------------------------------------------------------------------------
/data/51204.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/51204.jpg


--------------------------------------------------------------------------------
/data/6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/6.jpg


--------------------------------------------------------------------------------
/data/6086083.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/6086083.jpg


--------------------------------------------------------------------------------
/data/6406401.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/6406401.jpg


--------------------------------------------------------------------------------
/data/6406402.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/6406402.jpg


--------------------------------------------------------------------------------
/data/6406403.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/6406403.jpg


--------------------------------------------------------------------------------
/data/6406404.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/6406404.jpg


--------------------------------------------------------------------------------
/data/6406406.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/6406406.jpg


--------------------------------------------------------------------------------
/data/6406407.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/6406407.jpg


--------------------------------------------------------------------------------
/data/7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/7.jpg


--------------------------------------------------------------------------------
/data/bus.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/bus.jpg


--------------------------------------------------------------------------------
/data/dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/dog.jpg


--------------------------------------------------------------------------------
/data/efficientdet/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/efficientdet/.gitkeep


--------------------------------------------------------------------------------
/data/im_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/im_01.png


--------------------------------------------------------------------------------
/data/image1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/image1.jpg


--------------------------------------------------------------------------------
/data/image2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/image2.jpg


--------------------------------------------------------------------------------
/data/image3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/image3.jpg


--------------------------------------------------------------------------------
/data/libfacedetction/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/libfacedetction/.gitkeep


--------------------------------------------------------------------------------
/data/long.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/long.jpg


--------------------------------------------------------------------------------
/data/mobilenetv3/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/mobilenetv3/.gitkeep


--------------------------------------------------------------------------------
/data/people.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/people.mp4


--------------------------------------------------------------------------------
/data/pphumanseg/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/pphumanseg/.gitkeep


--------------------------------------------------------------------------------
/data/resnet18/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/resnet18/.gitkeep


--------------------------------------------------------------------------------
/data/retinanet/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/retinanet/.gitkeep


--------------------------------------------------------------------------------
/data/rifle2.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/rifle2.jpeg


--------------------------------------------------------------------------------
/data/road0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/road0.png


--------------------------------------------------------------------------------
/data/road1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/road1.jpg


--------------------------------------------------------------------------------
/data/sailboat3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/sailboat3.jpg


--------------------------------------------------------------------------------
/data/ssd/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/ssd/.gitkeep


--------------------------------------------------------------------------------
/data/swin/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/swin/.gitkeep


--------------------------------------------------------------------------------
/data/u2net/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/u2net/.gitkeep


--------------------------------------------------------------------------------
/data/yolor/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/yolor/.gitkeep


--------------------------------------------------------------------------------
/data/yolor/coco.names:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorbike
 5 | aeroplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 | 


--------------------------------------------------------------------------------
/data/yolov3/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/yolov3/.gitkeep


--------------------------------------------------------------------------------
/data/yolov4/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/yolov4/.gitkeep


--------------------------------------------------------------------------------
/data/yolov5/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/yolov5/.gitkeep


--------------------------------------------------------------------------------
/data/yolov6/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/yolov6/.gitkeep


--------------------------------------------------------------------------------
/data/yolov7/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/yolov7/.gitkeep


--------------------------------------------------------------------------------
/data/yolov8-pose/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/yolov8-pose/.gitkeep


--------------------------------------------------------------------------------
/data/yolov8/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/yolov8/.gitkeep


--------------------------------------------------------------------------------
/data/yolox/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/yolox/.gitkeep


--------------------------------------------------------------------------------
/data/zidane.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/data/zidane.jpg


--------------------------------------------------------------------------------
/docker/README.md:
--------------------------------------------------------------------------------
 1 | ## 1. download tensorrt8.4.2.4
 2 | - enter： https://developer.nvidia.cn/nvidia-tensorrt-8x-download
 3 | - select： I Agree To the Terms of the NVIDIA TensorRT License Agreement
 4 | - select:   TensorRT 8.4 GA Update 1
 5 | - select:   TensorRT 8.4 GA Update 1 for Linux x86_64 and CUDA 11.0, 11.1, 11.2, 11.3, 11.4, 11.5, 11.6 and 11.7 TAR Package
 6 | - download file:  "TensorRT-8.4.2.4.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz"
 7 | 
 8 | ```bash
 9 | cd TensorRT-Alpha/docker
10 | cp TensorRT-8.4.2.4.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz .
11 | ```
12 | 
13 | ## 2. build docker images
14 | ```bash
15 | docker build -f ubuntu18.04-cu113.Dockerfile --network=host -t trta .
16 | ```


--------------------------------------------------------------------------------
/docker/ubuntu18.04-cu113.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:11.3.1-cudnn8-devel-ubuntu18.04
 2 | RUN sed -i 's#http://archive.ubuntu.com/#http://mirrors.tuna.tsinghua.edu.cn/#' /etc/apt/sources.list && \
 3 |     apt-get update
 4 | 
 5 | RUN apt-get install -y software-properties-common && \
 6 |     add-apt-repository ppa:deadsnakes/ppa && \
 7 |     apt-get update && \
 8 |     DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
 9 |     build-essential \
10 |     git \
11 |     gdb \
12 |     cmake \
13 |     python3.8 \
14 |     python3.8-dev \
15 |     python3-pip \
16 |     && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.6 1 \
17 |     && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.8 2 \
18 |     && update-alternatives --config python3
19 | 
20 | #copy and unzip tensorrt8.4.2.4
21 | RUN mkdir -p /home/feiyull/
22 | COPY TensorRT-8.4.2.4.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz /home/feiyull/
23 | RUN cd /home/feiyull/  && \
24 |     tar -zxvf TensorRT-8.4.2.4.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz && \
25 |     rm TensorRT-8.4.2.4.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz && \
26 |     mkdir workspace
27 | 
28 | RUN \
29 |     DEBIAN_FRONTEND=noninteractive apt-get install libgl1-mesa-glx -y \
30 |     pkg-config \
31 |     libgtk-3-dev \
32 |     libavcodec-dev \
33 |     libavformat-dev \
34 |     libswscale-dev \
35 |     libv4l-dev \
36 |     libxvidcore-dev \
37 |     libx264-dev \
38 |     libopencv-dev \
39 |     && apt-get clean
40 | 
41 | # RUN pip3 install --upgrade pip -i https://pypi.tuna.tsinghua.edu.cn/simple
42 | # RUN pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
43 | # RUN pip install opencv-python-headless==4.8.0.74 && \
44 | #     pip install opencv-python==4.8.0.74 \
45 | #     pip install onnx==1.9.0 \
46 | #     pip install torch==1.9.0 \
47 | #     pip install torchvision==0.10.0 \
48 | #     pip install onnx-simplifier==0.4.8
49 | 
50 | #RUN cd /root/.cache/pip && \
51 | #    rm -r *


--------------------------------------------------------------------------------
/efficientdet/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10)
 2 | 
 3 | set(CMAKE_BUILD_TYPE "Debug")
 4 | #set(CMAKE_BUILD_TYPE "Release")
 5 | 
 6 | PROJECT(efficientdet VERSION 1.0.0 LANGUAGES C CXX CUDA)
 7 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR})
 8 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake)
 9 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT})
10 | 
11 | message(STATUS ${ALL_LIBS})
12 | file(GLOB CPPS 
13 |   ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
14 |   ${CMAKE_CURRENT_SOURCE_DIR}/*.cu
15 |   ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu
16 |   ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp
17 |   ${TensorRT_ROOT}/samples/common/logger.cpp 
18 |   ${TensorRT_ROOT}/samples/common/sampleOptions.cpp 
19 |   )
20 | list(REMOVE_ITEM CPPS app_efficientdet.cpp)
21 |  
22 | message(STATUS CPPS = ${CPPS})
23 | list (LENGTH CPPS length)
24 | message(STATUS ***length*** = ${length}) 
25 | find_package(OpenCV REQUIRED)
26 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR})
27 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS})
28 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS})
29 |  
30 | add_library(${PROJECT_NAME} SHARED ${CPPS})
31 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES})
32 | 
33 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75)
34 | target_compile_options(${PROJECT_NAME} PUBLIC 
35 |    $<$<COMPILE_LANGUAGE:CUDA>:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>)
36 |   
37 | add_executable(app_efficientdet app_efficientdet.cpp)
38 | # NVCC
39 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a)
40 | target_link_libraries(app_efficientdet ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} )
41 | 


--------------------------------------------------------------------------------
/efficientdet/README.md:
--------------------------------------------------------------------------------
 1 | ## 1. get onnx
 2 | download directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv)
 3 | 
 4 | or export onnx:
 5 | ```bash
 6 | # Please refer to following site, it is tensorrt's offical doc, and it lead you to export onnx from efficientdet's offical weights.
 7 | # TensorRT-Alpha converts python to cuda c.
 8 | https://github.com/NVIDIA/TensorRT/blob/release/8.4/samples/python/efficientdet/README.md
 9 | ```
10 | ## 2.edit and save onnx
11 | ```bash
12 | # note: If you have obtained onnx by downloading, this step can be ignored
13 | ignored
14 | ```
15 | ## 3.compile onnx 
16 | ```bash
17 | # put your onnx file in this path:tensorrt-alpha/data/efficientdet
18 | cd tensorrt-alpha/data/efficientdet
19 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib
20 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=efficientdet0.onnx   --saveEngine=efficientdet0.trt   --buildOnly --minShapes=input:1x512x512x3 --optShapes=input:2x512x512x3 --maxShapes=input:4x512x512x3
21 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=efficientdet1.onnx   --saveEngine=efficientdet1.trt   --buildOnly --minShapes=input:1x640x640x3 --optShapes=input:2x640x640x3 --maxShapes=input:4x640x640x3
22 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=efficientdet2.onnx   --saveEngine=efficientdet2.trt   --buildOnly --minShapes=input:1x768x768x3 --optShapes=input:2x768x768x3 --maxShapes=input:4x768x768x3
23 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=efficientdet3.onnx   --saveEngine=efficientdet3.trt   --buildOnly --minShapes=input:1x896x896x3 --optShapes=input:2x896x896x3 --maxShapes=input:4x896x896x3
24 | 
25 | ```
26 | ## 4.run
27 | ```bash
28 | git clone https://github.com/FeiYull/tensorrt-alpha
29 | cd tensorrt-alpha/efficientdet
30 | mkdir build
31 | cd build
32 | cmake ..
33 | make -j10
34 | # note: the dstImage will be saved in tensorrt-alpha/efficientdet/build by default
35 | 
36 | # infer image
37 | ./app_efficientdet  --model=../../data/efficientdet/efficientdet0.trt --img=../../data/road0.png  --size=512 --batch_size=1 --show --savePath
38 | ./app_efficientdet  --model=../../data/efficientdet/efficientdet1.trt --img=../../data/road0.png  --size=640 --batch_size=1 --show --savePath
39 | ./app_efficientdet  --model=../../data/efficientdet/efficientdet2.trt --img=../../data/road0.png  --size=768 --batch_size=1 --show --savePath
40 | ./app_efficientdet  --model=../../data/efficientdet/efficientdet3.trt --img=../../data/road0.png  --size=896 --batch_size=1 --show --savePath
41 | 
42 | 
43 | # infer video
44 | ./app_efficientdet  --model=../../data/efficientdet/efficientdet0.trt  --size=512 --batch_size=2  --video=../../data/people.mp4  --show
45 | 
46 | # infer camera
47 | ./app_efficientdet  --model=../../data/efficientdet/efficientdet0.trt  --size=512 --batch_size=2  --cam_id=0  --show
48 | ```
49 | ## 5. appendix
50 | ignore


--------------------------------------------------------------------------------
/efficientdet/efficientdet.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include"../utils/common_include.h"
 3 | #include"../utils/utils.h"
 4 | #include"../utils/kernel_function.h"
 5 | 
 6 | class EfficientDet
 7 | {
 8 | public:
 9 |     EfficientDet(const utils::InitParameter& param);
10 |     ~EfficientDet();
11 | 
12 | public:
13 |     bool init(const std::vector<unsigned char>& trtFile);
14 |     void check();
15 |     void copy(const std::vector<cv::Mat>& imgsBatch);
16 |     void preprocess(const std::vector<cv::Mat>& imgsBatch);
17 |     bool infer();
18 |     void postprocess(const std::vector<cv::Mat>& imgsBatch);
19 |     void reset();
20 | 
21 | public:
22 |     std::vector<std::vector<utils::Box>> getObjectss() const;
23 | 
24 | protected:
25 |     std::shared_ptr<nvinfer1::ICudaEngine> m_engine;
26 |     std::unique_ptr<nvinfer1::IExecutionContext> m_context;
27 | 
28 | protected:
29 |     utils::InitParameter m_param;
30 |     std::vector<std::vector<utils::Box>> m_objectss;
31 |     utils::AffineMat m_dst2src;
32 |     // input
33 |     float* m_input_src_device;
34 |     float* m_input_resize_device;
35 |     float* m_input_rgb_device;
36 |     // output
37 |     int* m_output_num_device;     
38 |     int* m_output_boxes_device;   
39 |     int* m_output_scores_device;   
40 |     int* m_output_classes_device; 
41 |     int* m_output_num_host;       
42 |     int* m_output_boxes_host;      
43 |     int* m_output_scores_host;    
44 |     int* m_output_classes_host;  
45 | };


--------------------------------------------------------------------------------
/examples/python_with_dll/c_files/pch.cpp:
--------------------------------------------------------------------------------
 1 | ﻿// pch.cpp: 与预编译标头对应的源文件
 2 | #include"./utils/yolo.h"
 3 | #include "pch.h"
 4 | #include"./yolov8/yolov8.h"
 5 | // 当使用预编译的头时，需要使用此源文件，编译才能成功。
 6 | 
 7 | void getAimsInfo(const std::vector<std::vector<utils::Box>>& objectss, float(*res_array)[6])
 8 | {
 9 | 	for (const auto& objects : objectss)
10 | 	{
11 | 		for (const auto& box : objects)
12 | 		{
13 | 			res_array[0][0] = box.left;
14 | 			res_array[0][1] = box.top;
15 | 			res_array[0][2] = box.right;
16 | 			res_array[0][3] = box.bottom;
17 | 			res_array[0][4] = box.label;
18 | 			res_array[0][5] = box.confidence;
19 | 
20 | 			++res_array;
21 | 		}
22 | 	}
23 | }
24 | 
25 | 
26 | // c++ code
27 | 
28 | void* Init(
29 | 	const char* trt_file_path,
30 | 	int src_w,
31 | 	int src_h,
32 | 	float conf_thresh,
33 | 	float iou_thresh,
34 | 	int num_class
35 | )
36 | 
37 | {
38 | 	// parameters
39 | 	utils::InitParameter param;
40 | 
41 | 	param.input_output_names = { "images",  "output0" };
42 | 	param.batch_size = 1;
43 | 	param.src_h = src_h;
44 | 	param.src_w = src_w;
45 | 	param.dst_h = 640;
46 | 	param.dst_w = 640;
47 | 	param.iou_thresh = iou_thresh;
48 | 	param.conf_thresh = conf_thresh;
49 | 	param.num_class = num_class;
50 | 
51 | 	YOLOV8* yolov8 = new YOLOV8(param);
52 | 
53 | 	std::vector<unsigned char> trt_file = utils::loadModel(trt_file_path);
54 | 	if (trt_file.empty())
55 | 	{
56 | 		sample::gLogError << "trt_file is empty!" << std::endl;
57 | 		return nullptr;
58 | 	}
59 | 
60 | 	if (!yolov8->init(trt_file))
61 | 	{
62 | 		sample::gLogError << "initEngine() ocur errors!" << std::endl;
63 | 		return nullptr;
64 | 	}
65 | 	yolov8->check();
66 | 	return yolov8;
67 | }
68 | 
69 | 
70 | // 2. img inference 
71 | void Detect(void* yolo, int rows, int cols, unsigned char* src_data, float(*res_array)[6])
72 | 
73 | {
74 | 	YOLOV8* yolov8 = (YOLOV8*)yolo;
75 | 	
76 | 	cv::Mat frame = cv::Mat(rows, cols, CV_8UC3, src_data);
77 | 
78 | 	std::vector<cv::Mat> imgs_batch(1, frame.clone());
79 | 
80 | 	yolov8->reset();
81 | 
82 | 	yolov8->copy(imgs_batch);
83 | 
84 | 	utils::DeviceTimer d_t1; yolov8->preprocess(imgs_batch);  float t1 = d_t1.getUsedTime();
85 | 	utils::DeviceTimer d_t2; yolov8->infer();				  float t2 = d_t2.getUsedTime();
86 | 	utils::DeviceTimer d_t3; yolov8->postprocess(imgs_batch); float t3 = d_t3.getUsedTime();
87 | 
88 | 	sample::gLogInfo << 
89 | 		"preprocess time = " << t1 << "; "
90 | 		"infer time = " << t2 << "; "
91 | 		"postprocess time = " << t3 << std::endl;
92 | 
93 | 	getAimsInfo(yolov8->getObjectss(), res_array);	
94 | }
95 | 


--------------------------------------------------------------------------------
/examples/python_with_dll/c_files/pch.h:
--------------------------------------------------------------------------------
 1 | ﻿// pch.h: 这是预编译标头文件。
 2 | // 下方列出的文件仅编译一次，提高了将来生成的生成性能。
 3 | // 这还将影响 IntelliSense 性能，包括代码完成和许多代码浏览功能。
 4 | // 但是，如果此处列出的文件中的任何一个在生成之间有更新，它们全部都将被重新编译。
 5 | // 请勿在此处添加要频繁更新的文件，这将使得性能优势无效。
 6 | 
 7 | #ifndef PCH_H
 8 | #define PCH_H
 9 | 
10 | // 添加要在此处预编译的标头
11 | #include "framework.h"
12 | #endif //PCH_H
13 | 
14 | //定义宏
15 | #ifdef IMPORT_DLL
16 | #else
17 | #define IMPORT_DLL extern "C" _declspec(dllimport)
18 | #endif
19 | 
20 | 
21 | IMPORT_DLL void* Init(
22 | 	const char* trt_file_path,
23 | 	int src_w,
24 | 	int src_h,
25 | 	float conf_thresh,
26 | 	float iou_thresh,
27 | 	int num_class
28 | );
29 | IMPORT_DLL void Detect(void* yolo, int rows, int cols, unsigned char* src_data, float(*res_array)[6]);


--------------------------------------------------------------------------------
/examples/python_with_dll/config/screen_inf.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | import mss
 4 | import win32api
 5 | 
 6 | cap = mss.mss()
 7 | def grab_screen_mss(monitor):
 8 |     return cv2.cvtColor(np.array(cap.grab(monitor)), cv2.COLOR_BGRA2BGR)
 9 | 
10 | def get_parameters():
11 |         x, y = get_screen_size().values()
12 |         return 0, 0, x, y
13 | 
14 | def get_screen_size():
15 |     wide = win32api.GetSystemMetrics(0)
16 |     high = win32api.GetSystemMetrics(1)
17 |     return {"wide": wide, "high": high}


--------------------------------------------------------------------------------
/examples/python_with_dll/python_trt.py:
--------------------------------------------------------------------------------
  1 | from ctypes import *
  2 | from threading import Thread
  3 | import cv2
  4 | import numpy as np
  5 | import numpy.ctypeslib as npct
  6 | from pygame.time import Clock
  7 | from config.screen_inf import get_parameters, grab_screen_mss
  8 | 
  9 | 
 10 | class Detector:
 11 |     def __init__(
 12 |             self, dll_path, trt_path, window_width=640, window_height=640, conf_thresh=0.25, iou_thresh=0.45,
 13 |             num_class=80):
 14 |         self.yolo = CDLL(dll_path)
 15 |         self.max_bbox = 50
 16 | 
 17 |         self.yolo.Detect.argtypes = [c_void_p, c_int, c_int, POINTER(c_ubyte),
 18 |                                      npct.ndpointer(dtype=np.float32, ndim=2, shape=(self.max_bbox, 6),
 19 |                                                     flags="C_CONTIGUOUS")]
 20 | 
 21 |         self.yolo.Init.argtypes = [c_char_p, c_int, c_int, c_float, c_float, c_int]
 22 |         self.yolo.Init.restype = c_void_p
 23 | 
 24 |         self.c_point = self.yolo.Init(trt_path.encode('utf-8'), window_width, window_height, conf_thresh, iou_thresh,
 25 |                                       num_class)
 26 | 
 27 |     def predict(self, img):
 28 |         rows, cols = img.shape[0], img.shape[1]
 29 |         res_arr = np.zeros((self.max_bbox, 6), dtype=np.float32)
 30 |         self.yolo.Detect(self.c_point, c_int(rows), c_int(cols), img.ctypes.data_as(POINTER(c_ubyte)), res_arr)
 31 |         self.bbox_array = res_arr[~(res_arr == 0).all(1)]
 32 |         return self.bbox_array
 33 | 
 34 | 
 35 | class_names = [
 36 |     "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
 37 |     "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
 38 |     "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
 39 |     "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
 40 |     "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
 41 |     "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
 42 |     "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
 43 |     "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
 44 |     "hair drier", "toothbrush"
 45 | ]
 46 | 
 47 | # 对屏幕指定的区域录屏，并推理
 48 | if __name__ == '__main__':
 49 | 
 50 |     def img_grab_thread():
 51 | 
 52 |         global frame
 53 |         global monitor
 54 |         clock = Clock()
 55 | 
 56 |         while True:
 57 |             frame = grab_screen_mss(monitor)
 58 |             clock.tick(200)
 59 | 
 60 | 
 61 |     def img_pred_thread():
 62 | 
 63 |         global frame
 64 |         global source_w
 65 |         global source_h
 66 |         det = Detector(dll_path="./python_dll.dll", trt_path="./yolov8n.trt", window_width=source_w,
 67 |                        window_height=source_h)
 68 |         clock = Clock()
 69 | 
 70 |         windows_title = "cvwindow"
 71 |         cv2.namedWindow(windows_title, cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO)  # allow window resize (Linux)
 72 | 
 73 |         max_w = 576
 74 |         max_h = 324
 75 |         if source_h > max_h or source_w > max_w:
 76 |             cv2.resizeWindow(windows_title, max_w, source_h * max_w // source_w)
 77 | 
 78 |         while True:
 79 |             aims = det.predict(frame)
 80 |             for aim in aims:
 81 |                 cv2.rectangle(frame, (int(aim[0]), int(aim[1])), (int(aim[2]), int(aim[3])), (0, 255, 0), 2)
 82 |                 det_info = class_names[int(aim[4])] + " " + str(aim[5])
 83 |                 cv2.putText(frame, det_info, (int(aim[0]), int(aim[1])), cv2.FONT_HERSHEY_DUPLEX, 0.6, (255, 0, 255), 1,
 84 |                             cv2.LINE_AA)
 85 | 
 86 |             cv2.putText(frame, "FPS:{:.1f}".format(clock.get_fps()), (10, 50), cv2.FONT_HERSHEY_SIMPLEX,
 87 |                         2, (0, 0, 235), 4)
 88 |             cv2.imshow('cvwindow', frame)
 89 |             cv2.waitKey(1)
 90 | 
 91 |             clock.tick(200)
 92 | 
 93 | 
 94 |     # 4:3 800x600 center region detect
 95 |     source_w = int(800)
 96 |     source_h = int(600)
 97 | 
 98 |     _, _, x, y = get_parameters()
 99 |     top_x = (x // 2) - (source_w // 2)
100 |     top_y = (y // 2) - (source_h // 2)
101 | 
102 |     monitor = {'left': top_x, 'top': top_y, 'width': source_w, 'height': source_h}
103 | 
104 |     frame = None
105 | 
106 |     # To demonstrate the inference speed more intuitively,
107 |     # two threads are used here:
108 |     # img_grab_thread for image fetching
109 |     # img_pred_thread for inference
110 |     # Lock is not used here, so the display effect may be poor if the image fetching speed is too high
111 |     Thread(target=img_grab_thread).start()
112 |     Thread(target=img_pred_thread).start()
113 | 
114 | # VideoCapture predict demo
115 | if __name__ == '__main__OFF':
116 |     cap = cv2.VideoCapture('./people.mp4')
117 | 
118 |     source_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
119 |     source_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
120 | 
121 |     det = Detector(dll_path="./yoloDemo.dll", trt_path="./yolov8n.trt", window_width=source_w, window_height=source_h)
122 | 
123 |     clock = Clock()
124 |     while True:
125 |         ret, frame = cap.read()
126 |         if not ret:
127 |             break
128 | 
129 |         aims = det.predict(frame)
130 | 
131 |         # do something here
132 |         for aim in aims:
133 |             cv2.rectangle(frame, (int(aim[0]), int(aim[1])), (int(aim[2]), int(aim[3])), (0, 255, 0), 2)
134 |             det_info = class_names[int(aim[4])] + " " + str(aim[5])
135 |             cv2.putText(frame, det_info, (int(aim[0]), int(aim[1])), cv2.FONT_HERSHEY_DUPLEX, 0.6, (255, 0, 255), 1,
136 |                         cv2.LINE_AA)
137 | 
138 |         cv2.imshow('cvwindow', frame)
139 |         cv2.waitKey(1)
140 | 
141 |         print('pred fps: ', clock.get_fps())
142 |         clock.tick(5)
143 | 
144 |     cap.release()
145 |     cv2.destroyAllWindows()
146 | 


--------------------------------------------------------------------------------
/libfacedetection/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10)
 2 | 
 3 | #set(CMAKE_BUILD_TYPE "Debug")
 4 | set(CMAKE_BUILD_TYPE "Release")
 5 | 
 6 | # cuda
 7 | PROJECT(facedet VERSION 1.0.0 LANGUAGES C CXX CUDA)
 8 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR})
 9 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake)
10 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT})
11 | 
12 | message(STATUS ${ALL_LIBS})
13 | file(GLOB CPPS 
14 |   ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
15 |   ${CMAKE_CURRENT_SOURCE_DIR}/*.cu
16 |   ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu
17 |   ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp
18 |   ${TensorRT_ROOT}/samples/common/logger.cpp 
19 |   ${TensorRT_ROOT}/samples/common/sampleOptions.cpp 
20 |   )
21 | list(REMOVE_ITEM CPPS app_libfacedetction.cpp)
22 |  
23 | message(STATUS CPPS = ${CPPS})
24 | list (LENGTH CPPS length)
25 | message(STATUS ***length*** = ${length}) 
26 | find_package(OpenCV REQUIRED)
27 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR})
28 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS})
29 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS})
30 | add_library(${PROJECT_NAME} SHARED ${CPPS})
31 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES})
32 | 
33 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75)
34 | target_compile_options(${PROJECT_NAME} PUBLIC 
35 |    $<$<COMPILE_LANGUAGE:CUDA>:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>)
36 |   
37 | add_executable(app_libfacedetction app_libfacedetction.cpp)
38 | 
39 | # NVCC
40 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a)
41 | target_link_libraries(app_libfacedetction ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} )
42 | 


--------------------------------------------------------------------------------
/libfacedetection/README.md:
--------------------------------------------------------------------------------
 1 | ## 1. get onnx
 2 | download directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv)
 3 | 
 4 | or export onnx:
 5 | ```bash
 6 | git clone https://github.com/ShiqiYu/libfacedetection.train
 7 | git checkout  a3bc97c7e85bb206c9feca97fbd541ce82cfa3a9
 8 | 
 9 | # note：The official repository gives the following three models:
10 | yunet_yunet_final_320_320_simplify.onnx
11 | yunet_yunet_final_640_640_simplify.onnx
12 | yunet_yunet_final_dynamic_simplify.onnx
13 | choose the third model here.
14 | ```
15 | ## 2.edit and save onnx
16 | ```bash
17 | # note: If you have obtained onnx by downloading, this step can be ignored
18 | conda activate tensorrt-alpha
19 | # put your onnx file in this path:tensorrt-alpha/data/libfacedetection
20 | cd  tensorrt-alpha/data/libfacedetction
21 | python alpha_edit.py --onnx=yunet_yunet_final_dynamic_simplify.onnx
22 | ```
23 | ## 3.compile onnx 
24 | ```bash
25 | # put your onnx file in this path:tensorrt-alpha/data/libfacedetection
26 | cd tensorrt-alpha/data/libfacedetection
27 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib
28 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=alpha_yunet_yunet_final_dynamic_simplify.onnx --saveEngine=alpha_yunet_yunet_final_dynamic_simplify.trt --buildOnly --minShapes=input:1x3x120x120 --optShapes=input:4x3x320x320 --maxShapes=input:8x3x2000x2000
29 | ```
30 | ## 4.run
31 | ```bash
32 | git clone https://github.com/FeiYull/tensorrt-alpha
33 | cd tensorrt-alpha/libfacedetction
34 | mkdir build
35 | cd build
36 | cmake ..
37 | make -j10
38 | # note: the dstImage will be saved in tensorrt-alpha/libfacedetction/build by default
39 | 
40 | # dynamic [b w h] 
41 | # infer image
42 | ./app_libfacedetction  --model=../../data/libfacedetction/alpha_yunet_yunet_final_dynamic_simplify.trt  --batch_size=1  --img=../../data/6406401.jpg  --show --savePath
43 | 
44 | # infer video
45 | ./app_libfacedetction  --model=../../data/libfacedetction/alpha_yunet_yunet_final_dynamic_simplify.trt  --batch_size=8  --video=../../data/people.mp4  --show
46 | 
47 | # infer camera
48 | ./app_libfacedetction  --model=../../data/libfacedetction/alpha_yunet_yunet_final_dynamic_simplify.trt  --batch_size=2  --cam_id=0  --show
49 | ```
50 | ## 5. appendix
51 | ignore


--------------------------------------------------------------------------------
/libfacedetection/alpha_edit.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from pyexpat import model
 3 | from turtle import width
 4 | import onnx
 5 | import onnx.checker
 6 | import onnx.utils
 7 | from onnx.tools import update_model_dims
 8 | import onnx.helper as helper
 9 | import torch
10 | 
11 | if __name__ == '__main__':
12 |     parser = argparse.ArgumentParser()
13 |     parser.add_argument('--onnx', type=str, default='yunet_yunet_final_dynamic_simplify.onnx', help='onnx path')
14 |     opt = parser.parse_args()
15 | 
16 |     model = onnx.load(opt.onnx)
17 |     in_b  = model.graph.input[0].type.tensor_type.shape.dim[0]
18 |     in_c  = model.graph.input[0].type.tensor_type.shape.dim[1]
19 |     in_h  = model.graph.input[0].type.tensor_type.shape.dim[2]
20 |     in_w  = model.graph.input[0].type.tensor_type.shape.dim[3]
21 |     # loc
22 |     out_loc_b               = model.graph.output[0].type.tensor_type.shape.dim[0]
23 |     out_loc_num_candidates  = model.graph.output[0].type.tensor_type.shape.dim[1]
24 |     out_loc_dim2            = model.graph.output[0].type.tensor_type.shape.dim[2] 
25 |     # conf
26 |     out_conf_b              = model.graph.output[1].type.tensor_type.shape.dim[0]
27 |     out_conf_num_candidates = model.graph.output[1].type.tensor_type.shape.dim[1]
28 |     out_conf_dim2           = model.graph.output[1].type.tensor_type.shape.dim[2] 
29 |     # iou
30 |     out_iou_b               = model.graph.output[2].type.tensor_type.shape.dim[0]
31 |     out_iou_num_candidates  = model.graph.output[2].type.tensor_type.shape.dim[1]
32 |     out_iou_dim2            = model.graph.output[2].type.tensor_type.shape.dim[2] 
33 |     in_b.dim_param= "batch_size"
34 |     in_h.dim_param= "height"
35 |     in_w.dim_param= "width"
36 |     out_loc_b.dim_param = "batch_size"
37 |     out_conf_b.dim_param= "batch_size"
38 |     out_iou_b.dim_param = "batch_size"
39 |     out_loc_num_candidates.dim_param  = "num_condidates"
40 |     out_conf_num_candidates.dim_param = "num_condidates"
41 |     out_iou_num_candidates.dim_param  = "num_condidates"
42 | 
43 |     onnx.save(model, 'alpha_yunet_yunet_final_dynamic_simplify.onnx')
44 |     print("ok")
45 | 
46 | 


--------------------------------------------------------------------------------
/libfacedetection/app_libfacedetction.cpp:
--------------------------------------------------------------------------------
  1 | ﻿#include"../utils/common_include.h"
  2 | #include"../utils/utils.h"
  3 | #include"libfacedetection.h"
  4 | 
  5 | void setParameters(utils::InitParameter& initParameters)
  6 | {
  7 | 	initParameters.class_names = utils::dataSets::face2;
  8 | 
  9 | 	initParameters.num_class = 2; 
 10 | 	initParameters.batch_size = 8;
 11 | 	// dynamic: HWC
 12 | 	/*initParameters.dst_h = 640;
 13 | 	initParameters.dst_w = 640;*/
 14 | 
 15 | 	initParameters.topK = 1000; 
 16 | 
 17 | 	initParameters.input_output_names = { "input",  "loc", "conf", "iou"};
 18 | 	initParameters.conf_thresh = 0.3f;
 19 | 	initParameters.iou_thresh = 0.45f;
 20 | 	initParameters.save_path = "";
 21 | }
 22 | 
 23 | void task(LibFaceDet& face_det, const utils::InitParameter& param, std::vector<cv::Mat>& imgsBatch, const int& delayTime, const int& batchi,
 24 | 	const bool& isShow, const bool& isSave)
 25 | {
 26 | 	face_det.copy(imgsBatch);
 27 | 	utils::DeviceTimer d_t1; face_det.preprocess(imgsBatch);  float t1 = d_t1.getUsedTime();
 28 | 	utils::DeviceTimer d_t2; face_det.infer();				  float t2 = d_t2.getUsedTime();
 29 | 	utils::DeviceTimer d_t3; face_det.postprocess(imgsBatch); float t3 = d_t3.getUsedTime();
 30 | 	sample::gLogInfo << "preprocess time = " << t1 / param.batch_size << "; "
 31 | 		"infer time = " << t2 / param.batch_size << "; "
 32 | 		"postprocess time = " << t3 / param.batch_size << std::endl;
 33 | 	if(isShow)
 34 | 		utils::show(face_det.getObjectss(), param.class_names, delayTime, imgsBatch);
 35 | 	if(isSave)
 36 | 		utils::save(face_det.getObjectss(), param.class_names, param.save_path, imgsBatch, param.batch_size, batchi);
 37 | 	face_det.reset();
 38 | }
 39 | 
 40 | int main(int argc, char** argv)
 41 | {
 42 | 	cv::CommandLineParser parser(argc, argv,
 43 | 		{
 44 | 			"{model 	|| tensorrt model file	}"
 45 | 			"{batch_size|| batch size           }"
 46 | 			"{video     || video's path			}"
 47 | 			"{img       || image's path			}"
 48 | 			"{cam_id    || camera's device id	}"
 49 | 			"{show      || if show the result	}"
 50 | 			"{savePath  || save path, can be ignore}"
 51 | 		});
 52 | 	// parameters
 53 | 	utils::InitParameter param;
 54 | 	setParameters(param);
 55 | 	// path
 56 | 	std::string model_path = "../../data/libfacedetction/alpha_yunet_yunet_final_dynamic_simplify.trt";
 57 | 	std::string video_path = "../../data/people.mp4";
 58 | 	std::string image_path = "../../data/6406403.jpg";
 59 | 	// camera' id
 60 | 	int camera_id = 0;
 61 | 
 62 | 	// get input
 63 | 	utils::InputStream source;
 64 | 	source = utils::InputStream::IMAGE;
 65 | 	//source = utils::InputStream::VIDEO;
 66 | 	//source = utils::InputStream::CAMERA;
 67 | 
 68 | 	// update params from command line parser
 69 | 	//int size = -1; // w or h
 70 | 	int batch_size = 8;
 71 | 	bool is_show = false;
 72 | 	bool is_save = false;
 73 | 	if(parser.has("model"))
 74 | 	{
 75 | 		model_path = parser.get<std::string>("model");
 76 | 		sample::gLogInfo << "model_path = " << model_path << std::endl;
 77 | 	}
 78 | 	
 79 | 	if(parser.has("batch_size"))
 80 | 	{
 81 | 		batch_size = parser.get<int>("batch_size");
 82 | 		sample::gLogInfo << "batch_size = " << batch_size << std::endl;
 83 | 		param.batch_size = batch_size;
 84 | 	}
 85 | 	if(parser.has("video"))
 86 | 	{
 87 | 		source = utils::InputStream::VIDEO;
 88 | 		video_path = parser.get<std::string>("video");
 89 | 		sample::gLogInfo << "video_path = " << video_path << std::endl;
 90 | 	}
 91 | 	if(parser.has("img"))
 92 | 	{
 93 | 		source = utils::InputStream::IMAGE;
 94 | 		image_path = parser.get<std::string>("img");
 95 | 		sample::gLogInfo << "image_path = " << image_path << std::endl;
 96 | 	}
 97 | 	if(parser.has("cam_id"))
 98 | 	{
 99 | 		source = utils::InputStream::CAMERA;
100 | 		camera_id = parser.get<int>("cam_id");
101 | 		sample::gLogInfo << "camera_id = " << camera_id << std::endl;
102 | 	}
103 | 	if(parser.has("show"))
104 | 	{
105 | 		is_show = true;
106 | 		sample::gLogInfo << "is_show = " << is_show << std::endl;
107 | 	}
108 | 	if(parser.has("savePath"))
109 | 	{
110 | 		is_save = true;
111 | 		param.save_path = parser.get<std::string>("savePath");
112 | 		sample::gLogInfo << "save_path = " << param.save_path << std::endl;
113 | 	}
114 | 
115 | 	int total_batches = 0;
116 | 	int delay_time = 1;
117 | 	cv::VideoCapture capture;
118 | 	if (!setInputStream(source, image_path, video_path, camera_id,
119 | 		capture, total_batches, delay_time, param))
120 | 	{
121 | 		sample::gLogError << "read the input data errors!" << std::endl;
122 | 		return -1;
123 | 	}
124 | 
125 | 	LibFaceDet face_det(param);
126 | 
127 | 	// read model
128 | 	std::vector<unsigned char> trt_file = utils::loadModel(model_path);
129 | 	if (trt_file.empty())
130 | 	{
131 | 		sample::gLogError << "trt_file is empty!" << std::endl;
132 | 		return -1;
133 | 	}
134 | 	// init model
135 | 	if (!face_det.init(trt_file))
136 | 	{
137 | 		sample::gLogError << "initEngine() ocur errors!" << std::endl;
138 | 		return -1;
139 | 	}
140 | 	face_det.check();
141 | 	cv::Mat frame;
142 | 	std::vector<cv::Mat> imgs_batch;
143 | 	imgs_batch.reserve(param.batch_size);
144 | 	sample::gLogInfo << imgs_batch.capacity() << std::endl;
145 | 	int batchi = 0;
146 | 	while (capture.isOpened())
147 | 	{
148 | 		if (batchi >= total_batches && source != utils::InputStream::CAMERA)
149 | 		{
150 | 			break;
151 | 		}
152 | 		if (imgs_batch.size() < param.batch_size) 
153 | 		{
154 | 			if (source != utils::InputStream::IMAGE)
155 | 			{
156 | 				capture.read(frame);
157 | 			}
158 | 			else
159 | 			{
160 | 				frame = cv::imread(image_path);
161 | 			}
162 | 
163 | 			if (frame.empty())
164 | 			{
165 | 				sample::gLogWarning << "no more video or camera frame" << std::endl;
166 | 				task(face_det, param, imgs_batch, delay_time, batchi, is_show, is_save);
167 | 				imgs_batch.clear(); 
168 | 				batchi++;
169 | 				break;
170 | 			}
171 | 			else
172 | 			{
173 | 				imgs_batch.emplace_back(frame.clone());
174 | 			}
175 | 
176 | 		}
177 | 		else
178 | 		{
179 | 			task(face_det, param, imgs_batch, delay_time, batchi, is_show, is_save);
180 | 			imgs_batch.clear();
181 | 			batchi++;
182 | 		}
183 | 	}
184 | 	return  -1;
185 | }


--------------------------------------------------------------------------------
/libfacedetection/libfacedetection.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include"../utils/common_include.h"
 3 | #include"../utils/utils.h"
 4 | #include"../utils/kernel_function.h"
 5 | 
 6 | class LibFaceDet
 7 | {
 8 | public:
 9 |     LibFaceDet(const utils::InitParameter& param);
10 |     ~LibFaceDet();
11 | 
12 | public:
13 |     bool init(const std::vector<unsigned char>& trtFile);
14 |     void check();
15 |     void copy(const std::vector<cv::Mat>& imgsBatch);
16 |     void preprocess(const std::vector<cv::Mat>& imgsBatch);
17 |     bool infer();
18 |     void postprocess(const std::vector<cv::Mat>& imgsBatch);
19 |     void reset();
20 | 
21 | public:
22 |     std::vector<std::vector<utils::Box>> getObjectss() const;
23 | 
24 | private:
25 |     std::shared_ptr<nvinfer1::ICudaEngine> m_engine;
26 |     std::unique_ptr<nvinfer1::IExecutionContext> m_context;
27 | 
28 | protected:
29 |     utils::InitParameter m_param;
30 |     nvinfer1::Dims m_output_loc_dims;
31 |     nvinfer1::Dims m_output_conf_dims;
32 |     nvinfer1::Dims m_output_iou_dims;
33 |     int m_total_objects;
34 | 
35 |     // const params on host 
36 |     const float  m_min_sizes_host[4 * 3] = 
37 |     { 10, 16, 24,  32, 48, FLT_MAX,  64, 96, FLT_MAX,  128, 192, 256 };
38 |     const int m_min_sizes_host_dim[4] = 
39 |     { 3, 2, 2, 3 };
40 |     float* m_feat_hw_host;
41 |     float* m_prior_boxes_host;
42 |     const float m_variances_host[2] = { 0.1f, 0.2f };
43 |     // const params on device
44 |     float* m_min_sizes_device;
45 |     float* m_feat_hw_host_device;
46 |     float* m_prior_boxes_device;
47 |     float* m_variances_device;
48 |     std::vector<std::vector<utils::Box>> m_objectss;
49 |     // input
50 |     float* m_input_src_device;
51 |     float* m_input_hwc_device;
52 |     // output
53 |     float* m_output_loc_device;
54 |     float* m_output_conf_device;
55 |     float* m_output_iou_device;
56 |     float* m_output_objects_device;
57 |     float* m_output_objects_host;
58 |     int m_output_objects_width; 
59 | 
60 | };
61 | 
62 | void decodeLibFaceDetDevice(float* minSizes, float* feat_hw, float* priorBoxes, float* variances,
63 |     int srcImgWidth, int srcImgHeight,
64 |     float confThreshold, int batchSize, int srcHeight,
65 |     float* srcLoc, int srcLocWidth,
66 |     float* srcConf, int srcConfWidth,
67 |     float* srcIou, int srcIouWidth,
68 |     float* dst, int dstWidth, int dstHeight);


--------------------------------------------------------------------------------
/pphumanseg/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10)
 2 | 
 3 | #set(CMAKE_BUILD_TYPE "Debug")
 4 | set(CMAKE_BUILD_TYPE "Release")
 5 | 
 6 | PROJECT(pphunmanseg VERSION 1.0.0 LANGUAGES C CXX CUDA)
 7 |  
 8 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR})
 9 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake)
10 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT})
11 | 
12 | message(STATUS ${ALL_LIBS})
13 | file(GLOB CPPS 
14 |   ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
15 |   ${CMAKE_CURRENT_SOURCE_DIR}/*.cu
16 |   ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu
17 |   ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp
18 |   ${TensorRT_ROOT}/samples/common/logger.cpp 
19 |   ${TensorRT_ROOT}/samples/common/sampleOptions.cpp 
20 |   )
21 | list(REMOVE_ITEM CPPS app_pphunmanseg.cpp)
22 |  
23 | message(STATUS CPPS = ${CPPS})
24 | list (LENGTH CPPS length)
25 | message(STATUS ***length*** = ${length}) 
26 | find_package(OpenCV REQUIRED)
27 | 
28 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR})
29 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS})
30 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS})
31 | 
32 | add_library(${PROJECT_NAME} SHARED ${CPPS})
33 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES})
34 | 
35 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75)
36 | target_compile_options(${PROJECT_NAME} PUBLIC 
37 |    $<$<COMPILE_LANGUAGE:CUDA>:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>)
38 |   
39 | add_executable(app_pphunmanseg app_pphunmanseg.cpp)
40 | 
41 | # NVCC
42 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a)
43 | target_link_libraries(app_pphunmanseg ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} )
44 | 


--------------------------------------------------------------------------------
/pphumanseg/README.md:
--------------------------------------------------------------------------------
 1 | ## 1. get onnx
 2 | download directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv)
 3 | 
 4 | or export onnx:
 5 | ```bash
 6 | # Install git-lfs from https://git-lfs.github.com/
 7 | git clone https://github.com/opencv/opencv_zoo && cd opencv_zoo
 8 | git checkout  ae1d754a3ea14e4244fbea7d781cca2e18584035
 9 | git lfs install
10 | git lfs pull
11 | # note：The official onnx is in this path:opencv_zoo/models/human_segmentation_pphumanseg.
12 | ```
13 | ## 2.edit and save onnx
14 | ```bash
15 | # note: If you have obtained onnx by downloading, this step can be ignored
16 | conda activate tensorrt-alpha
17 | # put your onnx file in this path:tensorrt-alpha/data/pphumanseg
18 | cd  tensorrt-alpha/data/pphumanseg
19 | python alpha_edit.py --onnx=../data/pphumanseg/human_segmentation_pphumanseg_2021oct.onnx
20 | ```
21 | ## 3.compile onnx 
22 | ```bash
23 | # put your onnx file in this path:tensorrt-alpha/data/pphumanseg
24 | cd tensorrt-alpha/data/pphumanseg
25 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib
26 | ../../../../TensorRT-8.4.2.4/bin/trtexec --onnx=human_segmentation_pphumanseg_2021oct_dynamic.onnx   --saveEngine=human_segmentation_pphumanseg_2021oct_dynamic.trt  --buildOnly  --minShapes=x:1x3x192x192 --optShapes=x:2x3x192x192 --maxShapes=x:4x3x192x192
27 | ```
28 | ## 4.run
29 | ```bash
30 | git clone https://github.com/FeiYull/tensorrt-alpha
31 | cd tensorrt-alpha/pphumanseg
32 | mkdir build
33 | cd build
34 | cmake ..
35 | make -j10
36 | # note: the dstImage will be saved in tensorrt-alpha/pphumanseg/build by default
37 | 
38 | # infer image
39 | ./app_pphunmanseg  --model=../../data/pphumanseg/human_segmentation_pphumanseg_2021oct_dynamic.trt --img=../../data/6.jpg  --size=192 --batch_size=1 --show -savePath
40 | 
41 | # infer video
42 | ./app_pphunmanseg  --model=../../data/pphumanseg/human_segmentation_pphumanseg_2021oct_dynamic.trt  --batch_size=2  --video=../../data/people.mp4  --show
43 | 
44 | # infer camera
45 | ./app_pphunmanseg  --model=../../data/pphumanseg/human_segmentation_pphumanseg_2021oct_dynamic.trt  --batch_size=2  --cam_id=0  --show
46 | ```
47 | ## 5. appendix
48 | ignore


--------------------------------------------------------------------------------
/pphumanseg/alpha_edit.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import onnx
 3 | import onnx.checker
 4 | import onnx.utils
 5 | from onnx.tools import update_model_dims
 6 | import onnx.helper as helper
 7 | 
 8 | if __name__ == '__main__':
 9 |     parser = argparse.ArgumentParser()
10 |     parser.add_argument('--onnx', type=str, default='../data/pphumanseg/human_segmentation_pphumanseg_2021oct.onnx', help='onnx path')
11 |     opt = parser.parse_args()
12 | 
13 |     model = onnx.load(opt.onnx)
14 | 
15 |     in_b  = model.graph.input[0].type.tensor_type.shape.dim[0]
16 |     in_c  = model.graph.input[0].type.tensor_type.shape.dim[1]
17 |     in_h  = model.graph.input[0].type.tensor_type.shape.dim[2]
18 |     in_w  = model.graph.input[0].type.tensor_type.shape.dim[3]
19 | 
20 |     out_loc_b        = model.graph.output[0].type.tensor_type.shape.dim[0]
21 |     out_loc_num_candidates  = model.graph.output[0].type.tensor_type.shape.dim[1]
22 |     out_loc_dim2       = model.graph.output[0].type.tensor_type.shape.dim[2] # 这个维度不修改
23 | 
24 |     in_b.dim_param= "batch_size"
25 | 
26 |     out_loc_b.dim_param = "batch_size"
27 | 
28 |     onnx.save(model, '../data/pphumanseg//human_segmentation_pphumanseg_2021oct_dynamic.onnx')
29 |     print("ok")


--------------------------------------------------------------------------------
/pphumanseg/app_pphunmanseg.cpp:
--------------------------------------------------------------------------------
  1 | #include"pphunmanseg.h"
  2 | 
  3 | void setParameters(utils::InitParameter& initParameters)
  4 | {
  5 | 	initParameters.batch_size = 8;
  6 | 	initParameters.dst_h = 192;
  7 | 	initParameters.dst_w = 192;
  8 | 
  9 | 	initParameters.means[0] = 0.5f;
 10 | 	initParameters.means[1] = 0.5f;
 11 | 	initParameters.means[2] = 0.5f;
 12 | 	initParameters.stds[0] = 0.5f;
 13 | 	initParameters.stds[1] = 0.5f;
 14 | 	initParameters.stds[2] = 0.5f;
 15 | 
 16 | 	initParameters.input_output_names = { "x",  "save_infer_model/scale_0.tmp_1" };
 17 | 	initParameters.save_path = "";
 18 | }
 19 | 
 20 | void task(PPHunmanSeg& hunman_seg, const utils::InitParameter& param, std::vector<cv::Mat>& imgsBatch, const int& delayTime, const int& batchi, 
 21 | 	const bool& isShow, const bool& isSave)
 22 | {
 23 | 	hunman_seg.copy(imgsBatch);
 24 | 	utils::DeviceTimer d_t1; hunman_seg.preprocess(imgsBatch);  float t1 = d_t1.getUsedTime();
 25 | 	utils::DeviceTimer d_t2; hunman_seg.infer();				  float t2 = d_t2.getUsedTime();
 26 | 	utils::DeviceTimer d_t3; hunman_seg.postprocess(imgsBatch); float t3 = d_t3.getUsedTime();
 27 | 	sample::gLogInfo << "preprocess time = " << t1 / param.batch_size << "; "
 28 | 		"infer time = " << t2 / param.batch_size << "; "
 29 | 		"postprocess time = " << t3 / param.batch_size << std::endl;
 30 | 	if (isShow)
 31 | 		hunman_seg.showMask(imgsBatch, delayTime);
 32 | 	if (isSave)
 33 | 		hunman_seg.saveMask(imgsBatch, param.save_path, param.batch_size, batchi);
 34 | }
 35 | 
 36 | int main(int argc, char** argv)
 37 | {
 38 | 	cv::CommandLineParser parser(argc, argv,
 39 | 		{
 40 | 			"{model 	|| tensorrt model file	}"
 41 | 			"{size      || image (h, w), eg: 640}"
 42 | 			"{batch_size|| batch size           }"
 43 | 			"{video     || video's path			}"
 44 | 			"{img       || image's path			}"
 45 | 			"{cam_id    || camera's device id	}"
 46 | 			"{show      || if show the result	}"
 47 | 			"{savePath  || save path, can be ignore}"
 48 | 		});
 49 | 	// parameters
 50 | 	utils::InitParameter param;
 51 | 	setParameters(param);
 52 | 	// path
 53 | 	std::string model_path = "../../data/pphumanseg/human_segmentation_pphumanseg_2021oct_dynamic.trt";
 54 | 	std::string video_path = "../../data/people.mp4";
 55 | 	std::string image_path = "../../data/6406403.jpg";
 56 | 	int camera_id = 0; // camera' id
 57 | 
 58 | 	// get input
 59 | 	utils::InputStream source;
 60 | 	//source = utils::InputStream::IMAGE;
 61 | 	source = utils::InputStream::VIDEO;
 62 | 	//source = utils::InputStream::CAMERA;
 63 | 
 64 | 	// update params from command line parser
 65 | 	int size = -1; // w or h
 66 | 	int batch_size = 8;
 67 | 	bool is_show = false;
 68 | 	bool is_save = false;
 69 | 	if(parser.has("model"))
 70 | 	{
 71 | 		model_path = parser.get<std::string>("model");
 72 | 		sample::gLogInfo << "model_path = " << model_path << std::endl;
 73 | 	}
 74 | 	if(parser.has("size"))
 75 | 	{
 76 | 		size = parser.get<int>("size");
 77 | 		sample::gLogInfo << "size = " << size << std::endl;
 78 | 		param.dst_h = param.dst_w = size;
 79 | 	}
 80 | 	if(parser.has("batch_size"))
 81 | 	{
 82 | 		batch_size = parser.get<int>("batch_size");
 83 | 		sample::gLogInfo << "batch_size = " << batch_size << std::endl;
 84 | 		param.batch_size = batch_size;
 85 | 	}
 86 | 	if(parser.has("video"))
 87 | 	{
 88 | 		source = utils::InputStream::VIDEO;
 89 | 		video_path = parser.get<std::string>("video");
 90 | 		sample::gLogInfo << "video_path = " << video_path << std::endl;
 91 | 	}
 92 | 	if(parser.has("img"))
 93 | 	{
 94 | 		source = utils::InputStream::IMAGE;
 95 | 		image_path = parser.get<std::string>("img");
 96 | 		sample::gLogInfo << "image_path = " << image_path << std::endl;
 97 | 	}
 98 | 	if(parser.has("cam_id"))
 99 | 	{
100 | 		source = utils::InputStream::CAMERA;
101 | 		camera_id = parser.get<int>("cam_id");
102 | 		sample::gLogInfo << "camera_id = " << camera_id << std::endl;
103 | 	}
104 | 	if(parser.has("show"))
105 | 	{
106 | 		is_show = true;
107 | 		sample::gLogInfo << "is_show = " << is_show << std::endl;
108 | 	}
109 | 	if(parser.has("savePath"))
110 | 	{
111 | 		is_save = true;
112 | 		param.save_path = parser.get<std::string>("savePath");
113 | 		sample::gLogInfo << "save_path = " << param.save_path << std::endl;
114 | 	}
115 | 
116 | 
117 | 
118 | 	int total_batches = 0;
119 | 	int delay_time = 1;
120 | 	cv::VideoCapture capture;
121 | 	if (!setInputStream(source, image_path, video_path, camera_id,
122 | 		capture, total_batches, delay_time, param))
123 | 	{
124 | 		sample::gLogError << "read the input data errors!" << std::endl;
125 | 		return -1;
126 | 	}
127 | 
128 | 	PPHunmanSeg hunman_seg(param);
129 | 
130 | 	// read model
131 | 	std::vector<unsigned char> trt_file = utils::loadModel(model_path);
132 | 	if (trt_file.empty())
133 | 	{
134 | 		sample::gLogError << "trt_file is empty!" << std::endl;
135 | 		return -1;
136 | 	}
137 | 	// init model
138 | 	if (!hunman_seg.init(trt_file))
139 | 	{
140 | 		sample::gLogError << "initEngine() ocur errors!" << std::endl;
141 | 		return -1;
142 | 	}
143 | 	hunman_seg.check();
144 | 	cv::Mat frame;
145 | 	std::vector<cv::Mat> imgs_batch;
146 | 	imgs_batch.reserve(param.batch_size);
147 | 	sample::gLogInfo << imgs_batch.capacity() << std::endl;
148 | 	int batchi = 0;
149 | 	while (capture.isOpened())
150 | 	{
151 | 		if (batchi >= total_batches && source != utils::InputStream::CAMERA)
152 | 		{
153 | 			break;
154 | 		}
155 | 		if (imgs_batch.size() < param.batch_size) // get input
156 | 		{
157 | 			if (source != utils::InputStream::IMAGE)
158 | 			{
159 | 				capture.read(frame);
160 | 			}
161 | 			else
162 | 			{
163 | 				frame = cv::imread(image_path);
164 | 			}
165 | 
166 | 			if (frame.empty())
167 | 			{
168 | 				sample::gLogWarning << "no more video or camera frame" << std::endl;
169 | 				task(hunman_seg, param, imgs_batch, delay_time, batchi, is_show, is_save);
170 | 				imgs_batch.clear(); 
171 | 				batchi++;
172 | 				break;
173 | 			}
174 | 			else
175 | 			{
176 | 				imgs_batch.emplace_back(frame.clone());
177 | 			}
178 | 
179 | 		}
180 | 		else // infer
181 | 		{
182 | 			task(hunman_seg, param, imgs_batch, delay_time, batchi, is_show, is_save);
183 | 			imgs_batch.clear(); 
184 | 			batchi++;
185 | 		}
186 | 	}
187 | 	return  -1;
188 | }
189 | 
190 | 


--------------------------------------------------------------------------------
/pphumanseg/decode_pphunmanseg.cu:
--------------------------------------------------------------------------------
 1 | #include"decode_pphunmanseg.h"
 2 | #include"../utils/kernel_function.h"
 3 | 
 4 | __global__
 5 | void decode_pphunmanseg_device_kernel(int batch_size,
 6 | 	float* src, int src_width, int src_height, int src_area, int src_volum,
 7 | 	float* dst, int dst_width, int dst_height, int dst_area, int dst_volum)
 8 | {
 9 | 	int dx = blockDim.x * blockIdx.x + threadIdx.x;
10 | 	int dy = blockDim.y * blockIdx.y + threadIdx.y;
11 | 	if (dx >= dst_area || dy >= batch_size)
12 | 	{
13 | 		return;
14 | 	}
15 | 	dst[dy * dst_volum + dx] = (src[dy * src_volum + dx + src_area] > src[dy * src_volum + dx] ? 1.f : 0.f);
16 | }
17 | void pphunmanseg::decodeDevice(int batchSize, float* src, int srcWidth, int srcHeight, float* dst, int dstWidth, int dstHeight)
18 | {
19 | 	dim3 block_size(BLOCK_SIZE, BLOCK_SIZE);
20 | 	dim3 grid_size((dstWidth * dstHeight + BLOCK_SIZE - 1) / BLOCK_SIZE,
21 | 		(batchSize + BLOCK_SIZE - 1) / BLOCK_SIZE);
22 | 	int src_area  = srcWidth * srcHeight;
23 | 	int src_volum = srcWidth * srcHeight * 2;
24 | 	int dst_area  = dstWidth * dstHeight;
25 | 	int dst_volum = dstWidth * dstHeight * 1;
26 | 	decode_pphunmanseg_device_kernel << < grid_size, block_size, 0, nullptr >> > (batchSize, 
27 | 		src, srcWidth, srcHeight, src_area, src_volum,
28 | 		dst, dstWidth, dstHeight, dst_area, dst_volum);
29 | }


--------------------------------------------------------------------------------
/pphumanseg/decode_pphunmanseg.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include"../utils/utils.h"
3 | #include"../utils/common_include.h"
4 | 
5 | namespace pphunmanseg
6 | {
7 | 	void decodeDevice(int batchSize, float* src, int srcWidth, int srcHeight, float* dst, int dstWidth, int dstHeight);
8 | }


--------------------------------------------------------------------------------
/pphumanseg/pphunmanseg.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include"../utils/common_include.h"
 3 | #include"../utils/utils.h"
 4 | #include"../utils/kernel_function.h"
 5 | 
 6 | 
 7 | class PPHunmanSeg
 8 | {
 9 | public:
10 |     PPHunmanSeg(const utils::InitParameter& param);
11 |     ~PPHunmanSeg();
12 | 
13 | public:
14 |     bool init(const std::vector<unsigned char>& trtFile);
15 |     void check();
16 |     void copy(const std::vector<cv::Mat>& imgsBatch);
17 |     void preprocess(const std::vector<cv::Mat>& imgsBatch);
18 |     bool infer();
19 |     void postprocess(const std::vector<cv::Mat>& imgsBatch);
20 |     void reset();
21 |     void showMask(const std::vector<cv::Mat>& imgsBatch, const int& cvDelayTime);
22 |     void saveMask(const std::vector<cv::Mat>& imgsBatch, const std::string& savePath, const int& batchSize, const int& batchi);
23 | 
24 | protected:
25 |     std::shared_ptr<nvinfer1::ICudaEngine> m_engine;
26 |     std::unique_ptr<nvinfer1::IExecutionContext> m_context;
27 | 
28 | protected:
29 |     utils::InitParameter m_param;
30 |     nvinfer1::Dims m_output_src_dims; 
31 |     int m_output_src_area;          
32 | 
33 |     utils::AffineMat m_dst2src;
34 |     utils::AffineMat m_src2dst; 
35 | 
36 |     // input
37 |     float* m_input_src_device;
38 |     float* m_input_resize_device;
39 |     float* m_input_rgb_device;
40 |     float* m_input_norm_device;
41 |     float* m_input_hwc_device; 
42 | 
43 |     // output
44 |     float* m_output_src_device; 
45 |     float* m_output_mask_device;   
46 |     float* m_output_resize_device; 
47 |     float* m_output_resize_host; 
48 | };
49 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | torch==1.9.0
2 | onnx== 1.9.0
3 | torchvision==0.10.1 # Image classification
4 | onnx-simplifier==0.4.8
5 | onnxruntime==1.8.0
6 | opencv-python==4.6.0


--------------------------------------------------------------------------------
/tools/onnx2trt.cpp:
--------------------------------------------------------------------------------
 1 | #include <NvInfer.h>
 2 | #include <NvOnnxParser.h>
 3 | #include <NvInferRuntime.h>
 4 | #include <cuda_runtime.h>
 5 | #include <iostream>
 6 | #include <fstream>
 7 | #include <vector>
 8 | #include<logger.h> // add file:   ../TensorRT-8.4.2.4/samples/common/logger.cpp
 9 | using namespace std;
10 | 
11 | int main() {
12 |     // setting
13 |     std::string onnx_file = "D:/ThirdParty/TensorRT-8.4.2.4/bin/yolov8n.onnx";
14 |     std::string trt_file = "yolov8n.trt";
15 |     int min_batchsize = 1;
16 |     int opt_batchsize = 1;
17 |     int max_batchsize = 2;
18 |     nvinfer1::Dims4 min_shape(min_batchsize, 3, 640, 640);
19 |     nvinfer1::Dims4 opt_shape(opt_batchsize, 3, 640, 640);
20 |     nvinfer1::Dims4 max_shape(max_batchsize, 3, 640, 640);
21 | 
22 | 
23 | 
24 |     nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger());
25 |     nvinfer1::IBuilderConfig* config = builder->createBuilderConfig();
26 |     nvinfer1::INetworkDefinition* network = builder->createNetworkV2(1);
27 | 
28 |     nvonnxparser::IParser* parser = nvonnxparser::createParser(*network, sample::gLogger.getTRTLogger());
29 |     if (!parser->parseFromFile(onnx_file.c_str(), 1)) {
30 |         printf("Failed to parser demo.onnx\n");
31 |         return false;
32 |     }
33 | 
34 |     printf("Workspace Size = %.2f MB\n", (1 << 28) / 1024.0f / 1024.0f);
35 |     config->setMaxWorkspaceSize(1 << 28);
36 | 
37 |     auto profile = builder->createOptimizationProfile();
38 |     auto input_tensor = network->getInput(0);
39 |     int input_channel = input_tensor->getDimensions().d[1];
40 | 
41 |     profile->setDimensions(input_tensor->getName(), nvinfer1::OptProfileSelector::kMIN, min_shape);
42 |     profile->setDimensions(input_tensor->getName(), nvinfer1::OptProfileSelector::kOPT, opt_shape);
43 |     profile->setDimensions(input_tensor->getName(), nvinfer1::OptProfileSelector::kMAX, max_shape);
44 |     config->addOptimizationProfile(profile);
45 | 
46 |     nvinfer1::ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
47 |     if (engine == nullptr) {
48 |         printf("Build engine failed.\n");
49 |         return false;
50 |     }
51 |     nvinfer1::IHostMemory* model_data = engine->serialize();
52 |     FILE* f = fopen(trt_file.c_str(), "wb");
53 |     fwrite(model_data->data(), 1, model_data->size(), f);
54 |     fclose(f);
55 | 
56 |     model_data->destroy();
57 |     parser->destroy();
58 |     engine->destroy();
59 |     network->destroy();
60 |     config->destroy();
61 |     builder->destroy();
62 |     printf("Done.\n");
63 |     return true;
64 | }
65 | 


--------------------------------------------------------------------------------
/u2net/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10)
 2 | 
 3 | #set(CMAKE_BUILD_TYPE "Debug")
 4 | set(CMAKE_BUILD_TYPE "Release")
 5 | 
 6 | PROJECT(u2net VERSION 1.0.0 LANGUAGES C CXX CUDA)
 7 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR})
 8 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake)
 9 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT})
10 | 
11 | message(STATUS ${ALL_LIBS})
12 | file(GLOB CPPS 
13 |   ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
14 |   ${CMAKE_CURRENT_SOURCE_DIR}/*.cu
15 |   ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu
16 |   ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp
17 |   ${TensorRT_ROOT}/samples/common/logger.cpp 
18 |   ${TensorRT_ROOT}/samples/common/sampleOptions.cpp 
19 |   )
20 | list(REMOVE_ITEM CPPS u2net.cpp)
21 | message(STATUS CPPS = ${CPPS})
22 | list (LENGTH CPPS length)
23 | message(STATUS ***length*** = ${length}) 
24 | find_package(OpenCV REQUIRED)
25 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR})
26 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS})
27 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS})
28 | 
29 | add_library(${PROJECT_NAME} SHARED ${CPPS})
30 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES})
31 | 
32 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75)
33 | target_compile_options(${PROJECT_NAME} PUBLIC 
34 |    $<$<COMPILE_LANGUAGE:CUDA>:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>)
35 |   
36 | add_executable(app_u2net app_u2net.cpp)
37 | 
38 | # NVCC
39 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a)
40 | target_link_libraries(app_u2net ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} )
41 | 


--------------------------------------------------------------------------------
/u2net/README.md:
--------------------------------------------------------------------------------
 1 | ## 1. get onnx
 2 | download directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv)
 3 | 
 4 | or export onnx:
 5 | ```bash
 6 | git clone https://github.com/xuebinqin/U-2-Net
 7 | cd U-2-Net-master
 8 | # Use the script alpha_export.py provided by this repo to export onnx
 9 | cp alpha_export.py U-2-Net-master
10 | python alpha_export.py --net=u2net --weights=saved_models/u2net/u2net.pth
11 | python alpha_export.py --net=u2netp --weights=saved_models/u2netp/u2netp.pth
12 | ```
13 | ## 2.edit and save onnx
14 | ```bash
15 | # note: If you have obtained onnx by downloading, this step can be ignored
16 | ignore
17 | ```
18 | ## 3.compile onnx
19 | ```bash
20 | # put your onnx file in this path:tensorrt-alpha/data/u2net
21 | cd tensorrt-alpha/data/u2net
22 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib
23 | 
24 | ../../../../TensorRT-8.4.2.4/bin/trtexec   --onnx=u2net.onnx   --saveEngine=u2net.trt   --buildOnly --minShapes=images:1x3x320x320 --optShapes=images:4x3x320x320 --maxShapes=images:8x3x320x320
25 | ../../../../TensorRT-8.4.2.4/bin/trtexec   --onnx=u2netp.onnx  --saveEngine=u2netp.trt  --buildOnly --minShapes=images:1x3x320x320 --optShapes=images:4x3x320x320 --maxShapes=images:8x3x320x320
26 | ```
27 | ## 4.run
28 | ```bash
29 | git clone https://github.com/FeiYull/tensorrt-alpha
30 | cd tensorrt-alpha/u2net
31 | mkdir build
32 | cd build
33 | cmake ..
34 | make -j10
35 | # note: the dstImage will be saved in tensorrt-alpha/u2net/build by default
36 | 
37 | ## 320
38 | # infer image
39 | ./app_u2net  --model=../../data/u2net/u2net.trt --size=320  --batch_size=1  --img=../../data/sailboat3.jpg  --show --savePath
40 | 
41 | # infer video
42 | ./app_u2net  --model=../../data/u2net/u2net.trt --size=320 --batch_size=2  --video=../../data/people.mp4  --show
43 | 
44 | # infer camera
45 | ./app_u2net  --model=../../data/u2net/u2net.trt --size=320 --batch_size=2  --cam_id=0  --show
46 | ```
47 | ## 5. appendix
48 | ignore


--------------------------------------------------------------------------------
/u2net/alpha_export.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import torch.nn
 3 | from model import U2NET
 4 | from model import U2NETP
 5 | 
 6 | import onnx
 7 | import numpy as np
 8 | import onnxsim  # pip install onnx-simplifier
 9 | import onnxruntime as ort
10 | import numpy as np
11 | 
12 | class Alpha_U2Net(torch.nn.Module):
13 |     def __init__(self, weight_file):
14 |         super().__init__()
15 |         self.model = U2NET(3, 1)
16 |         self.model.load_state_dict(torch.load(model_path, map_location='cpu'))
17 |         self.model.eval()
18 |         
19 |     def forward(self, x):
20 |         y = self.model(x) 
21 |         return y[0]
22 | 
23 | class Alpha_U2Netp(torch.nn.Module):
24 |     def __init__(self, weight_file):
25 |         super().__init__()
26 |         self.model = U2NETP(3, 1)
27 |         self.model.load_state_dict(torch.load(model_path, map_location='cpu'))
28 |         self.model.eval()
29 |         
30 |     def forward(self, x):
31 |         y = self.model(x) 
32 |         return y[0]
33 | """
34 | example:
35 | python alpha_export.py --net=u2net --weights=saved_models/u2net/u2net.pth
36 | python alpha_export.py --net=u2netp --weights=saved_models/u2netp/u2netp.pth
37 | """
38 | if __name__ == '__main__':
39 |     parser = argparse.ArgumentParser()
40 |     parser.add_argument('--net', type=str, default='u2net', help='net type')
41 |     parser.add_argument('--weights', type=str, default='saved_models/u2net/u2net.pth', help='net path')
42 |     opt = parser.parse_args()
43 | 
44 |     net = ''
45 |     image_input_shape = [1, 3, 320, 320]
46 |     image_input = torch.autograd.Variable(torch.randn(image_input_shape))
47 |     input_names = ["images"]
48 |     output_names = ["output"]
49 |     dynamic_axes = {"images": {0: "batch_size"}, "output": {0: "batch_size"}}
50 | 
51 |     net = opt.net
52 |     if net=='u2net':  # for u2net.pt
53 |         net_name = "u2net"
54 |         onnx_name = net_name + ".onnx"
55 |         model_path = opt.weights
56 |         u2net = Alpha_U2Net(model_path)
57 |         torch.onnx.export(u2net, image_input, "saved_models/onnx/" + onnx_name,
58 |                         verbose=True,
59 |                         input_names=input_names,
60 |                         output_names=output_names,
61 |                         opset_version=11,  # try  opset_version=9
62 |                         training=False,
63 |                         dynamic_axes=dynamic_axes)
64 |     elif net=='u2netp':  # for u2netp.pt
65 |         model_path = opt.weights
66 |         u2netp = Alpha_U2Netp(model_path)
67 |         torch.onnx.export(u2netp, image_input, "saved_models/onnx/u2netp.onnx",
68 |                         verbose=True,
69 |                         input_names=input_names,
70 |                         output_names=output_names,
71 |                         opset_version=11,
72 |                         training=False,
73 |                         dynamic_axes=dynamic_axes)
74 | 


--------------------------------------------------------------------------------
/u2net/app_u2net.cpp:
--------------------------------------------------------------------------------
  1 | #include"u2net.h"
  2 | 
  3 | void setParameters(utils::InitParameter& initParameters)
  4 | {
  5 | 	initParameters.class_names = utils::dataSets::coco80;
  6 | 	//initParameters.num_class = 80; // for coco
  7 | 
  8 | 	initParameters.batch_size = 1;
  9 | 	initParameters.dst_h = 320;
 10 | 	initParameters.dst_w = 320;
 11 | 	initParameters.input_output_names = { "images",  "output" };
 12 | 	initParameters.scale = 1.0; // div by max in u2net!
 13 | 	initParameters.means[0] = 0.485;
 14 | 	initParameters.means[1] = 0.456;
 15 | 	initParameters.means[2] = 0.406;
 16 | 	initParameters.stds[0] = 0.229;
 17 | 	initParameters.stds[1] = 0.224;
 18 | 	initParameters.stds[2] = 0.225;
 19 | 
 20 | 	initParameters.save_path = "";
 21 | }
 22 | 
 23 | void task(u2net::U2NET& u2net, const utils::InitParameter& param, std::vector<cv::Mat>& imgsBatch, const int& delayTime, const int& batchi,
 24 | 	const bool& isShow, const bool& isSave)
 25 | {
 26 | 	u2net.copy(imgsBatch);
 27 | 	utils::DeviceTimer d_t1; u2net.preprocess(imgsBatch);  float t1 = d_t1.getUsedTime();
 28 | 	utils::DeviceTimer d_t2; u2net.infer();				   float t2 = d_t2.getUsedTime();
 29 | 	utils::DeviceTimer d_t3; u2net.postprocess(imgsBatch); float t3 = d_t3.getUsedTime();
 30 | 	sample::gLogInfo << "preprocess time = " << t1 / param.batch_size << "; "
 31 | 		"infer time = " << t2 / param.batch_size << "; "
 32 | 		"postprocess time = " << t3 / param.batch_size << std::endl;
 33 | 	if(isShow)
 34 | 		u2net.showMask(imgsBatch, delayTime);
 35 | 	if(isSave)
 36 | 		u2net.saveMask(imgsBatch, param.save_path, param.batch_size, batchi);
 37 | }
 38 | 
 39 | int main(int argc, char** argv)
 40 | {
 41 | 	cv::CommandLineParser parser(argc, argv,
 42 | 		{
 43 | 			"{model 	|| tensorrt model file	}"
 44 | 			"{size      || image (h, w), eg: 640}"
 45 | 			"{batch_size|| batch size           }"
 46 | 			"{video     || video's path			}"
 47 | 			"{img       || image's path			}"
 48 | 			"{cam_id    || camera's device id	}"
 49 | 			"{show      || if show the result	}"
 50 | 			"{savePath  || save path, can be ignore}"
 51 | 		});
 52 | 	// parameters
 53 | 	utils::InitParameter param;
 54 | 	setParameters(param);
 55 | 	// path
 56 | 	std::string model_path = "../../data/u2net/u2net.trt";
 57 | 	std::string video_path = "../../data/people.mp4";
 58 | 	std::string image_path = "../../data/6406403.jpg";
 59 | 	// camera' id
 60 | 	int camera_id = 0;
 61 | 
 62 | 	// get input
 63 | 	utils::InputStream source;
 64 | 	//source = utils::InputStream::IMAGE;
 65 | 	//source = utils::InputStream::VIDEO;
 66 | 	source = utils::InputStream::CAMERA;
 67 | 
 68 | 	// update params from command line parser
 69 | 	int size = -1; // w or h
 70 | 	int batch_size = 8;
 71 | 	bool is_show = false;
 72 | 	bool is_save = false;
 73 | 	if(parser.has("model"))
 74 | 	{
 75 | 		model_path = parser.get<std::string>("model");
 76 | 		sample::gLogInfo << "model_path = " << model_path << std::endl;
 77 | 	}
 78 | 	if(parser.has("size"))
 79 | 	{
 80 | 		size = parser.get<int>("size");
 81 | 		sample::gLogInfo << "size = " << size << std::endl;
 82 | 		param.dst_h = param.dst_w = size;
 83 | 	}
 84 | 	if(parser.has("batch_size"))
 85 | 	{
 86 | 		batch_size = parser.get<int>("batch_size");
 87 | 		sample::gLogInfo << "batch_size = " << batch_size << std::endl;
 88 | 		param.batch_size = batch_size;
 89 | 	}
 90 | 	if(parser.has("video"))
 91 | 	{
 92 | 		source = utils::InputStream::VIDEO;
 93 | 		video_path = parser.get<std::string>("video");
 94 | 		sample::gLogInfo << "video_path = " << video_path << std::endl;
 95 | 	}
 96 | 	if(parser.has("img"))
 97 | 	{
 98 | 		source = utils::InputStream::IMAGE;
 99 | 		image_path = parser.get<std::string>("img");
100 | 		sample::gLogInfo << "image_path = " << image_path << std::endl;
101 | 	}
102 | 	if(parser.has("cam_id"))
103 | 	{
104 | 		source = utils::InputStream::CAMERA;
105 | 		camera_id = parser.get<int>("cam_id");
106 | 		sample::gLogInfo << "camera_id = " << camera_id << std::endl;
107 | 	}
108 | 	if(parser.has("show"))
109 | 	{
110 | 		is_show = true;
111 | 		sample::gLogInfo << "is_show = " << is_show << std::endl;
112 | 	}
113 | 	if(parser.has("savePath"))
114 | 	{
115 | 		is_save = true;
116 | 		param.save_path = parser.get<std::string>("savePath");
117 | 		sample::gLogInfo << "save_path = " << param.save_path << std::endl;
118 | 	}
119 | 
120 | 	int total_batches = 0;
121 | 	int delay_time = 1;
122 | 	cv::VideoCapture capture;
123 | 	if (!setInputStream(source, image_path, video_path, camera_id,
124 | 		capture, total_batches, delay_time, param))
125 | 	{
126 | 		sample::gLogError << "read the input data errors!" << std::endl;
127 | 		return -1;
128 | 	}
129 | 	u2net::U2NET u2net(param);
130 | 	// read model
131 | 	std::vector<unsigned char> trt_file = utils::loadModel(model_path);
132 | 	if (trt_file.empty())
133 | 	{
134 | 		sample::gLogError << "trt_file is empty!" << std::endl;
135 | 		return -1;
136 | 	}
137 | 	// init model
138 | 	if (!u2net.init(trt_file))
139 | 	{
140 | 		sample::gLogError << "initEngine() ocur errors!" << std::endl;
141 | 		return -1;
142 | 	}
143 | 	u2net.check();
144 | 	cv::Mat frame;
145 | 	std::vector<cv::Mat> imgs_batch;
146 | 	imgs_batch.reserve(param.batch_size);
147 | 	sample::gLogInfo << imgs_batch.capacity() << std::endl;
148 | 	int batchi = 0;
149 | 	while (capture.isOpened())
150 | 	{
151 | 		if (batchi >= total_batches && source != utils::InputStream::CAMERA)
152 | 		{
153 | 			break;
154 | 		}
155 | 		if (imgs_batch.size() < param.batch_size)
156 | 		{
157 | 			if (source != utils::InputStream::IMAGE)
158 | 			{
159 | 				capture.read(frame);
160 | 			}
161 | 			else
162 | 			{
163 | 				frame = cv::imread(image_path);
164 | 			}
165 | 
166 | 			if (frame.empty())
167 | 			{
168 | 				sample::gLogWarning << "no more video or camera frame" << std::endl;
169 | 				task(u2net, param, imgs_batch, delay_time, batchi, is_show, is_save);
170 | 				imgs_batch.clear();
171 | 				batchi++;
172 | 				break;
173 | 			}
174 | 			else
175 | 			{
176 | 				imgs_batch.emplace_back(frame.clone());
177 | 			}
178 | 		}
179 | 		else
180 | 		{
181 | 			task(u2net, param, imgs_batch, delay_time, batchi, is_show, is_save);
182 | 			imgs_batch.clear();
183 | 			batchi++;
184 | 		}
185 | 	}
186 | 	return  -1;
187 | }
188 | 
189 | 


--------------------------------------------------------------------------------
/u2net/u2net.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include"../utils/common_include.h"
 3 | #include"../utils/utils.h"
 4 | #include"../utils/kernel_function.h"
 5 | namespace u2net
 6 | {
 7 |     class U2NET
 8 |     {
 9 |     public:
10 |         U2NET(const utils::InitParameter& param);
11 |         ~U2NET();
12 | 
13 |     public:
14 |         bool init(const std::vector<unsigned char>& trtFile);
15 |         void check();
16 |         void copy(const std::vector<cv::Mat>& imgsBatch);
17 |         void preprocess(const std::vector<cv::Mat>& imgsBatch);
18 |         bool infer();
19 |         void postprocess(const std::vector<cv::Mat>& imgsBatch);
20 |         void showMask(const std::vector<cv::Mat>& imgsBatch, const int& cvDelayTime);
21 |         void saveMask(const std::vector<cv::Mat>& imgsBatch, const std::string& savePath, const int& batchSize, const int& batchi);
22 |         void reset();
23 |     private:
24 |         std::shared_ptr<nvinfer1::ICudaEngine> m_engine;
25 |         std::unique_ptr<nvinfer1::IExecutionContext> m_context;
26 | 
27 |         //private:
28 |     protected:
29 |         utils::InitParameter m_param;
30 |         nvinfer1::Dims m_output_dims;  
31 |         int m_output_area;
32 |         std::vector<std::vector<utils::Box>> m_objectss;
33 | 
34 |         
35 |         utils::AffineMat m_dst2src;
36 |         utils::AffineMat m_src2dst;
37 | 
38 |         // input
39 |         float* m_input_src_device;
40 |         float* m_input_resize_device;
41 |         float* m_input_rgb_device;
42 |         float* m_input_norm_device;
43 |         float* m_input_hwc_device;
44 |      
45 |         float* m_max_val_device;
46 |         float* m_min_val_device;
47 | 
48 |         // output
49 |         float* m_output_src_device;
50 |         float* m_output_resize_device; 
51 |         float* m_output_resize_host; 
52 |         float* m_output_mask_host; 
53 |        
54 |     };
55 | }
56 | 
57 | void u2netDivMaxDevice(const int& batchSize, float* src, int srcWidth, int srcHeight, int channel, float* maxVals);
58 | 
59 | void u2netNormPredDevice(const int& batchSize, float* src, int srcWidth, int srcHeight, float scale, float* minVals, float* maxVals);


--------------------------------------------------------------------------------
/utils/common_include.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | // tensorrt
 3 | #include<logger.h>
 4 | #include<parserOnnxConfig.h>
 5 | #include<NvInfer.h>
 6 | // cuda
 7 | #include<cuda_runtime.h>
 8 | #include<stdio.h>
 9 | #include <thrust/sort.h>
10 | #include<cuda_device_runtime_api.h>
11 | #include<cuda_runtime.h>
12 | #include<cuda_runtime_api.h>
13 | #include<device_launch_parameters.h>
14 | #include<device_atomic_functions.h>
15 | // opencv
16 | #include<opencv2/opencv.hpp>
17 | // cpp std
18 | #include<algorithm>
19 | #include<cstdlib>
20 | #include<fstream>
21 | #include<iostream>
22 | #include<sstream>
23 | #include<vector>
24 | #include<chrono>


--------------------------------------------------------------------------------
/utils/kernel_function.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include"../utils/common_include.h"
 3 | #include"../utils/utils.h"
 4 | 
 5 | #define CHECK(op)  __check_cuda_runtime((op), #op, __FILE__, __LINE__)
 6 | 
 7 | bool __check_cuda_runtime(cudaError_t code, const char* op, const char* file, int line);
 8 | 
 9 | #define BLOCK_SIZE 8
10 | 
11 | //note: resize rgb with padding
12 | void resizeDevice(const int& batch_size, float* src, int src_width, int src_height,
13 |     float* dst, int dstWidth, int dstHeight,
14 |     float paddingValue, utils::AffineMat matrix);
15 | 
16 | //overload:resize rgb with padding, but src's type is uin8
17 | void resizeDevice(const int& batch_size, unsigned char* src, int src_width, int src_height,
18 |     float* dst, int dstWidth, int dstHeight,
19 |     float paddingValue, utils::AffineMat matrix);
20 | 
21 | // overload: resize rgb/gray without padding
22 | void resizeDevice(const int& batchSize, float* src, int srcWidth, int srcHeight,
23 |     float* dst, int dstWidth, int dstHeight,
24 |     utils::ColorMode mode, utils::AffineMat matrix);
25 | 
26 | void bgr2rgbDevice(const int& batch_size, float* src, int srcWidth, int srcHeight,
27 |     float* dst, int dstWidth, int dstHeight);
28 | 
29 | void normDevice(const int& batch_size, float* src, int srcWidth, int srcHeight,
30 |     float* dst, int dstWidth, int dstHeight,
31 |     utils::InitParameter norm_param);
32 | 
33 | void hwc2chwDevice(const int& batch_size, float* src, int srcWidth, int srcHeight,
34 |     float* dst, int dstWidth, int dstHeight);
35 | 
36 | void decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcLength, float* dst, int dstWidth, int dstHeight);
37 | 
38 | // nms fast
39 | void nmsDeviceV1(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea);
40 | 
41 | // nms sort
42 | void nmsDeviceV2(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea,
43 |     int* idx, float* conf);
44 | 
45 | void copyWithPaddingDevice(const int& batchSize, float* src, int srcWidth, int srcHeight,
46 |     float* dst, int dstWidth, int dstHeight, float paddingValue, int padTop, int padLeft);


--------------------------------------------------------------------------------
/utils/tracking/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FeiYull/TensorRT-Alpha/bca9575229ef5f6fe4c5acf51c1bd3c7e5959ec6/utils/tracking/.gitkeep


--------------------------------------------------------------------------------
/utils/yolo.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include"../utils/common_include.h"
 3 | #include"../utils/utils.h"
 4 | #include"../utils/kernel_function.h"
 5 | 
 6 | namespace yolo
 7 | {
 8 |     class YOLO
 9 |     {
10 |     public:
11 |         YOLO(const utils::InitParameter& param);
12 |         ~YOLO();
13 | 
14 |     public:
15 |         virtual bool init(const std::vector<unsigned char>& trtFile);
16 |         virtual void check();
17 |         virtual void copy(const std::vector<cv::Mat>& imgsBatch);
18 |         virtual void preprocess(const std::vector<cv::Mat>& imgsBatch);
19 |         virtual bool infer();
20 |         virtual void postprocess(const std::vector<cv::Mat>& imgsBatch);
21 |         virtual void reset();
22 | 
23 |     public:
24 |         std::vector<std::vector<utils::Box>> getObjectss() const;
25 | 
26 |     protected:
27 |         std::shared_ptr<nvinfer1::ICudaEngine> m_engine;
28 |         std::unique_ptr<nvinfer1::IExecutionContext> m_context;
29 | 
30 |     protected:
31 |         utils::InitParameter m_param;
32 |         nvinfer1::Dims m_output_dims;   
33 |         int m_output_area;
34 |         int m_total_objects;
35 |         std::vector<std::vector<utils::Box>> m_objectss;
36 |         utils::AffineMat m_dst2src;     
37 | 
38 |         // input
39 |         unsigned char* m_input_src_device;
40 |         float* m_input_resize_device;
41 |         float* m_input_rgb_device;
42 |         float* m_input_norm_device;
43 |         float* m_input_hwc_device;
44 |         // output
45 |         float* m_output_src_device;
46 |         float* m_output_objects_device;
47 |         float* m_output_objects_host;
48 |         int m_output_objects_width;     
49 |         int* m_output_idx_device;      
50 |         float* m_output_conf_device;
51 |     };
52 | }
53 | 


--------------------------------------------------------------------------------
/vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "version": "0.2.0",
 3 |     "configurations": [
 4 |         {
 5 |             "name": "C++ Launch",
 6 |             "type": "cppdbg",
 7 |             "request": "launch",
 8 |             "program": "${workspaceFolder}/build/app_yolox",
 9 |             "args": [
10 | 
11 |             "--model=../data/yolox/yolox_tiny.trt",
12 |             "--size=416",
13 |             "--batch_size=8",
14 | 
15 |             // image
16 |             //"--img= ../data/6406403.jpg",
17 |             
18 |             // video
19 |             "--video=../data/people.mp4",
20 |     
21 |             // camera
22 |             // "--cam_id=  0",
23 |             
24 |             "--show",
25 |             "--savePath= build/"
26 |         ],
27 |             "stopAtEntry": false,
28 |             "cwd": "${workspaceFolder}",
29 |             //"preLaunchTask": "C/C++: g++ build active file",
30 |             //"miDebuggerPath": "/usr/bin/gdb"
31 |           }
32 |     ]
33 | }


--------------------------------------------------------------------------------
/yolonas/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10)
 2 | 
 3 | #set(CMAKE_BUILD_TYPE "Debug")
 4 | set(CMAKE_BUILD_TYPE "Release")
 5 | 
 6 | PROJECT(yolo_nas VERSION 1.0.0 LANGUAGES C CXX CUDA)
 7 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR})
 8 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake)
 9 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT})
10 | message(STATUS ${ALL_LIBS})
11 | file(GLOB CPPS 
12 |   ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
13 |   ${CMAKE_CURRENT_SOURCE_DIR}/*.cu
14 |   ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu
15 |   ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp
16 |   ${TensorRT_ROOT}/samples/common/logger.cpp 
17 |   ${TensorRT_ROOT}/samples/common/sampleOptions.cpp 
18 |   #${TensorRT_ROOT}/samples/common/sampleUtils.cpp
19 |   )
20 | list(REMOVE_ITEM CPPS app_yolo_nas.cpp)
21 | message(STATUS CPPS = ${CPPS})
22 | 
23 | list (LENGTH CPPS length)
24 | message(STATUS ***length*** = ${length}) 
25 | find_package(OpenCV REQUIRED)
26 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR})
27 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS})
28 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS})
29 | add_library(${PROJECT_NAME} SHARED ${CPPS})
30 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES})
31 | 
32 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75)
33 | target_compile_options(${PROJECT_NAME} PUBLIC 
34 |    $<$<COMPILE_LANGUAGE:CUDA>:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>)
35 |   
36 | add_executable(app_yolo_nas app_yolo_nas.cpp)
37 | 
38 | # NVCC
39 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a)
40 | target_link_libraries(app_yolo_nas ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} )
41 | 


--------------------------------------------------------------------------------
/yolonas/README.md:
--------------------------------------------------------------------------------
 1 | ## 1. get onnx 
 2 | download directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv) or export onnx:
 3 | ```bash
 4 | pip install super-gradients==3.3.1
 5 | cd super-gradients
 6 | # copy the python script provided in this repository to your workspace
 7 | # note:The weight file is downloaded automatically
 8 | cp TensorRT-Alpha/yolonas/alpha_export_dynamic.py YOUR_WORKSPACE
 9 | 
10 | # for YOLO_NAS_S
11 | # Changing lines 9-11 of the code allows you to switch to other models, eg:YOLO_NAS_M
12 | python alpha_export_dynamic.py
13 | ```
14 | 
15 | ## 2.edit and save onnx
16 | ```bash
17 | # note: If you have obtained onnx by downloading, this step can be ignored
18 | ignore
19 | ```
20 | 
21 | ## 3.compile onnx
22 | ```bash
23 | # put your onnx file in this path:tensorrt-alpha/data/yolonas
24 | cd tensorrt-alpha/data/yolonas
25 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib
26 | # 640
27 | ../../../../TensorRT-8.4.2.4/bin/trtexec   --onnx=yolonas_s.onnx  --saveEngine=yolonas_s.trt  --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
28 | ```
29 | ## 4.run
30 | ```bash
31 | git clone https://github.com/FeiYull/tensorrt-alpha
32 | cd tensorrt-alpha/yolonas
33 | mkdir build
34 | cd build
35 | cmake ..
36 | make -j10
37 | # note: the dstImage will be saved in tensorrt-alpha/yolonas/build by default
38 | 
39 | ## 640
40 | # infer image
41 | ./app_yolo_nas  --model=../../data/yolo_nas/yolonas_s.trt --size=640 --batch_size=1  --img=../../data/6406407.jpg   --show --savePath=../
42 | 
43 | # infer video
44 | ./app_yolo_nas  --model=../../data/yolo_nas/yolonas_s.trt --size=640 --batch_size=2  --video=../../data/people.mp4  --show 
45 | 
46 | # infer camera
47 | ./app_yolo_nas  --model=../../data/yolo_nas/yolonas_s.trt --size=640 --batch_size=2  --cam_id=0  --show
48 | ```
49 | ## 5. appendix
50 | ignore


--------------------------------------------------------------------------------
/yolonas/alpha_export_dynamic.py:
--------------------------------------------------------------------------------
 1 | from super_gradients.training import models
 2 | from super_gradients.common.object_names import Models
 3 | import torch
 4 | import numpy as np
 5 | 
 6 | class AlphaYoloNas(torch.nn.Module):
 7 |     def __init__(self):
 8 |         super().__init__()
 9 |         self.model = models.get(Models.YOLO_NAS_S, pretrained_weights="coco")
10 |         # self.model = models.get(Models.YOLO_NAS_M, pretrained_weights="coco")
11 |         # self.model = models.get(Models.YOLO_NAS_L, pretrained_weights="coco")
12 |         self.model.eval()
13 | 
14 |     def forward(self, x):
15 |         y = self.model(x)
16 |         return torch.cat((y[0], y[1]), 2)
17 | 
18 | input_size = (1, 3, 640, 640)
19 | onnx_input = torch.Tensor(np.zeros(input_size))
20 | 
21 | net = AlphaYoloNas()
22 | input_names = ["images"]
23 | output_names = ["output"]
24 | dynamic_axes = {input_names[0]: {0: "batch_size"}, 
25 |                 output_names[0]: {0: "batch_size"}}
26 | 
27 | torch.onnx.export(net, onnx_input, "yolonas_s.onnx",
28 |             #verbose=True,
29 |             input_names=input_names,
30 |             output_names=output_names,
31 |             opset_version=12,
32 |             dynamic_axes=dynamic_axes)


--------------------------------------------------------------------------------
/yolonas/app_yolo_nas.cpp:
--------------------------------------------------------------------------------
  1 | #include"../utils/yolo.h"
  2 | #include"yolo_nas.h"
  3 | 
  4 | void setParameters(utils::InitParameter& initParameters)
  5 | {
  6 | 	initParameters.class_names = utils::dataSets::coco80;
  7 | 	//initParameters.class_names = utils::dataSets::voc20;
  8 | 	initParameters.num_class = 80; // for coco
  9 | 	//initParameters.num_class = 20; // for voc2012
 10 | 	initParameters.batch_size = 8;
 11 | 	initParameters.dst_h = 636;
 12 | 	initParameters.dst_w = 636;
 13 | 	initParameters.input_output_names = { "images",  "output" };
 14 | 	initParameters.conf_thresh = 0.25f;
 15 | 	initParameters.iou_thresh = 0.7f;
 16 | 	initParameters.save_path = "";
 17 | }
 18 | 
 19 | void task(YOLO_NAS& yolo, const utils::InitParameter& param, std::vector<cv::Mat>& imgsBatch, const int& delayTime, const int& batchi,
 20 | 	const bool& isShow, const bool& isSave)
 21 | {
 22 | 	utils::DeviceTimer d_t0; yolo.copy(imgsBatch);	      float t0 = d_t0.getUsedTime();
 23 | 	utils::DeviceTimer d_t1; yolo.preprocess(imgsBatch);  float t1 = d_t1.getUsedTime();
 24 | 	utils::DeviceTimer d_t2; yolo.infer();				  float t2 = d_t2.getUsedTime();
 25 | 	utils::DeviceTimer d_t3; yolo.postprocess(imgsBatch); float t3 = d_t3.getUsedTime();
 26 | 	sample::gLogInfo << 
 27 | 		"preprocess time = " << t1 / param.batch_size << "; "
 28 | 		"infer time = " << t2 / param.batch_size << "; "
 29 | 		"postprocess time = " << t3 / param.batch_size << std::endl;
 30 | 
 31 | 	if(isShow)
 32 | 		utils::show(yolo.getObjectss(), param.class_names, delayTime, imgsBatch);
 33 | 	if(isSave)
 34 | 		utils::save(yolo.getObjectss(), param.class_names, param.save_path, imgsBatch, param.batch_size, batchi);
 35 | 	yolo.reset();
 36 | }
 37 | 
 38 | int main(int argc, char** argv)
 39 | {
 40 | 	cv::CommandLineParser parser(argc, argv,
 41 | 		{
 42 | 			"{model 	|| tensorrt model file	   }"
 43 | 			"{size      || image (h, w), eg: 640   }"
 44 | 			"{batch_size|| batch size              }"
 45 | 			"{video     || video's path			   }"
 46 | 			"{img       || image's path			   }"
 47 | 			"{cam_id    || camera's device id	   }"
 48 | 			"{show      || if show the result	   }"
 49 | 			"{savePath  || save path, can be ignore}"
 50 | 		});
 51 | 	utils::InitParameter param;
 52 | 	setParameters(param);
 53 | 	std::string model_path = "../../data/yolov8/yolonas_s.trt";
 54 | 	std::string video_path = "../../data/people.mp4";
 55 | 	std::string image_path = "../../data/bus.jpg";
 56 | 	int camera_id = 0;
 57 | 	utils::InputStream source;
 58 | 	source = utils::InputStream::IMAGE;
 59 | 	//source = utils::InputStream::VIDEO;
 60 | 	//source = utils::InputStream::CAMERA;
 61 | 
 62 | 	int size = -1;
 63 | 	int batch_size = 8;
 64 | 	bool is_show = false;
 65 | 	bool is_save = false;
 66 | 	if(parser.has("model"))
 67 | 	{
 68 | 		model_path = parser.get<std::string>("model");
 69 | 		sample::gLogInfo << "model_path = " << model_path << std::endl;
 70 | 	}
 71 | 	if(parser.has("size"))
 72 | 	{
 73 | 		size = parser.get<int>("size");
 74 | 		sample::gLogInfo << "size = " << size << std::endl;
 75 | 		param.dst_h = param.dst_w = size;
 76 | 	}
 77 | 	if(parser.has("batch_size"))
 78 | 	{
 79 | 		batch_size = parser.get<int>("batch_size");
 80 | 		sample::gLogInfo << "batch_size = " << batch_size << std::endl;
 81 | 		param.batch_size = batch_size;
 82 | 	}
 83 | 	if(parser.has("video"))
 84 | 	{
 85 | 		source = utils::InputStream::VIDEO;
 86 | 		video_path = parser.get<std::string>("video");
 87 | 		sample::gLogInfo << "video_path = " << video_path << std::endl;
 88 | 	}
 89 | 	if(parser.has("img"))
 90 | 	{
 91 | 		source = utils::InputStream::IMAGE;
 92 | 		image_path = parser.get<std::string>("img");
 93 | 		sample::gLogInfo << "image_path = " << image_path << std::endl;
 94 | 	}
 95 | 	if(parser.has("cam_id"))
 96 | 	{
 97 | 		source = utils::InputStream::CAMERA;
 98 | 		camera_id = parser.get<int>("cam_id");
 99 | 		sample::gLogInfo << "camera_id = " << camera_id << std::endl;
100 | 	}
101 | 	if(parser.has("show"))
102 | 	{
103 | 		is_show = true;
104 | 		sample::gLogInfo << "is_show = " << is_show << std::endl;
105 | 	}
106 | 	if(parser.has("savePath"))
107 | 	{
108 | 		is_save = true;
109 | 		param.save_path = parser.get<std::string>("savePath");
110 | 		sample::gLogInfo << "save_path = " << param.save_path << std::endl;
111 | 	}
112 | 	int total_batches = 0;
113 | 	int delay_time = 1;
114 | 	cv::VideoCapture capture;
115 | 	if (!setInputStream(source, image_path, video_path, camera_id,
116 | 		capture, total_batches, delay_time, param))
117 | 	{
118 | 		sample::gLogError << "read the input data errors!" << std::endl;
119 | 		return -1;
120 | 	}
121 | 	YOLO_NAS yolo(param);
122 | 	std::vector<unsigned char> trt_file = utils::loadModel(model_path);
123 | 	if (trt_file.empty())
124 | 	{
125 | 		sample::gLogError << "trt_file is empty!" << std::endl;
126 | 		return -1;
127 | 	}
128 | 	if (!yolo.init(trt_file))
129 | 	{
130 | 		sample::gLogError << "initEngine() ocur errors!" << std::endl;
131 | 		return -1;
132 | 	}
133 | 	yolo.check();
134 | 	cv::Mat frame;
135 | 	std::vector<cv::Mat> imgs_batch;
136 | 	imgs_batch.reserve(param.batch_size);
137 | 	sample::gLogInfo << imgs_batch.capacity() << std::endl;
138 | 	int batchi = 0;
139 | 	while (capture.isOpened())
140 | 	{
141 | 		if (batchi >= total_batches && source != utils::InputStream::CAMERA)
142 | 		{
143 | 			break;
144 | 		}
145 | 		if (imgs_batch.size() < param.batch_size)
146 | 		{
147 | 			if (source != utils::InputStream::IMAGE)
148 | 			{
149 | 				capture.read(frame);
150 | 			}
151 | 			else
152 | 			{
153 | 				frame = cv::imread(image_path);
154 | 			}
155 | 
156 | 			if (frame.empty())
157 | 			{
158 | 				sample::gLogWarning << "no more video or camera frame" << std::endl;
159 | 				task(yolo, param, imgs_batch, delay_time, batchi, is_show, is_save);
160 | 				imgs_batch.clear(); 
161 | 				batchi++;
162 | 				break;
163 | 			}
164 | 			else
165 | 			{
166 | 				imgs_batch.emplace_back(frame.clone());
167 | 			}
168 | 		}
169 | 		else 
170 | 		{
171 | 			task(yolo, param, imgs_batch, delay_time, batchi, is_show, is_save);
172 | 			imgs_batch.clear();
173 | 			batchi++;
174 | 		}
175 | 	}
176 | 	return  -1;
177 | }
178 | 
179 | 


--------------------------------------------------------------------------------
/yolonas/decode_yolo_nas.cu:
--------------------------------------------------------------------------------
 1 | #include "decode_yolo_nas.h"
 2 | 
 3 | __global__ void decode_yolo_nas_device_kernel(int batch_size, int  num_class, int topK, float conf_thresh,
 4 | 	float* src, int srcWidth, int srcHeight, int srcArea,
 5 | 	float* dst, int dstWidth, int dstHeight, int dstArea)
 6 | {
 7 | 	int dx = blockDim.x * blockIdx.x + threadIdx.x;
 8 | 	int dy = blockDim.y * blockIdx.y + threadIdx.y;
 9 | 	if (dx >= srcHeight || dy >= batch_size)
10 | 	{
11 | 		return;
12 | 	}
13 | 	float* pitem = src + dy * srcArea + dx * srcWidth;
14 | 	float* class_confidence = pitem + 4;
15 | 	float confidence = *class_confidence++;
16 | 	int label = 0;
17 | 	for (int i = 1; i < num_class; ++i, ++class_confidence)
18 | 	{
19 | 		if (*class_confidence > confidence)
20 | 		{
21 | 			confidence = *class_confidence;
22 | 			label = i;
23 | 		}
24 | 	}
25 | 	if (confidence < conf_thresh)
26 | 	{
27 | 		return;
28 | 	}
29 | 	int index = atomicAdd(dst + dy * dstArea, 1);
30 | 
31 | 	if (index >= topK)
32 | 	{
33 | 		return;
34 | 	}
35 | 	float left = *pitem++;
36 | 	float top = *pitem++;
37 | 	float right = *pitem++;
38 | 	float bottom = *pitem++;
39 | 
40 | 	float* pout_item = dst + dy * dstArea + 1 + index * dstWidth;
41 | 	*pout_item++ = left;
42 | 	*pout_item++ = top;
43 | 	*pout_item++ = right;
44 | 	*pout_item++ = bottom;
45 | 	*pout_item++ = confidence;
46 | 	*pout_item++ = label;
47 | 	*pout_item++ = 1;
48 | }
49 | 
50 | void yolo_nas::decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea, float* dst, int dstWidth, int dstHeight)
51 | {
52 | 	dim3 block_size(BLOCK_SIZE, BLOCK_SIZE);
53 | 	dim3 grid_size((srcHeight + BLOCK_SIZE - 1) / BLOCK_SIZE,
54 | 		(param.batch_size + BLOCK_SIZE - 1) / BLOCK_SIZE);
55 | 	int dstArea = 1 + dstWidth * dstHeight;
56 | 	decode_yolo_nas_device_kernel << < grid_size, block_size, 0, nullptr >> > (param.batch_size, param.num_class, param.topK, param.conf_thresh,
57 | 		src, srcWidth, srcHeight, srcArea,
58 | 		dst, dstWidth, dstHeight, dstArea);
59 | }


--------------------------------------------------------------------------------
/yolonas/decode_yolo_nas.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include"../utils/utils.h"
3 | #include"../utils/kernel_function.h"
4 | 
5 | namespace yolo_nas
6 | {
7 | 	void decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcLength, float* dst, int dstWidth, int dstHeight);
8 | }
9 | 


--------------------------------------------------------------------------------
/yolonas/yolo_nas.cpp:
--------------------------------------------------------------------------------
  1 | #include"yolo_nas.h"
  2 | #include"decode_yolo_nas.h"
  3 | 
  4 | YOLO_NAS::YOLO_NAS(const utils::InitParameter& param) :yolo::YOLO(param)
  5 | {
  6 |     m_resize_shape = cv::Size(636, 636);
  7 |     m_input_resize_padding_device = nullptr;
  8 |     CHECK(cudaMalloc(&m_input_resize_padding_device, param.batch_size * 3 * m_param.dst_h * m_param.dst_w * sizeof(float)));
  9 | }
 10 | 
 11 | YOLO_NAS::~YOLO_NAS()
 12 | {
 13 |     CHECK(cudaFree(m_input_resize_padding_device));
 14 | }
 15 | 
 16 | bool YOLO_NAS::init(const std::vector<unsigned char>& trtFile)
 17 | {
 18 |     if (trtFile.empty())
 19 |     {
 20 |         return false;
 21 |     }
 22 |     std::unique_ptr<nvinfer1::IRuntime> runtime =
 23 |         std::unique_ptr<nvinfer1::IRuntime>(nvinfer1::createInferRuntime(sample::gLogger.getTRTLogger()));
 24 |     if (runtime == nullptr)
 25 |     {
 26 |         return false;
 27 |     }
 28 |     this->m_engine = std::unique_ptr<nvinfer1::ICudaEngine>(runtime->deserializeCudaEngine(trtFile.data(), trtFile.size()));
 29 |     if (this->m_engine == nullptr)
 30 |     {
 31 |         return false;
 32 |     }
 33 |     this->m_context = std::unique_ptr<nvinfer1::IExecutionContext>(this->m_engine->createExecutionContext());
 34 |     if (this->m_context == nullptr)
 35 |     {
 36 |         return false;
 37 |     }
 38 |     if (m_param.dynamic_batch)
 39 |     {
 40 |         this->m_context->setBindingDimensions(0, nvinfer1::Dims4(m_param.batch_size, 3, m_param.dst_h, m_param.dst_w));
 41 |     }
 42 |     m_output_dims = this->m_context->getBindingDimensions(1);
 43 |     m_total_objects = m_output_dims.d[1];
 44 |     assert(m_param.batch_size <= m_output_dims.d[0]);
 45 |     m_output_area = 1; 
 46 |     for (int i = 1; i < m_output_dims.nbDims; i++)
 47 |     {
 48 |         if (m_output_dims.d[i] != 0)
 49 |         {
 50 |             m_output_area *= m_output_dims.d[i];
 51 |         }
 52 |     }
 53 |     CHECK(cudaMalloc(&m_output_src_device, m_param.batch_size * m_output_area * sizeof(float)));
 54 |     float a = float(m_resize_shape.height) / m_param.src_h;
 55 |     float b = float(m_resize_shape.width) / m_param.src_w;
 56 |     float scale = a < b ? a : b;
 57 |     cv::Mat src2dst = (cv::Mat_<float>(2, 3) << scale, 0.f, (-scale * m_param.src_w + m_resize_shape.width + scale - 1) * 0.5,
 58 |         0.f, scale, (-scale * m_param.src_h + m_resize_shape.height + scale - 1) * 0.5);
 59 |     cv::Mat dst2src = cv::Mat::zeros(2, 3, CV_32FC1);
 60 |     cv::invertAffineTransform(src2dst, dst2src);
 61 |     int pad_height = m_param.dst_h - m_resize_shape.height;
 62 |     int pad_width = m_param.dst_w - m_resize_shape.width;
 63 |     m_pad_top = pad_height / 2;
 64 |     m_pad_left = pad_width / 2;
 65 | 
 66 |     m_dst2src.v0 = dst2src.ptr<float>(0)[0];
 67 |     m_dst2src.v1 = dst2src.ptr<float>(0)[1];
 68 |     m_dst2src.v2 = dst2src.ptr<float>(0)[2];
 69 |     m_dst2src.v3 = dst2src.ptr<float>(1)[0];
 70 |     m_dst2src.v4 = dst2src.ptr<float>(1)[1];
 71 |     m_dst2src.v5 = dst2src.ptr<float>(1)[2];
 72 |     return true;
 73 | }
 74 | 
 75 | void YOLO_NAS::preprocess(const std::vector<cv::Mat>& imgsBatch)
 76 | {
 77 |     resizeDevice(m_param.batch_size, m_input_src_device, m_param.src_w, m_param.src_h,
 78 |         m_input_resize_device, m_resize_shape.width, m_resize_shape.height, 114, m_dst2src);
 79 |     copyWithPaddingDevice(m_param.batch_size, m_input_resize_device, m_resize_shape.width, m_resize_shape.height,
 80 |         m_input_resize_padding_device, m_param.dst_w, m_param.dst_h, 114.f, m_pad_top, m_pad_left);
 81 |     bgr2rgbDevice(m_param.batch_size, m_input_resize_padding_device, m_param.dst_w, m_param.dst_h,
 82 |         m_input_rgb_device, m_param.dst_w, m_param.dst_h);
 83 |     normDevice(m_param.batch_size, m_input_rgb_device, m_param.dst_w, m_param.dst_h,
 84 |         m_input_norm_device, m_param.dst_w, m_param.dst_h, m_param);
 85 |     hwc2chwDevice(m_param.batch_size, m_input_norm_device, m_param.dst_w, m_param.dst_h,
 86 |         m_input_hwc_device, m_param.dst_w, m_param.dst_h);
 87 | }
 88 | void YOLO_NAS::postprocess(const std::vector<cv::Mat>& imgsBatch)
 89 | {
 90 |     yolo_nas::decodeDevice(m_param, m_output_src_device, 4 + m_param.num_class, m_total_objects, m_output_area,
 91 |         m_output_objects_device, m_output_objects_width, m_param.topK);
 92 |     nmsDeviceV1(m_param, m_output_objects_device, m_output_objects_width, m_param.topK, m_param.topK * m_output_objects_width + 1);
 93 |     //nmsDeviceV2(m_param, m_output_objects_device, m_output_objects_width, m_param.topK, m_param.topK * m_output_objects_width + 1, m_output_idx_device, m_output_conf_device);
 94 |     CHECK(cudaMemcpy(m_output_objects_host, m_output_objects_device, m_param.batch_size * sizeof(float) * (1 + 7 * m_param.topK), cudaMemcpyDeviceToHost));
 95 |     for (size_t bi = 0; bi < imgsBatch.size(); bi++)
 96 |     {
 97 |         int num_boxes = std::min((int)(m_output_objects_host + bi * (m_param.topK * m_output_objects_width + 1))[0], m_param.topK);
 98 |         for (size_t i = 0; i < num_boxes; i++)
 99 |         {
100 |             float* ptr = m_output_objects_host + bi * (m_param.topK * m_output_objects_width + 1) + m_output_objects_width * i + 1;
101 |             int keep_flag = ptr[6];
102 |             if (keep_flag)
103 |             {            
104 |                 ptr[0] -= m_pad_left;
105 |                 ptr[1] -= m_pad_top;
106 |                 ptr[2] -= m_pad_left;
107 |                 ptr[3] -= m_pad_top;
108 |                 float x_lt = m_dst2src.v0 * ptr[0] + m_dst2src.v1 * ptr[1] + m_dst2src.v2; 
109 |                 float y_lt = m_dst2src.v3 * ptr[0] + m_dst2src.v4 * ptr[1] + m_dst2src.v5;
110 |                 float x_rb = m_dst2src.v0 * ptr[2] + m_dst2src.v1 * ptr[3] + m_dst2src.v2;
111 |                 float y_rb = m_dst2src.v3 * ptr[2] + m_dst2src.v4 * ptr[3] + m_dst2src.v5;
112 |                 m_objectss[bi].emplace_back(x_lt, y_lt, x_rb, y_rb, ptr[4], (int)ptr[5]);
113 |             }
114 |         }
115 |     }
116 | }


--------------------------------------------------------------------------------
/yolonas/yolo_nas.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include"../utils/yolo.h"
 3 | #include"../utils/utils.h"
 4 | class YOLO_NAS : public yolo::YOLO
 5 | {
 6 | public:
 7 | 	YOLO_NAS(const utils::InitParameter& param);
 8 | 	~YOLO_NAS();
 9 | 	virtual bool init(const std::vector<unsigned char>& trtFile);
10 | 	virtual void preprocess(const std::vector<cv::Mat>& imgsBatch);
11 | 	virtual void postprocess(const std::vector<cv::Mat>& imgsBatch);
12 | 
13 | private:
14 | 	float* m_input_resize_padding_device;
15 | 	cv::Size m_resize_shape;
16 | 	int m_pad_top;
17 | 	int m_pad_left;
18 | };


--------------------------------------------------------------------------------
/yolor/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10)
 2 | 
 3 | #set(CMAKE_BUILD_TYPE "Debug")
 4 | set(CMAKE_BUILD_TYPE "Release")
 5 | 
 6 | PROJECT(yolor VERSION 1.0.0 LANGUAGES C CXX CUDA)
 7 |  
 8 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR})
 9 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake)
10 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT})
11 | message(STATUS ${ALL_LIBS})
12 | file(GLOB CPPS 
13 |   ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
14 |   ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu
15 |   ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp
16 |   ${TensorRT_ROOT}/samples/common/logger.cpp
17 |   ${TensorRT_ROOT}/samples/common/sampleOptions.cpp  
18 |   )
19 | list(REMOVE_ITEM CPPS app_yolor.cpp)
20 | message(STATUS CPPS = ${CPPS})
21 | list (LENGTH CPPS length)
22 | message(STATUS ***length*** = ${length}) 
23 | find_package(OpenCV REQUIRED)
24 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR})
25 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS})
26 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS})
27 | 
28 | add_library(${PROJECT_NAME} SHARED ${CPPS})
29 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES})
30 | 
31 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75)
32 | target_compile_options(${PROJECT_NAME} PUBLIC 
33 |    $<$<COMPILE_LANGUAGE:CUDA>:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>)
34 |   
35 | add_executable(app_yolor app_yolor.cpp)
36 | 
37 | # NVCC
38 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a)
39 | target_link_libraries(app_yolor ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} )
40 | 


--------------------------------------------------------------------------------
/yolor/README.md:
--------------------------------------------------------------------------------
 1 | ## 说明
 2 | - 0、请使用本仓库提供的导出脚本“alpha_export.py：
 3 | - 1、使用torch1.7+onnx1.8.0时候，导出onnx的时候会报错：
 4 | “RuntimeError: Exporting the operator silu to ONNX opset version 11 is not supported. Please open a bug to request ONNX export support for the missing operator.”
 5 | - 2、将环境改为：torch1.9+onnx1.11.0，上述不支持的op问题就解决了导出onnx问题。
 6 | 
 7 | 
 8 | ## 1. get onnx 
 9 | download directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv)
10 | 
11 | or export onnx:
12 | ```bash
13 | git clone https://github.com/WongKinYiu/yolor
14 | git checkout  462858e8737f56388f812cfe381a69c4ffca0cc7
15 | # PLease use the "alpha_export.py" file provided by TensorRT-Alpha to export onnx
16 | cd yolor-main
17 | cp  alpha_export.py yolor-main
18 | 
19 | # 1280
20 | python alpha_export.py --net=yolor_p6
21 | # 640
22 | python alpha_export.py --net=yolor_csp
23 | python alpha_export.py --net=yolor_csp_star
24 | python alpha_export.py --net=yolor_csp_x
25 | python alpha_export.py --net=yolor_csp_x_star
26 | ```
27 | ## 2.edit and save onnx 
28 | ```bash
29 | # note: If you have obtained onnx by downloading, this step can be ignored
30 | ignore
31 | ```
32 | ## 3.compile onnx
33 | ```bash
34 | # put your onnx file in this path:tensorrt-alpha/data/yolor
35 | cd tensorrt-alpha/data/yolor
36 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib
37 | 
38 | #1280
39 | ../../../../TensorRT-8.4.2.4/bin/trtexec  --onnx=yolor_p6.onnx   --saveEngine=yolor_p6.trt  --buildOnly   --minShapes=images:1x3x1280x1280 --optShapes=images:2x3x1280x1280 --maxShapes=images:4x3x1280x1280
40 | 
41 | # 640
42 | ../../../../TensorRT-8.4.2.4/bin/trtexec  --onnx=yolor_csp.onnx          --saveEngine=yolor_csp.trt          --buildOnly   --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
43 | ../../../../TensorRT-8.4.2.4/bin/trtexec  --onnx=yolor_csp_star.onnx     --saveEngine=yolor_csp_star.trt     --buildOnly   --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
44 | ../../../../TensorRT-8.4.2.4/bin/trtexec  --onnx=yolor_csp_x.onnx        --saveEngine=yolor_csp_x.trt        --buildOnly   --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
45 | ../../../../TensorRT-8.4.2.4/bin/trtexec  --onnx=yolor_csp_x_star.onnx   --saveEngine=yolor_csp_x_star.trt   --buildOnly   --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
46 | ```
47 | ## 4.run
48 | ```bash
49 | git clone https://github.com/FeiYull/tensorrt-alpha
50 | cd tensorrt-alpha/yolor
51 | mkdir build
52 | cd build
53 | cmake ..
54 | make -j10
55 | # note: the dstImage will be saved in tensorrt-alpha/yolor/build by default
56 | 
57 | ## 640
58 | # infer image
59 | ./app_yolor  --model=../../data/yolor/yolor_csp.trt --size=640  --batch_size=1  --img=../../data/6406401.jpg  --show --savePath=../
60 | 
61 | # infer video
62 | ./app_yolor  --model=../../data/yolor/yolor_csp.trt --size=640 --batch_size=2  --video=../../data/people.mp4  --show 
63 | 
64 | # infer camera
65 | ./app_yolor  --model=../../data/yolor/yolor_csp.trt --size=640 --batch_size=2  --cam_id=0  --show
66 | 
67 | 
68 | ## 1280
69 | ./app_yolor  --model=../../data/yolor/yolor_p6.trt  --size=1280 --batch_size=1  --img=../../data/6406401.jpg  --show --savePath
70 | ```
71 | ## 5. appendix
72 | ignore


--------------------------------------------------------------------------------
/yolor/alpha_export.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import platform
  4 | import shutil
  5 | import time
  6 | from pathlib import Path
  7 | 
  8 | import cv2
  9 | import torch
 10 | import torch.backends.cudnn as cudnn
 11 | from numpy import random
 12 | 
 13 | from utils.google_utils import attempt_load
 14 | from utils.datasets import LoadStreams, LoadImages
 15 | from utils.general import (
 16 |     check_img_size, non_max_suppression, apply_classifier, scale_coords, xyxy2xywh, strip_optimizer)
 17 | from utils.plots import plot_one_box
 18 | from utils.torch_utils import select_device, load_classifier, time_synchronized
 19 | 
 20 | from models.models import *
 21 | from utils.datasets import *
 22 | from utils.general import *
 23 | 
 24 | import argparse
 25 | 
 26 | import torch
 27 | from utils.google_utils import attempt_download
 28 | 
 29 | 
 30 | import onnx
 31 | import onnxruntime as ort
 32 | import numpy as np
 33 | 
 34 | """
 35 | example:
 36 | python alpha_export.py --net=yolor_p6
 37 | python alpha_export.py --net=yolor_csp
 38 | python alpha_export.py --net=yolor_csp_star
 39 | python alpha_export.py --net=yolor_csp_x
 40 | python alpha_export.py --net=yolor_csp_x_star
 41 | """
 42 | if __name__ == '__main__':
 43 |     parser = argparse.ArgumentParser()
 44 |     parser.add_argument('--net', type=str, default='yolor_p6', help='net type')
 45 |     opt = parser.parse_args()
 46 |     # init
 47 |     image_input_shape = ''
 48 |     img = ''
 49 |     model = ''
 50 | 
 51 |     net = opt.net 
 52 |     if net == "yolor_p6":
 53 |         # yolor_p6
 54 |         image_input_shape = (1, 3, 1280, 1280)
 55 |         img = torch.ones(image_input_shape)  # image size(1,3,320,192) iDetection
 56 |         model = Darknet("cfg/yolor_p6.cfg", 1280).cpu()
 57 |         opt.weights = 'yolor_p6.pt'
 58 |     elif net == "yolor_csp":
 59 |         # yolor_csp
 60 |         image_input_shape = (1, 3, 640, 640)
 61 |         img = torch.ones(image_input_shape)  # image size(1,3,320,192) iDetection
 62 |         model = Darknet("cfg/yolor_csp.cfg", 640).cpu()
 63 |         opt.weights = 'yolor_csp.pt'
 64 |     elif net == "yolor_csp_star":
 65 |         # yolor_csp_star
 66 |         image_input_shape = (1, 3, 640, 640)
 67 |         img = torch.ones(image_input_shape)  # image size(1,3,320,192) iDetection
 68 |         model = Darknet("cfg/yolor_csp.cfg", 640).cpu()
 69 |         opt.weights = 'yolor_csp_star.pt'
 70 |     elif net == "yolor_csp_x":
 71 |         # yolor_csp_x:
 72 |         image_input_shape = (1, 3, 640, 640)
 73 |         img = torch.ones(image_input_shape)  # image size(1,3,320,192) iDetection
 74 |         model = Darknet("cfg/yolor_csp_x.cfg", 640).cpu()
 75 |         opt.weights = 'yolor_csp_x.pt'
 76 |     elif net == "yolor_csp_x_star":
 77 |         # yolor_csp_x_star: 640*640
 78 |         image_input_shape = (1, 3, 640, 640)
 79 |         img = torch.ones(image_input_shape)  # image size(1,3,320,192) iDetection
 80 |         model = Darknet("cfg/yolor_csp_x.cfg", 640).cpu()
 81 |         opt.weights = 'yolor_csp_x_star.pt'
 82 | 
 83 |     model.load_state_dict(torch.load(opt.weights, map_location="cpu")['model'])
 84 | 
 85 |     model.eval()
 86 |     y = model(img)  # dry run
 87 |     print(y[0][0][0][0:10])
 88 | 
 89 |     # ONNX export
 90 |     # try
 91 |     print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
 92 |     f = opt.weights.replace('.pt', '.onnx')  # filename
 93 |     torch.onnx.export(model, img, f, verbose=False, opset_version=11, input_names=['images'], output_names=['output'],
 94 |                       dynamic_axes={
 95 |                 'images': {
 96 |                     0: 'batch',
 97 |                     2: 'height',
 98 |                     3: 'width'},  # shape(1,3,640,640)
 99 |                 'output': {
100 |                     0: 'batch',
101 |                     1: 'anchors'}  # shape(1,25200,85)
102 |             })
103 | 
104 |     # Checks
105 |     onnx_model = onnx.load(f)  # load onnx model
106 | 
107 |     input_names = ("images")
108 |     ort_session = ort.InferenceSession(f)
109 |     outputs = ort_session.run(
110 |         None,
111 |         {input_names: np.ones(shape=image_input_shape).astype(np.float32)},
112 |     )
113 |     print(outputs[0][0][0][0:10])
114 |     onnx.checker.check_model(onnx_model)  # check onnx model
115 |     print(onnx.helper.printable_graph(onnx_model.graph))  # print a human readable model
116 |     print('ONNX export success, saved as %s' % f)
117 | 
118 |     if net == "yolor_p6":   
119 |         item1 = onnx_model.graph.output[1]
120 |         item2 = onnx_model.graph.output[2]
121 |         item3 = onnx_model.graph.output[3]
122 |         item4 = onnx_model.graph.output[4]
123 |         onnx_model.graph.output.remove(item1)
124 |         onnx_model.graph.output.remove(item2)
125 |         onnx_model.graph.output.remove(item3)
126 |         onnx_model.graph.output.remove(item4)
127 |     else:
128 |         item1 = onnx_model.graph.output[1]
129 |         item2 = onnx_model.graph.output[2]
130 |         item3 = onnx_model.graph.output[3]
131 |         onnx_model.graph.output.remove(item1)
132 |         onnx_model.graph.output.remove(item2)
133 |         onnx_model.graph.output.remove(item3)
134 | 
135 |     # save
136 |     onnx.save(onnx_model, f)
137 |     # Finish
138 |     print('\nExport complete. Visualize with https://github.com/lutzroeder/netron.')
139 | 


--------------------------------------------------------------------------------
/yolov3/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10)
 2 | 
 3 | #set(CMAKE_BUILD_TYPE "Debug")
 4 | set(CMAKE_BUILD_TYPE "Release")
 5 | 
 6 | PROJECT(yolov3 VERSION 1.0.0 LANGUAGES C CXX CUDA)
 7 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR})
 8 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake)
 9 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT})
10 | 
11 | message(STATUS ${ALL_LIBS})
12 | file(GLOB CPPS 
13 |   ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
14 |   ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu
15 |   ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp
16 |   ${TensorRT_ROOT}/samples/common/logger.cpp 
17 |   ${TensorRT_ROOT}/samples/common/sampleOptions.cpp 
18 |   )
19 | list(REMOVE_ITEM CPPS app_yolov3.cpp)
20 | message(STATUS CPPS = ${CPPS})
21 | list (LENGTH CPPS length)
22 | message(STATUS ***length*** = ${length})  
23 | find_package(OpenCV REQUIRED)
24 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR})
25 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS})
26 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS})
27 | 
28 | add_library(${PROJECT_NAME} SHARED ${CPPS})
29 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES})
30 | 
31 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75)
32 | target_compile_options(${PROJECT_NAME} PUBLIC 
33 |    $<$<COMPILE_LANGUAGE:CUDA>:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>)
34 |   
35 | add_executable(app_yolov3 app_yolov3.cpp)
36 | 
37 | # NVCC
38 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a)
39 | target_link_libraries(app_yolov3 ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} )
40 | 


--------------------------------------------------------------------------------
/yolov3/README.md:
--------------------------------------------------------------------------------
 1 | ## 1. get onnx 
 2 | download directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv)
 3 | or export onnx:
 4 | ```bash
 5 | git clone https://github.com/ultralytics/yolov3
 6 | git checkout  dd838e25863169d0de4f10631a609350658efb69
 7 | ```
 8 | ```bash
 9 | # note: When using the official export.py to export onnx, you need to comment the following two lines：
10 | #---------------------------------------------------------------------------------------------------------
11 | if simplify:
12 |             try:
13 |                 check_requirements(('onnx-simplifier',))
14 |                 import onnxsim
15 | 
16 |                 LOGGER.info(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...')
17 |                 model_onnx, check = onnxsim.simplify(
18 |                     model_onnx,
19 | 	    #-------------------------------------------------------------------------------
20 |                     #dynamic_input_shape=dynamic, 
21 |                     #input_shapes={'images': list(im.shape)} if dynamic else None
22 | 	    #-------------------------------------------------------------------------------
23 |                     )
24 |                 assert check, 'assert check failed'
25 |                 onnx.save(model_onnx, f)
26 |             except Exception as e:
27 |                 LOGGER.info(f'{prefix} simplifier failure: {e}')
28 |         LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
29 |         LOGGER.info(f"{prefix} run --dynamic ONNX model inference with: 'python detect.py --weights {f}'")
30 | #---------------------------------------------------------------------------------------------------------
31 | ```
32 | ```bash
33 | cd yolov3
34 | python export.py  --weights  yolov3-tiny.pt --dynamic --simplify
35 | python export.py  --weights  yolov3.pt      --dynamic --simplify
36 | python export.py  --weights  yolov3-spp.pt  --dynamic
37 | ```
38 | ## 2.edit and save onnx
39 | ```bash
40 | # note: If you have obtained onnx by downloading, this step can be ignored
41 | git clone https://github.com/FeiYull/tensorrt-alpha
42 | cd tensorrt-alpha/yolov3
43 | conda activate tensorrt-alpha
44 | #edit alpha_edit.py on line21 & line25.
45 | python alpha_edit.py
46 | ```
47 | ## 3.compile onnx
48 | ```bash
49 | # put your onnx file in this path:tensorrt-alpha/data/yolov3
50 | cd tensorrt-alpha/data/yolov3
51 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib
52 | # 640
53 | ../../../../TensorRT-8.4.2.4/bin/trtexec   --onnx=alpha_yolov3.onnx        --saveEngine=alpha_yolov3.trt      --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
54 | ../../../../TensorRT-8.4.2.4/bin/trtexec   --onnx=alpha_yolov3-spp.onnx    --saveEngine=alpha_yolov3-spp.trt  --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
55 | ../../../../TensorRT-8.4.2.4/bin/trtexec   --onnx=alpha_yolov3-tiny.onnx   --saveEngine=alpha_yolov3-tiny.trt --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
56 | 
57 | # note: When compiling the alpha_yolov3-tiny model, if the following error may occur
58 | # error: Error Code 4: Internal Error (/model.11/Reshape: IShuffleLayer applied to shape tensor must have 0 or 1 #reshape dimensions: dimensions were [-1,2])
59 | # solve：Add the parameter --simplify when exporting onnx (opset defaults to 13, which is high enough)
60 | ```
61 | ## 4.run
62 | ```bash
63 | git clone https://github.com/FeiYull/tensorrt-alpha
64 | cd tensorrt-alpha/yolov3
65 | mkdir build
66 | cd build
67 | cmake ..
68 | make -j10
69 | # note: the dstImage will be saved in tensorrt-alpha/yolov3/build by default
70 | 
71 | ## 640
72 | # infer image
73 | ./app_yolov3  --model=../../data/yolov3/alpha_yolov3-tiny.trt --size=640  --batch_size=1  --img=../../data/6406403.jpg  --show --savePath
74 | 
75 | # infer video
76 | ./app_yolov3  --model=../../data/yolov3/alpha_yolov3-tiny.trt --size=640 --batch_size=2  --video=../../data/people.mp4  --show
77 | 
78 | # infer camera
79 | ./app_yolov3  --model=../../data/yolov3/alpha_yolov3-tiny.trt --size=640 --batch_size=2  --cam_id=0  --show
80 | 
81 | # note:yolov3-tiny has obvious missed detection on the image 6406401.jpg, don't worry, the effect is consistent with the official
82 | ```
83 | ## 5. appendix
84 | ignore


--------------------------------------------------------------------------------
/yolov3/alpha_edit.py:
--------------------------------------------------------------------------------
 1 | import onnx
 2 | import onnx.helper as helper
 3 | import torch
 4 | # import torchvision
 5 | import onnxsim  # pip install onnx-simplifier
 6 | import onnxruntime as ort
 7 | import numpy as np
 8 | import os
 9 | 
10 | 
11 | def infer_onnx(onnx_file, input_names, image_input_shape):
12 |     ort_session = ort.InferenceSession(onnx_file)
13 |     outputs = ort_session.run(
14 |         None,
15 |         # {"data": np.ones(shape=image_input_shape).astype(np.float32)},
16 |         {input_names[0]: np.ones(shape=image_input_shape).astype(np.float32)},
17 |     )
18 |     return outputs
19 | 
20 | 
21 | net_name = "yolov3-tiny"
22 | # net_name = "yolov3"
23 | # net_name = "yolov3-spp"
24 | path = "../data/yolov3/"
25 | 
26 | image_input_shape = [1, 3, 640, 640]
27 | onnx_name = net_name + ".onnx"
28 | input_names = ["images"]
29 | output_names = ["output"]
30 | 
31 | model = onnx.load_model(path + onnx_name)
32 | 
33 | outputs = infer_onnx(path + onnx_name, input_names, image_input_shape)
34 | for output in outputs:
35 |     print(output.shape)
36 | 
37 | # delete some nodes
38 | if net_name == "yolov3-tiny":
39 |     item1 = model.graph.output[1]
40 |     item2 = model.graph.output[2]
41 |     model.graph.output.remove(item1)
42 |     model.graph.output.remove(item2)
43 | elif net_name == "yolov3" or net_name == "yolov3-spp":
44 |     item1 = model.graph.output[1]
45 |     item2 = model.graph.output[2]
46 |     item3 = model.graph.output[3]
47 |     model.graph.output.remove(item1)
48 |     model.graph.output.remove(item2)
49 |     model.graph.output.remove(item3)
50 | 
51 | onnx.save(model, path + "alpha_" + onnx_name)
52 | outputs = infer_onnx(path + "alpha_" + onnx_name, input_names, image_input_shape)
53 | for output in outputs:
54 |     print(output.shape)
55 | print("")


--------------------------------------------------------------------------------
/yolov4/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10)
 2 | 
 3 | #set(CMAKE_BUILD_TYPE "Debug")
 4 | set(CMAKE_BUILD_TYPE "Release")
 5 | 
 6 | PROJECT(yolov4 VERSION 1.0.0 LANGUAGES C CXX CUDA)
 7 |  
 8 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR})                           
 9 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake)
10 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT})
11 | 
12 | message(STATUS ${ALL_LIBS})
13 | file(GLOB CPPS 
14 |   ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
15 |   ${CMAKE_CURRENT_SOURCE_DIR}/*.cu
16 |   ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu
17 |   ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp
18 |   ${TensorRT_ROOT}/samples/common/logger.cpp
19 |   ${TensorRT_ROOT}/samples/common/sampleOptions.cpp  
20 |   )
21 | list(REMOVE_ITEM CPPS app_yolov4.cpp)
22 | message(STATUS CPPS = ${CPPS})
23 | list (LENGTH CPPS length)
24 | message(STATUS ***length*** = ${length}) 
25 | find_package(OpenCV REQUIRED)
26 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR})
27 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS})
28 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS})
29 | add_library(${PROJECT_NAME} SHARED ${CPPS})
30 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES})
31 | 
32 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75)
33 | target_compile_options(${PROJECT_NAME} PUBLIC 
34 |    $<$<COMPILE_LANGUAGE:CUDA>:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>)
35 |   
36 | add_executable(app_yolov4 app_yolov4.cpp)
37 | 
38 | # NVCC
39 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a)
40 | target_link_libraries(app_yolov4 ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} )
41 | 


--------------------------------------------------------------------------------
/yolov4/README.md:
--------------------------------------------------------------------------------
 1 | ## 1. get onnx
 2 | download directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv)
 3 | or export onnx:
 4 | ```bash
 5 | git clone https://github.com/Tianxiaomo/pytorch-YOLOv4
 6 | git checkout  a65d219f9066bae4e12003bd7cdc04531860c672
 7 | git clone https://github.com/FeiYull/tensorrt-alpha
 8 | cd tensorrt-alpha/yolov4
 9 | # PLease use the "alpha_export.py" file provided by TensorRT-Alpha to export onnx
10 | cp alpha_export.py Pytorch_YOLOV4/
11 | cd Pytorch_YOLOV4/
12 | # 608
13 | python alpha_export.py cfg/yolov4.cfg yolov4.weights --batch_size=-1 --onnx_file_path=alpha_yolov4_-1_3_608_608_dynamic.onnx
14 | ```
15 | ## 2.edit and save onnx
16 | ```bash
17 | # note: If you have obtained onnx by downloading, this step can be ignored
18 | ignore
19 | ```
20 | ## 3.compile onnx
21 | ```bash
22 | # put your onnx file in this path:tensorrt-alpha/data/yolov4
23 | cd tensorrt-alpha/data/yolov4
24 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib
25 | # 608
26 | ../../../../TensorRT-8.4.2.4/bin/trtexec   --onnx=yolov4_-1_3_608_608_dynamic.onnx   --saveEngine=yolov4_-1_3_608_608_dynamic.trt  --buildOnly --minShapes=input:1x3x608x608 --optShapes=input:2x3x608x608 --maxShapes=input:4x3x608x608
27 | ```
28 | ## 4.run
29 | ```bash
30 | git clone https://github.com/FeiYull/tensorrt-alpha
31 | cd tensorrt-alpha/yolov4
32 | mkdir build
33 | cd build
34 | cmake ..
35 | make -j10
36 | # note: the dstImage will be saved in tensorrt-alpha/yolov4/build by default
37 | 
38 | ## 608
39 | # infer image
40 | ./app_yolov4  --model=../../data/yolov4/alpha_yolov4_-1_3_608_608_dynamic.trt --size=608  --batch_size=1  --img=../../data/6406402.jpg  --show --savePath=../
41 | 
42 | # infer video
43 | ./app_yolov4  --model=../../data/yolov4/alpha_yolov4_-1_3_608_608_dynamic.trt --size=608 --batch_size=2  --video=../../data/people.mp4  --show 
44 | 
45 | # infer camera
46 | ./app_yolov4  --model=../../data/yolov4/alpha_yolov4_-1_3_608_608_dynamic.trt --size=608 --batch_size=2  --cam_id=0  --show
47 | ```
48 | ## 5. appendix
49 | ignore


--------------------------------------------------------------------------------
/yolov4/alpha_export.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import torch
 3 | from tool.darknet2pytorch import Darknet
 4 | 
 5 | class AlphaYolov4(torch.nn.Module):
 6 |     def __init__(self, cfgfile, weightfile):
 7 |         super().__init__()
 8 |         self.model = Darknet(cfgfile)
 9 |         self.model.load_weights(weightfile)
10 |         self.model.eval()
11 |         self.model.print_network()
12 |         
13 |     def forward(self, x):
14 |         y = self.model(x) 
15 |         boxes = y[0]
16 |         confs = y[1].unsqueeze(dim = 2)
17 |         return torch.cat((boxes, confs), 3)
18 | 
19 | def transform_to_onnx(cfgfile, weightfile, batch_size=1, onnx_file_name=None):
20 |     model = AlphaYolov4(cfgfile, weightfile)
21 | 
22 |     dynamic = False
23 |     if batch_size <= 0:
24 |         dynamic = True
25 | 
26 |     input_names = ["input"]
27 |     output_names = ['output']
28 | 
29 |     if dynamic:
30 |         x = torch.randn((1, 3, model.model.height, model.model.width), requires_grad=True)
31 |         if not onnx_file_name:
32 |             onnx_file_name = "yolov4_-1_3_{}_{}_dynamic.onnx".format(model.model.height, model.model.width)
33 |         dynamic_axes = {"input": {0: "batch_size"}, "output": {0: "batch_size"}}
34 |         # Export the model
35 |         print('Export the onnx model ...')
36 |         torch.onnx.export(model,
37 |                           x,
38 |                           onnx_file_name,
39 |                           export_params=True,
40 |                           opset_version=11,
41 |                           do_constant_folding=True,
42 |                           input_names=input_names, output_names=output_names,
43 |                           dynamic_axes=dynamic_axes)
44 | 
45 |         print('Onnx model exporting done')
46 |         return onnx_file_name
47 | 
48 |     else:
49 |         x = torch.randn((batch_size, 3, model.model.height, model.model.width), requires_grad=True)
50 |         onnx_file_name = "yolov4_{}_3_{}_{}_static.onnx".format(batch_size, model.model.height, model.model.width)
51 |         torch.onnx.export(model,
52 |                           x,
53 |                           onnx_file_name,
54 |                           export_params=True,
55 |                           opset_version=11,
56 |                           do_constant_folding=True,
57 |                           input_names=input_names, output_names=output_names,
58 |                           dynamic_axes=None)
59 | 
60 |         print('Onnx model exporting done')
61 |         return onnx_file_name
62 | 
63 | 
64 | if __name__ == '__main__':
65 |     from argparse import ArgumentParser
66 |     parser = ArgumentParser()
67 |     parser.add_argument('config')
68 |     parser.add_argument('weightfile')
69 |     parser.add_argument('--batch_size', type=int, help="Static Batchsize of the model. use batch_size<=0 for dynamic batch size")
70 |     parser.add_argument('--onnx_file_path', help="Output onnx file path")
71 |     args = parser.parse_args()
72 |     transform_to_onnx(args.config, args.weightfile, args.batch_size, args.onnx_file_path)
73 | 
74 | 


--------------------------------------------------------------------------------
/yolov4/decode_yolov4.cu:
--------------------------------------------------------------------------------
 1 | #include "decode_yolov4.h"
 2 | 
 3 | __global__ void decode_yolov4_device_kernel(int batch_size, int  num_class, int topK, float conf_thresh,
 4 | 									float* src, int srcWidth, int srcHeight, int srcArea, 
 5 | 									float* dst, int dstWidth, int dstHeight, int dstArea)
 6 | {
 7 | 	int dx = blockDim.x * blockIdx.x + threadIdx.x;
 8 | 	int dy = blockDim.y * blockIdx.y + threadIdx.y;
 9 | 	if (dx >= srcHeight || dy >= batch_size)
10 | 	{
11 | 		return;
12 | 	}
13 | 	float* pitem = src + dy * srcArea + dx * srcWidth;
14 | 	float* class_confidence = pitem + 4;
15 | 	float confidence = *class_confidence++;
16 | 	int label = 0;
17 | 	for (int i = 1; i < num_class; ++i, ++class_confidence)
18 | 	{
19 | 		if (*class_confidence > confidence)
20 | 		{
21 | 			confidence = *class_confidence;
22 | 			label = i;
23 | 		}
24 | 	}
25 | 	if (confidence < conf_thresh)
26 | 	{
27 | 		return;
28 | 	}
29 | 	int index = atomicAdd(dst + dy * dstArea, 1);
30 | 	if (index >= topK)
31 | 	{
32 | 		return;
33 | 	}
34 | 	float cx = *pitem++;
35 | 	float cy = *pitem++;
36 | 	float width = *pitem++;
37 | 	float height = *pitem++;
38 | 
39 | 	float left = cx;
40 | 	float top = cy;
41 | 	float right = width;
42 | 	float bottom = height;
43 | 	float* pout_item = dst + dy * dstArea + 1 + index * dstWidth;
44 | 	*pout_item++ = left; 
45 | 	*pout_item++ = top;
46 | 	*pout_item++ = right;
47 | 	*pout_item++ = bottom;
48 | 	*pout_item++ = confidence;
49 | 	*pout_item++ = label;
50 | 	*pout_item++ = 1;
51 | }
52 | 
53 | static __device__ float box_iou(
54 | 	float aleft, float atop, float aright, float abottom,
55 | 	float bleft, float btop, float bright, float bbottom
56 | ) {
57 | 	float cleft = max(aleft, bleft);
58 | 	float ctop = max(atop, btop);
59 | 	float cright = min(aright, bright);
60 | 	float cbottom = min(abottom, bbottom);
61 | 
62 | 	float c_area = max(cright - cleft, 0.0f) * max(cbottom - ctop, 0.0f);
63 | 	if (c_area == 0.0f)
64 | 		return 0.0f;
65 | 
66 | 	float a_area = max(0.0f, aright - aleft) * max(0.0f, abottom - atop);
67 | 	float b_area = max(0.0f, bright - bleft) * max(0.0f, bbottom - btop);
68 | 	return c_area / (a_area + b_area - c_area);
69 | }
70 | 
71 | void yolov4::decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea, float* dst, int dstWidth, int dstHeight)
72 | {
73 | 	dim3 block_size(BLOCK_SIZE, BLOCK_SIZE);
74 | 	dim3 grid_size((srcHeight + BLOCK_SIZE - 1) / BLOCK_SIZE,
75 | 		(param.batch_size + BLOCK_SIZE - 1) / BLOCK_SIZE);
76 | 	int dstArea = 1 + dstWidth * dstHeight;
77 | 	
78 | 	decode_yolov4_device_kernel << < grid_size, block_size, 0, nullptr >> >(param.batch_size, param.num_class, param.topK, param.conf_thresh,
79 | 																	 src, srcWidth, srcHeight, srcArea, 
80 | 																	 dst, dstWidth, dstHeight, dstArea);
81 | }
82 | 
83 | 


--------------------------------------------------------------------------------
/yolov4/decode_yolov4.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include"../utils/utils.h"
3 | #include"../utils/kernel_function.h"
4 | 
5 | namespace yolov4
6 | {
7 | 	void decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcLength, float* dst, int dstWidth, int dstHeight);
8 | }
9 | 


--------------------------------------------------------------------------------
/yolov4/yolov4.cpp:
--------------------------------------------------------------------------------
 1 | #include"yolov4.h"
 2 | #include"decode_yolov4.h"
 3 | 
 4 | YOLOV4::YOLOV4(const utils::InitParameter& param) :yolo::YOLO(param)
 5 | {
 6 | }
 7 | 
 8 | YOLOV4::~YOLOV4()
 9 | {
10 | }
11 | 
12 | void YOLOV4::postprocess(const std::vector<cv::Mat>& imgsBatch)
13 | {
14 |     yolov4::decodeDevice(m_param, m_output_src_device, 4 + m_param.num_class, m_total_objects, m_output_area,
15 |         m_output_objects_device, m_output_objects_width, m_param.topK);
16 |     nmsDeviceV1(m_param, m_output_objects_device, m_output_objects_width, m_param.topK, m_param.topK * m_output_objects_width + 1);
17 |     CHECK(cudaMemcpy(m_output_objects_host, m_output_objects_device, m_param.batch_size * sizeof(float) * (1 + 7 * m_param.topK), cudaMemcpyDeviceToHost));
18 |     for (size_t bi = 0; bi < imgsBatch.size(); bi++)
19 |     {
20 |         int num_boxes = std::min((int)(m_output_objects_host + bi * (m_param.topK * m_output_objects_width + 1))[0], m_param.topK);
21 |         for (size_t i = 0; i < num_boxes; i++)
22 |         {
23 |             float* ptr = m_output_objects_host + bi * (m_param.topK * m_output_objects_width + 1) + m_output_objects_width * i + 1;
24 |             int keep_flag = ptr[6];
25 |             if (keep_flag)
26 |             {
27 |                 float x_lt = m_dst2src.v0 * ptr[0] * m_param.dst_w + m_dst2src.v1 * ptr[1] * m_param.dst_h + m_dst2src.v2;
28 |                 float y_lt = m_dst2src.v3 * ptr[0] * m_param.dst_w + m_dst2src.v4 * ptr[1] * m_param.dst_h + m_dst2src.v5;
29 |                 float x_rb = m_dst2src.v0 * ptr[2] * m_param.dst_w + m_dst2src.v1 * ptr[3] * m_param.dst_h + m_dst2src.v2;
30 |                 float y_rb = m_dst2src.v3 * ptr[2] * m_param.dst_w + m_dst2src.v4 * ptr[3] * m_param.dst_h + m_dst2src.v5;
31 | 
32 |                 m_objectss[bi].emplace_back(x_lt, y_lt, x_rb, y_rb, ptr[4], (int)ptr[5]);
33 |             }
34 |         }
35 | 
36 |     }
37 | }


--------------------------------------------------------------------------------
/yolov4/yolov4.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include"../utils/yolo.h"
 3 | #include"../utils/utils.h"
 4 | class YOLOV4 : public yolo::YOLO
 5 | {
 6 | public:
 7 | 	YOLOV4(const utils::InitParameter& param);
 8 | 	~YOLOV4();
 9 | 	virtual void postprocess(const std::vector<cv::Mat>& imgsBatch);
10 | };


--------------------------------------------------------------------------------
/yolov5/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10)
 2 | 
 3 | #set(CMAKE_BUILD_TYPE "Debug")
 4 | set(CMAKE_BUILD_TYPE "Release")
 5 | 
 6 | # cuda
 7 | PROJECT(yolov5 VERSION 1.0.0 LANGUAGES C CXX CUDA)     
 8 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR})
 9 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake)
10 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT})
11 | message(STATUS ${ALL_LIBS})
12 | file(GLOB CPPS 
13 |   ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
14 |   ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu
15 |   ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp
16 |   ${TensorRT_ROOT}/samples/common/logger.cpp 
17 |   ${TensorRT_ROOT}/samples/common/sampleOptions.cpp 
18 |   )
19 | list(REMOVE_ITEM CPPS app_yolov5.cpp)
20 | message(STATUS CPPS = ${CPPS})
21 | list (LENGTH CPPS length)
22 | message(STATUS ***length*** = ${length}) 
23 | find_package(OpenCV REQUIRED)
24 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR})
25 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS})
26 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS})
27 | add_library(${PROJECT_NAME} SHARED ${CPPS})
28 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES})
29 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75)
30 | target_compile_options(${PROJECT_NAME} PUBLIC 
31 |    $<$<COMPILE_LANGUAGE:CUDA>:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>)
32 |   
33 | add_executable(app_yolov5 app_yolov5.cpp)
34 | 
35 | # NVCC
36 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a)
37 | target_link_libraries(app_yolov5 ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} )
38 | 


--------------------------------------------------------------------------------
/yolov5/alpha_edit.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import onnx
 3 | import onnx.helper as helper
 4 | import torch
 5 | # import torchvision
 6 | import onnxsim  # pip install onnx-simplifier
 7 | import onnxruntime as ort
 8 | import numpy as np
 9 | import os
10 | 
11 | 
12 | def infer_onnx(onnx_file, input_names, image_input_shape):
13 |     ort_session = ort.InferenceSession(onnx_file)
14 |     outputs = ort_session.run(
15 |         None,
16 |         {input_names[0]: np.ones(shape=image_input_shape).astype(np.float32)},
17 |     )
18 |     return outputs
19 | 
20 | 
21 | def run(mode, net_name, model_path):
22 |     #mode = "p5"
23 |     mode = mode
24 | 
25 |     if mode == "p5":
26 |         #net_name = "yolov5m"
27 |         net_name = net_name
28 |         image_input_shape = [1, 3, 640, 640]
29 |     else: # mode == "p6":
30 |         #net_name = "yolov5m6"
31 |         net_name = net_name
32 |         image_input_shape = [1, 3, 1280, 1280]
33 | 
34 | 
35 | 
36 |     path = model_path
37 |     onnx_name = net_name + ".onnx"
38 |     input_names = ["images"]
39 |     output_names = ["output"]
40 | 
41 |     model = onnx.load_model(path + onnx_name)
42 | 
43 |     outputs = infer_onnx(path + onnx_name, input_names, image_input_shape)
44 |     for output in outputs:
45 |         print(output.shape)
46 | 
47 |     # delete some nodes
48 |     if mode == "p5":
49 |         item1 = model.graph.output[1]
50 |         item2 = model.graph.output[2]
51 |         item3 = model.graph.output[3]
52 |         model.graph.output.remove(item1)
53 |         model.graph.output.remove(item2)
54 |         model.graph.output.remove(item3)
55 |     else: # mode == "p6":
56 |         item1 = model.graph.output[1]
57 |         item2 = model.graph.output[2]
58 |         item3 = model.graph.output[3]
59 |         item4 = model.graph.output[4]
60 |         model.graph.output.remove(item1)
61 |         model.graph.output.remove(item2)
62 |         model.graph.output.remove(item3)
63 |         model.graph.output.remove(item4)
64 | 
65 |     onnx.save(model, path + "alpha_" + onnx_name)
66 |     outputs = infer_onnx(path + "alpha_" + onnx_name, input_names, image_input_shape)
67 |     for output in outputs:
68 |         print(output.shape)
69 | 
70 | def parse_opt():
71 |     parser = argparse.ArgumentParser()
72 |     parser.add_argument('--mode', type=str, default='p5', help='p5:640*640, p6:1280*1280')
73 |     parser.add_argument('--net_name', type=str, default='yolov5s', help='yolov5n yolov5s yolov5m ... yolov5s6 ...')
74 |     parser.add_argument('--model_path', type=str, default='', help='pth file path')
75 |     opt = parser.parse_args()
76 |     #print_args(vars(opt))
77 |     return opt
78 | 
79 | def main(opt):
80 |     run(**vars(opt))
81 | 
82 | if __name__ == "__main__":
83 |     opt = parse_opt()
84 |     main(opt)
85 | 
86 | 


--------------------------------------------------------------------------------
/yolov6/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10)
 2 | 
 3 | #set(CMAKE_BUILD_TYPE "Debug")
 4 | set(CMAKE_BUILD_TYPE "Release")
 5 | 
 6 | PROJECT(yolov6 VERSION 1.0.0 LANGUAGES C CXX CUDA)
 7 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR})
 8 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake)
 9 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT})
10 | message(STATUS ${ALL_LIBS})
11 | file(GLOB CPPS 
12 |   ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
13 |   ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu
14 |   ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp
15 |   ${TensorRT_ROOT}/samples/common/logger.cpp 
16 |   ${TensorRT_ROOT}/samples/common/sampleOptions.cpp 
17 |   )
18 | list(REMOVE_ITEM CPPS app_yolov6.cpp)
19 | message(STATUS CPPS = ${CPPS})
20 | list (LENGTH CPPS length)
21 | message(STATUS ***length*** = ${length}) 
22 | find_package(OpenCV REQUIRED)
23 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR})
24 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS})
25 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS})
26 | add_library(${PROJECT_NAME} SHARED ${CPPS})
27 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES})
28 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75)
29 | target_compile_options(${PROJECT_NAME} PUBLIC 
30 |    $<$<COMPILE_LANGUAGE:CUDA>:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>)
31 |   
32 | add_executable(app_yolov6 app_yolov6.cpp)
33 | 
34 | # NVCC
35 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a)
36 | target_link_libraries(app_yolov6 ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} )
37 | 


--------------------------------------------------------------------------------
/yolov7/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10)
 2 | 
 3 | #set(CMAKE_BUILD_TYPE "Debug")
 4 | set(CMAKE_BUILD_TYPE "Release")
 5 | 
 6 | PROJECT(yolov7 VERSION 1.0.0 LANGUAGES C CXX CUDA)
 7 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR})
 8 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake)
 9 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT})
10 | 
11 | message(STATUS ${ALL_LIBS})
12 | file(GLOB CPPS 
13 |   ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
14 |   ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu
15 |   ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp
16 |   ${TensorRT_ROOT}/samples/common/logger.cpp 
17 |   ${TensorRT_ROOT}/samples/common/sampleOptions.cpp 
18 |   )
19 | list(REMOVE_ITEM CPPS app_yolov7.cpp)
20 | message(STATUS CPPS = ${CPPS})
21 | list (LENGTH CPPS length)
22 | message(STATUS ***length*** = ${length}) 
23 | find_package(OpenCV REQUIRED)
24 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR})
25 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS})
26 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS})
27 | add_library(${PROJECT_NAME} SHARED ${CPPS})
28 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES})
29 | 
30 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75)
31 | target_compile_options(${PROJECT_NAME} PUBLIC 
32 |    $<$<COMPILE_LANGUAGE:CUDA>:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>)
33 |   
34 | add_executable(app_yolov7 app_yolov7.cpp)
35 | 
36 | # NVCC
37 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a)
38 | target_link_libraries(app_yolov7 ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} )
39 | 


--------------------------------------------------------------------------------
/yolov7/README.md:
--------------------------------------------------------------------------------
 1 | ## 1. get onnx 
 2 | 
 3 | download directly at [weiyun]:[weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv?usp=sharing) or export onnx:
 4 | ```bash
 5 | git clone https://github.com/WongKinYiu/yolov7
 6 | git checkout  072f76c72c641c7a1ee482e39f604f6f8ef7ee92
 7 | # 640
 8 | python export.py --weights yolov7-tiny.pt  --dynamic  --grid
 9 | python export.py --weights yolov7.pt  --dynamic  --grid
10 | python export.py --weights yolov7x.pt  --dynamic  --grid
11 | # 1280
12 | python export.py --weights yolov7-w6.pt  --dynamic  --grid --img-size 1280
13 | ```
14 | ## 2.edit and save onnx 
15 | ```bash
16 | # note: If you have obtained onnx by downloading, this step can be ignored
17 | ignore
18 | ```
19 | 
20 | ## 3.compile onnx
21 | ```bash
22 | # put your onnx file in this path:tensorrt-alpha/data/yolov7
23 | cd tensorrt-alpha/data/yolov7
24 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib
25 | # 640
26 | ../../../../TensorRT-8.4.2.4/bin/trtexec    --onnx=yolov7-tiny.onnx  --saveEngine=yolov7-tiny.trt  --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
27 | ../../../../TensorRT-8.4.2.4/bin/trtexec    --onnx=yolov7.onnx   	--saveEngine=yolov7.trt       --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
28 | ../../../../TensorRT-8.4.2.4/bin/trtexec    --onnx=yolov7x.onnx   	--saveEngine=yolov7x.trt      --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
29 | # 1280
30 | ../../../../TensorRT-8.4.2.4/bin/trtexec    --onnx=yolov7-w6.onnx    --saveEngine=yolov7-w6.trt    --buildOnly --minShapes=images:1x3x1280x1280 --optShapes=images:2x3x1280x1280 --maxShapes=images:4x3x1280x1280
31 | 
32 | # note:if report an error（Error Code 1: Cuda Runtime (an illegal memory access was encountered "bool context = m_context->executeV2((void**)bindings)" returns false） 
33 | when running the model(yolov7-w6), just lower the batch_size.
34 | ```
35 | ## 4.run
36 | ```bash
37 | git clone https://github.com/FeiYull/tensorrt-alpha
38 | cd tensorrt-alpha/yolov7
39 | mkdir build
40 | cd build
41 | cmake ..
42 | make -j10
43 | # note: the dstImage will be saved in tensorrt-alpha/yolov7/build by default
44 | 
45 | ## 640
46 | # infer image
47 | ./app_yolov7  --model=../../data/yolov7/yolov7-tiny.trt --size=640  --batch_size=1  --img=../../data/6406401.jpg  --show --savePath
48 | ./app_yolov7  --model=../../data/yolov7/yolov7-w6.trt   --size=1280 --batch_size=1  --img=../../data/6406401.jpg  --show --savePath=../
49 | 
50 | # infer video
51 | ./app_yolov7  --model=../../data/yolov7/yolov7-tiny.trt     --size=640 --batch_size=2  --video=../../data/people.mp4  --show 
52 | 
53 | # infer camera
54 | ./app_yolov7  --model=../../data/yolov7/yolov7-tiny.trt     --size=640 --batch_size=2  --cam_id=0  --show
55 | ```
56 | ## 5. appendix
57 | ignore


--------------------------------------------------------------------------------
/yolov8-pose/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10)
 2 | 
 3 | #set(CMAKE_BUILD_TYPE "Debug")
 4 | set(CMAKE_BUILD_TYPE "Release")
 5 | 
 6 | PROJECT(yolov8_pose VERSION 1.0.0 LANGUAGES C CXX CUDA)
 7 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR})
 8 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake)
 9 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT})
10 | message(STATUS ${ALL_LIBS})
11 | file(GLOB CPPS 
12 |   ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
13 |   ${CMAKE_CURRENT_SOURCE_DIR}/*.cu
14 |   ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu
15 |   ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp
16 |   ${TensorRT_ROOT}/samples/common/logger.cpp 
17 |   ${TensorRT_ROOT}/samples/common/sampleOptions.cpp
18 |   #${TensorRT_ROOT}/samples/common/sampleUtils.cpp
19 |   )
20 | list(REMOVE_ITEM CPPS app_yolov8_pose.cpp)
21 | message(STATUS CPPS = ${CPPS})
22 | list (LENGTH CPPS length)
23 | message(STATUS ***length*** = ${length}) 
24 | find_package(OpenCV REQUIRED)
25 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR})
26 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS})
27 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS})
28 | add_library(${PROJECT_NAME} SHARED ${CPPS})
29 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES})
30 | 
31 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75)
32 | target_compile_options(${PROJECT_NAME} PUBLIC 
33 |    $<$<COMPILE_LANGUAGE:CUDA>:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>)
34 |   
35 | add_executable(app_yolov8_pose app_yolov8_pose.cpp)
36 | 
37 | # NVCC
38 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a)
39 | target_link_libraries(app_yolov8_pose ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} )
40 | 


--------------------------------------------------------------------------------
/yolov8-pose/README.md:
--------------------------------------------------------------------------------
 1 | ## 1. get onnx 
 2 | download directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv) or export onnx:
 3 | ```bash
 4 | # 🔥 yolov8 offical repo: https://github.com/ultralytics/ultralytics
 5 | # 🔥 yolov8 quickstart: https://docs.ultralytics.com/quickstart/
 6 | # 🚀TensorRT-Alpha will be updated synchronously as soon as possible!
 7 | 
 8 | # install yolov8
 9 | conda create -n yolov8 python==3.8 -y # for Linux
10 | # conda create -n yolov8 python=3.9 -y # for Windows10
11 | conda activate yolov8
12 | pip install ultralytics==8.0.200
13 | pip install onnx==1.12.0
14 | 
15 | # download offical weights(".pt" file)
16 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt
17 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt
18 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt
19 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt
20 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt
21 | ```
22 | 
23 | export onnx:
24 | ```bash
25 | yolo mode=export model=yolov8n-pose.pt format=onnx dynamic=True opset=12
26 | yolo mode=export model=yolov8s-pose.pt format=onnx dynamic=True opset=12    
27 | yolo mode=export model=yolov8m-pose.pt format=onnx dynamic=True opset=12    
28 | yolo mode=export model=yolov8l-pose.pt format=onnx dynamic=True opset=12    
29 | yolo mode=export model=yolov8x-pose.pt format=onnx dynamic=True opset=12
30 | ```
31 | 
32 | ## 2.edit and save onnx
33 | ```bash
34 | # note: If you have obtained onnx by downloading, this step can be ignored
35 | ignore
36 | ```
37 | 
38 | ## 3.compile onnx
39 | ```bash
40 | # put your onnx file in this path:tensorrt-alpha/data/yolov8-pose
41 | cd tensorrt-alpha/data/yolov8-pose
42 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib
43 | ../../../../TensorRT-8.4.2.4/bin/trtexec   --onnx=yolov8n-pose.onnx  --saveEngine=yolov8n-pose.trt  --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
44 | ../../../../TensorRT-8.4.2.4/bin/trtexec   --onnx=yolov8s-pose.onnx  --saveEngine=yolov8s-pose.trt  --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
45 | ../../../../TensorRT-8.4.2.4/bin/trtexec   --onnx=yolov8m-pose.onnx  --saveEngine=yolov8m-pose.trt  --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
46 | ../../../../TensorRT-8.4.2.4/bin/trtexec   --onnx=yolov8l-pose.onnx  --saveEngine=yolov8l-pose.trt  --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
47 | ../../../../TensorRT-8.4.2.4/bin/trtexec   --onnx=yolov8x-pose.onnx  --saveEngine=yolov8x-pose.trt  --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
48 | ```
49 | ## 4.run
50 | ```bash
51 | git clone https://github.com/FeiYull/tensorrt-alpha
52 | cd tensorrt-alpha/yolov8-pose
53 | mkdir build
54 | cd build
55 | cmake ..
56 | make -j10
57 | # note: the dstImage will be saved in tensorrt-alpha/yolov8-pose/build by default
58 | 
59 | ## 640
60 | # infer image
61 | ./app_yolov8_pose  --model=../../data/yolov8/yolov8n-pose.trt --size=640 --batch_size=1  --img=../../data/6406407.jpg   --show --savePath=../
62 | 
63 | # infer video
64 | ./app_yolov8_pose  --model=../../data/yolov8/yolov8n-pose.trt     --size=640 --batch_size=2  --video=../../data/people.mp4  --show 
65 | 
66 | # infer camera
67 | ./app_yolov8_pose  --model=../../data/yolov8/yolov8n-pose.trt     --size=640 --batch_size=2  --cam_id=0  --show
68 | 
69 | ```
70 | ## 5. appendix
71 | ignore


--------------------------------------------------------------------------------
/yolov8-pose/app_yolov8_pose.cpp:
--------------------------------------------------------------------------------
  1 | #include"../utils/yolo.h"
  2 | #include"yolov8_pose.h"
  3 | 
  4 | void setParameters(utils::InitParameter& initParameters)
  5 | {
  6 | 	initParameters.class_names = utils::dataSets::coco80;
  7 | 	//initParameters.class_names = utils::dataSets::voc20;
  8 | 	initParameters.num_class = 80; // for coco
  9 | 	//initParameters.num_class = 20; // for voc2012
 10 | 	initParameters.batch_size = 8;
 11 | 	initParameters.dst_h = 640;
 12 | 	initParameters.dst_w = 640;
 13 | 	initParameters.input_output_names = { "images",  "output0" };
 14 | 	initParameters.conf_thresh = 0.25f;
 15 | 	initParameters.iou_thresh = 0.7f;
 16 | 	initParameters.save_path = "D:/Data/1/";
 17 | }
 18 | 
 19 | void task(YOLOv8Pose& yolo, const utils::InitParameter& param, std::vector<cv::Mat>& imgsBatch, const int& delayTime, const int& batchi)
 20 | {
 21 | 	yolo.copy(imgsBatch);
 22 | 	utils::DeviceTimer d_t1; yolo.preprocess(imgsBatch);  float t1 = d_t1.getUsedTime();
 23 | 	utils::DeviceTimer d_t2; yolo.infer();				  float t2 = d_t2.getUsedTime();
 24 | 	utils::DeviceTimer d_t3; yolo.postprocess(imgsBatch); float t3 = d_t3.getUsedTime();
 25 | 	float avg_times[3] = { t1 / param.batch_size, t2 / param.batch_size, t3 / param.batch_size };
 26 | 	sample::gLogInfo << "preprocess time = " << avg_times[0] << "; "
 27 | 		"infer time = " << avg_times[1] << "; "
 28 | 		"postprocess time = " << avg_times[2] << std::endl;
 29 | 	yolo.showAndSave(param.class_names, delayTime, imgsBatch, avg_times);
 30 | 	yolo.reset();
 31 | }
 32 | 
 33 | int main(int argc, char** argv)
 34 | {
 35 | 	cv::CommandLineParser parser(argc, argv,
 36 | 		{
 37 | 			"{model 	|| tensorrt model file	   }"
 38 | 			"{size      || image (h, w), eg: 640   }"
 39 | 			"{batch_size|| batch size              }"
 40 | 			"{video     || video's path			   }"
 41 | 			"{img       || image's path			   }"
 42 | 			"{cam_id    || camera's device id	   }"
 43 | 			"{show      || if show the result	   }"
 44 | 			"{savePath  || save path, can be ignore}"
 45 | 		});
 46 | 	utils::InitParameter param;
 47 | 	setParameters(param);
 48 | 	std::string model_path = "../../data/yolov8/yolov8n-pose.trt";
 49 | 	std::string video_path = "../../data/people.mp4";
 50 | 	std::string image_path = "../../data/bus.jpg";
 51 | 	int camera_id = 0;
 52 | 	utils::InputStream source;
 53 | 	source = utils::InputStream::IMAGE;
 54 | 	//source = utils::InputStream::VIDEO;
 55 | 	//source = utils::InputStream::CAMERA;
 56 | 	int size = -1; // w or h
 57 | 	int batch_size = 8;
 58 | 	bool is_show = false;
 59 | 	bool is_save = false;
 60 | 	if (parser.has("model"))
 61 | 	{
 62 | 		model_path = parser.get<std::string>("model");
 63 | 		sample::gLogInfo << "model_path = " << model_path << std::endl;
 64 | 	}
 65 | 	if (parser.has("size"))
 66 | 	{
 67 | 		size = parser.get<int>("size");
 68 | 		sample::gLogInfo << "size = " << size << std::endl;
 69 | 		param.dst_h = param.dst_w = size;
 70 | 	}
 71 | 	if (parser.has("batch_size"))
 72 | 	{
 73 | 		batch_size = parser.get<int>("batch_size");
 74 | 		sample::gLogInfo << "batch_size = " << batch_size << std::endl;
 75 | 		param.batch_size = batch_size;
 76 | 	}
 77 | 	if (parser.has("video"))
 78 | 	{
 79 | 		source = utils::InputStream::VIDEO;
 80 | 		video_path = parser.get<std::string>("video");
 81 | 		sample::gLogInfo << "video_path = " << video_path << std::endl;
 82 | 	}
 83 | 	if (parser.has("img"))
 84 | 	{
 85 | 		source = utils::InputStream::IMAGE;
 86 | 		image_path = parser.get<std::string>("img");
 87 | 		sample::gLogInfo << "image_path = " << image_path << std::endl;
 88 | 	}
 89 | 	if (parser.has("cam_id"))
 90 | 	{
 91 | 		source = utils::InputStream::CAMERA;
 92 | 		camera_id = parser.get<int>("cam_id");
 93 | 		sample::gLogInfo << "camera_id = " << camera_id << std::endl;
 94 | 	}
 95 | 	if (parser.has("show"))
 96 | 	{
 97 | 		param.is_show = true;
 98 | 		sample::gLogInfo << "is_show = " << is_show << std::endl;
 99 | 	}
100 | 	if (parser.has("savePath"))
101 | 	{
102 | 		param.is_save = true;
103 | 		param.save_path = parser.get<std::string>("savePath");
104 | 		sample::gLogInfo << "save_path = " << param.save_path << std::endl;
105 | 	}
106 | 	int total_batches = 0;
107 | 	int delay_time = 1;
108 | 	cv::VideoCapture capture;
109 | 	if (!setInputStream(source, image_path, video_path, camera_id,
110 | 		capture, total_batches, delay_time, param))
111 | 	{
112 | 		sample::gLogError << "read the input data errors!" << std::endl;
113 | 		return -1;
114 | 	}
115 | 	setRenderWindow(param);
116 | 	YOLOv8Pose yolo(param);
117 | 	std::vector<unsigned char> trt_file = utils::loadModel(model_path);
118 | 	if (trt_file.empty())
119 | 	{
120 | 		sample::gLogError << "trt_file is empty!" << std::endl;
121 | 		return -1;
122 | 	}
123 | 	if (!yolo.init(trt_file))
124 | 	{
125 | 		sample::gLogError << "initEngine() ocur errors!" << std::endl;
126 | 		return -1;
127 | 	}
128 | 	yolo.check();
129 | 	cv::Mat frame;
130 | 	std::vector<cv::Mat> imgs_batch;
131 | 	imgs_batch.reserve(param.batch_size);
132 | 	sample::gLogInfo << imgs_batch.capacity() << std::endl;
133 | 	int batchi = 0;
134 | 	while (capture.isOpened())
135 | 	{
136 | 		if (batchi >= total_batches && source != utils::InputStream::CAMERA)
137 | 		{
138 | 			break;
139 | 		}
140 | 		if (imgs_batch.size() < param.batch_size)
141 | 		{
142 | 			if (source != utils::InputStream::IMAGE)
143 | 			{
144 | 				capture.read(frame);
145 | 			}
146 | 			else
147 | 			{
148 | 				frame = cv::imread(image_path);
149 | 			}
150 | 			if (frame.empty())
151 | 			{
152 | 				sample::gLogWarning << "no more video or camera frame" << std::endl;
153 | 				task(yolo, param, imgs_batch, delay_time, batchi);
154 | 				imgs_batch.clear(); 
155 | 				batchi++;
156 | 				break;
157 | 			}
158 | 			else
159 | 			{
160 | 				imgs_batch.emplace_back(frame.clone());
161 | 			}
162 | 		}
163 | 		else
164 | 		{
165 | 			task(yolo, param, imgs_batch, delay_time, batchi);
166 | 			imgs_batch.clear();
167 | 			batchi++;
168 | 		}
169 | 	}
170 | 	return  -1;
171 | }


--------------------------------------------------------------------------------
/yolov8-pose/decode_yolov8_pose.cu:
--------------------------------------------------------------------------------
 1 | #include "decode_yolov8_pose.h"
 2 | 
 3 | __global__ void decode_yolov8_pose_device_kernel(int batch_size, int  num_class, int topK, float conf_thresh,
 4 | 	float* src, int srcWidth, int srcHeight, int srcArea,
 5 | 	float* dst, int dstWidth, int dstArea)
 6 | {
 7 | 	int dx = blockDim.x * blockIdx.x + threadIdx.x;
 8 | 	int dy = blockDim.y * blockIdx.y + threadIdx.y;
 9 | 	if (dx >= srcHeight || dy >= batch_size)
10 | 	{
11 | 		return;
12 | 	}
13 | 	float* pitem = src + dy * srcArea + dx * srcWidth;
14 | 	float confidence = pitem[4];
15 | 	if (confidence < conf_thresh)
16 | 	{
17 | 		return;
18 | 	}
19 | 	int index = atomicAdd(dst + dy * dstArea, 1);
20 | 
21 | 	if (index >= topK)
22 | 	{
23 | 		return;
24 | 	}
25 | 	float cx = *pitem++;
26 | 	float cy = *pitem++;
27 | 	float width = *pitem++;
28 | 	float height = *pitem++;
29 | 
30 | 	float left = cx - width * 0.5f;
31 | 	float top = cy - height * 0.5f;
32 | 	float right = cx + width * 0.5f;
33 | 	float bottom = cy + height * 0.5f;
34 | 	float* pout_item = dst + dy * dstArea + 1 + index * dstWidth;
35 | 	*pout_item++ = left;
36 | 	*pout_item++ = top;
37 | 	*pout_item++ = right;
38 | 	*pout_item++ = bottom;
39 | 	*pout_item++ = confidence;
40 | 	*pout_item++ = 0;
41 | 	*pout_item++ = 1;
42 | 	memcpy(pout_item, pitem + 1, (dstWidth - 7) * sizeof(float));
43 | }
44 | 
45 | void yolov8pose::decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea, float* dst, int dstWidth, int dstHeight)
46 | {
47 | 	dim3 block_size(BLOCK_SIZE, BLOCK_SIZE);
48 | 	dim3 grid_size((srcHeight + BLOCK_SIZE - 1) / BLOCK_SIZE,
49 | 		(param.batch_size + BLOCK_SIZE - 1) / BLOCK_SIZE);
50 | 	int dstArea = 1 + dstWidth * dstHeight;
51 | 
52 | 	decode_yolov8_pose_device_kernel << < grid_size, block_size, 0, nullptr >> > (param.batch_size, param.num_class, param.topK, param.conf_thresh,
53 | 		src, srcWidth, srcHeight, srcArea,
54 | 		dst, dstWidth, dstArea);
55 | }
56 | 
57 | __global__ void transpose_device_kernel(int batch_size,
58 | 	float* src, int srcWidth, int srcHeight, int srcArea,
59 | 	float* dst, int dstWidth, int dstHeight, int dstArea)
60 | {
61 | 	int dx = blockDim.x * blockIdx.x + threadIdx.x;
62 | 	int dy = blockDim.y * blockIdx.y + threadIdx.y;
63 | 	if (dx >= dstHeight || dy >= batch_size)
64 | 	{
65 | 		return;
66 | 	}
67 | 	float* p_dst_row = dst + dy * dstArea + dx * dstWidth;
68 | 	float* p_src_col = src + dy * srcArea + dx;
69 | 
70 | 	for (int i = 0; i < dstWidth; i++)
71 | 	{
72 | 		p_dst_row[i] = p_src_col[i * srcWidth];
73 | 	}
74 | }
75 | 
76 | void yolov8pose::transposeDevice(utils::InitParameter param,
77 | float* src, int srcWidth, int srcHeight, int srcArea, 
78 | float* dst, int dstWidth, int dstHeight)
79 | {
80 | 	dim3 block_size(BLOCK_SIZE, BLOCK_SIZE);
81 | 	dim3 grid_size((dstHeight + BLOCK_SIZE - 1) / BLOCK_SIZE,
82 | 		(param.batch_size + BLOCK_SIZE - 1) / BLOCK_SIZE);
83 | 	int dstArea = dstWidth * dstHeight;
84 | 
85 | 	transpose_device_kernel << < grid_size, block_size, 0, nullptr >> > (param.batch_size,
86 | 		src, srcWidth, srcHeight, srcArea,
87 | 		dst, dstWidth, dstHeight, dstArea);
88 | }
89 | 
90 | 


--------------------------------------------------------------------------------
/yolov8-pose/decode_yolov8_pose.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include"../utils/utils.h"
 3 | #include"../utils/kernel_function.h"
 4 | 
 5 | namespace yolov8pose
 6 | {
 7 | 	void decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcLength, float* dst, int dstWidth, int dstHeight);
 8 | 	void transposeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea, float* dst, int dstWidth, int dstHeight);
 9 | }
10 | 


--------------------------------------------------------------------------------
/yolov8-pose/yolov8_pose.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include"../utils/yolo.h"
 3 | #include"../utils/utils.h"
 4 | class YOLOv8Pose : public yolo::YOLO
 5 | {
 6 | public:
 7 | 	YOLOv8Pose(const utils::InitParameter& param);
 8 | 	~YOLOv8Pose();
 9 | 	virtual bool init(const std::vector<unsigned char>& trtFile);
10 | 	virtual void preprocess(const std::vector<cv::Mat>& imgsBatch);
11 | 	virtual void postprocess(const std::vector<cv::Mat>& imgsBatch);
12 | 	virtual void reset();
13 | 
14 | public:
15 | 	void showAndSave(const std::vector<std::string>& classNames,
16 | 		const int& cvDelayTime, std::vector<cv::Mat>& imgsBatch, float* avg_times);
17 | 
18 | private:
19 | 	float* m_output_src_transpose_device;
20 | 	float* m_output_objects_device;
21 | 	float* m_output_objects_host;
22 | 	int m_output_objects_width;
23 | 
24 | 	const size_t m_nkpts;
25 | 	std::vector<cv::Point2i> m_skeleton;
26 | 	std::vector<cv::Scalar> m_kpt_color;
27 | 	std::vector<cv::Scalar> m_limb_color;
28 | };


--------------------------------------------------------------------------------
/yolov8-seg/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10)
 2 | 
 3 | #set(CMAKE_BUILD_TYPE "Debug")
 4 | set(CMAKE_BUILD_TYPE "Release")
 5 | 
 6 | PROJECT(yolov8_seg VERSION 1.0.0 LANGUAGES C CXX CUDA)
 7 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR})
 8 | set( CMAKE_CXX_FLAGS "-O3" )
 9 | include_directories( "/usr/include/eigen3" )                       
10 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake)
11 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT})
12 | message(STATUS ${ALL_LIBS})
13 | file(GLOB CPPS 
14 |   ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
15 |   ${CMAKE_CURRENT_SOURCE_DIR}/*.cu
16 |   ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu
17 |   ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp
18 |   ${TensorRT_ROOT}/samples/common/logger.cpp 
19 |   ${TensorRT_ROOT}/samples/common/sampleOptions.cpp
20 |   #${TensorRT_ROOT}/samples/common/sampleUtils.cpp
21 |   )
22 | list(REMOVE_ITEM CPPS app_yolov8_seg.cpp)
23 | message(STATUS CPPS = ${CPPS})
24 | list (LENGTH CPPS length)
25 | message(STATUS ***length*** = ${length}) 
26 | find_package(OpenCV REQUIRED)
27 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR})
28 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS})
29 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS})
30 | add_library(${PROJECT_NAME} SHARED ${CPPS})
31 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES})
32 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75)
33 | target_compile_options(${PROJECT_NAME} PUBLIC 
34 |    $<$<COMPILE_LANGUAGE:CUDA>:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>)
35 | 
36 | add_executable(app_yolov8_seg app_yolov8_seg.cpp)
37 | 
38 | # NVCC
39 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a)
40 | target_link_libraries(app_yolov8_seg ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} )
41 | 


--------------------------------------------------------------------------------
/yolov8-seg/README.md:
--------------------------------------------------------------------------------
 1 | ## 0. install eigen
 2 | eigen3.4.0 has been tested and passed!
 3 | ```bash
 4 | # for linux
 5 | sudo apt-get install libeigen3-dev
 6 | 
 7 | # for windows
 8 | # download from https://eigen.tuxfamily.org/index.php?title=Main_Page
 9 | # decompressing the package
10 | # Just manually add the include directory in the vs project
11 | ```
12 | 
13 | ## 1. get onnx 
14 | download directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv) or export onnx:
15 | ```bash
16 | # 🔥 yolov8 offical repo: https://github.com/ultralytics/ultralytics
17 | # 🔥 yolov8 quickstart: https://docs.ultralytics.com/quickstart/
18 | # 🚀TensorRT-Alpha will be updated synchronously as soon as possible!
19 | 
20 | # install yolov8
21 | conda create -n yolov8 python==3.8 -y # for Linux
22 | # conda create -n yolov8 python=3.9 -y # for Windows10
23 | conda activate yolov8
24 | pip install ultralytics==8.0.200
25 | pip install onnx==1.12.0
26 | 
27 | # download offical weights(".pt" file)
28 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt
29 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt
30 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-seg.pt
31 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-seg.pt
32 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-seg.pt
33 | ```
34 | 
35 | export onnx:
36 | ```bash
37 | yolo mode=export model=yolov8n-seg.pt format=onnx dynamic=True opset=12
38 | yolo mode=export model=yolov8s-seg.pt format=onnx dynamic=True opset=12    
39 | yolo mode=export model=yolov8m-seg.pt format=onnx dynamic=True opset=12    
40 | yolo mode=export model=yolov8l-seg.pt format=onnx dynamic=True opset=12    
41 | yolo mode=export model=yolov8x-seg.pt format=onnx dynamic=True opset=12
42 | ```
43 | 
44 | ## 2.edit and save onnx
45 | ```bash
46 | # note: If you have obtained onnx by downloading, this step can be ignored
47 | ignore
48 | ```
49 | 
50 | ## 3.compile onnx
51 | ```bash
52 | # put your onnx file in this path:tensorrt-alpha/data/yolov8-seg
53 | cd tensorrt-alpha/data/yolov8-seg
54 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib
55 | ../../../../TensorRT-8.4.2.4/bin/trtexec   --onnx=yolov8n-seg.onnx  --saveEngine=yolov8n-seg.trt  --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
56 | ../../../../TensorRT-8.4.2.4/bin/trtexec   --onnx=yolov8s-seg.onnx  --saveEngine=yolov8s-seg.trt  --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
57 | ../../../../TensorRT-8.4.2.4/bin/trtexec   --onnx=yolov8m-seg.onnx  --saveEngine=yolov8m-seg.trt  --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
58 | ../../../../TensorRT-8.4.2.4/bin/trtexec   --onnx=yolov8l-seg.onnx  --saveEngine=yolov8l-seg.trt  --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
59 | ../../../../TensorRT-8.4.2.4/bin/trtexec   --onnx=yolov8x-seg.onnx  --saveEngine=yolov8x-seg.trt  --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
60 | ```
61 | ## 4.run
62 | ```bash
63 | git clone https://github.com/FeiYull/tensorrt-alpha
64 | cd tensorrt-alpha/yolov8-seg
65 | mkdir build
66 | cd build
67 | cmake ..
68 | make -j10
69 | # note: the dstImage will be saved in tensorrt-alpha/yolov8-seg/build by default
70 | 
71 | ## 640
72 | # infer image
73 | ./app_yolov8_seg  --model=../../data/yolov8/yolov8n-seg.trt --size=640 --batch_size=1  --img=../../data/6406407.jpg   --show --savePath=../
74 | 
75 | # infer video
76 | ./app_yolov8_seg  --model=../../data/yolov8/yolov8n-seg.trt     --size=640 --batch_size=1  --video=../../data/people.mp4  --show 
77 | 
78 | # infer camera
79 | ./app_yolov8_seg  --model=../../data/yolov8/yolov8n-seg.trt     --size=640 --batch_size=1  --cam_id=0  --show
80 | 
81 | ```
82 | ## 5. appendix
83 | ignore


--------------------------------------------------------------------------------
/yolov8-seg/app_yolov8_seg.cpp:
--------------------------------------------------------------------------------
  1 | #include"../utils/yolo.h"
  2 | #include"yolov8_seg.h"
  3 | 
  4 | void setParameters(utils::InitParameter& initParameters)
  5 | {
  6 | 	initParameters.class_names = utils::dataSets::coco80;
  7 | 	//initParameters.class_names = utils::dataSets::voc20;
  8 | 	initParameters.num_class = 80; // for coco
  9 | 	//initParameters.num_class = 20; // for voc2012
 10 | 	initParameters.batch_size = 8;
 11 | 	initParameters.dst_h = 640;
 12 | 	initParameters.dst_w = 640;
 13 | 	initParameters.input_output_names = { "images",  "output0" };
 14 | 	initParameters.conf_thresh = 0.25f;
 15 | 	initParameters.iou_thresh = 0.7f;
 16 | 	initParameters.save_path = "D:/Data/1/";
 17 | }
 18 | 
 19 | void task(YOLOv8Seg& yolo, const utils::InitParameter& param, std::vector<cv::Mat>& imgsBatch, const int& delayTime, const int& batchi)
 20 | {
 21 | 	yolo.copy(imgsBatch);
 22 | 	utils::DeviceTimer d_t1; yolo.preprocess(imgsBatch);  float t1 = d_t1.getUsedTime();
 23 | 	utils::DeviceTimer d_t2; yolo.infer();				  float t2 = d_t2.getUsedTime();
 24 | 	utils::DeviceTimer d_t3; yolo.postprocess(imgsBatch); float t3 = d_t3.getUsedTime();
 25 | 	float avg_times[3] = { t1 / param.batch_size, t2 / param.batch_size, t3 / param.batch_size };
 26 | 	sample::gLogInfo << "preprocess time = " << avg_times[0] << "; "
 27 | 		"infer time = " << avg_times[1] << "; "
 28 | 		"postprocess time = " << avg_times[2] << std::endl;
 29 | 	yolo.showAndSave(param.class_names, delayTime, imgsBatch);
 30 | 	yolo.reset();
 31 | }
 32 | 
 33 | int main(int argc, char** argv)
 34 | {
 35 | 	cv::CommandLineParser parser(argc, argv,
 36 | 		{
 37 | 			"{model 	|| tensorrt model file	   }"
 38 | 			"{size      || image (h, w), eg: 640   }"
 39 | 			"{batch_size|| batch size              }"
 40 | 			"{video     || video's path			   }"
 41 | 			"{img       || image's path			   }"
 42 | 			"{cam_id    || camera's device id	   }"
 43 | 			"{show      || if show the result	   }"
 44 | 			"{savePath  || save path, can be ignore}"
 45 | 		});
 46 | 	utils::InitParameter param;
 47 | 	setParameters(param);
 48 | 	std::string model_path = "../../data/yolov8/yolov8n-seg.trt";
 49 | 	std::string video_path = "../../data/people.mp4";
 50 | 	std::string image_path = "../../data/bus.jpg";
 51 | 	int camera_id = 0;
 52 | 	utils::InputStream source;
 53 | 	source = utils::InputStream::IMAGE;
 54 | 	//source = utils::InputStream::VIDEO;
 55 | 	//source = utils::InputStream::CAMERA;
 56 | 	// update params from command line parser
 57 | 	int size = -1; 
 58 | 	int batch_size = 8;
 59 | 	bool is_show = false;
 60 | 	bool is_save = false;
 61 | 	if (parser.has("model"))
 62 | 	{
 63 | 		model_path = parser.get<std::string>("model");
 64 | 		sample::gLogInfo << "model_path = " << model_path << std::endl;
 65 | 	}
 66 | 	if (parser.has("size"))
 67 | 	{
 68 | 		size = parser.get<int>("size");
 69 | 		sample::gLogInfo << "size = " << size << std::endl;
 70 | 		param.dst_h = param.dst_w = size;
 71 | 	}
 72 | 	if (parser.has("batch_size"))
 73 | 	{
 74 | 		batch_size = parser.get<int>("batch_size");
 75 | 		sample::gLogInfo << "batch_size = " << batch_size << std::endl;
 76 | 		param.batch_size = batch_size;
 77 | 	}
 78 | 	if (parser.has("video"))
 79 | 	{
 80 | 		source = utils::InputStream::VIDEO;
 81 | 		video_path = parser.get<std::string>("video");
 82 | 		sample::gLogInfo << "video_path = " << video_path << std::endl;
 83 | 	}
 84 | 	if (parser.has("img"))
 85 | 	{
 86 | 		source = utils::InputStream::IMAGE;
 87 | 		image_path = parser.get<std::string>("img");
 88 | 		sample::gLogInfo << "image_path = " << image_path << std::endl;
 89 | 	}
 90 | 	if (parser.has("cam_id"))
 91 | 	{
 92 | 		source = utils::InputStream::CAMERA;
 93 | 		camera_id = parser.get<int>("cam_id");
 94 | 		sample::gLogInfo << "camera_id = " << camera_id << std::endl;
 95 | 	}
 96 | 
 97 | 	if (parser.has("show"))
 98 | 	{
 99 | 		param.is_show = true;
100 | 		sample::gLogInfo << "is_show = " << is_show << std::endl;
101 | 	}
102 | 	if (parser.has("savePath"))
103 | 	{
104 | 		param.is_save = true;
105 | 		param.save_path = parser.get<std::string>("savePath");
106 | 		sample::gLogInfo << "save_path = " << param.save_path << std::endl;
107 | 	}
108 | 	int total_batches = 0;
109 | 	int delay_time = 1;
110 | 	cv::VideoCapture capture;
111 | 	if (!setInputStream(source, image_path, video_path, camera_id,
112 | 		capture, total_batches, delay_time, param))
113 | 	{
114 | 		sample::gLogError << "read the input data errors!" << std::endl;
115 | 		return -1;
116 | 	}
117 | 	setRenderWindow(param);
118 | 	YOLOv8Seg yolo(param);
119 | 	std::vector<unsigned char> trt_file = utils::loadModel(model_path);
120 | 	if (trt_file.empty())
121 | 	{
122 | 		sample::gLogError << "trt_file is empty!" << std::endl;
123 | 		return -1;
124 | 	}
125 | 	if (!yolo.init(trt_file))
126 | 	{
127 | 		sample::gLogError << "initEngine() ocur errors!" << std::endl;
128 | 		return -1;
129 | 	}
130 | 	yolo.check();
131 | 	cv::Mat frame;
132 | 	std::vector<cv::Mat> imgs_batch;
133 | 	imgs_batch.reserve(param.batch_size);
134 | 	sample::gLogInfo << imgs_batch.capacity() << std::endl;
135 | 	int batchi = 0;
136 | 	while (capture.isOpened())
137 | 	{
138 | 		if (batchi >= total_batches && source != utils::InputStream::CAMERA)
139 | 		{
140 | 			break;
141 | 		}
142 | 		if (imgs_batch.size() < param.batch_size)
143 | 		{
144 | 			if (source != utils::InputStream::IMAGE)
145 | 			{
146 | 				capture.read(frame);
147 | 			}
148 | 			else
149 | 			{
150 | 				frame = cv::imread(image_path);
151 | 			}
152 | 			if (frame.empty())
153 | 			{
154 | 				sample::gLogWarning << "no more video or camera frame" << std::endl;
155 | 				task(yolo, param, imgs_batch, delay_time, batchi);
156 | 				imgs_batch.clear(); 
157 | 				batchi++;
158 | 				break;
159 | 			}
160 | 			else
161 | 			{
162 | 				imgs_batch.emplace_back(frame.clone());
163 | 			}
164 | 		}
165 | 		else
166 | 		{
167 | 			task(yolo, param, imgs_batch, delay_time, batchi);
168 | 			imgs_batch.clear();
169 | 			batchi++;
170 | 		}
171 | 	}
172 | 	return  -1;
173 | }


--------------------------------------------------------------------------------
/yolov8-seg/decode_yolov8_seg.cu:
--------------------------------------------------------------------------------
 1 | #include "decode_yolov8_seg.h"
 2 | 
 3 | __global__ void decode_yolov8_seg_device_kernel(int batch_size, int  num_class, int topK, float conf_thresh,
 4 | 	float* src, int srcWidth, int srcHeight, int srcArea,
 5 | 	float* dst, int dstWidth, int dstArea)
 6 | {
 7 | 	int dx = blockDim.x * blockIdx.x + threadIdx.x; 
 8 | 	int dy = blockDim.y * blockIdx.y + threadIdx.y; 
 9 | 	if (dx >= srcHeight || dy >= batch_size)
10 | 	{
11 | 		return;
12 | 	}
13 | 	float* pitem = src + dy * srcArea + dx * srcWidth;
14 | 	float* class_confidence = pitem + 4;    
15 | 	float confidence = *class_confidence++; 
16 | 	int label = 0;
17 | 	for (int i = 1; i < num_class; ++i, ++class_confidence)
18 | 	{
19 | 		if (*class_confidence > confidence)
20 | 		{
21 | 			confidence = *class_confidence;
22 | 			label = i;
23 | 		}
24 | 	}
25 | 	if (confidence < conf_thresh)
26 | 	{
27 | 		return;
28 | 	}
29 | 	int index = atomicAdd(dst + dy * dstArea, 1);
30 | 
31 | 	if (index >= topK)
32 | 	{
33 | 		return;
34 | 	}
35 | 	float cx = *pitem++;
36 | 	float cy = *pitem++;
37 | 	float width = *pitem++;
38 | 	float height = *pitem++;
39 | 
40 | 	float left = cx - width * 0.5f;
41 | 	float top = cy - height * 0.5f;
42 | 	float right = cx + width * 0.5f;
43 | 	float bottom = cy + height * 0.5f;
44 | 	float* pout_item = dst + dy * dstArea + 1 + index * dstWidth;
45 | 	*pout_item++ = left;
46 | 	*pout_item++ = top;
47 | 	*pout_item++ = right;
48 | 	*pout_item++ = bottom;
49 | 	*pout_item++ = confidence;
50 | 	*pout_item++ = label;
51 | 	*pout_item++ = 1;
52 | 	memcpy(pout_item, pitem + num_class, 32 * sizeof(float));
53 | }
54 | 
55 | void yolov8seg::decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea, float* dst, int dstWidth, int dstHeight)
56 | {
57 | 	dim3 block_size(BLOCK_SIZE, BLOCK_SIZE);
58 | 	dim3 grid_size((srcHeight + BLOCK_SIZE - 1) / BLOCK_SIZE,
59 | 		(param.batch_size + BLOCK_SIZE - 1) / BLOCK_SIZE);
60 | 	int dstArea = 1 + dstWidth * dstHeight;
61 | 	decode_yolov8_seg_device_kernel <<< grid_size, block_size, 0, nullptr >>> (param.batch_size, param.num_class, param.topK, param.conf_thresh,
62 | 		src, srcWidth, srcHeight, srcArea,
63 | 		dst, dstWidth, dstArea);
64 | }
65 | 
66 | __global__ void transpose_device_kernel(int batch_size,
67 | 	float* src, int srcWidth, int srcHeight, int srcArea,
68 | 	float* dst, int dstWidth, int dstHeight, int dstArea)
69 | {
70 | 	int dx = blockDim.x * blockIdx.x + threadIdx.x;
71 | 	int dy = blockDim.y * blockIdx.y + threadIdx.y;
72 | 	if (dx >= dstHeight || dy >= batch_size)
73 | 	{
74 | 		return;
75 | 	}
76 | 	float* p_dst_row = dst + dy * dstArea + dx * dstWidth;
77 | 	float* p_src_col = src + dy * srcArea + dx;
78 | 
79 | 	for (int i = 0; i < dstWidth; i++)
80 | 	{
81 | 		p_dst_row[i] = p_src_col[i * srcWidth];
82 | 	}
83 | }
84 | 
85 | void yolov8seg::transposeDevice(utils::InitParameter param,
86 | float* src, int srcWidth, int srcHeight, int srcArea, 
87 | float* dst, int dstWidth, int dstHeight)
88 | {
89 | 	dim3 block_size(BLOCK_SIZE, BLOCK_SIZE);
90 | 	dim3 grid_size((dstHeight + BLOCK_SIZE - 1) / BLOCK_SIZE,
91 | 		(param.batch_size + BLOCK_SIZE - 1) / BLOCK_SIZE);
92 | 	int dstArea = dstWidth * dstHeight;
93 | 	transpose_device_kernel << < grid_size, block_size, 0, nullptr >> > (param.batch_size,
94 | 		src, srcWidth, srcHeight, srcArea,
95 | 		dst, dstWidth, dstHeight, dstArea);
96 | }
97 | 
98 | 


--------------------------------------------------------------------------------
/yolov8-seg/decode_yolov8_seg.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include"../utils/utils.h"
 3 | #include"../utils/kernel_function.h"
 4 | 
 5 | namespace yolov8seg
 6 | {
 7 | 	void decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcLength, float* dst, int dstWidth, int dstHeight);
 8 | 	void transposeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea, float* dst, int dstWidth, int dstHeight);
 9 | }
10 | 


--------------------------------------------------------------------------------
/yolov8-seg/yolov8_seg.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include<Eigen/Dense>
 3 | #include<opencv2/core/eigen.hpp>
 4 | #include"../utils/yolo.h"
 5 | #include"../utils/utils.h"
 6 | class YOLOv8Seg : public yolo::YOLO
 7 | {
 8 | public:
 9 | 	YOLOv8Seg(const utils::InitParameter& param);
10 | 	~YOLOv8Seg();
11 | 	virtual bool init(const std::vector<unsigned char>& trtFile);
12 | 	virtual void preprocess(const std::vector<cv::Mat>& imgsBatch);
13 | 	virtual bool infer();
14 | 	virtual void postprocess(const std::vector<cv::Mat>& imgsBatch);
15 | 	virtual void reset();
16 | 
17 | public:
18 | 	void showAndSave(const std::vector<std::string>& classNames,
19 | 		const int& cvDelayTime, std::vector<cv::Mat>& imgsBatch);
20 | 
21 | private:
22 | 	float* m_output_src_transpose_device;
23 | 	float* m_output_seg_device; // eg:116 * 8400, 116=4+80+32
24 | 	float* m_output_objects_device;
25 | 
26 | 	float* m_output_seg_host;
27 | 	float* m_output_objects_host;
28 | 
29 | 	int m_output_objects_width; // 39 = 32 + 7, 7:left, top, right, bottom, confidence, class, keepflag; 
30 | 	int m_output_src_width; // 116 = 4+80+32, 4:xyxy; 80:coco label; 32:seg
31 | 	nvinfer1::Dims m_output_seg_dims;
32 | 	int m_output_obj_area;
33 | 	int m_output_seg_area;
34 | 	int m_output_seg_w;
35 | 	int m_output_seg_h;
36 | 
37 | 	cv::Mat m_mask160; 
38 | 	Eigen::MatrixXf m_mask_eigen160;
39 | 	cv::Rect m_thresh_roi160;
40 | 	cv::Rect m_thresh_roisrc;
41 | 	float m_downsample_scale;
42 | 	cv::Mat m_mask_src;
43 | 	cv::Mat m_img_canvas;
44 | };


--------------------------------------------------------------------------------
/yolov8/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10)
 2 | 
 3 | #set(CMAKE_BUILD_TYPE "Debug")
 4 | set(CMAKE_BUILD_TYPE "Release")
 5 | 
 6 | PROJECT(yolov8 VERSION 1.0.0 LANGUAGES C CXX CUDA)
 7 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR})
 8 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake)
 9 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT})
10 | message(STATUS ${ALL_LIBS})
11 | file(GLOB CPPS 
12 |   ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
13 |   ${CMAKE_CURRENT_SOURCE_DIR}/*.cu
14 |   ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu
15 |   ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp
16 |   ${TensorRT_ROOT}/samples/common/logger.cpp 
17 |   ${TensorRT_ROOT}/samples/common/sampleOptions.cpp
18 |   #${TensorRT_ROOT}/samples/common/sampleUtils.cpp
19 |   )
20 | list(REMOVE_ITEM CPPS app_yolov8.cpp)
21 | message(STATUS CPPS = ${CPPS})
22 | list (LENGTH CPPS length)
23 | message(STATUS ***length*** = ${length}) 
24 | find_package(OpenCV REQUIRED)
25 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR})
26 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS})
27 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS})
28 | add_library(${PROJECT_NAME} SHARED ${CPPS})
29 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES})
30 | 
31 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75)
32 | target_compile_options(${PROJECT_NAME} PUBLIC 
33 |    $<$<COMPILE_LANGUAGE:CUDA>:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>)
34 |   
35 | add_executable(app_yolov8 app_yolov8.cpp)
36 | 
37 | # NVCC
38 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a)
39 | target_link_libraries(app_yolov8 ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} )
40 | 


--------------------------------------------------------------------------------
/yolov8/README.md:
--------------------------------------------------------------------------------
 1 | ## 1. get onnx 
 2 | download directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv) or export onnx:
 3 | ```bash
 4 | # 🔥 yolov8 offical repo: https://github.com/ultralytics/ultralytics
 5 | # 🔥 yolov8 quickstart: https://docs.ultralytics.com/quickstart/
 6 | # 🚀TensorRT-Alpha will be updated synchronously as soon as possible!
 7 | 
 8 | # install yolov8
 9 | conda create -n yolov8 python==3.8 -y # for Linux
10 | # conda create -n yolov8 python=3.9 -y # for Windows10
11 | conda activate yolov8
12 | pip install ultralytics==8.0.5
13 | pip install onnx==1.12.0
14 | 
15 | # download offical weights(".pt" file)
16 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt
17 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt
18 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m.pt
19 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt
20 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt
21 | https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x6.pt
22 | ```
23 | 
24 | export onnx:
25 | ```bash
26 | # 640
27 | yolo mode=export model=yolov8n.pt format=onnx dynamic=True opset=12    #simplify=True
28 | yolo mode=export model=yolov8s.pt format=onnx dynamic=True opset=12    #simplify=True
29 | yolo mode=export model=yolov8m.pt format=onnx dynamic=True opset=12    #simplify=True
30 | yolo mode=export model=yolov8l.pt format=onnx dynamic=True opset=12    #simplify=True
31 | yolo mode=export model=yolov8x.pt format=onnx dynamic=True opset=12    #simplify=True
32 | # 1280
33 | yolo mode=export model=yolov8x6.pt format=onnx dynamic=True opset=12  #simplify=True
34 | ```
35 | 
36 | ## 2.edit and save onnx
37 | ```bash
38 | # note: If you have obtained onnx by downloading, this step can be ignored
39 | ignore
40 | ```
41 | 
42 | ## 3.compile onnx
43 | ```bash
44 | # put your onnx file in this path:tensorrt-alpha/data/yolov8
45 | cd tensorrt-alpha/data/yolov8
46 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib
47 | # 640
48 | ../../../../TensorRT-8.4.2.4/bin/trtexec   --onnx=yolov8n.onnx  --saveEngine=yolov8n.trt  --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
49 | ../../../../TensorRT-8.4.2.4/bin/trtexec   --onnx=yolov8s.onnx  --saveEngine=yolov8s.trt  --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
50 | ../../../../TensorRT-8.4.2.4/bin/trtexec   --onnx=yolov8m.onnx  --saveEngine=yolov8m.trt  --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
51 | ../../../../TensorRT-8.4.2.4/bin/trtexec   --onnx=yolov8l.onnx  --saveEngine=yolov8l.trt  --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
52 | ../../../../TensorRT-8.4.2.4/bin/trtexec   --onnx=yolov8x.onnx  --saveEngine=yolov8x.trt  --buildOnly --minShapes=images:1x3x640x640 --optShapes=images:2x3x640x640 --maxShapes=images:4x3x640x640
53 | # 1280
54 | ../../../../TensorRT-8.4.2.4/bin/trtexec   --onnx=yolov8x6.onnx  --saveEngine=yolov8x6.trt  --buildOnly --minShapes=images:1x3x1280x1280 --optShapes=images:2x3x1280x1280 --maxShapes=images:4x3x1280x1280
55 | ```
56 | ## 4.run
57 | ```bash
58 | git clone https://github.com/FeiYull/tensorrt-alpha
59 | cd tensorrt-alpha/yolov8
60 | mkdir build
61 | cd build
62 | cmake ..
63 | make -j10
64 | # note: the dstImage will be saved in tensorrt-alpha/yolov8/build by default
65 | 
66 | ## 640
67 | # infer image
68 | ./app_yolov8  --model=../../data/yolov8/yolov8n.trt --size=640 --batch_size=1  --img=../../data/6406407.jpg   --show --savePath=../
69 | 
70 | # infer video
71 | ./app_yolov8  --model=../../data/yolov8/yolov8n.trt     --size=640 --batch_size=2  --video=../../data/people.mp4  --show 
72 | 
73 | # infer camera
74 | ./app_yolov8  --model=../../data/yolov8/yolov8n.trt     --size=640 --batch_size=2  --cam_id=0  --show
75 | 
76 | ## 1280
77 | # infer camera
78 | ./app_yolov8  --model=../../data/yolov8/yolov8x6.trt     --size=1280 --batch_size=2  --cam_id=0  --show
79 | ```
80 | ## 5. appendix
81 | ignore


--------------------------------------------------------------------------------
/yolov8/decode_yolov8.cu:
--------------------------------------------------------------------------------
  1 | #include "decode_yolov8.h"
  2 | 
  3 | __global__ void decode_yolov8_device_kernel(int batch_size, int  num_class, int topK, float conf_thresh,
  4 | 	float* src, int srcWidth, int srcHeight, int srcArea,
  5 | 	float* dst, int dstWidth, int dstHeight, int dstArea)
  6 | {
  7 | 	int dx = blockDim.x * blockIdx.x + threadIdx.x;
  8 | 	int dy = blockDim.y * blockIdx.y + threadIdx.y;
  9 | 	if (dx >= srcHeight || dy >= batch_size)
 10 | 	{
 11 | 		return;
 12 | 	}
 13 | 	float* pitem = src + dy * srcArea + dx * srcWidth;
 14 | 	float* class_confidence = pitem + 4;
 15 | 	float confidence = *class_confidence++;
 16 | 	int label = 0;
 17 | 	for (int i = 1; i < num_class; ++i, ++class_confidence)
 18 | 	{
 19 | 		if (*class_confidence > confidence)
 20 | 		{
 21 | 			confidence = *class_confidence;
 22 | 			label = i;
 23 | 		}
 24 | 	}
 25 | 	if (confidence < conf_thresh)
 26 | 	{
 27 | 		return;
 28 | 	}
 29 | 	int index = atomicAdd(dst + dy * dstArea, 1);
 30 | 
 31 | 	if (index >= topK)
 32 | 	{
 33 | 		return;
 34 | 	}
 35 | 	float cx = *pitem++;
 36 | 	float cy = *pitem++;
 37 | 	float width = *pitem++;
 38 | 	float height = *pitem++;
 39 | 
 40 | 	float left = cx - width * 0.5f;
 41 | 	float top = cy - height * 0.5f;
 42 | 	float right = cx + width * 0.5f;
 43 | 	float bottom = cy + height * 0.5f;
 44 | 	float* pout_item = dst + dy * dstArea + 1 + index * dstWidth;
 45 | 	*pout_item++ = left;
 46 | 	*pout_item++ = top;
 47 | 	*pout_item++ = right;
 48 | 	*pout_item++ = bottom;
 49 | 	*pout_item++ = confidence;
 50 | 	*pout_item++ = label;
 51 | 	*pout_item++ = 1;
 52 | }
 53 | 
 54 | void yolov8::decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea, float* dst, int dstWidth, int dstHeight)
 55 | {
 56 | 	dim3 block_size(BLOCK_SIZE, BLOCK_SIZE);
 57 | 	dim3 grid_size((srcHeight + BLOCK_SIZE - 1) / BLOCK_SIZE,
 58 | 		(param.batch_size + BLOCK_SIZE - 1) / BLOCK_SIZE);
 59 | 	int dstArea = 1 + dstWidth * dstHeight;
 60 | 
 61 | 	decode_yolov8_device_kernel << < grid_size, block_size, 0, nullptr >> > (param.batch_size, param.num_class, param.topK, param.conf_thresh,
 62 | 		src, srcWidth, srcHeight, srcArea,
 63 | 		dst, dstWidth, dstHeight, dstArea);
 64 | }
 65 | 
 66 | 
 67 | __global__ void transpose_device_kernel(int batch_size,
 68 | 	float* src, int srcWidth, int srcHeight, int srcArea,
 69 | 	float* dst, int dstWidth, int dstHeight, int dstArea)
 70 | {
 71 | 	int dx = blockDim.x * blockIdx.x + threadIdx.x;
 72 | 	int dy = blockDim.y * blockIdx.y + threadIdx.y;
 73 | 	if (dx >= dstHeight || dy >= batch_size)
 74 | 	{
 75 | 		return;
 76 | 	}
 77 | 	float* p_dst_row = dst + dy * dstArea + dx * dstWidth;
 78 | 	float* p_src_col = src + dy * srcArea + dx;
 79 | 
 80 | 	for (int i = 0; i < dstWidth; i++)
 81 | 	{
 82 | 		p_dst_row[i] = p_src_col[i * srcWidth];
 83 | 	}
 84 | }
 85 | 
 86 | void yolov8::transposeDevice(utils::InitParameter param, 
 87 | float* src, int srcWidth, int srcHeight, int srcArea, 
 88 | float* dst, int dstWidth, int dstHeight)
 89 | {
 90 | 	dim3 block_size(BLOCK_SIZE, BLOCK_SIZE);
 91 | 	dim3 grid_size((dstHeight + BLOCK_SIZE - 1) / BLOCK_SIZE,
 92 | 		(param.batch_size + BLOCK_SIZE - 1) / BLOCK_SIZE);
 93 | 	int dstArea = dstWidth * dstHeight;
 94 | 
 95 | 	transpose_device_kernel << < grid_size, block_size, 0, nullptr >> > (param.batch_size,
 96 | 		src, srcWidth, srcHeight, srcArea,
 97 | 		dst, dstWidth, dstHeight, dstArea);
 98 | }
 99 | 
100 | 
101 | 


--------------------------------------------------------------------------------
/yolov8/decode_yolov8.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include"../utils/utils.h"
 3 | #include"../utils/kernel_function.h"
 4 | 
 5 | namespace yolov8
 6 | {
 7 | 	void decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcLength, float* dst, int dstWidth, int dstHeight);
 8 | 	void transposeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea, float* dst, int dstWidth, int dstHeight);
 9 | }
10 | 


--------------------------------------------------------------------------------
/yolov8/yolov8.cpp:
--------------------------------------------------------------------------------
  1 | #include"yolov8.h"
  2 | #include"decode_yolov8.h"
  3 | 
  4 | YOLOV8::YOLOV8(const utils::InitParameter& param) :yolo::YOLO(param)
  5 | {
  6 | }
  7 | 
  8 | YOLOV8::~YOLOV8()
  9 | {
 10 |     CHECK(cudaFree(m_output_src_transpose_device));
 11 | }
 12 | 
 13 | bool YOLOV8::init(const std::vector<unsigned char>& trtFile)
 14 | {
 15 |     if (trtFile.empty())
 16 |     {
 17 |         return false;
 18 |     }
 19 |     std::unique_ptr<nvinfer1::IRuntime> runtime =
 20 |         std::unique_ptr<nvinfer1::IRuntime>(nvinfer1::createInferRuntime(sample::gLogger.getTRTLogger()));
 21 |     if (runtime == nullptr)
 22 |     {
 23 |         return false;
 24 |     }
 25 |     this->m_engine = std::unique_ptr<nvinfer1::ICudaEngine>(runtime->deserializeCudaEngine(trtFile.data(), trtFile.size()));
 26 | 
 27 |     if (this->m_engine == nullptr)
 28 |     {
 29 |         return false;
 30 |     }
 31 |     this->m_context = std::unique_ptr<nvinfer1::IExecutionContext>(this->m_engine->createExecutionContext());
 32 |     if (this->m_context == nullptr)
 33 |     {
 34 |         return false;
 35 |     }
 36 |     if (m_param.dynamic_batch)
 37 |     {
 38 |         this->m_context->setBindingDimensions(0, nvinfer1::Dims4(m_param.batch_size, 3, m_param.dst_h, m_param.dst_w));
 39 |     }
 40 |     m_output_dims = this->m_context->getBindingDimensions(1);
 41 |     m_total_objects = m_output_dims.d[2];
 42 |     assert(m_param.batch_size <= m_output_dims.d[0]);
 43 |     m_output_area = 1;
 44 |     for (int i = 1; i < m_output_dims.nbDims; i++)
 45 |     {
 46 |         if (m_output_dims.d[i] != 0)
 47 |         {
 48 |             m_output_area *= m_output_dims.d[i];
 49 |         }
 50 |     }
 51 |     CHECK(cudaMalloc(&m_output_src_device, m_param.batch_size * m_output_area * sizeof(float)));
 52 |     CHECK(cudaMalloc(&m_output_src_transpose_device, m_param.batch_size * m_output_area * sizeof(float)));
 53 |     float a = float(m_param.dst_h) / m_param.src_h;
 54 |     float b = float(m_param.dst_w) / m_param.src_w;
 55 |     float scale = a < b ? a : b;
 56 |     cv::Mat src2dst = (cv::Mat_<float>(2, 3) << scale, 0.f, (-scale * m_param.src_w + m_param.dst_w + scale - 1) * 0.5,
 57 |         0.f, scale, (-scale * m_param.src_h + m_param.dst_h + scale - 1) * 0.5);
 58 |     cv::Mat dst2src = cv::Mat::zeros(2, 3, CV_32FC1);
 59 |     cv::invertAffineTransform(src2dst, dst2src);
 60 | 
 61 |     m_dst2src.v0 = dst2src.ptr<float>(0)[0];
 62 |     m_dst2src.v1 = dst2src.ptr<float>(0)[1];
 63 |     m_dst2src.v2 = dst2src.ptr<float>(0)[2];
 64 |     m_dst2src.v3 = dst2src.ptr<float>(1)[0];
 65 |     m_dst2src.v4 = dst2src.ptr<float>(1)[1];
 66 |     m_dst2src.v5 = dst2src.ptr<float>(1)[2];
 67 | 
 68 |     return true;
 69 | }
 70 | 
 71 | void YOLOV8::preprocess(const std::vector<cv::Mat>& imgsBatch)
 72 | {
 73 |     resizeDevice(m_param.batch_size, m_input_src_device, m_param.src_w, m_param.src_h,
 74 |         m_input_resize_device, m_param.dst_w, m_param.dst_h, 114, m_dst2src);
 75 |     bgr2rgbDevice(m_param.batch_size, m_input_resize_device, m_param.dst_w, m_param.dst_h,
 76 |         m_input_rgb_device, m_param.dst_w, m_param.dst_h);
 77 |     normDevice(m_param.batch_size, m_input_rgb_device, m_param.dst_w, m_param.dst_h,
 78 |         m_input_norm_device, m_param.dst_w, m_param.dst_h, m_param);
 79 |     hwc2chwDevice(m_param.batch_size, m_input_norm_device, m_param.dst_w, m_param.dst_h,
 80 |         m_input_hwc_device, m_param.dst_w, m_param.dst_h);
 81 | }
 82 | 
 83 | 
 84 | void YOLOV8::postprocess(const std::vector<cv::Mat>& imgsBatch)
 85 | {
 86 |     yolov8::transposeDevice(m_param, m_output_src_device, m_total_objects, 4 + m_param.num_class, m_total_objects * (4 + m_param.num_class),
 87 |         m_output_src_transpose_device, 4 + m_param.num_class, m_total_objects);
 88 |     yolov8::decodeDevice(m_param, m_output_src_transpose_device, 4 + m_param.num_class, m_total_objects, m_output_area,
 89 |         m_output_objects_device, m_output_objects_width, m_param.topK);
 90 |     // nms
 91 |     //nmsDeviceV1(m_param, m_output_objects_device, m_output_objects_width, m_param.topK, m_param.topK * m_output_objects_width + 1);
 92 |     nmsDeviceV2(m_param, m_output_objects_device, m_output_objects_width, m_param.topK, m_param.topK * m_output_objects_width + 1, m_output_idx_device, m_output_conf_device);
 93 |     CHECK(cudaMemcpy(m_output_objects_host, m_output_objects_device, m_param.batch_size * sizeof(float) * (1 + 7 * m_param.topK), cudaMemcpyDeviceToHost));
 94 |     for (size_t bi = 0; bi < imgsBatch.size(); bi++)
 95 |     {
 96 |         int num_boxes = std::min((int)(m_output_objects_host + bi * (m_param.topK * m_output_objects_width + 1))[0], m_param.topK);
 97 |         for (size_t i = 0; i < num_boxes; i++)
 98 |         {
 99 |             float* ptr = m_output_objects_host + bi * (m_param.topK * m_output_objects_width + 1) + m_output_objects_width * i + 1;
100 |             int keep_flag = ptr[6];
101 |             if (keep_flag)
102 |             {
103 |                 float x_lt = m_dst2src.v0 * ptr[0] + m_dst2src.v1 * ptr[1] + m_dst2src.v2;
104 |                 float y_lt = m_dst2src.v3 * ptr[0] + m_dst2src.v4 * ptr[1] + m_dst2src.v5;
105 |                 float x_rb = m_dst2src.v0 * ptr[2] + m_dst2src.v1 * ptr[3] + m_dst2src.v2;
106 |                 float y_rb = m_dst2src.v3 * ptr[2] + m_dst2src.v4 * ptr[3] + m_dst2src.v5;
107 |                 m_objectss[bi].emplace_back(x_lt, y_lt, x_rb, y_rb, ptr[4], (int)ptr[5]);
108 |             }
109 |         }
110 | 
111 |     }
112 | }


--------------------------------------------------------------------------------
/yolov8/yolov8.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include"../utils/yolo.h"
 3 | #include"../utils/utils.h"
 4 | class YOLOV8 : public yolo::YOLO
 5 | {
 6 | public:
 7 | 	YOLOV8(const utils::InitParameter& param);
 8 | 	~YOLOV8();
 9 | 	virtual bool init(const std::vector<unsigned char>& trtFile);
10 | 	virtual void preprocess(const std::vector<cv::Mat>& imgsBatch);
11 | 	virtual void postprocess(const std::vector<cv::Mat>& imgsBatch);
12 | 
13 | private:
14 | 	float* m_output_src_transpose_device;
15 | };


--------------------------------------------------------------------------------
/yolox/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10)
 2 | 
 3 | #set(CMAKE_BUILD_TYPE "Debug")
 4 | set(CMAKE_BUILD_TYPE "Release")
 5 | 
 6 | PROJECT(yolox VERSION 1.0.0 LANGUAGES C CXX CUDA)
 7 | message(STATUS CMAKE_CURRENT_SOURCE_DIR = ${CMAKE_CURRENT_SOURCE_DIR})
 8 | include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/common.cmake)
 9 | message(STATUS TensorRT_ROOT = ${TensorRT_ROOT})
10 | message(STATUS ${ALL_LIBS})
11 | file(GLOB CPPS 
12 |   ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
13 |   ${CMAKE_CURRENT_SOURCE_DIR}/*.cu
14 |   ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cu
15 |   ${CMAKE_CURRENT_SOURCE_DIR}/../utils/*.cpp
16 |   ${TensorRT_ROOT}/samples/common/logger.cpp 
17 |   ${TensorRT_ROOT}/samples/common/sampleOptions.cpp 
18 |   )
19 | list(REMOVE_ITEM CPPS app_yolox.cpp)
20 | message(STATUS CPPS = ${CPPS})
21 | list (LENGTH CPPS length)
22 | message(STATUS ***length*** = ${length}) 
23 | find_package(OpenCV REQUIRED)
24 | include_directories(${INCLUDE_DRIS} ${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR})
25 | message(STATUS ***INCLUDE_DRIS*** = ${INCLUDE_DRIS})
26 | message(STATUS ***OpenCV_INCLUDE_DIRS*** = ${OpenCV_INCLUDE_DIRS})
27 | add_library(${PROJECT_NAME} SHARED ${CPPS})
28 | target_link_libraries(${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES})
29 | set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 50 61 72 75)
30 | target_compile_options(${PROJECT_NAME} PUBLIC 
31 |    $<$<COMPILE_LANGUAGE:CUDA>:--default-stream per-thread -lineinfo; --use_fast_math --disable-warnings>)
32 |   
33 | add_executable(app_yolox app_yolox.cpp)
34 | 
35 | # NVCC
36 | # target_link_libraries(detect ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} libgflags_nothreads.a)
37 | target_link_libraries(app_yolox ${PROJECT_NAME} ${ALL_LIBS} ${OpenCV_LIBRARIES} )
38 | 


--------------------------------------------------------------------------------
/yolox/README.md:
--------------------------------------------------------------------------------
 1 | ## 特别说明
 2 | 这里yolox官方在图像预处理的时候，其中resize是带有padding，只不过是在图像右边界、下边界进行padding
 3 | ，本仓库的是核函数做法是：将图像进行等比例缩放插值，效果图存在m_input_resize_without_padding_device中，
 4 | 然后将上述效果图像 copy 到m_input_resize_device(申请内存大小为：
 5 | 416 * 416 * 3 * batch_size or 640 * 640 * 3 * batch_size,初始值为：{114, 114, 114})
 6 | 另外，由于插值始终和opencv严格对齐，略有差异，但最终检测结果几乎一样，框的位置一样，置信度小数点后第二位才有差异。
 7 | 最后，模型支持固定batch size
 8 | 
 9 | ## 1. get onnx 
10 | download onnx(default:batch_size=2) directly at [weiyun](https://share.weiyun.com/3T3mZKBm) or [google driver](https://drive.google.com/drive/folders/1-8phZHkx_Z274UVqgw6Ma-6u5AKmqCOv?usp=sharing)
11 | or export onnx:
12 | ```bash
13 | git clone https://github.com/Megvii-BaseDetection/YOLOX
14 | git checkout  0.3.0
15 | 
16 | ## batch_size=1
17 | # 640 for image
18 | python tools/export_onnx.py --output-name=yolox_s.onnx  --exp_file=exps/default/yolox_s.py --ckpt=yolox_s.pth --decode_in_inference --batch-size=1
19 | python tools/export_onnx.py --output-name=yolox_m.onnx  --exp_file=exps/default/yolox_m.py --ckpt=yolox_m.pth --decode_in_inference --batch-size=1
20 | python tools/export_onnx.py --output-name=yolox_x.onnx  --exp_file=exps/default/yolox_x.py --ckpt=yolox_x.pth --decode_in_inference --batch-size=1
21 | python tools/export_onnx.py --output-name=yolox_s.onnx  --exp_file=exps/default/yolox_s.py --ckpt=yolox_s.pth --decode_in_inference --batch-size=1
22 | # 416 for image
23 | python tools/export_onnx.py --output-name=yolox_nano.onnx --exp_file=exps/default/yolox_nano.py --ckpt=yolox_nano.pth --decode_in_inference --batch-size=1
24 | python tools/export_onnx.py --output-name=yolox_tiny.onnx --exp_file=exps/default/yolox_tiny.py --ckpt=yolox_tiny.pth --decode_in_inference --batch-size=1
25 | 
26 | ## batch_size > 1
27 | # 例如将上述6条编译onnx的指令中，设置参数--batch-size=2，也是可行，但需注意：最后运行的时候，需要设置一样参数：--batch_size=2
28 | ```
29 | 
30 | ## 2.edit and save onnx
31 | ```bash
32 | # note: If you have obtained onnx by downloading, this step can be ignored
33 | ignore
34 | ```
35 | 
36 | ## 3.compile onnx
37 | ```bash
38 | # put your onnx file in this path:tensorrt-alpha/data/yolox
39 | cd tensorrt-alpha/data/yolox
40 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/feiyull/TensorRT-8.4.2.4/lib
41 | 
42 | # 640
43 | ../../../../TensorRT-8.4.2.4/bin/trtexec   --onnx=yolox_s.onnx   --saveEngine=yolox_s.trt  --buildOnly
44 | ../../../../TensorRT-8.4.2.4/bin/trtexec   --onnx=yolox_m.onnx   --saveEngine=yolox_m.trt  --buildOnly
45 | ../../../../TensorRT-8.4.2.4/bin/trtexec   --onnx=yolox_x.onnx   --saveEngine=yolox_x.trt  --buildOnly
46 | 
47 | # 416
48 | ../../../../TensorRT-8.4.2.4/bin/trtexec   --onnx=yolox_nano.onnx   --saveEngine=yolox_nano.trt  --buildOnly
49 | ../../../../TensorRT-8.4.2.4/bin/trtexec   --onnx=yolox_tiny.onnx   --saveEngine=yolox_tiny.trt  --buildOnly
50 | ```
51 | 
52 | ## 4.run
53 | ```bash
54 | git clone https://github.com/FeiYull/tensorrt-alpha
55 | cd tensorrt-alpha/yolox
56 | mkdir build
57 | cd build
58 | cmake ..
59 | make -j10
60 | # note: the dstImage will be saved in tensorrt-alpha/yolox/build by default
61 | # only support static multi-batch inference!
62 | # the values of batch_size are different, and onnx needs to be compiled additionally
63 | 
64 | ## 640
65 | # infer image
66 | ./app_yolox  --model=../../data/yolox/yolox_s.trt     --size=640 --batch_size=1  --img=../../data/6406401.jpg  --show --savePath=../
67 | 
68 | # infer video
69 | ./app_yolox  --model=../../data/yolox/yolox_s.trt     --size=640 --batch_size=1  --video=../../data/people.mp4  --show 
70 | 
71 | # infer camera
72 | ./app_yolox  --model=../../data/yolox/yolox_s.trt     --size=640 --batch_size=1  --cam_id=0  --show
73 | 
74 | # 416
75 | ./app_yolox  --model=../../data/yolox/yolox_nano.trt  --size=416 --batch_size=1  --img=../../data/6406401.jpg  --show --savePath
76 | ```
77 | ## 5. appendix
78 | ignore


--------------------------------------------------------------------------------
/yolox/yolox.cu:
--------------------------------------------------------------------------------
  1 | #include"yolox.h"
  2 | 
  3 | YOLOX::YOLOX(const utils::InitParameter& param) :yolo::YOLO(param)
  4 | {
  5 | }
  6 | YOLOX::~YOLOX()
  7 | {
  8 |     CHECK(cudaFree(m_input_resize_without_padding_device));
  9 | }
 10 | bool YOLOX::init(const std::vector<unsigned char>& trtFile)
 11 | {
 12 |     if (trtFile.empty())
 13 |     {
 14 |         return false;
 15 |     }
 16 |     std::unique_ptr<nvinfer1::IRuntime> runtime =
 17 |         std::unique_ptr<nvinfer1::IRuntime>(nvinfer1::createInferRuntime(sample::gLogger.getTRTLogger()));
 18 |     if (runtime == nullptr)
 19 |     {
 20 |         return false;
 21 |     }
 22 |     this->m_engine = std::unique_ptr<nvinfer1::ICudaEngine>(runtime->deserializeCudaEngine(trtFile.data(), trtFile.size()));
 23 | 
 24 |     if (this->m_engine == nullptr)
 25 |     {
 26 |         return false;
 27 |     }
 28 |     this->m_context = std::unique_ptr<nvinfer1::IExecutionContext>(this->m_engine->createExecutionContext());
 29 |     if (this->m_context == nullptr)
 30 |     {
 31 |         return false;
 32 |     }
 33 |     // binding dim
 34 |     // ...
 35 |     //nvinfer1::Dims input_dims = this->m_context->getBindingDimensions(0);
 36 |     m_output_dims = this->m_context->getBindingDimensions(1);
 37 |     m_total_objects = m_output_dims.d[1];
 38 |     assert(m_param.batch_size == m_output_dims.d[0] || 
 39 |            m_param.batch_size == 1 // batch_size = 1, but it will infer with "batch_size=m_output_dims.d[0]", only support static batch
 40 |             );
 41 |     m_output_area = 1;
 42 |     for (int i = 1; i < m_output_dims.nbDims; i++)
 43 |     {
 44 |         if (m_output_dims.d[i] != 0)
 45 |         {
 46 |             m_output_area *= m_output_dims.d[i];
 47 |         }
 48 |     }
 49 |     CHECK(cudaMalloc(&m_output_src_device, m_param.batch_size * m_output_area * sizeof(float)));
 50 |     float a = float(m_param.dst_h) / m_param.src_h;
 51 |     float b = float(m_param.dst_w) / m_param.src_w;
 52 |     float scale = a < b ? a : b;
 53 |     m_resized_h = roundf((float)m_param.src_h * scale);
 54 |     m_resized_w = roundf((float)m_param.src_w * scale);
 55 | 
 56 |     CHECK(cudaMalloc(&m_input_resize_without_padding_device,
 57 |         m_param.batch_size * 3 * m_resized_h * m_resized_w * sizeof(float)));
 58 |     cv::Mat src2dst = (cv::Mat_<float>(2, 3) << scale, 0.f, (scale - 1) * 0.5,
 59 |         0.f, scale, (scale - 1) * 0.5);
 60 |     cv::Mat dst2src = cv::Mat::zeros(2, 3, CV_32FC1);
 61 |     cv::invertAffineTransform(src2dst, dst2src);
 62 |     m_dst2src.v0 = dst2src.ptr<float>(0)[0];
 63 |     m_dst2src.v1 = dst2src.ptr<float>(0)[1];
 64 |     m_dst2src.v2 = dst2src.ptr<float>(0)[2];
 65 |     m_dst2src.v3 = dst2src.ptr<float>(1)[0];
 66 |     m_dst2src.v4 = dst2src.ptr<float>(1)[1];
 67 |     m_dst2src.v5 = dst2src.ptr<float>(1)[2];
 68 |     return true;
 69 | }
 70 | void YOLOX::preprocess(const std::vector<cv::Mat>& imgsBatch)
 71 | {
 72 |     resizeDevice(m_param.batch_size, m_input_src_device, m_param.src_w, m_param.src_h,
 73 |         m_input_resize_without_padding_device, m_resized_w, m_resized_h, 114, m_dst2src);
 74 |     copyWithPaddingDevice(m_param.batch_size, m_input_resize_without_padding_device, m_resized_w, m_resized_h, 
 75 |         m_input_resize_device, m_param.dst_w, m_param.dst_h, 114.f);
 76 |     hwc2chwDevice(m_param.batch_size, m_input_resize_device, m_param.dst_w, m_param.dst_h,
 77 |         m_input_hwc_device, m_param.dst_w, m_param.dst_h);
 78 | }
 79 | __global__
 80 | void copy_with_padding_kernel_function(int batchSize, float* src, int srcWidth, int srcHeight, int srcArea, int srcVolume,
 81 |     float* dst, int dstWidth, int dstHeight, int dstArea, int dstVolume, float paddingValue)
 82 | {
 83 |     int dx = blockDim.x * blockIdx.x + threadIdx.x;
 84 |     int dy = blockDim.y * blockIdx.y + threadIdx.y;
 85 |     if (dx < dstArea && dy < batchSize)
 86 |     {
 87 |         int dst_y = dx / dstWidth;
 88 |         int dst_x = dx % dstWidth;
 89 |         float* pdst = dst + dy * dstVolume + dst_y * dstWidth * 3 + dst_x * 3;
 90 |        
 91 |         if (dst_y < srcHeight && dst_x < srcWidth)
 92 |         {
 93 |             float* psrc = src + dy * srcVolume + dst_y * srcWidth * 3 + dst_x * 3;
 94 |             pdst[0] = psrc[0];
 95 |             pdst[1] = psrc[1];
 96 |             pdst[2] = psrc[2];
 97 |         }
 98 |         else
 99 |         {
100 |             pdst[0] = paddingValue;
101 |             pdst[1] = paddingValue;
102 |             pdst[2] = paddingValue;
103 |         }
104 |     }
105 | }
106 | void copyWithPaddingDevice(const int& batchSize, float* src, int srcWidth, int srcHeight,
107 |     float* dst, int dstWidth, int dstHeight, float paddingValue)
108 | {
109 |     dim3 block_size(BLOCK_SIZE, BLOCK_SIZE);
110 |     dim3 grid_size((dstWidth * dstHeight + BLOCK_SIZE - 1) / BLOCK_SIZE,
111 |         (batchSize + BLOCK_SIZE - 1) / BLOCK_SIZE);
112 |     int src_area = srcHeight * srcWidth;
113 |     int dst_area = dstHeight * dstWidth;
114 | 
115 |     int src_volume = 3 * srcHeight * srcWidth;
116 |     int dst_volume = 3 * dstHeight * dstWidth;
117 |     assert(srcWidth <= dstWidth);
118 |     assert(srcHeight <= dstHeight);
119 |     copy_with_padding_kernel_function <<< grid_size, block_size, 0, nullptr >>>(batchSize, src, srcWidth, srcHeight, src_area, src_volume,
120 |         dst, dstWidth, dstHeight, dst_area, dst_volume, paddingValue);
121 | }


--------------------------------------------------------------------------------
/yolox/yolox.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include"../utils/yolo.h"
 3 | #include"../utils/kernel_function.h"
 4 | 
 5 | class YOLOX : public yolo::YOLO
 6 | {
 7 | public:
 8 | 	YOLOX(const utils::InitParameter& param);
 9 | 	~YOLOX();
10 | 	virtual bool init(const std::vector<unsigned char>& trtFile);
11 | 	virtual void preprocess(const std::vector<cv::Mat>& imgsBatch);
12 | private:
13 | 	float* m_input_resize_without_padding_device;
14 | 	int m_resized_w;
15 | 	int m_resized_h;
16 | };
17 | void copyWithPaddingDevice(const int& batchSize, float* src, int srcWidth, int srcHeight,
18 | 	float* dst, int dstWidth, int dstHeight, float paddingValue);


--------------------------------------------------------------------------------