├── .gitignore ├── Detection_app.py ├── README.md ├── UI_webcam_4.py ├── config.py ├── icon ├── pause.png └── play.png ├── inference └── pexels-thirdman-8482541.jpg ├── models ├── __init__.py ├── api.py ├── common.py ├── cudart_api.py ├── engine.py ├── pycuda_api.py ├── torch_utils.py └── utils.py ├── tensorrt_infer_det.py ├── tensorrt_infer_det_without_torch.py └── yolov8_test.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # Video file 156 | inference/*.mp4 157 | 158 | # model file 159 | model/ 160 | 161 | # PyCharm 162 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 163 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 164 | # and can be added to the global gitignore or merged into this file. For a more nuclear 165 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 166 | #.idea/ 167 | -------------------------------------------------------------------------------- /Detection_app.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import cv2 3 | import numpy as np 4 | from ultralytics import YOLO 5 | from PyQt5 import QtCore 6 | from PyQt5.QtWidgets import QMainWindow, QApplication, QGraphicsScene 7 | from PyQt5.QtGui import QIcon, QPixmap, QImage 8 | from PyQt5.QtCore import Qt, QTimer, QThread, pyqtSignal 9 | from UI_webcam_4 import Ui_MainWindow 10 | from models.pycuda_api import TRTEngine 11 | from tensorrt_infer_det_without_torch import inference 12 | 13 | class CamaraThread(QThread): 14 | image = pyqtSignal(np.ndarray) 15 | 16 | def __init__(self): 17 | super().__init__() 18 | self.capture = cv2.VideoCapture(0) 19 | self.running = False 20 | if (self.capture is None) or (not self.capture.isOpened()): 21 | self.connect = False 22 | else: 23 | self.connect = True 24 | 25 | def run(self): 26 | while self.running and self.connect: 27 | ret, frame = self.capture.read() 28 | if ret: 29 | #print(1) 30 | self.image.emit(frame) 31 | else: 32 | print("Acquired frame fail!") 33 | self.connect = False 34 | 35 | def start_stop(self): 36 | if self.connect: 37 | self.running = not self.running 38 | if self.running: 39 | print("Camera is open!") 40 | else: 41 | print("Camera is close!") 42 | 43 | def close(self): 44 | if self.connect: 45 | self.running = False 46 | self.capture.release() 47 | 48 | 49 | class ObjectDetect_MainWindow(QMainWindow, Ui_MainWindow): 50 | def __init__(self): 51 | super(ObjectDetect_MainWindow, self).__init__() 52 | self.setupUi(self) 53 | self.camera = CamaraThread() 54 | 55 | if self.camera.connect: 56 | self.debugBar("Camera Connection!") 57 | else: 58 | self.debugBar("Camera Disconnection!") 59 | self.pushButton.clicked.connect(self.ClickStartBtn) 60 | self.checkBox.clicked.connect(self.SetDetectMask) 61 | self.comboBox.currentIndexChanged.connect(self.SetViewRadio) 62 | self.camera.image.connect(self.UpdateImage) 63 | self.b_DetectMask = False 64 | self.b_TensorRTMode = True 65 | self.b_TriggerResizeView = False 66 | self.ViewRadio = 1.0 67 | self.comboBox.setCurrentIndex(2) 68 | self.model = YOLO("model/mask_detect/best.pt") 69 | results = self.model.predict(source="inference/pexels-thirdman-8482541.jpg", show=False, save=False, conf=0.5) 70 | self.engine = TRTEngine("model/mask_detect/best.engine") 71 | 72 | def ClickStartBtn(self): 73 | if self.camera.connect: 74 | icon = QIcon() 75 | if not self.camera.running: 76 | icon.addPixmap(QPixmap("icon/pause.png"), QIcon.Normal, QIcon.Off) 77 | self.camera.start_stop() 78 | self.camera.start() 79 | else: 80 | icon.addPixmap(QPixmap("icon/play.png"), QIcon.Normal, QIcon.Off) 81 | self.camera.start_stop() 82 | 83 | self.pushButton.setIcon(icon) 84 | self.comboBox.setEnabled(not self.camera.running) 85 | 86 | def SetDetectMask(self): 87 | if self.checkBox.isChecked(): 88 | self.b_DetectMask = True 89 | #print("detect") 90 | else: 91 | self.b_DetectMask = False 92 | #print("not detect") 93 | 94 | def SetViewRadio(self): 95 | self.ViewRadio = float(self.comboBox.currentText()) 96 | self.b_TriggerResizeView = True 97 | 98 | def UpdateImage(self, frame): 99 | scene = QGraphicsScene() 100 | frame_width = int(frame.shape[1]*self.ViewRadio) 101 | frame_height = int(frame.shape[0]*self.ViewRadio) 102 | scene.setSceneRect(0, 0, frame_width, frame_height) 103 | if self.b_TriggerResizeView: 104 | self.b_TriggerResizeView = False 105 | self.graphicsView.setMinimumSize(QtCore.QSize(frame_width+4, frame_height+4)) 106 | 107 | if self.b_DetectMask: 108 | image = self.DetectMaks(frame) 109 | else: 110 | if self.comboBox.currentIndex() != 2: 111 | frame = cv2.resize(frame, (frame_width, frame_height), 112 | interpolation=cv2.INTER_CUBIC) 113 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 114 | image = QImage(frame, frame.shape[1], frame.shape[0], QImage.Format_RGB888) 115 | 116 | scene.addPixmap(QPixmap.fromImage(image)) 117 | self.graphicsView.setScene(scene) 118 | 119 | def DetectMaks(self, frame): 120 | if not self.b_TensorRTMode: 121 | results = self.model.predict(source=frame, show=False, save=False, conf=0.5) 122 | detect_image = results[0].plot() 123 | else: 124 | detect_image = inference(self.engine, frame, self.b_DetectMask) 125 | if self.comboBox.currentIndex() != 2: 126 | detect_image = cv2.resize(detect_image, (int(detect_image.shape[1]*self.ViewRadio), int(detect_image.shape[0]*self.ViewRadio)), 127 | interpolation=cv2.INTER_CUBIC) 128 | detect_frame = cv2.cvtColor(detect_image, cv2.COLOR_BGR2RGB) # yolov8 result image is BGR(for opencv display) 129 | detect_frame = QImage(detect_frame, detect_image.shape[1], detect_image.shape[0], QImage.Format_RGB888) 130 | return detect_frame 131 | """ 132 | def CloseEvent(self, event): 133 | if self.camera.running: 134 | self.camera.close() 135 | self.camera.terminate() 136 | QtWidgets.QApplication.closeAllWindows() 137 | """ 138 | def debugBar(self, msg): 139 | self.statusbar.showMessage(str(msg), 5000) 140 | 141 | if __name__ == "__main__": 142 | app = QApplication(sys.argv) 143 | window = ObjectDetect_MainWindow() 144 | window.show() 145 | sys.exit(app.exec_()) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # YOLOv8-TensorRT-MaskDetection 2 | Mask detection using YOLOv8 with TensorRT. 3 | 4 | # File guidance 5 | 1.yolov8_test.py 6 | 7 | Can test on video file, webcam or video stream from url. 8 | 9 | mode setting: 10 | 11 | (1) 12 | 13 | tensorrt = 1, program will use .engine file to detect object. 14 | 15 | tensorrt = 0, program will use .pt file to detect object. 16 | 17 | (2) 18 | 19 | mask_detect_mode = 1, program will use mask detection model to detect person wearing mask or not. 20 | 21 | mask_detect_mode = 0, program will use official pretrained model to detect 80 classes object. 22 | 23 | (3) 24 | 25 | webcam = 1, program will detect object from webcam video. 26 | 27 | webcam = 0, program will detect object from video file or video stream from url. 28 | 29 | 2.Detection_app.py 30 | 31 | This program provide UI to show object detection from webcam. UI with start/pause button and checkbox 32 | can enable mask detection or not. 33 | 34 | mode setting: 35 | 36 | self.b_TensorRTMode = True, program will use .engine file to detect person wearing mask or not. 37 | 38 | self.b_TensorRTMode = False, program will use .pt file to detect person wearing mask or not. 39 | 40 | (I will add control item in UI for mode change.) 41 | 42 | # Inference source 43 | [Video](https://drive.google.com/drive/folders/16zLaimbdfVHhElf467EXvonjqsaBEBnx?usp=drive_link) 44 | 45 | # Model 46 | [Model](https://drive.google.com/drive/folders/1IkrbvLPiS0b8fu-ELpd0ViE5tKB9dmrl?usp=drive_link) 47 | 48 | # Computer environment 49 | 50 | OS:WIN11 / WSL Ubuntu-20.04 51 | 52 | NVIDIA GeForce RTX 3060 Laptop 53 | 54 | CUDA11.8 + cudnn8.9.6 + TensorRT 8.6 GA 55 | 56 | torch=2.1.2+cu118 57 | 58 | # Reference 59 | [YOLOv8-TensorRT](https://github.com/triple-Mu/YOLOv8-TensorRT) 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /UI_webcam_4.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Form implementation generated from reading ui file 'design4_test.ui' 4 | # 5 | # Created by: PyQt5 UI code generator 5.15.9 6 | # 7 | # WARNING: Any manual changes made to this file will be lost when pyuic5 is 8 | # run again. Do not edit this file unless you know what you are doing. 9 | 10 | 11 | from PyQt5 import QtCore, QtGui, QtWidgets 12 | 13 | 14 | class Ui_MainWindow(object): 15 | def setupUi(self, MainWindow): 16 | MainWindow.setObjectName("MainWindow") 17 | MainWindow.resize(666, 599) 18 | self.centralwidget = QtWidgets.QWidget(MainWindow) 19 | self.centralwidget.setObjectName("centralwidget") 20 | self.horizontalLayout = QtWidgets.QHBoxLayout(self.centralwidget) 21 | self.horizontalLayout.setObjectName("horizontalLayout") 22 | self.verticalLayout = QtWidgets.QVBoxLayout() 23 | self.verticalLayout.setContentsMargins(-1, -1, 0, 0) 24 | self.verticalLayout.setObjectName("verticalLayout") 25 | self.horizontalLayout_2 = QtWidgets.QHBoxLayout() 26 | self.horizontalLayout_2.setObjectName("horizontalLayout_2") 27 | self.graphicsView = QtWidgets.QGraphicsView(self.centralwidget) 28 | self.graphicsView.setMinimumSize(QtCore.QSize(644, 484)) 29 | self.graphicsView.setObjectName("graphicsView") 30 | self.horizontalLayout_2.addWidget(self.graphicsView) 31 | self.verticalLayout.addLayout(self.horizontalLayout_2) 32 | self.horizontalLayout_3 = QtWidgets.QHBoxLayout() 33 | self.horizontalLayout_3.setObjectName("horizontalLayout_3") 34 | self.groupBox = QtWidgets.QGroupBox(self.centralwidget) 35 | self.groupBox.setMinimumSize(QtCore.QSize(320, 60)) 36 | self.groupBox.setMaximumSize(QtCore.QSize(320, 60)) 37 | self.groupBox.setTitle("") 38 | self.groupBox.setObjectName("groupBox") 39 | self.pushButton = QtWidgets.QPushButton(self.groupBox) 40 | self.pushButton.setGeometry(QtCore.QRect(120, 0, 60, 60)) 41 | self.pushButton.setMinimumSize(QtCore.QSize(60, 60)) 42 | self.pushButton.setMaximumSize(QtCore.QSize(60, 60)) 43 | self.pushButton.setText("") 44 | icon = QtGui.QIcon() 45 | icon.addPixmap(QtGui.QPixmap("icon/play.png"), QtGui.QIcon.Normal, QtGui.QIcon.Off) 46 | self.pushButton.setIcon(icon) 47 | self.pushButton.setIconSize(QtCore.QSize(60, 60)) 48 | self.pushButton.setObjectName("pushButton") 49 | self.checkBox = QtWidgets.QCheckBox(self.groupBox) 50 | self.checkBox.setGeometry(QtCore.QRect(200, 20, 121, 19)) 51 | self.checkBox.setMaximumSize(QtCore.QSize(250, 60)) 52 | font = QtGui.QFont() 53 | font.setFamily("Arial") 54 | font.setBold(True) 55 | font.setWeight(75) 56 | self.checkBox.setFont(font) 57 | self.checkBox.setObjectName("checkBox") 58 | self.comboBox = QtWidgets.QComboBox(self.groupBox) 59 | self.comboBox.setGeometry(QtCore.QRect(30, 20, 61, 22)) 60 | font = QtGui.QFont() 61 | font.setFamily("Arial") 62 | font.setBold(True) 63 | font.setWeight(75) 64 | self.comboBox.setFont(font) 65 | self.comboBox.setObjectName("comboBox") 66 | self.comboBox.addItem("") 67 | self.comboBox.addItem("") 68 | self.comboBox.addItem("") 69 | self.comboBox.addItem("") 70 | self.comboBox.addItem("") 71 | self.horizontalLayout_3.addWidget(self.groupBox) 72 | self.verticalLayout.addLayout(self.horizontalLayout_3) 73 | self.horizontalLayout.addLayout(self.verticalLayout) 74 | MainWindow.setCentralWidget(self.centralwidget) 75 | self.statusbar = QtWidgets.QStatusBar(MainWindow) 76 | self.statusbar.setObjectName("statusbar") 77 | MainWindow.setStatusBar(self.statusbar) 78 | 79 | self.retranslateUi(MainWindow) 80 | QtCore.QMetaObject.connectSlotsByName(MainWindow) 81 | 82 | def retranslateUi(self, MainWindow): 83 | _translate = QtCore.QCoreApplication.translate 84 | MainWindow.setWindowTitle(_translate("MainWindow", "WebCam Example")) 85 | self.checkBox.setText(_translate("MainWindow", "Mask detect")) 86 | self.comboBox.setItemText(0, _translate("MainWindow", "0.50")) 87 | self.comboBox.setItemText(1, _translate("MainWindow", "0.75")) 88 | self.comboBox.setItemText(2, _translate("MainWindow", "1.00")) 89 | self.comboBox.setItemText(3, _translate("MainWindow", "1.25")) 90 | self.comboBox.setItemText(4, _translate("MainWindow", "1.50")) 91 | 92 | 93 | if __name__ == "__main__": 94 | import sys 95 | app = QtWidgets.QApplication(sys.argv) 96 | MainWindow = QtWidgets.QMainWindow() 97 | ui = Ui_MainWindow() 98 | ui.setupUi(MainWindow) 99 | MainWindow.show() 100 | sys.exit(app.exec_()) 101 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import numpy as np 4 | 5 | random.seed(0) 6 | 7 | # detection model classes 8 | 9 | OBJECT_CLASSES = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 10 | 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 11 | 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 12 | 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 13 | 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 14 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 15 | 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 16 | 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 17 | 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 18 | 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 19 | 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 20 | 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 21 | 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 22 | 'scissors', 'teddy bear', 'hair drier', 'toothbrush') 23 | 24 | # self define model classes 25 | OBJECT_MASK_CLASSES = ('Mask', 'No Mask', 'Bad Mask') 26 | 27 | # colors for per classes 28 | OBJECT_COLORS = { 29 | cls: [random.randint(0, 255) for _ in range(3)] 30 | for i, cls in enumerate(OBJECT_CLASSES) 31 | } 32 | 33 | OBJECT_MASK_COLORS = { 34 | cls: [random.randint(0, 255) for _ in range(3)] 35 | for i, cls in enumerate(OBJECT_MASK_CLASSES) 36 | } 37 | 38 | # colors for segment masks 39 | MASK_COLORS = np.array([(255, 56, 56), (255, 157, 151), (255, 112, 31), 40 | (255, 178, 29), (207, 210, 49), (72, 249, 10), 41 | (146, 204, 23), (61, 219, 134), (26, 147, 52), 42 | (0, 212, 187), (44, 153, 168), (0, 194, 255), 43 | (52, 69, 147), (100, 115, 255), (0, 24, 236), 44 | (132, 56, 255), (82, 0, 133), (203, 56, 255), 45 | (255, 149, 200), (255, 55, 199)], 46 | dtype=np.float32) / 255. 47 | 48 | KPS_COLORS = [[0, 255, 0], [0, 255, 0], [0, 255, 0], [0, 255, 0], [0, 255, 0], 49 | [255, 128, 0], [255, 128, 0], [255, 128, 0], [255, 128, 0], 50 | [255, 128, 0], [255, 128, 0], [51, 153, 255], [51, 153, 255], 51 | [51, 153, 255], [51, 153, 255], [51, 153, 255], [51, 153, 255]] 52 | 53 | SKELETON = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12], [7, 13], 54 | [6, 7], [6, 8], [7, 9], [8, 10], [9, 11], [2, 3], [1, 2], [1, 3], 55 | [2, 4], [3, 5], [4, 6], [5, 7]] 56 | 57 | LIMB_COLORS = [[51, 153, 255], [51, 153, 255], [51, 153, 255], [51, 153, 255], 58 | [255, 51, 255], [255, 51, 255], [255, 51, 255], [255, 128, 0], 59 | [255, 128, 0], [255, 128, 0], [255, 128, 0], [255, 128, 0], 60 | [0, 255, 0], [0, 255, 0], [0, 255, 0], [0, 255, 0], [0, 255, 0], 61 | [0, 255, 0], [0, 255, 0]] 62 | 63 | # alpha for segment masks 64 | ALPHA = 0.5 65 | -------------------------------------------------------------------------------- /icon/pause.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cyuanfan/YOLOv8-TensorRT-MaskDetection/84976cf367261f769eae67e8a0db35e9d67f32ee/icon/pause.png -------------------------------------------------------------------------------- /icon/play.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cyuanfan/YOLOv8-TensorRT-MaskDetection/84976cf367261f769eae67e8a0db35e9d67f32ee/icon/play.png -------------------------------------------------------------------------------- /inference/pexels-thirdman-8482541.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cyuanfan/YOLOv8-TensorRT-MaskDetection/84976cf367261f769eae67e8a0db35e9d67f32ee/inference/pexels-thirdman-8482541.jpg -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | from .engine import EngineBuilder, TRTModule, TRTProfilerV0, TRTProfilerV1 # isort:skip # noqa: E501 2 | import warnings 3 | 4 | import torch 5 | 6 | warnings.filterwarnings(action='ignore', category=torch.jit.TracerWarning) 7 | warnings.filterwarnings(action='ignore', category=torch.jit.ScriptWarning) 8 | warnings.filterwarnings(action='ignore', category=UserWarning) 9 | warnings.filterwarnings(action='ignore', category=FutureWarning) 10 | warnings.filterwarnings(action='ignore', category=DeprecationWarning) 11 | __all__ = ['EngineBuilder', 'TRTModule', 'TRTProfilerV0', 'TRTProfilerV1'] 12 | -------------------------------------------------------------------------------- /models/api.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from typing import List, OrderedDict, Tuple, Union 3 | 4 | import numpy as np 5 | import tensorrt as trt 6 | 7 | warnings.filterwarnings(action='ignore', category=DeprecationWarning) 8 | 9 | 10 | def trtweight(weights: np.ndarray) -> trt.Weights: 11 | weights = weights.astype(weights.dtype.name) 12 | return trt.Weights(weights) 13 | 14 | 15 | def get_width(x: int, gw: float, divisor: int = 8) -> int: 16 | return int(np.ceil(x * gw / divisor) * divisor) 17 | 18 | 19 | def get_depth(x: int, gd: float) -> int: 20 | return max(int(round(x * gd)), 1) 21 | 22 | 23 | def Conv2d(network: trt.INetworkDefinition, weights: OrderedDict, 24 | input: trt.ITensor, out_channel: int, ksize: int, stride: int, 25 | group: int, layer_name: str) -> trt.ILayer: 26 | padding = ksize // 2 27 | conv_w = trtweight(weights[layer_name + '.weight']) 28 | conv_b = trtweight(weights[layer_name + '.bias']) 29 | conv = network.add_convolution_nd(input, 30 | num_output_maps=out_channel, 31 | kernel_shape=trt.DimsHW(ksize, ksize), 32 | kernel=conv_w, 33 | bias=conv_b) 34 | assert conv, 'Add convolution_nd layer failed' 35 | conv.stride_nd = trt.DimsHW(stride, stride) 36 | conv.padding_nd = trt.DimsHW(padding, padding) 37 | conv.num_groups = group 38 | return conv 39 | 40 | 41 | def Conv(network: trt.INetworkDefinition, weights: OrderedDict, 42 | input: trt.ITensor, out_channel: int, ksize: int, stride: int, 43 | group: int, layer_name: str) -> trt.ILayer: 44 | padding = ksize // 2 45 | if ksize > 3: 46 | padding -= 1 47 | conv_w = trtweight(weights[layer_name + '.conv.weight']) 48 | conv_b = trtweight(weights[layer_name + '.conv.bias']) 49 | 50 | conv = network.add_convolution_nd(input, 51 | num_output_maps=out_channel, 52 | kernel_shape=trt.DimsHW(ksize, ksize), 53 | kernel=conv_w, 54 | bias=conv_b) 55 | assert conv, 'Add convolution_nd layer failed' 56 | conv.stride_nd = trt.DimsHW(stride, stride) 57 | conv.padding_nd = trt.DimsHW(padding, padding) 58 | conv.num_groups = group 59 | 60 | sigmoid = network.add_activation(conv.get_output(0), 61 | trt.ActivationType.SIGMOID) 62 | assert sigmoid, 'Add activation layer failed' 63 | dot_product = network.add_elementwise(conv.get_output(0), 64 | sigmoid.get_output(0), 65 | trt.ElementWiseOperation.PROD) 66 | assert dot_product, 'Add elementwise layer failed' 67 | return dot_product 68 | 69 | 70 | def Bottleneck(network: trt.INetworkDefinition, weights: OrderedDict, 71 | input: trt.ITensor, c1: int, c2: int, shortcut: bool, 72 | group: int, scale: float, layer_name: str) -> trt.ILayer: 73 | c_ = int(c2 * scale) 74 | conv1 = Conv(network, weights, input, c_, 3, 1, 1, layer_name + '.cv1') 75 | conv2 = Conv(network, weights, conv1.get_output(0), c2, 3, 1, group, 76 | layer_name + '.cv2') 77 | if shortcut and c1 == c2: 78 | ew = network.add_elementwise(input, 79 | conv2.get_output(0), 80 | op=trt.ElementWiseOperation.SUM) 81 | assert ew, 'Add elementwise layer failed' 82 | return ew 83 | return conv2 84 | 85 | 86 | def C2f(network: trt.INetworkDefinition, weights: OrderedDict, 87 | input: trt.ITensor, cout: int, n: int, shortcut: bool, group: int, 88 | scale: float, layer_name: str) -> trt.ILayer: 89 | c_ = int(cout * scale) # e:expand param 90 | conv1 = Conv(network, weights, input, 2 * c_, 1, 1, 1, layer_name + '.cv1') 91 | y1 = conv1.get_output(0) 92 | 93 | b, _, h, w = y1.shape 94 | slice = network.add_slice(y1, (0, c_, 0, 0), (b, c_, h, w), (1, 1, 1, 1)) 95 | assert slice, 'Add slice layer failed' 96 | y2 = slice.get_output(0) 97 | 98 | input_tensors = [y1] 99 | for i in range(n): 100 | b = Bottleneck(network, weights, y2, c_, c_, shortcut, group, 1.0, 101 | layer_name + '.m.' + str(i)) 102 | y2 = b.get_output(0) 103 | input_tensors.append(y2) 104 | 105 | cat = network.add_concatenation(input_tensors) 106 | assert cat, 'Add concatenation layer failed' 107 | 108 | conv2 = Conv(network, weights, cat.get_output(0), cout, 1, 1, 1, 109 | layer_name + '.cv2') 110 | return conv2 111 | 112 | 113 | def SPPF(network: trt.INetworkDefinition, weights: OrderedDict, 114 | input: trt.ITensor, c1: int, c2: int, ksize: int, 115 | layer_name: str) -> trt.ILayer: 116 | c_ = c1 // 2 117 | conv1 = Conv(network, weights, input, c_, 1, 1, 1, layer_name + '.cv1') 118 | 119 | pool1 = network.add_pooling_nd(conv1.get_output(0), trt.PoolingType.MAX, 120 | trt.DimsHW(ksize, ksize)) 121 | assert pool1, 'Add pooling_nd layer failed' 122 | pool1.padding_nd = trt.DimsHW(ksize // 2, ksize // 2) 123 | pool1.stride_nd = trt.DimsHW(1, 1) 124 | 125 | pool2 = network.add_pooling_nd(pool1.get_output(0), trt.PoolingType.MAX, 126 | trt.DimsHW(ksize, ksize)) 127 | assert pool2, 'Add pooling_nd layer failed' 128 | pool2.padding_nd = trt.DimsHW(ksize // 2, ksize // 2) 129 | pool2.stride_nd = trt.DimsHW(1, 1) 130 | 131 | pool3 = network.add_pooling_nd(pool2.get_output(0), trt.PoolingType.MAX, 132 | trt.DimsHW(ksize, ksize)) 133 | assert pool3, 'Add pooling_nd layer failed' 134 | pool3.padding_nd = trt.DimsHW(ksize // 2, ksize // 2) 135 | pool3.stride_nd = trt.DimsHW(1, 1) 136 | 137 | input_tensors = [ 138 | conv1.get_output(0), 139 | pool1.get_output(0), 140 | pool2.get_output(0), 141 | pool3.get_output(0) 142 | ] 143 | cat = network.add_concatenation(input_tensors) 144 | assert cat, 'Add concatenation layer failed' 145 | conv2 = Conv(network, weights, cat.get_output(0), c2, 1, 1, 1, 146 | layer_name + '.cv2') 147 | return conv2 148 | 149 | 150 | def Detect( 151 | network: trt.INetworkDefinition, 152 | weights: OrderedDict, 153 | input: Union[List, Tuple], 154 | s: Union[List, Tuple], 155 | layer_name: str, 156 | reg_max: int = 16, 157 | fp16: bool = True, 158 | iou: float = 0.65, 159 | conf: float = 0.25, 160 | topk: int = 100, 161 | ) -> trt.ILayer: 162 | bboxes_branch = [] 163 | scores_branch = [] 164 | anchors = [] 165 | strides = [] 166 | for i, (inp, stride) in enumerate(zip(input, s)): 167 | h, w = inp.shape[2:] 168 | sx = np.arange(0, w).astype(np.float16 if fp16 else np.float32) + 0.5 169 | sy = np.arange(0, h).astype(np.float16 if fp16 else np.float32) + 0.5 170 | sy, sx = np.meshgrid(sy, sx) 171 | a = np.ascontiguousarray(np.stack((sy, sx), -1).reshape(-1, 2)) 172 | anchors.append(a) 173 | strides.append( 174 | np.full((1, h * w), 175 | stride, 176 | dtype=np.float16 if fp16 else np.float32)) 177 | c2 = weights[f'{layer_name}.cv2.{i}.0.conv.weight'].shape[0] 178 | c3 = weights[f'{layer_name}.cv3.{i}.0.conv.weight'].shape[0] 179 | nc = weights[f'{layer_name}.cv3.0.2.weight'].shape[0] 180 | reg_max_x4 = weights[layer_name + f'.cv2.{i}.2.weight'].shape[0] 181 | assert reg_max_x4 == reg_max * 4 182 | b_Conv_0 = Conv(network, weights, inp, c2, 3, 1, 1, 183 | layer_name + f'.cv2.{i}.0') 184 | b_Conv_1 = Conv(network, weights, b_Conv_0.get_output(0), c2, 3, 1, 1, 185 | layer_name + f'.cv2.{i}.1') 186 | b_Conv_2 = Conv2d(network, weights, b_Conv_1.get_output(0), reg_max_x4, 187 | 1, 1, 1, layer_name + f'.cv2.{i}.2') 188 | 189 | b_out = b_Conv_2.get_output(0) 190 | b_shape = network.add_constant([ 191 | 4, 192 | ], np.array(b_out.shape[0:1] + (4, reg_max, -1), dtype=np.int32)) 193 | assert b_shape, 'Add constant layer failed' 194 | b_shuffle = network.add_shuffle(b_out) 195 | assert b_shuffle, 'Add shuffle layer failed' 196 | b_shuffle.set_input(1, b_shape.get_output(0)) 197 | b_shuffle.second_transpose = (0, 3, 1, 2) 198 | 199 | bboxes_branch.append(b_shuffle.get_output(0)) 200 | 201 | s_Conv_0 = Conv(network, weights, inp, c3, 3, 1, 1, 202 | layer_name + f'.cv3.{i}.0') 203 | s_Conv_1 = Conv(network, weights, s_Conv_0.get_output(0), c3, 3, 1, 1, 204 | layer_name + f'.cv3.{i}.1') 205 | s_Conv_2 = Conv2d(network, weights, s_Conv_1.get_output(0), nc, 1, 1, 206 | 1, layer_name + f'.cv3.{i}.2') 207 | s_out = s_Conv_2.get_output(0) 208 | s_shape = network.add_constant([ 209 | 3, 210 | ], np.array(s_out.shape[0:2] + (-1, ), dtype=np.int32)) 211 | assert s_shape, 'Add constant layer failed' 212 | s_shuffle = network.add_shuffle(s_out) 213 | assert s_shuffle, 'Add shuffle layer failed' 214 | s_shuffle.set_input(1, s_shape.get_output(0)) 215 | s_shuffle.second_transpose = (0, 2, 1) 216 | 217 | scores_branch.append(s_shuffle.get_output(0)) 218 | 219 | Cat_bboxes = network.add_concatenation(bboxes_branch) 220 | assert Cat_bboxes, 'Add concatenation layer failed' 221 | Cat_scores = network.add_concatenation(scores_branch) 222 | assert Cat_scores, 'Add concatenation layer failed' 223 | Cat_scores.axis = 1 224 | 225 | Softmax = network.add_softmax(Cat_bboxes.get_output(0)) 226 | assert Softmax, 'Add softmax layer failed' 227 | Softmax.axes = 1 << 3 228 | 229 | SCORES = network.add_activation(Cat_scores.get_output(0), 230 | trt.ActivationType.SIGMOID) 231 | assert SCORES, 'Add activation layer failed' 232 | 233 | reg_max = np.arange( 234 | 0, reg_max).astype(np.float16 if fp16 else np.float32).reshape( 235 | (1, 1, -1, 1)) 236 | constant = network.add_constant(reg_max.shape, reg_max) 237 | assert constant, 'Add constant layer failed' 238 | Matmul = network.add_matrix_multiply(Softmax.get_output(0), 239 | trt.MatrixOperation.NONE, 240 | constant.get_output(0), 241 | trt.MatrixOperation.NONE) 242 | assert Matmul, 'Add matrix_multiply layer failed' 243 | pre_bboxes = network.add_gather( 244 | Matmul.get_output(0), 245 | network.add_constant([ 246 | 1, 247 | ], np.array([0], dtype=np.int32)).get_output(0), 3) 248 | assert pre_bboxes, 'Add gather layer failed' 249 | pre_bboxes.num_elementwise_dims = 1 250 | 251 | pre_bboxes_tensor = pre_bboxes.get_output(0) 252 | b, c, _ = pre_bboxes_tensor.shape 253 | slice_x1y1 = network.add_slice(pre_bboxes_tensor, (0, 0, 0), (b, c, 2), 254 | (1, 1, 1)) 255 | assert slice_x1y1, 'Add slice layer failed' 256 | slice_x2y2 = network.add_slice(pre_bboxes_tensor, (0, 0, 2), (b, c, 2), 257 | (1, 1, 1)) 258 | assert slice_x2y2, 'Add slice layer failed' 259 | anchors = np.concatenate(anchors, 0)[np.newaxis] 260 | anchors = network.add_constant(anchors.shape, anchors) 261 | assert anchors, 'Add constant layer failed' 262 | strides = np.concatenate(strides, 1)[..., np.newaxis] 263 | strides = network.add_constant(strides.shape, strides) 264 | assert strides, 'Add constant layer failed' 265 | 266 | Sub = network.add_elementwise(anchors.get_output(0), 267 | slice_x1y1.get_output(0), 268 | trt.ElementWiseOperation.SUB) 269 | assert Sub, 'Add elementwise layer failed' 270 | Add = network.add_elementwise(anchors.get_output(0), 271 | slice_x2y2.get_output(0), 272 | trt.ElementWiseOperation.SUM) 273 | assert Add, 'Add elementwise layer failed' 274 | x1y1 = Sub.get_output(0) 275 | x2y2 = Add.get_output(0) 276 | 277 | Cat_bboxes_ = network.add_concatenation([x1y1, x2y2]) 278 | assert Cat_bboxes_, 'Add concatenation layer failed' 279 | Cat_bboxes_.axis = 2 280 | 281 | BBOXES = network.add_elementwise(Cat_bboxes_.get_output(0), 282 | strides.get_output(0), 283 | trt.ElementWiseOperation.PROD) 284 | assert BBOXES, 'Add elementwise layer failed' 285 | plugin_creator = trt.get_plugin_registry().get_plugin_creator( 286 | 'EfficientNMS_TRT', '1') 287 | assert plugin_creator, 'Plugin EfficientNMS_TRT is not registried' 288 | 289 | background_class = trt.PluginField('background_class', 290 | np.array(-1, np.int32), 291 | trt.PluginFieldType.INT32) 292 | box_coding = trt.PluginField('box_coding', np.array(0, np.int32), 293 | trt.PluginFieldType.INT32) 294 | iou_threshold = trt.PluginField('iou_threshold', 295 | np.array(iou, dtype=np.float32), 296 | trt.PluginFieldType.FLOAT32) 297 | max_output_boxes = trt.PluginField('max_output_boxes', 298 | np.array(topk, np.int32), 299 | trt.PluginFieldType.INT32) 300 | plugin_version = trt.PluginField('plugin_version', np.array('1'), 301 | trt.PluginFieldType.CHAR) 302 | score_activation = trt.PluginField('score_activation', 303 | np.array(0, np.int32), 304 | trt.PluginFieldType.INT32) 305 | score_threshold = trt.PluginField('score_threshold', 306 | np.array(conf, dtype=np.float32), 307 | trt.PluginFieldType.FLOAT32) 308 | 309 | batched_nms_op = plugin_creator.create_plugin( 310 | name='batched_nms', 311 | field_collection=trt.PluginFieldCollection([ 312 | background_class, box_coding, iou_threshold, max_output_boxes, 313 | plugin_version, score_activation, score_threshold 314 | ])) 315 | 316 | batched_nms = network.add_plugin_v2( 317 | inputs=[BBOXES.get_output(0), 318 | SCORES.get_output(0)], 319 | plugin=batched_nms_op) 320 | 321 | batched_nms.get_output(0).name = 'num_dets' 322 | batched_nms.get_output(1).name = 'bboxes' 323 | batched_nms.get_output(2).name = 'scores' 324 | batched_nms.get_output(3).name = 'labels' 325 | 326 | return batched_nms 327 | -------------------------------------------------------------------------------- /models/common.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | 3 | import torch 4 | import torch.nn as nn 5 | from torch import Graph, Tensor, Value 6 | 7 | 8 | def make_anchors(feats: Tensor, 9 | strides: Tensor, 10 | grid_cell_offset: float = 0.5) -> Tuple[Tensor, Tensor]: 11 | anchor_points, stride_tensor = [], [] 12 | assert feats is not None 13 | dtype, device = feats[0].dtype, feats[0].device 14 | for i, stride in enumerate(strides): 15 | _, _, h, w = feats[i].shape 16 | sx = torch.arange(end=w, device=device, 17 | dtype=dtype) + grid_cell_offset # shift x 18 | sy = torch.arange(end=h, device=device, 19 | dtype=dtype) + grid_cell_offset # shift y 20 | sy, sx = torch.meshgrid(sy, sx) 21 | anchor_points.append(torch.stack((sx, sy), -1).view(-1, 2)) 22 | stride_tensor.append( 23 | torch.full((h * w, 1), stride, dtype=dtype, device=device)) 24 | return torch.cat(anchor_points), torch.cat(stride_tensor) 25 | 26 | 27 | class TRT_NMS(torch.autograd.Function): 28 | 29 | @staticmethod 30 | def forward( 31 | ctx: Graph, 32 | boxes: Tensor, 33 | scores: Tensor, 34 | iou_threshold: float = 0.65, 35 | score_threshold: float = 0.25, 36 | max_output_boxes: int = 100, 37 | background_class: int = -1, 38 | box_coding: int = 0, 39 | plugin_version: str = '1', 40 | score_activation: int = 0 41 | ) -> Tuple[Tensor, Tensor, Tensor, Tensor]: 42 | batch_size, num_boxes, num_classes = scores.shape 43 | num_dets = torch.randint(0, 44 | max_output_boxes, (batch_size, 1), 45 | dtype=torch.int32) 46 | boxes = torch.randn(batch_size, max_output_boxes, 4) 47 | scores = torch.randn(batch_size, max_output_boxes) 48 | labels = torch.randint(0, 49 | num_classes, (batch_size, max_output_boxes), 50 | dtype=torch.int32) 51 | 52 | return num_dets, boxes, scores, labels 53 | 54 | @staticmethod 55 | def symbolic( 56 | g, 57 | boxes: Value, 58 | scores: Value, 59 | iou_threshold: float = 0.45, 60 | score_threshold: float = 0.25, 61 | max_output_boxes: int = 100, 62 | background_class: int = -1, 63 | box_coding: int = 0, 64 | score_activation: int = 0, 65 | plugin_version: str = '1') -> Tuple[Value, Value, Value, Value]: 66 | out = g.op('TRT::EfficientNMS_TRT', 67 | boxes, 68 | scores, 69 | iou_threshold_f=iou_threshold, 70 | score_threshold_f=score_threshold, 71 | max_output_boxes_i=max_output_boxes, 72 | background_class_i=background_class, 73 | box_coding_i=box_coding, 74 | plugin_version_s=plugin_version, 75 | score_activation_i=score_activation, 76 | outputs=4) 77 | nums_dets, boxes, scores, classes = out 78 | return nums_dets, boxes, scores, classes 79 | 80 | 81 | class C2f(nn.Module): 82 | 83 | def __init__(self, *args, **kwargs): 84 | super().__init__() 85 | 86 | def forward(self, x): 87 | x = self.cv1(x) 88 | x = [x, x[:, self.c:, ...]] 89 | x.extend(m(x[-1]) for m in self.m) 90 | x.pop(1) 91 | return self.cv2(torch.cat(x, 1)) 92 | 93 | 94 | class PostDetect(nn.Module): 95 | export = True 96 | shape = None 97 | dynamic = False 98 | iou_thres = 0.65 99 | conf_thres = 0.25 100 | topk = 100 101 | 102 | def __init__(self, *args, **kwargs): 103 | super().__init__() 104 | 105 | def forward(self, x): 106 | shape = x[0].shape 107 | b, res, b_reg_num = shape[0], [], self.reg_max * 4 108 | for i in range(self.nl): 109 | res.append(torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)) 110 | if self.dynamic or self.shape != shape: 111 | self.anchors, self.strides = (x.transpose( 112 | 0, 1) for x in make_anchors(x, self.stride, 0.5)) 113 | self.shape = shape 114 | x = [i.view(b, self.no, -1) for i in res] 115 | y = torch.cat(x, 2) 116 | boxes, scores = y[:, :b_reg_num, ...], y[:, b_reg_num:, ...].sigmoid() 117 | boxes = boxes.view(b, 4, self.reg_max, -1).permute(0, 1, 3, 2) 118 | boxes = boxes.softmax(-1) @ torch.arange(self.reg_max).to(boxes) 119 | boxes0, boxes1 = -boxes[:, :2, ...], boxes[:, 2:, ...] 120 | boxes = self.anchors.repeat(b, 2, 1) + torch.cat([boxes0, boxes1], 1) 121 | boxes = boxes * self.strides 122 | 123 | return TRT_NMS.apply(boxes.transpose(1, 2), scores.transpose(1, 2), 124 | self.iou_thres, self.conf_thres, self.topk) 125 | 126 | 127 | class PostSeg(nn.Module): 128 | export = True 129 | shape = None 130 | dynamic = False 131 | 132 | def __init__(self, *args, **kwargs): 133 | super().__init__() 134 | 135 | def forward(self, x): 136 | p = self.proto(x[0]) # mask protos 137 | bs = p.shape[0] # batch size 138 | mc = torch.cat( 139 | [self.cv4[i](x[i]).view(bs, self.nm, -1) for i in range(self.nl)], 140 | 2) # mask coefficients 141 | boxes, scores, labels = self.forward_det(x) 142 | out = torch.cat([boxes, scores, labels.float(), mc.transpose(1, 2)], 2) 143 | return out, p.flatten(2) 144 | 145 | def forward_det(self, x): 146 | shape = x[0].shape 147 | b, res, b_reg_num = shape[0], [], self.reg_max * 4 148 | for i in range(self.nl): 149 | res.append(torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)) 150 | if self.dynamic or self.shape != shape: 151 | self.anchors, self.strides = \ 152 | (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5)) 153 | self.shape = shape 154 | x = [i.view(b, self.no, -1) for i in res] 155 | y = torch.cat(x, 2) 156 | boxes, scores = y[:, :b_reg_num, ...], y[:, b_reg_num:, ...].sigmoid() 157 | boxes = boxes.view(b, 4, self.reg_max, -1).permute(0, 1, 3, 2) 158 | boxes = boxes.softmax(-1) @ torch.arange(self.reg_max).to(boxes) 159 | boxes0, boxes1 = -boxes[:, :2, ...], boxes[:, 2:, ...] 160 | boxes = self.anchors.repeat(b, 2, 1) + torch.cat([boxes0, boxes1], 1) 161 | boxes = boxes * self.strides 162 | scores, labels = scores.transpose(1, 2).max(dim=-1, keepdim=True) 163 | return boxes.transpose(1, 2), scores, labels 164 | 165 | 166 | def optim(module: nn.Module): 167 | s = str(type(module))[6:-2].split('.')[-1] 168 | if s == 'Detect': 169 | setattr(module, '__class__', PostDetect) 170 | elif s == 'Segment': 171 | setattr(module, '__class__', PostSeg) 172 | elif s == 'C2f': 173 | setattr(module, '__class__', C2f) 174 | -------------------------------------------------------------------------------- /models/cudart_api.py: -------------------------------------------------------------------------------- 1 | import os 2 | import warnings 3 | from dataclasses import dataclass 4 | from pathlib import Path 5 | from typing import List, Optional, Tuple, Union 6 | 7 | import numpy as np 8 | import tensorrt as trt 9 | from cuda import cudart 10 | from numpy import ndarray 11 | 12 | os.environ['CUDA_MODULE_LOADING'] = 'LAZY' 13 | warnings.filterwarnings(action='ignore', category=DeprecationWarning) 14 | 15 | 16 | @dataclass 17 | class Tensor: 18 | name: str 19 | dtype: np.dtype 20 | shape: Tuple 21 | cpu: ndarray 22 | gpu: int 23 | 24 | 25 | class TRTEngine: 26 | 27 | def __init__(self, weight: Union[str, Path]) -> None: 28 | self.weight = Path(weight) if isinstance(weight, str) else weight 29 | status, self.stream = cudart.cudaStreamCreate() 30 | assert status.value == 0 31 | self.__init_engine() 32 | self.__init_bindings() 33 | self.__warm_up() 34 | 35 | def __init_engine(self) -> None: 36 | logger = trt.Logger(trt.Logger.WARNING) 37 | trt.init_libnvinfer_plugins(logger, namespace='') 38 | with trt.Runtime(logger) as runtime: 39 | model = runtime.deserialize_cuda_engine(self.weight.read_bytes()) 40 | 41 | context = model.create_execution_context() 42 | 43 | names = [model.get_binding_name(i) for i in range(model.num_bindings)] 44 | self.num_bindings = model.num_bindings 45 | self.bindings: List[int] = [0] * self.num_bindings 46 | num_inputs, num_outputs = 0, 0 47 | 48 | for i in range(model.num_bindings): 49 | if model.binding_is_input(i): 50 | num_inputs += 1 51 | else: 52 | num_outputs += 1 53 | 54 | self.num_inputs = num_inputs 55 | self.num_outputs = num_outputs 56 | self.model = model 57 | self.context = context 58 | self.input_names = names[:num_inputs] 59 | self.output_names = names[num_inputs:] 60 | 61 | def __init_bindings(self) -> None: 62 | dynamic = False 63 | inp_info = [] 64 | out_info = [] 65 | out_ptrs = [] 66 | for i, name in enumerate(self.input_names): 67 | assert self.model.get_binding_name(i) == name 68 | dtype = trt.nptype(self.model.get_binding_dtype(i)) 69 | shape = tuple(self.model.get_binding_shape(i)) 70 | if -1 in shape: 71 | dynamic |= True 72 | if not dynamic: 73 | cpu = np.empty(shape, dtype) 74 | status, gpu = cudart.cudaMallocAsync(cpu.nbytes, self.stream) 75 | assert status.value == 0 76 | cudart.cudaMemcpyAsync( 77 | gpu, cpu.ctypes.data, cpu.nbytes, 78 | cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, self.stream) 79 | else: 80 | cpu, gpu = np.empty(0), 0 81 | inp_info.append(Tensor(name, dtype, shape, cpu, gpu)) 82 | for i, name in enumerate(self.output_names): 83 | i += self.num_inputs 84 | assert self.model.get_binding_name(i) == name 85 | dtype = trt.nptype(self.model.get_binding_dtype(i)) 86 | shape = tuple(self.model.get_binding_shape(i)) 87 | if not dynamic: 88 | cpu = np.empty(shape, dtype=dtype) 89 | status, gpu = cudart.cudaMallocAsync(cpu.nbytes, self.stream) 90 | assert status.value == 0 91 | cudart.cudaMemcpyAsync( 92 | gpu, cpu.ctypes.data, cpu.nbytes, 93 | cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, self.stream) 94 | out_ptrs.append(gpu) 95 | else: 96 | cpu, gpu = np.empty(0), 0 97 | out_info.append(Tensor(name, dtype, shape, cpu, gpu)) 98 | 99 | self.is_dynamic = dynamic 100 | self.inp_info = inp_info 101 | self.out_info = out_info 102 | self.out_ptrs = out_ptrs 103 | 104 | def __warm_up(self) -> None: 105 | if self.is_dynamic: 106 | print('You engine has dynamic axes, please warm up by yourself !') 107 | return 108 | for _ in range(10): 109 | inputs = [] 110 | for i in self.inp_info: 111 | inputs.append(i.cpu) 112 | self.__call__(inputs) 113 | 114 | def set_profiler(self, profiler: Optional[trt.IProfiler]) -> None: 115 | self.context.profiler = profiler \ 116 | if profiler is not None else trt.Profiler() 117 | 118 | def __call__(self, *inputs) -> Union[Tuple, ndarray]: 119 | 120 | assert len(inputs) == self.num_inputs 121 | contiguous_inputs: List[ndarray] = [ 122 | np.ascontiguousarray(i) for i in inputs 123 | ] 124 | 125 | for i in range(self.num_inputs): 126 | 127 | if self.is_dynamic: 128 | self.context.set_binding_shape( 129 | i, tuple(contiguous_inputs[i].shape)) 130 | status, self.inp_info[i].gpu = cudart.cudaMallocAsync( 131 | contiguous_inputs[i].nbytes, self.stream) 132 | assert status.value == 0 133 | cudart.cudaMemcpyAsync( 134 | self.inp_info[i].gpu, contiguous_inputs[i].ctypes.data, 135 | contiguous_inputs[i].nbytes, 136 | cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, self.stream) 137 | self.bindings[i] = self.inp_info[i].gpu 138 | 139 | output_gpu_ptrs: List[int] = [] 140 | outputs: List[ndarray] = [] 141 | 142 | for i in range(self.num_outputs): 143 | j = i + self.num_inputs 144 | if self.is_dynamic: 145 | shape = tuple(self.context.get_binding_shape(j)) 146 | dtype = self.out_info[i].dtype 147 | cpu = np.empty(shape, dtype=dtype) 148 | status, gpu = cudart.cudaMallocAsync(cpu.nbytes, self.stream) 149 | assert status.value == 0 150 | cudart.cudaMemcpyAsync( 151 | gpu, cpu.ctypes.data, cpu.nbytes, 152 | cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, self.stream) 153 | else: 154 | cpu = self.out_info[i].cpu 155 | gpu = self.out_info[i].gpu 156 | outputs.append(cpu) 157 | output_gpu_ptrs.append(gpu) 158 | self.bindings[j] = gpu 159 | 160 | self.context.execute_async_v2(self.bindings, self.stream) 161 | cudart.cudaStreamSynchronize(self.stream) 162 | 163 | for i, o in enumerate(output_gpu_ptrs): 164 | cudart.cudaMemcpyAsync( 165 | outputs[i].ctypes.data, o, outputs[i].nbytes, 166 | cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, self.stream) 167 | 168 | return tuple(outputs) if len(outputs) > 1 else outputs[0] 169 | -------------------------------------------------------------------------------- /models/engine.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | from collections import defaultdict, namedtuple 4 | from pathlib import Path 5 | from typing import List, Optional, Tuple, Union 6 | 7 | import onnx 8 | import tensorrt as trt 9 | import torch 10 | 11 | os.environ['CUDA_MODULE_LOADING'] = 'LAZY' 12 | 13 | 14 | class EngineBuilder: 15 | seg = False 16 | 17 | def __init__( 18 | self, 19 | checkpoint: Union[str, Path], 20 | device: Optional[Union[str, int, torch.device]] = None) -> None: 21 | checkpoint = Path(checkpoint) if isinstance(checkpoint, 22 | str) else checkpoint 23 | assert checkpoint.exists() and checkpoint.suffix in ('.onnx', '.pkl') 24 | self.api = checkpoint.suffix == '.pkl' 25 | if isinstance(device, str): 26 | device = torch.device(device) 27 | elif isinstance(device, int): 28 | device = torch.device(f'cuda:{device}') 29 | 30 | self.checkpoint = checkpoint 31 | self.device = device 32 | 33 | def __build_engine(self, 34 | fp16: bool = True, 35 | input_shape: Union[List, Tuple] = (1, 3, 640, 640), 36 | iou_thres: float = 0.65, 37 | conf_thres: float = 0.25, 38 | topk: int = 100, 39 | with_profiling: bool = True) -> None: 40 | logger = trt.Logger(trt.Logger.WARNING) 41 | trt.init_libnvinfer_plugins(logger, namespace='') 42 | builder = trt.Builder(logger) 43 | config = builder.create_builder_config() 44 | config.max_workspace_size = torch.cuda.get_device_properties( 45 | self.device).total_memory 46 | flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) 47 | network = builder.create_network(flag) 48 | 49 | self.logger = logger 50 | self.builder = builder 51 | self.network = network 52 | if self.api: 53 | self.build_from_api(fp16, input_shape, iou_thres, conf_thres, topk) 54 | else: 55 | self.build_from_onnx(iou_thres, conf_thres, topk) 56 | if fp16 and self.builder.platform_has_fast_fp16: 57 | config.set_flag(trt.BuilderFlag.FP16) 58 | self.weight = self.checkpoint.with_suffix('.engine') 59 | 60 | if with_profiling: 61 | config.profiling_verbosity = trt.ProfilingVerbosity.DETAILED 62 | with self.builder.build_engine(self.network, config) as engine: 63 | self.weight.write_bytes(engine.serialize()) 64 | self.logger.log( 65 | trt.Logger.WARNING, f'Build tensorrt engine finish.\n' 66 | f'Save in {str(self.weight.absolute())}') 67 | 68 | def build(self, 69 | fp16: bool = True, 70 | input_shape: Union[List, Tuple] = (1, 3, 640, 640), 71 | iou_thres: float = 0.65, 72 | conf_thres: float = 0.25, 73 | topk: int = 100, 74 | with_profiling=True) -> None: 75 | self.__build_engine(fp16, input_shape, iou_thres, conf_thres, topk, 76 | with_profiling) 77 | 78 | def build_from_onnx(self, 79 | iou_thres: float = 0.65, 80 | conf_thres: float = 0.25, 81 | topk: int = 100): 82 | parser = trt.OnnxParser(self.network, self.logger) 83 | onnx_model = onnx.load(str(self.checkpoint)) 84 | if not self.seg: 85 | onnx_model.graph.node[-1].attribute[2].i = topk 86 | onnx_model.graph.node[-1].attribute[3].f = conf_thres 87 | onnx_model.graph.node[-1].attribute[4].f = iou_thres 88 | 89 | if not parser.parse(onnx_model.SerializeToString()): 90 | raise RuntimeError( 91 | f'failed to load ONNX file: {str(self.checkpoint)}') 92 | inputs = [ 93 | self.network.get_input(i) for i in range(self.network.num_inputs) 94 | ] 95 | outputs = [ 96 | self.network.get_output(i) for i in range(self.network.num_outputs) 97 | ] 98 | 99 | for inp in inputs: 100 | self.logger.log( 101 | trt.Logger.WARNING, 102 | f'input "{inp.name}" with shape: {inp.shape} ' 103 | f'dtype: {inp.dtype}') 104 | for out in outputs: 105 | self.logger.log( 106 | trt.Logger.WARNING, 107 | f'output "{out.name}" with shape: {out.shape} ' 108 | f'dtype: {out.dtype}') 109 | 110 | def build_from_api( 111 | self, 112 | fp16: bool = True, 113 | input_shape: Union[List, Tuple] = (1, 3, 640, 640), 114 | iou_thres: float = 0.65, 115 | conf_thres: float = 0.25, 116 | topk: int = 100, 117 | ): 118 | assert not self.seg 119 | from .api import SPPF, C2f, Conv, Detect, get_depth, get_width 120 | 121 | with open(self.checkpoint, 'rb') as f: 122 | state_dict = pickle.load(f) 123 | mapping = {0.25: 1024, 0.5: 1024, 0.75: 768, 1.0: 512, 1.25: 512} 124 | 125 | GW = state_dict['GW'] 126 | GD = state_dict['GD'] 127 | width_64 = get_width(64, GW) 128 | width_128 = get_width(128, GW) 129 | width_256 = get_width(256, GW) 130 | width_512 = get_width(512, GW) 131 | width_1024 = get_width(mapping[GW], GW) 132 | depth_3 = get_depth(3, GD) 133 | depth_6 = get_depth(6, GD) 134 | strides = state_dict['strides'] 135 | reg_max = state_dict['reg_max'] 136 | images = self.network.add_input(name='images', 137 | dtype=trt.float32, 138 | shape=trt.Dims4(input_shape)) 139 | assert images, 'Add input failed' 140 | 141 | Conv_0 = Conv(self.network, state_dict, images, width_64, 3, 2, 1, 142 | 'Conv.0') 143 | Conv_1 = Conv(self.network, state_dict, Conv_0.get_output(0), 144 | width_128, 3, 2, 1, 'Conv.1') 145 | C2f_2 = C2f(self.network, state_dict, Conv_1.get_output(0), width_128, 146 | depth_3, True, 1, 0.5, 'C2f.2') 147 | Conv_3 = Conv(self.network, state_dict, C2f_2.get_output(0), width_256, 148 | 3, 2, 1, 'Conv.3') 149 | C2f_4 = C2f(self.network, state_dict, Conv_3.get_output(0), width_256, 150 | depth_6, True, 1, 0.5, 'C2f.4') 151 | Conv_5 = Conv(self.network, state_dict, C2f_4.get_output(0), width_512, 152 | 3, 2, 1, 'Conv.5') 153 | C2f_6 = C2f(self.network, state_dict, Conv_5.get_output(0), width_512, 154 | depth_6, True, 1, 0.5, 'C2f.6') 155 | Conv_7 = Conv(self.network, state_dict, C2f_6.get_output(0), 156 | width_1024, 3, 2, 1, 'Conv.7') 157 | C2f_8 = C2f(self.network, state_dict, Conv_7.get_output(0), width_1024, 158 | depth_3, True, 1, 0.5, 'C2f.8') 159 | SPPF_9 = SPPF(self.network, state_dict, C2f_8.get_output(0), 160 | width_1024, width_1024, 5, 'SPPF.9') 161 | Upsample_10 = self.network.add_resize(SPPF_9.get_output(0)) 162 | assert Upsample_10, 'Add Upsample_10 failed' 163 | Upsample_10.resize_mode = trt.ResizeMode.NEAREST 164 | Upsample_10.shape = Upsample_10.get_output( 165 | 0).shape[:2] + C2f_6.get_output(0).shape[2:] 166 | input_tensors11 = [Upsample_10.get_output(0), C2f_6.get_output(0)] 167 | Cat_11 = self.network.add_concatenation(input_tensors11) 168 | C2f_12 = C2f(self.network, state_dict, Cat_11.get_output(0), width_512, 169 | depth_3, False, 1, 0.5, 'C2f.12') 170 | Upsample13 = self.network.add_resize(C2f_12.get_output(0)) 171 | assert Upsample13, 'Add Upsample13 failed' 172 | Upsample13.resize_mode = trt.ResizeMode.NEAREST 173 | Upsample13.shape = Upsample13.get_output( 174 | 0).shape[:2] + C2f_4.get_output(0).shape[2:] 175 | input_tensors14 = [Upsample13.get_output(0), C2f_4.get_output(0)] 176 | Cat_14 = self.network.add_concatenation(input_tensors14) 177 | C2f_15 = C2f(self.network, state_dict, Cat_14.get_output(0), width_256, 178 | depth_3, False, 1, 0.5, 'C2f.15') 179 | Conv_16 = Conv(self.network, state_dict, C2f_15.get_output(0), 180 | width_256, 3, 2, 1, 'Conv.16') 181 | input_tensors17 = [Conv_16.get_output(0), C2f_12.get_output(0)] 182 | Cat_17 = self.network.add_concatenation(input_tensors17) 183 | C2f_18 = C2f(self.network, state_dict, Cat_17.get_output(0), width_512, 184 | depth_3, False, 1, 0.5, 'C2f.18') 185 | Conv_19 = Conv(self.network, state_dict, C2f_18.get_output(0), 186 | width_512, 3, 2, 1, 'Conv.19') 187 | input_tensors20 = [Conv_19.get_output(0), SPPF_9.get_output(0)] 188 | Cat_20 = self.network.add_concatenation(input_tensors20) 189 | C2f_21 = C2f(self.network, state_dict, Cat_20.get_output(0), 190 | width_1024, depth_3, False, 1, 0.5, 'C2f.21') 191 | input_tensors22 = [ 192 | C2f_15.get_output(0), 193 | C2f_18.get_output(0), 194 | C2f_21.get_output(0) 195 | ] 196 | batched_nms = Detect(self.network, state_dict, input_tensors22, 197 | strides, 'Detect.22', reg_max, fp16, iou_thres, 198 | conf_thres, topk) 199 | for o in range(batched_nms.num_outputs): 200 | self.network.mark_output(batched_nms.get_output(o)) 201 | 202 | 203 | class TRTModule(torch.nn.Module): 204 | dtypeMapping = { 205 | trt.bool: torch.bool, 206 | trt.int8: torch.int8, 207 | trt.int32: torch.int32, 208 | trt.float16: torch.float16, 209 | trt.float32: torch.float32 210 | } 211 | 212 | def __init__(self, weight: Union[str, Path], 213 | device: Optional[torch.device]) -> None: 214 | super(TRTModule, self).__init__() 215 | self.weight = Path(weight) if isinstance(weight, str) else weight 216 | self.device = device if device is not None else torch.device('cuda:0') 217 | self.stream = torch.cuda.Stream(device=device) 218 | self.__init_engine() 219 | self.__init_bindings() 220 | 221 | def __init_engine(self) -> None: 222 | logger = trt.Logger(trt.Logger.WARNING) 223 | trt.init_libnvinfer_plugins(logger, namespace='') 224 | with trt.Runtime(logger) as runtime: 225 | model = runtime.deserialize_cuda_engine(self.weight.read_bytes()) 226 | 227 | context = model.create_execution_context() 228 | num_bindings = model.num_bindings 229 | names = [model.get_binding_name(i) for i in range(num_bindings)] 230 | 231 | self.bindings: List[int] = [0] * num_bindings 232 | num_inputs, num_outputs = 0, 0 233 | 234 | for i in range(num_bindings): 235 | if model.binding_is_input(i): 236 | num_inputs += 1 237 | else: 238 | num_outputs += 1 239 | 240 | self.num_bindings = num_bindings 241 | self.num_inputs = num_inputs 242 | self.num_outputs = num_outputs 243 | self.model = model 244 | self.context = context 245 | self.input_names = names[:num_inputs] 246 | self.output_names = names[num_inputs:] 247 | self.idx = list(range(self.num_outputs)) 248 | 249 | def __init_bindings(self) -> None: 250 | idynamic = odynamic = False 251 | Tensor = namedtuple('Tensor', ('name', 'dtype', 'shape')) 252 | inp_info = [] 253 | out_info = [] 254 | for i, name in enumerate(self.input_names): 255 | assert self.model.get_binding_name(i) == name 256 | dtype = self.dtypeMapping[self.model.get_binding_dtype(i)] 257 | shape = tuple(self.model.get_binding_shape(i)) 258 | if -1 in shape: 259 | idynamic |= True 260 | inp_info.append(Tensor(name, dtype, shape)) 261 | for i, name in enumerate(self.output_names): 262 | i += self.num_inputs 263 | assert self.model.get_binding_name(i) == name 264 | dtype = self.dtypeMapping[self.model.get_binding_dtype(i)] 265 | shape = tuple(self.model.get_binding_shape(i)) 266 | if -1 in shape: 267 | odynamic |= True 268 | out_info.append(Tensor(name, dtype, shape)) 269 | 270 | if not odynamic: 271 | self.output_tensor = [ 272 | torch.empty(info.shape, dtype=info.dtype, device=self.device) 273 | for info in out_info 274 | ] 275 | self.idynamic = idynamic 276 | self.odynamic = odynamic 277 | self.inp_info = inp_info 278 | self.out_info = out_info 279 | 280 | def set_profiler(self, profiler: Optional[trt.IProfiler]): 281 | self.context.profiler = profiler \ 282 | if profiler is not None else trt.Profiler() 283 | 284 | def set_desired(self, desired: Optional[Union[List, Tuple]]): 285 | if isinstance(desired, 286 | (list, tuple)) and len(desired) == self.num_outputs: 287 | self.idx = [self.output_names.index(i) for i in desired] 288 | 289 | def forward(self, *inputs) -> Union[Tuple, torch.Tensor]: 290 | 291 | assert len(inputs) == self.num_inputs 292 | contiguous_inputs: List[torch.Tensor] = [ 293 | i.contiguous() for i in inputs 294 | ] 295 | 296 | for i in range(self.num_inputs): 297 | self.bindings[i] = contiguous_inputs[i].data_ptr() 298 | if self.idynamic: 299 | self.context.set_binding_shape( 300 | i, tuple(contiguous_inputs[i].shape)) 301 | 302 | outputs: List[torch.Tensor] = [] 303 | 304 | for i in range(self.num_outputs): 305 | j = i + self.num_inputs 306 | if self.odynamic: 307 | shape = tuple(self.context.get_binding_shape(j)) 308 | output = torch.empty(size=shape, 309 | dtype=self.out_info[i].dtype, 310 | device=self.device) 311 | else: 312 | output = self.output_tensor[i] 313 | self.bindings[j] = output.data_ptr() 314 | outputs.append(output) 315 | 316 | self.context.execute_async_v2(self.bindings, self.stream.cuda_stream) 317 | self.stream.synchronize() 318 | 319 | return tuple(outputs[i] 320 | for i in self.idx) if len(outputs) > 1 else outputs[0] 321 | 322 | 323 | class TRTProfilerV1(trt.IProfiler): 324 | 325 | def __init__(self): 326 | trt.IProfiler.__init__(self) 327 | self.total_runtime = 0.0 328 | self.recorder = defaultdict(float) 329 | 330 | def report_layer_time(self, layer_name: str, ms: float): 331 | self.total_runtime += ms * 1000 332 | self.recorder[layer_name] += ms * 1000 333 | 334 | def report(self): 335 | f = '\t%40s\t\t\t\t%10.4f' 336 | print('\t%40s\t\t\t\t%10s' % ('layername', 'cost(us)')) 337 | for name, cost in sorted(self.recorder.items(), key=lambda x: -x[1]): 338 | print( 339 | f % 340 | (name if len(name) < 40 else name[:35] + ' ' + '*' * 4, cost)) 341 | print(f'\nTotal Inference Time: {self.total_runtime:.4f}(us)') 342 | 343 | 344 | class TRTProfilerV0(trt.IProfiler): 345 | 346 | def __init__(self): 347 | trt.IProfiler.__init__(self) 348 | 349 | def report_layer_time(self, layer_name: str, ms: float): 350 | f = '\t%40s\t\t\t\t%10.4fms' 351 | print(f % (layer_name if len(layer_name) < 40 else layer_name[:35] + 352 | ' ' + '*' * 4, ms)) 353 | -------------------------------------------------------------------------------- /models/pycuda_api.py: -------------------------------------------------------------------------------- 1 | import os 2 | import warnings 3 | from dataclasses import dataclass 4 | from pathlib import Path 5 | from typing import List, Optional, Tuple, Union 6 | 7 | import numpy as np 8 | import pycuda.autoinit # noqa F401 9 | import pycuda.driver as cuda 10 | import tensorrt as trt 11 | from numpy import ndarray 12 | 13 | os.environ['CUDA_MODULE_LOADING'] = 'LAZY' 14 | warnings.filterwarnings(action='ignore', category=DeprecationWarning) 15 | 16 | 17 | @dataclass 18 | class Tensor: 19 | name: str 20 | dtype: np.dtype 21 | shape: Tuple 22 | cpu: ndarray 23 | gpu: int 24 | 25 | 26 | class TRTEngine: 27 | 28 | def __init__(self, weight: Union[str, Path]) -> None: 29 | self.weight = Path(weight) if isinstance(weight, str) else weight 30 | self.stream = cuda.Stream(0) 31 | self.__init_engine() 32 | self.__init_bindings() 33 | self.__warm_up() 34 | 35 | def __init_engine(self) -> None: 36 | logger = trt.Logger(trt.Logger.WARNING) 37 | trt.init_libnvinfer_plugins(logger, namespace='') 38 | with trt.Runtime(logger) as runtime: 39 | model = runtime.deserialize_cuda_engine(self.weight.read_bytes()) 40 | 41 | context = model.create_execution_context() 42 | 43 | names = [model.get_binding_name(i) for i in range(model.num_bindings)] 44 | self.num_bindings = model.num_bindings 45 | self.bindings: List[int] = [0] * self.num_bindings 46 | num_inputs, num_outputs = 0, 0 47 | 48 | for i in range(model.num_bindings): 49 | if model.binding_is_input(i): 50 | num_inputs += 1 51 | else: 52 | num_outputs += 1 53 | 54 | self.num_inputs = num_inputs 55 | self.num_outputs = num_outputs 56 | self.model = model 57 | self.context = context 58 | self.input_names = names[:num_inputs] 59 | self.output_names = names[num_inputs:] 60 | 61 | def __init_bindings(self) -> None: 62 | dynamic = False 63 | inp_info = [] 64 | out_info = [] 65 | out_ptrs = [] 66 | for i, name in enumerate(self.input_names): 67 | assert self.model.get_binding_name(i) == name 68 | dtype = trt.nptype(self.model.get_binding_dtype(i)) 69 | shape = tuple(self.model.get_binding_shape(i)) 70 | if -1 in shape: 71 | dynamic |= True 72 | if not dynamic: 73 | cpu = np.empty(shape, dtype) 74 | gpu = cuda.mem_alloc(cpu.nbytes) 75 | cuda.memcpy_htod_async(gpu, cpu, self.stream) 76 | else: 77 | cpu, gpu = np.empty(0), 0 78 | inp_info.append(Tensor(name, dtype, shape, cpu, gpu)) 79 | for i, name in enumerate(self.output_names): 80 | i += self.num_inputs 81 | assert self.model.get_binding_name(i) == name 82 | dtype = trt.nptype(self.model.get_binding_dtype(i)) 83 | shape = tuple(self.model.get_binding_shape(i)) 84 | if not dynamic: 85 | cpu = np.empty(shape, dtype=dtype) 86 | gpu = cuda.mem_alloc(cpu.nbytes) 87 | cuda.memcpy_htod_async(gpu, cpu, self.stream) 88 | out_ptrs.append(gpu) 89 | else: 90 | cpu, gpu = np.empty(0), 0 91 | out_info.append(Tensor(name, dtype, shape, cpu, gpu)) 92 | 93 | self.is_dynamic = dynamic 94 | self.inp_info = inp_info 95 | self.out_info = out_info 96 | self.out_ptrs = out_ptrs 97 | 98 | def __warm_up(self) -> None: 99 | if self.is_dynamic: 100 | print('You engine has dynamic axes, please warm up by yourself !') 101 | return 102 | for _ in range(10): 103 | inputs = [] 104 | for i in self.inp_info: 105 | inputs.append(i.cpu) 106 | self.__call__(inputs) 107 | 108 | def set_profiler(self, profiler: Optional[trt.IProfiler]) -> None: 109 | self.context.profiler = profiler \ 110 | if profiler is not None else trt.Profiler() 111 | 112 | def __call__(self, *inputs) -> Union[Tuple, ndarray]: 113 | 114 | assert len(inputs) == self.num_inputs 115 | contiguous_inputs: List[ndarray] = [ 116 | np.ascontiguousarray(i) for i in inputs 117 | ] 118 | 119 | for i in range(self.num_inputs): 120 | 121 | if self.is_dynamic: 122 | self.context.set_binding_shape( 123 | i, tuple(contiguous_inputs[i].shape)) 124 | self.inp_info[i].gpu = cuda.mem_alloc( 125 | contiguous_inputs[i].nbytes) 126 | 127 | cuda.memcpy_htod_async(self.inp_info[i].gpu, contiguous_inputs[i], 128 | self.stream) 129 | self.bindings[i] = int(self.inp_info[i].gpu) 130 | 131 | output_gpu_ptrs: List[int] = [] 132 | outputs: List[ndarray] = [] 133 | 134 | for i in range(self.num_outputs): 135 | j = i + self.num_inputs 136 | if self.is_dynamic: 137 | shape = tuple(self.context.get_binding_shape(j)) 138 | dtype = self.out_info[i].dtype 139 | cpu = np.empty(shape, dtype=dtype) 140 | gpu = cuda.mem_alloc(cpu.nbytes) 141 | cuda.memcpy_htod_async(gpu, cpu, self.stream) 142 | else: 143 | cpu = self.out_info[i].cpu 144 | gpu = self.out_info[i].gpu 145 | outputs.append(cpu) 146 | output_gpu_ptrs.append(gpu) 147 | self.bindings[j] = int(gpu) 148 | 149 | self.context.execute_async_v2(self.bindings, self.stream.handle) 150 | self.stream.synchronize() 151 | 152 | for i, o in enumerate(output_gpu_ptrs): 153 | cuda.memcpy_dtoh_async(outputs[i], o, self.stream) 154 | 155 | return tuple(outputs) if len(outputs) > 1 else outputs[0] 156 | -------------------------------------------------------------------------------- /models/torch_utils.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple, Union 2 | 3 | import torch 4 | import torch.nn.functional as F 5 | from torch import Tensor 6 | from torchvision.ops import batched_nms, nms 7 | 8 | 9 | def seg_postprocess( 10 | data: Tuple[Tensor], 11 | shape: Union[Tuple, List], 12 | conf_thres: float = 0.25, 13 | iou_thres: float = 0.65) \ 14 | -> Tuple[Tensor, Tensor, Tensor, Tensor]: 15 | assert len(data) == 2 16 | h, w = shape[0] // 4, shape[1] // 4 # 4x downsampling 17 | outputs, proto = data[0][0], data[1][0] 18 | bboxes, scores, labels, maskconf = outputs.split([4, 1, 1, 32], 1) 19 | scores, labels = scores.squeeze(), labels.squeeze() 20 | idx = scores > conf_thres 21 | if not idx.any(): # no bounding boxes or seg were created 22 | return bboxes.new_zeros((0, 4)), scores.new_zeros( 23 | (0, )), labels.new_zeros((0, )), bboxes.new_zeros((0, 0, 0, 0)) 24 | bboxes, scores, labels, maskconf = \ 25 | bboxes[idx], scores[idx], labels[idx], maskconf[idx] 26 | idx = batched_nms(bboxes, scores, labels, iou_thres) 27 | bboxes, scores, labels, maskconf = \ 28 | bboxes[idx], scores[idx], labels[idx].int(), maskconf[idx] 29 | masks = (maskconf @ proto).sigmoid().view(-1, h, w) 30 | masks = crop_mask(masks, bboxes / 4.) 31 | masks = F.interpolate(masks[None], 32 | shape, 33 | mode='bilinear', 34 | align_corners=False)[0] 35 | masks = masks.gt_(0.5)[..., None] 36 | return bboxes, scores, labels, masks 37 | 38 | 39 | def pose_postprocess( 40 | data: Union[Tuple, Tensor], 41 | conf_thres: float = 0.25, 42 | iou_thres: float = 0.65) \ 43 | -> Tuple[Tensor, Tensor, Tensor]: 44 | if isinstance(data, tuple): 45 | assert len(data) == 1 46 | data = data[0] 47 | outputs = torch.transpose(data[0], 0, 1).contiguous() 48 | bboxes, scores, kpts = outputs.split([4, 1, 51], 1) 49 | scores, kpts = scores.squeeze(), kpts.squeeze() 50 | idx = scores > conf_thres 51 | if not idx.any(): # no bounding boxes or seg were created 52 | return bboxes.new_zeros((0, 4)), scores.new_zeros( 53 | (0, )), bboxes.new_zeros((0, 0, 0)) 54 | bboxes, scores, kpts = bboxes[idx], scores[idx], kpts[idx] 55 | xycenter, wh = bboxes.chunk(2, -1) 56 | bboxes = torch.cat([xycenter - 0.5 * wh, xycenter + 0.5 * wh], -1) 57 | idx = nms(bboxes, scores, iou_thres) 58 | bboxes, scores, kpts = bboxes[idx], scores[idx], kpts[idx] 59 | return bboxes, scores, kpts.reshape(idx.shape[0], -1, 3) 60 | 61 | 62 | def det_postprocess(data: Tuple[Tensor, Tensor, Tensor, Tensor]): 63 | assert len(data) == 4 64 | iou_thres: float = 0.65 65 | num_dets, bboxes, scores, labels = data[0][0], data[1][0], data[2][ 66 | 0], data[3][0] 67 | nums = num_dets.item() 68 | #print(nums) 69 | if nums == 0: 70 | return bboxes.new_zeros((0, 4)), scores.new_zeros( 71 | (0, )), labels.new_zeros((0, )) 72 | # check score negative 73 | scores[scores < 0] = 1 + scores[scores < 0] 74 | # add nms 75 | idx = nms(bboxes, scores, iou_thres) 76 | #print(idx) 77 | bboxes, scores, labels = bboxes[idx], scores[idx], labels[idx] 78 | #print(scores) 79 | #print(labels) 80 | bboxes = bboxes[:nums] 81 | scores = scores[:nums] 82 | labels = labels[:nums] 83 | 84 | return bboxes, scores, labels 85 | 86 | 87 | def crop_mask(masks: Tensor, bboxes: Tensor) -> Tensor: 88 | n, h, w = masks.shape 89 | x1, y1, x2, y2 = torch.chunk(bboxes[:, :, None], 4, 1) # x1 shape(1,1,n) 90 | r = torch.arange(w, device=masks.device, 91 | dtype=x1.dtype)[None, None, :] # rows shape(1,w,1) 92 | c = torch.arange(h, device=masks.device, 93 | dtype=x1.dtype)[None, :, None] # cols shape(h,1,1) 94 | 95 | return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2)) 96 | -------------------------------------------------------------------------------- /models/utils.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import List, Tuple, Union 3 | 4 | import cv2 5 | import numpy as np 6 | from numpy import ndarray 7 | 8 | # image suffixs 9 | SUFFIXS = ('.bmp', '.dng', '.jpeg', '.jpg', '.mpo', '.png', '.tif', '.tiff', 10 | '.webp', '.pfm') 11 | 12 | 13 | def letterbox(im: ndarray, 14 | new_shape: Union[Tuple, List] = (640, 640), 15 | color: Union[Tuple, List] = (114, 114, 114)) \ 16 | -> Tuple[ndarray, float, Tuple[float, float]]: 17 | # Resize and pad image while meeting stride-multiple constraints 18 | shape = im.shape[:2] # current shape [height, width] 19 | if isinstance(new_shape, int): 20 | new_shape = (new_shape, new_shape) 21 | # new_shape: [width, height] 22 | 23 | # Scale ratio (new / old) 24 | r = min(new_shape[0] / shape[1], new_shape[1] / shape[0]) 25 | # Compute padding [width, height] 26 | new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) 27 | dw, dh = new_shape[0] - new_unpad[0], new_shape[1] - new_unpad[ 28 | 1] # wh padding 29 | 30 | dw /= 2 # divide padding into 2 sides 31 | dh /= 2 32 | 33 | if shape[::-1] != new_unpad: # resize 34 | im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) 35 | top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) 36 | left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) 37 | im = cv2.copyMakeBorder(im, 38 | top, 39 | bottom, 40 | left, 41 | right, 42 | cv2.BORDER_CONSTANT, 43 | value=color) # add border 44 | return im, r, (dw, dh) 45 | 46 | 47 | def blob(im: ndarray, return_seg: bool = False) -> Union[ndarray, Tuple]: 48 | seg = None 49 | if return_seg: 50 | seg = im.astype(np.float32) / 255 51 | im = im.transpose([2, 0, 1]) 52 | im = im[np.newaxis, ...] 53 | im = np.ascontiguousarray(im).astype(np.float32) / 255 54 | if return_seg: 55 | return im, seg 56 | else: 57 | return im 58 | 59 | 60 | def sigmoid(x: ndarray) -> ndarray: 61 | return 1. / (1. + np.exp(-x)) 62 | 63 | 64 | def bbox_iou(boxes1: ndarray, boxes2: ndarray) -> ndarray: 65 | boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * \ 66 | (boxes1[..., 3] - boxes1[..., 1]) 67 | boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * \ 68 | (boxes2[..., 3] - boxes2[..., 1]) 69 | left_up = np.maximum(boxes1[..., :2], boxes2[..., :2]) 70 | right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:]) 71 | inter_section = np.maximum(right_down - left_up, 0.0) 72 | inter_area = inter_section[..., 0] * inter_section[..., 1] 73 | union_area = boxes1_area + boxes2_area - inter_area 74 | ious = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps) 75 | 76 | return ious 77 | 78 | 79 | def batched_nms(boxes: ndarray, 80 | scores: ndarray, 81 | iou_thres: float = 0.65, 82 | conf_thres: float = 0.25): 83 | labels = np.argmax(scores, axis=-1) 84 | scores = np.max(scores, axis=-1) 85 | 86 | cand = scores > conf_thres 87 | boxes = boxes[cand] 88 | scores = scores[cand] 89 | labels = labels[cand] 90 | 91 | keep_boxes = [] 92 | keep_scores = [] 93 | keep_labels = [] 94 | 95 | for cls in np.unique(labels): 96 | cls_mask = labels == cls 97 | cls_boxes = boxes[cls_mask] 98 | cls_scores = scores[cls_mask] 99 | 100 | while cls_boxes.shape[0] > 0: 101 | max_idx = np.argmax(cls_scores) 102 | max_box = cls_boxes[max_idx:max_idx + 1] 103 | max_score = cls_scores[max_idx:max_idx + 1] 104 | max_label = np.array([cls], dtype=np.int32) 105 | keep_boxes.append(max_box) 106 | keep_scores.append(max_score) 107 | keep_labels.append(max_label) 108 | other_boxes = np.delete(cls_boxes, max_idx, axis=0) 109 | other_scores = np.delete(cls_scores, max_idx, axis=0) 110 | ious = bbox_iou(max_box, other_boxes) 111 | iou_mask = ious < iou_thres 112 | if not iou_mask.any(): 113 | break 114 | cls_boxes = other_boxes[iou_mask] 115 | cls_scores = other_scores[iou_mask] 116 | 117 | if len(keep_boxes) == 0: 118 | keep_boxes = np.empty((0, 4), dtype=np.float32) 119 | keep_scores = np.empty((0, ), dtype=np.float32) 120 | keep_labels = np.empty((0, ), dtype=np.float32) 121 | 122 | else: 123 | keep_boxes = np.concatenate(keep_boxes, axis=0) 124 | keep_scores = np.concatenate(keep_scores, axis=0) 125 | keep_labels = np.concatenate(keep_labels, axis=0) 126 | 127 | return keep_boxes, keep_scores, keep_labels 128 | 129 | 130 | def nms(boxes: ndarray, 131 | scores: ndarray, 132 | labels: ndarray, 133 | iou_thres: float = 0.65, 134 | conf_thres: float = 0.5): 135 | #labels = np.argmax(scores, axis=-1) 136 | #scores = np.max(scores, axis=-1) 137 | 138 | cand = scores > conf_thres 139 | #print(f"cand: {cand}") 140 | boxes = boxes[cand] 141 | scores = scores[cand] 142 | labels = labels[cand] 143 | 144 | keep_boxes = [] 145 | keep_scores = [] 146 | keep_labels = [] 147 | 148 | idxs = scores.argsort() 149 | #print(f"idxs size:{idxs.size}") 150 | while idxs.size > 0: 151 | max_score_index = idxs[-1] 152 | max_box = boxes[max_score_index:max_score_index + 1] 153 | #print("max_box:", max_box) 154 | max_score = scores[max_score_index:max_score_index + 1] 155 | max_label = np.array([labels[max_score_index]], dtype=np.int32) 156 | keep_boxes.append(max_box) 157 | keep_scores.append(max_score) 158 | keep_labels.append(max_label) 159 | if idxs.size == 1: 160 | break 161 | idxs = idxs[:-1] 162 | other_boxes = boxes[idxs] 163 | ious = bbox_iou(max_box, other_boxes) 164 | iou_mask = ious < iou_thres 165 | idxs = idxs[iou_mask] 166 | 167 | if len(keep_boxes) == 0: 168 | keep_boxes = np.empty((0, 4), dtype=np.float32) 169 | keep_scores = np.empty((0, ), dtype=np.float32) 170 | keep_labels = np.empty((0, ), dtype=np.float32) 171 | 172 | else: 173 | keep_boxes = np.concatenate(keep_boxes, axis=0) 174 | keep_scores = np.concatenate(keep_scores, axis=0) 175 | keep_labels = np.concatenate(keep_labels, axis=0) 176 | 177 | return keep_boxes, keep_scores, keep_labels 178 | 179 | 180 | def path_to_list(images_path: Union[str, Path]) -> List: 181 | if isinstance(images_path, str): 182 | images_path = Path(images_path) 183 | assert images_path.exists() 184 | if images_path.is_dir(): 185 | images = [ 186 | i.absolute() for i in images_path.iterdir() if i.suffix in SUFFIXS 187 | ] 188 | else: 189 | assert images_path.suffix in SUFFIXS 190 | images = [images_path.absolute()] 191 | return images 192 | 193 | 194 | def crop_mask(masks: ndarray, bboxes: ndarray) -> ndarray: 195 | n, h, w = masks.shape 196 | x1, y1, x2, y2 = np.split(bboxes[:, :, None], [1, 2, 3], 197 | 1) # x1 shape(1,1,n) 198 | r = np.arange(w, dtype=x1.dtype)[None, None, :] # rows shape(1,w,1) 199 | c = np.arange(h, dtype=x1.dtype)[None, :, None] # cols shape(h,1,1) 200 | 201 | return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2)) 202 | 203 | 204 | def det_postprocess(data: Tuple[ndarray, ndarray, ndarray, ndarray]): 205 | assert len(data) == 4 206 | iou_thres: float = 0.65 207 | num_dets, bboxes, scores, labels = (i[0] for i in data) 208 | #print(bboxes) 209 | #print(scores) 210 | #print(labels) 211 | nums = num_dets.item() 212 | #print(nums) 213 | if nums == 0: 214 | return np.empty((0, 4), dtype=np.float32), np.empty( 215 | (0, ), dtype=np.float32), np.empty((0, ), dtype=np.int32) 216 | # check score negative 217 | scores[scores < 0] = 1 + scores[scores < 0] 218 | # add nms 219 | ''' 220 | idx = nms(bboxes, scores, labels, iou_thres) 221 | print(idx) 222 | bboxes, scores, labels = bboxes[idx], scores[idx], labels[idx] 223 | ''' 224 | bboxes, scores, labels = nms(bboxes, scores, labels, iou_thres) 225 | 226 | bboxes = bboxes[:nums] 227 | scores = scores[:nums] 228 | labels = labels[:nums] 229 | return bboxes, scores, labels 230 | 231 | 232 | def seg_postprocess( 233 | data: Tuple[ndarray], 234 | shape: Union[Tuple, List], 235 | conf_thres: float = 0.25, 236 | iou_thres: float = 0.65) \ 237 | -> Tuple[ndarray, ndarray, ndarray, ndarray]: 238 | assert len(data) == 2 239 | h, w = shape[0] // 4, shape[1] // 4 # 4x downsampling 240 | outputs, proto = (i[0] for i in data) 241 | bboxes, scores, labels, maskconf = np.split(outputs, [4, 5, 6], 1) 242 | scores, labels = scores.squeeze(), labels.squeeze() 243 | idx = scores > conf_thres 244 | if not idx.any(): # no bounding boxes or seg were created 245 | return np.empty((0, 4), dtype=np.float32), \ 246 | np.empty((0,), dtype=np.float32), \ 247 | np.empty((0,), dtype=np.int32), \ 248 | np.empty((0, 0, 0, 0), dtype=np.int32) 249 | 250 | bboxes, scores, labels, maskconf = \ 251 | bboxes[idx], scores[idx], labels[idx], maskconf[idx] 252 | cvbboxes = np.concatenate([bboxes[:, :2], bboxes[:, 2:] - bboxes[:, :2]], 253 | 1) 254 | labels = labels.astype(np.int32) 255 | v0, v1 = map(int, (cv2.__version__).split('.')[:2]) 256 | assert v0 == 4, 'OpenCV version is wrong' 257 | if v1 > 6: 258 | idx = cv2.dnn.NMSBoxesBatched(cvbboxes, scores, labels, conf_thres, 259 | iou_thres) 260 | else: 261 | idx = cv2.dnn.NMSBoxes(cvbboxes, scores, conf_thres, iou_thres) 262 | bboxes, scores, labels, maskconf = \ 263 | bboxes[idx], scores[idx], labels[idx], maskconf[idx] 264 | masks = sigmoid(maskconf @ proto).reshape(-1, h, w) 265 | masks = crop_mask(masks, bboxes / 4.) 266 | masks = masks.transpose([1, 2, 0]) 267 | masks = cv2.resize(masks, (shape[1], shape[0]), 268 | interpolation=cv2.INTER_LINEAR) 269 | masks = masks.transpose(2, 0, 1) 270 | masks = np.ascontiguousarray((masks > 0.5)[..., None], dtype=np.float32) 271 | return bboxes, scores, labels, masks 272 | 273 | 274 | def pose_postprocess( 275 | data: Union[Tuple, ndarray], 276 | conf_thres: float = 0.25, 277 | iou_thres: float = 0.65) \ 278 | -> Tuple[ndarray, ndarray, ndarray]: 279 | if isinstance(data, tuple): 280 | assert len(data) == 1 281 | data = data[0] 282 | outputs = np.transpose(data[0], (1, 0)) 283 | bboxes, scores, kpts = np.split(outputs, [4, 5], 1) 284 | scores, kpts = scores.squeeze(), kpts.squeeze() 285 | idx = scores > conf_thres 286 | if not idx.any(): # no bounding boxes or seg were created 287 | return np.empty((0, 4), dtype=np.float32), np.empty( 288 | (0, ), dtype=np.float32), np.empty((0, 0, 0), dtype=np.float32) 289 | bboxes, scores, kpts = bboxes[idx], scores[idx], kpts[idx] 290 | xycenter, wh = np.split(bboxes, [ 291 | 2, 292 | ], -1) 293 | cvbboxes = np.concatenate([xycenter - 0.5 * wh, wh], -1) 294 | idx = cv2.dnn.NMSBoxes(cvbboxes, scores, conf_thres, iou_thres) 295 | cvbboxes, scores, kpts = cvbboxes[idx], scores[idx], kpts[idx] 296 | cvbboxes[:, 2:] += cvbboxes[:, :2] 297 | return cvbboxes, scores, kpts.reshape(idx.shape[0], -1, 3) 298 | -------------------------------------------------------------------------------- /tensorrt_infer_det.py: -------------------------------------------------------------------------------- 1 | from models import TRTModule # isort:skip 2 | import argparse 3 | from pathlib import Path 4 | 5 | import cv2 6 | import torch 7 | 8 | from config import OBJECT_CLASSES, OBJECT_MASK_CLASSES, OBJECT_COLORS, OBJECT_MASK_COLORS 9 | from models.torch_utils import det_postprocess 10 | from models.utils import blob, letterbox 11 | 12 | 13 | def inference(engine, img, mask_detect_mode, cuda_device): 14 | device = torch.device(cuda_device) 15 | Engine = TRTModule(engine, device) 16 | H, W = Engine.inp_info[0].shape[-2:] 17 | 18 | # set desired output names order 19 | Engine.set_desired(['num_dets', 'bboxes', 'scores', 'labels']) 20 | 21 | draw = img.copy() 22 | bgr, ratio, dwdh = letterbox(img, (W, H)) 23 | rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB) 24 | tensor = blob(rgb, return_seg=False) 25 | dwdh = torch.asarray(dwdh * 2, dtype=torch.float32, device=device) 26 | tensor = torch.asarray(tensor, device=device) 27 | # inference 28 | data = Engine(tensor) 29 | 30 | bboxes, scores, labels = det_postprocess(data) 31 | bboxes -= dwdh 32 | bboxes /= ratio 33 | 34 | for (bbox, score, label) in zip(bboxes, scores, labels): 35 | bbox = bbox.round().int().tolist() 36 | cls_id = int(label) 37 | if mask_detect_mode: 38 | cls = OBJECT_MASK_CLASSES[cls_id] 39 | color = OBJECT_MASK_COLORS[cls] 40 | else: 41 | cls = OBJECT_CLASSES[cls_id] 42 | color = OBJECT_COLORS[cls] 43 | cv2.rectangle(draw, bbox[:2], bbox[2:], color, 2) 44 | cv2.putText(draw, 45 | f'{cls}:{score:.3f}', (bbox[0], bbox[1] - 2), 46 | cv2.FONT_HERSHEY_SIMPLEX, 47 | 0.75, [225, 255, 255], 48 | thickness=2) 49 | 50 | return draw 51 | -------------------------------------------------------------------------------- /tensorrt_infer_det_without_torch.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pathlib import Path 3 | 4 | import cv2 5 | import numpy as np 6 | 7 | from config import OBJECT_CLASSES, OBJECT_MASK_CLASSES, OBJECT_COLORS, OBJECT_MASK_COLORS 8 | from models.utils import blob, det_postprocess, letterbox 9 | 10 | 11 | def inference(engine, img, mask_detect_mode): 12 | H, W = engine.inp_info[0].shape[-2:] 13 | 14 | draw = img.copy() 15 | bgr, ratio, dwdh = letterbox(img, (W, H)) 16 | rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB) 17 | tensor = blob(rgb, return_seg=False) 18 | dwdh = np.array(dwdh * 2, dtype=np.float32) 19 | tensor = np.ascontiguousarray(tensor) 20 | # inference 21 | data = engine(tensor) 22 | 23 | bboxes, scores, labels = det_postprocess(data) 24 | bboxes -= dwdh 25 | bboxes /= ratio 26 | 27 | for (bbox, score, label) in zip(bboxes, scores, labels): 28 | bbox = bbox.round().astype(np.int32).tolist() 29 | cls_id = int(label) 30 | if mask_detect_mode: 31 | cls = OBJECT_MASK_CLASSES[cls_id] 32 | color = OBJECT_MASK_COLORS[cls] 33 | else: 34 | cls = OBJECT_CLASSES[cls_id] 35 | color = OBJECT_COLORS[cls] 36 | cv2.rectangle(draw, bbox[:2], bbox[2:], color, 2) 37 | cv2.putText(draw, 38 | f'{cls}:{score:.3f}', (bbox[0], bbox[1] - 2), 39 | cv2.FONT_HERSHEY_SIMPLEX, 40 | 0.75, [225, 255, 255], 41 | thickness=2) 42 | 43 | return draw 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /yolov8_test.py: -------------------------------------------------------------------------------- 1 | from ultralytics import YOLO 2 | import cv2, time 3 | import numpy as np 4 | from models.pycuda_api import TRTEngine 5 | from tensorrt_infer_det_without_torch import inference 6 | 7 | tensorrt_mode = 1 8 | mask_detect_mode = 1 9 | webcam_mode = 1 10 | 11 | if mask_detect_mode: 12 | model = YOLO("model/mask_detect/best.pt") 13 | engine = TRTEngine("model/mask_detect/best.engine") 14 | target = "inference/face-mask-video.mp4" 15 | else: 16 | model = YOLO("model/yolov8l.pt") 17 | engine = TRTEngine("model/yolov8l.engine") 18 | #names=model.names 19 | target = "inference/City.mp4" 20 | #target = "https://trafficvideo2.tainan.gov.tw/82520774" 21 | 22 | # Run batched infernece on a list of images 23 | #results = model.predict(img, stream=True) # return a list of Results objects 24 | if webcam_mode: 25 | cap = cv2.VideoCapture(0) 26 | else: 27 | cap = cv2.VideoCapture(target) 28 | #CountFrame = 0 29 | #dt=0 30 | while True: 31 | try: 32 | r, img = cap.read() 33 | st = time.time() 34 | #img =cv2.resize(img, (800, 600)) 35 | if not tensorrt_mode: 36 | results = model(source=img) 37 | img = results[0].plot() # annotated_frame 38 | else: 39 | img = inference(engine, img, mask_detect_mode) 40 | 41 | et = time.time() 42 | 43 | FPS = round(1/(et-st)) 44 | cv2.putText(img, 'FPS=' + str(FPS), (20, 150), 45 | cv2.FONT_HERSHEY_PLAIN, 5, (0, 0, 255), 2, cv2.LINE_AA) 46 | cv2.imshow("YOLOv8", img) 47 | if cv2.waitKey(1) & 0xFF == ord('q'): 48 | break 49 | 50 | except Exception as e: 51 | print(e) 52 | 53 | cap.release() 54 | cv2.destroyAllWindows() --------------------------------------------------------------------------------