├── .github └── workflows │ └── docker-image.yml ├── .gitignore ├── .vscode ├── c_cpp_properties.json ├── launch.json ├── settings.json └── tasks.json ├── CMakeLists.txt ├── Dockerfile ├── IMG_4057.JPG ├── README.md ├── assets ├── cpp │ ├── bus.jpg │ └── zidane.jpg └── yolo │ ├── bus.jpg │ └── zidane.jpg ├── download_model.py ├── requirements.txt └── src ├── ia ├── inference.cpp └── inference.h ├── main.cpp └── video.cpp /.github/workflows/docker-image.yml: -------------------------------------------------------------------------------- 1 | name: Docker Image CI 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | jobs: 10 | 11 | build: 12 | 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v4 17 | - name: Build the Docker image 18 | run: docker build . --file Dockerfile --tag my-image-name:$(date +%s) 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | 163 | # Prerequisites 164 | *.d 165 | 166 | # Object files 167 | *.o 168 | *.ko 169 | *.obj 170 | *.elf 171 | 172 | # Linker output 173 | *.ilk 174 | *.map 175 | *.exp 176 | 177 | # Precompiled Headers 178 | *.gch 179 | *.pch 180 | 181 | # Libraries 182 | *.lib 183 | *.a 184 | *.la 185 | *.lo 186 | 187 | # Shared objects (inc. Windows DLLs) 188 | *.dll 189 | *.so 190 | *.so.* 191 | *.dylib 192 | 193 | # Executables 194 | *.exe 195 | *.out 196 | *.app 197 | *.i*86 198 | *.x86_64 199 | *.hex 200 | 201 | # Debug files 202 | *.dSYM/ 203 | *.su 204 | *.idb 205 | *.pdb 206 | 207 | # Kernel Module Compile Results 208 | *.mod* 209 | *.cmd 210 | .tmp_versions/ 211 | modules.order 212 | Module.symvers 213 | Mkfile.old 214 | dkms.conf 215 | 216 | *.onnx 217 | 218 | *.pt 219 | 220 | *.DS_Store -------------------------------------------------------------------------------- /.vscode/c_cpp_properties.json: -------------------------------------------------------------------------------- 1 | { 2 | "configurations": [ 3 | { 4 | "name": "Mac", 5 | "includePath": [ 6 | "${workspaceFolder}/**", 7 | "/opt/homebrew/Cellar/opencv/4.10.0_9/include/opencv4/opencv2", 8 | "/opt/homebrew/Cellar/opencv/4.10.0_9/include/opencv4", 9 | "/opt/homebrew/Cellar/onnxruntime/1.17.1/include/onnxruntime" 10 | ], 11 | "defines": [], 12 | "macFrameworkPath": [], 13 | "compilerPath": "/usr/bin/g++", 14 | "cStandard": "c17", 15 | "cppStandard": "c++17", 16 | "intelliSenseMode": "clang-x64", 17 | "browse": { 18 | "path": [ 19 | "/opt/homebrew/Cellar/opencv/4.10.0_9/include/opencv4", 20 | "/opt/homebrew/Cellar/onnxruntime/1.17.1/include/onnxruntime" 21 | ], 22 | "limitSymbolsToIncludedHeaders": true, 23 | "databaseFilename": "" 24 | } 25 | } 26 | ], 27 | "version": 4 28 | } -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.2.0", 3 | "configurations": [ 4 | { 5 | "name": "(lldb) Launch", 6 | "type": "cppdbg", 7 | "request": "launch", 8 | "program": "${fileDirname}/${fileBasenameNoExtension}.out", 9 | "args": [], 10 | "stopAtEntry": true, 11 | "cwd": "${workspaceFolder}", 12 | "environment": [], 13 | "externalConsole": true, 14 | "MIMode": "lldb", 15 | "preLaunchTask": "Build" 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "files.associations": { 3 | "__availability": "cpp", 4 | "charconv": "cpp", 5 | "string": "cpp", 6 | "vector": "cpp", 7 | "__config": "cpp", 8 | "__split_buffer": "cpp", 9 | "deque": "cpp", 10 | "list": "cpp", 11 | "__bit_reference": "cpp", 12 | "__debug": "cpp", 13 | "__errc": "cpp", 14 | "__hash_table": "cpp", 15 | "__locale": "cpp", 16 | "__mutex_base": "cpp", 17 | "__node_handle": "cpp", 18 | "__threading_support": "cpp", 19 | "__tree": "cpp", 20 | "__verbose_abort": "cpp", 21 | "array": "cpp", 22 | "atomic": "cpp", 23 | "bitset": "cpp", 24 | "cctype": "cpp", 25 | "clocale": "cpp", 26 | "cmath": "cpp", 27 | "complex": "cpp", 28 | "cstdarg": "cpp", 29 | "cstddef": "cpp", 30 | "cstdint": "cpp", 31 | "cstdio": "cpp", 32 | "cstdlib": "cpp", 33 | "cstring": "cpp", 34 | "ctime": "cpp", 35 | "cwchar": "cpp", 36 | "cwctype": "cpp", 37 | "exception": "cpp", 38 | "fstream": "cpp", 39 | "initializer_list": "cpp", 40 | "iomanip": "cpp", 41 | "ios": "cpp", 42 | "iosfwd": "cpp", 43 | "iostream": "cpp", 44 | "istream": "cpp", 45 | "limits": "cpp", 46 | "locale": "cpp", 47 | "map": "cpp", 48 | "mutex": "cpp", 49 | "new": "cpp", 50 | "optional": "cpp", 51 | "ostream": "cpp", 52 | "queue": "cpp", 53 | "ratio": "cpp", 54 | "set": "cpp", 55 | "sstream": "cpp", 56 | "stdexcept": "cpp", 57 | "streambuf": "cpp", 58 | "string_view": "cpp", 59 | "system_error": "cpp", 60 | "tuple": "cpp", 61 | "typeinfo": "cpp", 62 | "unordered_map": "cpp", 63 | "variant": "cpp", 64 | "algorithm": "cpp" 65 | } 66 | } -------------------------------------------------------------------------------- /.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0.0", 3 | "tasks": [ 4 | { 5 | "label": "Build", 6 | "type": "shell", 7 | "command": "clang++", 8 | "args": [ 9 | "-std=c++17", 10 | "${file}", 11 | "/Users/josedanielsarmientoblanco/Desktop/hobby/yolov10cpp/src/ia/inference.h", 12 | "-o", 13 | "${fileDirname}/${fileBasenameNoExtension}.out", 14 | "-I", 15 | "/opt/homebrew/Cellar/opencv/4.10.0_9/include/opencv4/opencv2", 16 | "-I", 17 | "/opt/homebrew/Cellar/onnxruntime/1.17.1/include/onnxruntime", 18 | "-I", 19 | "/opt/homebrew/Cellar/opencv/4.10.0_9/include/opencv4", 20 | "-L", 21 | "/opt/homebrew/Cellar/opencv/4.10.0_9/lib", 22 | "-L", 23 | "/opt/homebrew/Cellar/onnxruntime/1.17.1/lib", 24 | "-l", 25 | "onnxruntime", 26 | "-l", 27 | "opencv_stitching", 28 | "-l", 29 | "opencv_superres", 30 | "-l", 31 | "opencv_videostab", 32 | "-l", 33 | "opencv_aruco", 34 | "-l", 35 | "opencv_bgsegm", 36 | "-l", 37 | "opencv_bioinspired", 38 | "-l", 39 | "opencv_ccalib", 40 | "-l", 41 | "opencv_dnn_objdetect", 42 | "-l", 43 | "opencv_dpm", 44 | "-l", 45 | "opencv_face", 46 | "-l", 47 | "opencv_fuzzy", 48 | "-l", 49 | "opencv_hfs", 50 | "-l", 51 | "opencv_img_hash", 52 | "-l", 53 | "opencv_line_descriptor", 54 | "-l", 55 | "opencv_optflow", 56 | "-l", 57 | "opencv_reg", 58 | "-l", 59 | "opencv_rgbd", 60 | "-l", 61 | "opencv_saliency", 62 | "-l", 63 | "opencv_stereo", 64 | "-l", 65 | "opencv_structured_light", 66 | "-l", 67 | "opencv_phase_unwrapping", 68 | "-l", 69 | "opencv_surface_matching", 70 | "-l", 71 | "opencv_tracking", 72 | "-l", 73 | "opencv_datasets", 74 | "-l", 75 | "opencv_dnn", 76 | "-l", 77 | "opencv_plot", 78 | "-l", 79 | "opencv_xfeatures2d", 80 | "-l", 81 | "opencv_shape", 82 | "-l", 83 | "opencv_video", 84 | "-l", 85 | "opencv_ml", 86 | "-l", 87 | "opencv_ximgproc", 88 | "-l", 89 | "opencv_xobjdetect", 90 | "-l", 91 | "opencv_objdetect", 92 | "-l", 93 | "opencv_calib3d", 94 | "-l", 95 | "opencv_features2d", 96 | "-l", 97 | "opencv_highgui", 98 | "-l", 99 | "opencv_videoio", 100 | "-l", 101 | "opencv_imgcodecs", 102 | "-l", 103 | "opencv_flann", 104 | "-l", 105 | "opencv_xphoto", 106 | "-l", 107 | "opencv_photo", 108 | "-l", 109 | "opencv_imgproc", 110 | "-l", 111 | "opencv_core", 112 | "-g" 113 | ], 114 | "group": { 115 | "kind": "build", 116 | "isDefault": true 117 | }, 118 | "problemMatcher": [ 119 | "$gcc" 120 | ] 121 | } 122 | ] 123 | } -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | 3 | # Set the project name in a variable 4 | set(project_name yolov10_cpp) 5 | project(${project_name}) 6 | set(CMAKE_CXX_STANDARD 17) 7 | 8 | find_package(OpenCV REQUIRED) 9 | 10 | # Find ONNX Runtime package 11 | find_path(ONNXRUNTIME_INCLUDE_DIR onnxruntime_c_api.h 12 | HINTS /opt/homebrew/Cellar/onnxruntime/1.17.1/include/onnxruntime 13 | ) 14 | find_library(ONNXRUNTIME_LIBRARY onnxruntime 15 | HINTS /opt/homebrew/Cellar/onnxruntime/1.17.1/lib 16 | ) 17 | 18 | if(NOT ONNXRUNTIME_INCLUDE_DIR) 19 | message(FATAL_ERROR "ONNX Runtime include directory not found") 20 | endif() 21 | if(NOT ONNXRUNTIME_LIBRARY) 22 | message(FATAL_ERROR "ONNX Runtime library not found") 23 | endif() 24 | 25 | add_library(${project_name}-lib 26 | src/ia/inference.cpp 27 | src/ia/inference.h 28 | ) 29 | 30 | target_include_directories(${project_name}-lib PUBLIC src) 31 | target_include_directories(${project_name}-lib PUBLIC ${ONNXRUNTIME_INCLUDE_DIR}) 32 | 33 | target_link_libraries(${project_name}-lib 34 | PUBLIC ${OpenCV_LIBS} 35 | PUBLIC ${ONNXRUNTIME_LIBRARY} 36 | ) 37 | 38 | # Add the main executable 39 | add_executable(${project_name} 40 | ./src/main.cpp 41 | ) 42 | target_include_directories(${project_name} PUBLIC ${ONNXRUNTIME_INCLUDE_DIR}) 43 | target_link_libraries(${project_name} ${project_name}-lib) 44 | 45 | # Add the video executable 46 | add_executable(${project_name}_video 47 | ./src/video.cpp 48 | ) 49 | target_include_directories(${project_name}_video PUBLIC ${ONNXRUNTIME_INCLUDE_DIR}) 50 | target_link_libraries(${project_name}_video ${project_name}-lib) 51 | 52 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Use an official image as a parent image 2 | FROM ubuntu:20.04 3 | 4 | # Set environment variables to avoid interactive prompts during installation 5 | ENV DEBIAN_FRONTEND=noninteractive 6 | 7 | # Install necessary dependencies 8 | RUN apt-get update && apt-get install -y \ 9 | build-essential \ 10 | cmake \ 11 | libopencv-dev \ 12 | wget \ 13 | git \ 14 | && rm -rf /var/lib/apt/lists/* 15 | 16 | # Install ONNX Runtime 17 | RUN wget https://github.com/microsoft/onnxruntime/releases/download/v1.10.0/onnxruntime-linux-x64-1.10.0.tgz && \ 18 | tar -xzf onnxruntime-linux-x64-1.10.0.tgz && \ 19 | rm onnxruntime-linux-x64-1.10.0.tgz 20 | 21 | # Set ONNX Runtime library path 22 | ENV LD_LIBRARY_PATH="/onnxruntime-linux-x64-1.10.0/lib:$LD_LIBRARY_PATH" 23 | 24 | # Create a directory for your application 25 | WORKDIR /app 26 | 27 | # Copy your source code into the container 28 | COPY . . 29 | 30 | # Build your C++ application 31 | RUN mkdir build && cd build && \ 32 | cmake .. && \ 33 | make 34 | 35 | # # Run the application 36 | # CMD ["./build/object_detection", "model.onnx", "test_image.jpg"] 37 | -------------------------------------------------------------------------------- /IMG_4057.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielSarmiento04/yolov10cpp/7838e60d8de584f7d36010d9a67e5bbd713ccbca/IMG_4057.JPG -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

Yolo V10 cpp

2 | 3 |

Jose Sarmiento | josedanielsarmiento219@gmail.com

4 | 5 | 6 | ## Resumen 7 | 8 | The next repository aims to provide a basic c++ script using std 17 over, to do it and consider the speed The code use OpenCv 4.9.0_8 and Onnx 1.17.1 to manipulate the image and inference the model. Note that Opncv don't support a native integration because yolov10 integra A top K layer in their architecture. 9 | 10 | 11 | 12 | ## Prepare the code 13 | 14 | 15 | 1. Download de model you want 16 | 17 | 18 | - yolov10n 19 | - yolov10s 20 | - yolov10m 21 | - yolov10b 22 | - yolov10l 23 | - yolov10x 24 | 25 | 26 | ```bash 27 | python download_model.py --model {MODEL_SELECTED} 28 | ``` 29 | 30 | ## Install packages 31 | 32 | ``` 33 | conda create -n yolov10 python=3.9 34 | conda activate yolov10 35 | 36 | git clone https://github.com/THU-MIG/yolov10 37 | cd yolov10 38 | 39 | pip install -r requirements.txt 40 | pip install -e . 41 | 42 | cd .. 43 | ``` 44 | 45 | ## Convert model 46 | 47 | ``` 48 | yolo export model=yolov10n.pt format=onnx 49 | ``` 50 | ## Dependencies 51 | 52 | 1. ffmpeg 53 | 2. Opnecv 54 | 3. onnxruntime 55 | 56 | 57 | - MacOs 58 | ``` 59 | brew install ffmpeg 60 | brew install opencv 61 | brew install onnxruntime 62 | ``` 63 | 64 | - Ubuntu: Unfortunately, onnx runtime is no available using native apt-get 65 | 66 | You can use python 67 | ``` 68 | sudo apt-get update 69 | sudo apt-get install python3-pip 70 | pip3 install onnxruntime 71 | ``` 72 | 73 | dotnet 74 | ``` 75 | dotnet add package Microsoft.ML.OnnxRuntime 76 | 77 | ``` 78 | 79 | 80 | ## How to run this code 81 | 82 | 83 | 1. Using Cmake, Recommended 84 | 85 | ``` 86 | mkdir build 87 | cd build 88 | cmake .. 89 | make 90 | ``` 91 | 92 | 93 | 2. Run the following command 94 | 95 | > static images 96 | 97 | ``` 98 | ./yolov10_cpp [MODEL_PATH] [IMAGE_PATH] 99 | ``` 100 | 101 | > realtime 102 | 103 | ``` 104 | ./yolov10_cpp_video [MODEL_PATH] [SOURCE] 105 | ``` 106 | 107 | ## Results 108 | 109 | our cpp binding | python binding 110 | 111 |

112 | Image 1 113 | Image 2 114 |

115 | 116 |

117 | Image 1 118 | Image 2 119 |

120 | 121 | > source = Apple M3 PRO 122 | 123 | | Command Line Execution | Resource Utilization | 124 | |---------------------------------------------------------------------|------------------------------------------------------| 125 | | `./yolov10_cpp ../yolov10n.onnx ../bus.jpg` | **0.46s** user, **0.10s** system, **94%** CPU, **0.595s** total | 126 | | `yolo detect predict model=yolov10n.onnx source=bus.jpg` | **1.69s** user, **2.44s** system, **291%** CPU, **1.413s** total | 127 | 128 | 129 | ## Future plans 130 | 131 | 1. Modularize the components. ✅ 132 | 2. Make a example to video real time. ✅ 133 | 3. Support Cuda. ? 134 | 135 | ## Inspiration 136 | 137 | [Ultraopxt](https://github.com/Ultraopxt/yolov10cpp) 138 | 139 | 140 | ## Reference 141 | 142 | [1] Wang, A., Chen, H., Liu, L., Chen, K., Lin, Z., Han, J., & Ding, G. (2024). YOLOv10: Real-Time End-to-End Object Detection. arXiv [Cs.CV]. Retrieved from http://arxiv.org/abs/2405.14458 -------------------------------------------------------------------------------- /assets/cpp/bus.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielSarmiento04/yolov10cpp/7838e60d8de584f7d36010d9a67e5bbd713ccbca/assets/cpp/bus.jpg -------------------------------------------------------------------------------- /assets/cpp/zidane.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielSarmiento04/yolov10cpp/7838e60d8de584f7d36010d9a67e5bbd713ccbca/assets/cpp/zidane.jpg -------------------------------------------------------------------------------- /assets/yolo/bus.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielSarmiento04/yolov10cpp/7838e60d8de584f7d36010d9a67e5bbd713ccbca/assets/yolo/bus.jpg -------------------------------------------------------------------------------- /assets/yolo/zidane.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielSarmiento04/yolov10cpp/7838e60d8de584f7d36010d9a67e5bbd713ccbca/assets/yolo/zidane.jpg -------------------------------------------------------------------------------- /download_model.py: -------------------------------------------------------------------------------- 1 | # Import libraries 2 | import os 3 | import cv2 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | 7 | from zipfile import ZipFile 8 | from urllib.request import urlretrieve 9 | import argparse 10 | 11 | 12 | # Define the function to download the model 13 | parser = argparse.ArgumentParser(description='Process some integers.') 14 | parser.add_argument( 15 | '--model', 16 | choices=['yolov10n', 'yolov10s', 'yolov10m', 'yolov10b', 'yolov10l', 'yolov10x'], 17 | default='yolov10n', 18 | help='Model to download' 19 | ) 20 | 21 | args = parser.parse_args() 22 | 23 | def download_model(model): 24 | ''' 25 | Function to download the model from the github release page 26 | ''' 27 | 28 | url = "https://github.com/THU-MIG/yolov10/releases/download/v1.1/" + model + ".pt" 29 | # Downloading zip file using urllib package. 30 | print("Downloading the model...") 31 | urlretrieve(url, model + ".pt") 32 | print("Model downloaded successfully!") 33 | 34 | 35 | # Call the function to download the model 36 | download_model(args.model) -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | certifi==2024.2.2 2 | charset-normalizer==3.3.2 3 | coloredlogs==15.0.1 4 | contourpy==1.2.1 5 | cycler==0.12.1 6 | filelock==3.14.0 7 | flatbuffers==24.3.25 8 | fonttools==4.52.1 9 | humanfriendly==10.0 10 | idna==3.7 11 | Jinja2==3.1.4 12 | kiwisolver==1.4.5 13 | markdown-it-py==3.0.0 14 | MarkupSafe==2.1.5 15 | matplotlib==3.9.0 16 | mdurl==0.1.2 17 | mpmath==1.3.0 18 | networkx==3.3 19 | numpy==1.26.4 20 | onnx==1.14.0 21 | onnxruntime==1.15.1 22 | onnxsim==0.4.36 23 | opencv-python==4.9.0.80 24 | packaging==24.0 25 | pandas==2.2.2 26 | pillow==10.3.0 27 | protobuf==5.27.0 28 | psutil==5.9.8 29 | py-cpuinfo==9.0.0 30 | pycocotools==2.0.7 31 | Pygments==2.18.0 32 | pyparsing==3.1.2 33 | python-dateutil==2.9.0.post0 34 | pytz==2024.1 35 | PyYAML==6.0.1 36 | requests==2.32.2 37 | rich==13.7.1 38 | scipy==1.13.0 39 | seaborn==0.13.2 40 | six==1.16.0 41 | sympy==1.12 42 | thop==0.1.1.post2209072238 43 | torch==2.0.1 44 | torchvision==0.15.2 45 | tqdm==4.66.4 46 | typing_extensions==4.12.0 47 | tzdata==2024.1 48 | -e git+https://github.com/THU-MIG/yolov10@4197c99fbfc2da7a23ff5282519dbee40ff97207#egg=ultralytics 49 | urllib3==2.2.1 50 | -------------------------------------------------------------------------------- /src/ia/inference.cpp: -------------------------------------------------------------------------------- 1 | #include "inference.h" 2 | #include 3 | #include 4 | #include // For exp function 5 | 6 | const std::vector InferenceEngine::CLASS_NAMES = { 7 | "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", 8 | "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", 9 | "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", 10 | "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", 11 | "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", 12 | "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", 13 | "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", 14 | "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", 15 | "scissors", "teddy bear", "hair drier", "toothbrush"}; 16 | 17 | 18 | /** 19 | * @brief Letterbox an image to fit into the target size without changing its aspect ratio. 20 | * Adds padding to the shorter side to match the target dimensions. 21 | * 22 | * @param src Image to be letterboxed. 23 | * @param target_size Desired output size (width and height should be the same). 24 | * @param color Color of the padding (default is black). 25 | * @return Letterboxed image with padding. 26 | */ 27 | cv::Mat letterbox(const cv::Mat &src, const cv::Size &target_size, const cv::Scalar &color = cv::Scalar(0, 0, 0)) 28 | { 29 | // Calculate scale and padding 30 | float scale = std::min(target_size.width / (float)src.cols, target_size.height / (float)src.rows); 31 | int new_width = static_cast(src.cols * scale); 32 | int new_height = static_cast(src.rows * scale); 33 | 34 | // Resize the image with the computed scale 35 | cv::Mat resized_image; 36 | cv::resize(src, resized_image, cv::Size(new_width, new_height)); 37 | 38 | // Create the output image with the target size and fill it with the padding color 39 | cv::Mat dst = cv::Mat::zeros(target_size.height, target_size.width, src.type()); 40 | dst.setTo(color); 41 | 42 | // Calculate the top-left corner where the resized image will be placed 43 | int top = (target_size.height - new_height) / 2; 44 | int left = (target_size.width - new_width) / 2; 45 | 46 | // Place the resized image onto the center of the letterboxed image 47 | resized_image.copyTo(dst(cv::Rect(left, top, resized_image.cols, resized_image.rows))); 48 | 49 | return dst; 50 | } 51 | 52 | /** 53 | * @brief Computes the Intersection over Union (IoU) between two bounding boxes. 54 | * 55 | * @param boxA First bounding box. 56 | * @param boxB Second bounding box. 57 | * @return IoU value between 0 and 1. 58 | */ 59 | float computeIOU(const cv::Rect &boxA, const cv::Rect &boxB) 60 | { 61 | int xA = std::max(boxA.x, boxB.x); 62 | int yA = std::max(boxA.y, boxB.y); 63 | int xB = std::min(boxA.x + boxA.width, boxB.x + boxB.width); 64 | int yB = std::min(boxA.y + boxA.height, boxB.y + boxB.height); 65 | 66 | int interArea = std::max(0, xB - xA) * std::max(0, yB - yA); 67 | 68 | int boxAArea = boxA.width * boxA.height; 69 | int boxBArea = boxB.width * boxB.height; 70 | 71 | float iou = static_cast(interArea) / (boxAArea + boxBArea - interArea); 72 | return iou; 73 | } 74 | 75 | 76 | /** 77 | * @brief Applies Soft-NMS to a set of detected bounding boxes to reduce overlapping detections. 78 | * 79 | * @param detections Vector of detections to process. 80 | * @param sigma Soft-NMS parameter controlling the Gaussian function's width. Default is 0.5. 81 | * @param iou_threshold IoU threshold for suppression. Default is 0.3. 82 | */ 83 | void applySoftNMS(std::vector &detections, float sigma = 0.5, float iou_threshold = 0.3) 84 | { 85 | for (size_t i = 0; i < detections.size(); ++i) 86 | { 87 | for (size_t j = i + 1; j < detections.size(); ++j) 88 | { 89 | float iou = computeIOU(detections[i].bbox, detections[j].bbox); 90 | if (iou > iou_threshold) 91 | { 92 | // Apply the Soft-NMS score decay formula 93 | detections[j].confidence *= std::exp(-iou * iou / sigma); 94 | } 95 | } 96 | } 97 | 98 | // Remove detections with low confidence scores 99 | detections.erase(std::remove_if(detections.begin(), detections.end(), 100 | [](const Detection &det) { return det.confidence < 0.001; }), 101 | detections.end()); 102 | } 103 | 104 | 105 | /** 106 | * @brief Apply Histogram Equalization to an image. 107 | * 108 | * @param src Input image in BGR format. 109 | * @return Image with enhanced contrast. 110 | */ 111 | cv::Mat applyHistogramEqualization(const cv::Mat &src) 112 | { 113 | cv::Mat ycrcb_image; 114 | cv::cvtColor(src, ycrcb_image, cv::COLOR_BGR2YCrCb); // Convert to YCrCb color space 115 | 116 | std::vector channels; 117 | cv::split(ycrcb_image, channels); 118 | 119 | // Apply histogram equalization to the Y channel (intensity) 120 | cv::equalizeHist(channels[0], channels[0]); 121 | 122 | // Merge back the channels and convert to BGR 123 | cv::merge(channels, ycrcb_image); 124 | cv::Mat result; 125 | cv::cvtColor(ycrcb_image, result, cv::COLOR_YCrCb2BGR); 126 | 127 | return result; 128 | } 129 | 130 | /** 131 | * @brief Apply CLAHE to an image for adaptive contrast enhancement. 132 | * 133 | * @param src Input image in BGR format. 134 | * @return Image with enhanced local contrast. 135 | */ 136 | cv::Mat applyCLAHE(const cv::Mat &src) 137 | { 138 | cv::Mat lab_image; 139 | cv::cvtColor(src, lab_image, cv::COLOR_BGR2Lab); // Convert to LAB color space 140 | 141 | std::vector lab_planes; 142 | cv::split(lab_image, lab_planes); 143 | 144 | // Apply CLAHE to the L channel (lightness) 145 | cv::Ptr clahe = cv::createCLAHE(); 146 | clahe->setClipLimit(4.0); // Set the clip limit for contrast enhancement 147 | clahe->apply(lab_planes[0], lab_planes[0]); 148 | 149 | // Merge the planes back and convert to BGR 150 | cv::merge(lab_planes, lab_image); 151 | cv::Mat result; 152 | cv::cvtColor(lab_image, result, cv::COLOR_Lab2BGR); 153 | 154 | return result; 155 | } 156 | 157 | 158 | /** 159 | * @brief Apply Gamma Correction to an image. 160 | * 161 | * @param src Input image in BGR format. 162 | * @param gamma Gamma value for correction. Values < 1 will lighten the image, values > 1 will darken it. 163 | * @return Image with gamma correction applied. 164 | */ 165 | cv::Mat applyGammaCorrection(const cv::Mat &src, float gamma) 166 | { 167 | cv::Mat lut(1, 256, CV_8UC1); 168 | uchar* p = lut.ptr(); 169 | for (int i = 0; i < 256; ++i) 170 | { 171 | p[i] = cv::saturate_cast(std::pow(i / 255.0, gamma) * 255.0); 172 | } 173 | 174 | cv::Mat result; 175 | cv::LUT(src, lut, result); // Apply the gamma lookup table to the image 176 | 177 | return result; 178 | } 179 | 180 | 181 | InferenceEngine::InferenceEngine(const std::string &model_path) 182 | : env(ORT_LOGGING_LEVEL_WARNING, "ONNXRuntime"), 183 | session_options(), 184 | session(env, model_path.c_str(), session_options), 185 | input_shape{1, 3, 640, 640} 186 | { 187 | session_options.SetIntraOpNumThreads(1); 188 | session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_BASIC); 189 | 190 | // Check if the session was created successfully 191 | if (!session) 192 | { 193 | throw std::runtime_error("Failed to create ONNX Runtime session."); 194 | } 195 | } 196 | 197 | InferenceEngine::~InferenceEngine() {} 198 | 199 | /* 200 | * Function to preprocess the image 201 | * 202 | * @param image: input image as cv::Mat 203 | * @return: vector of floats representing the preprocessed image 204 | */ 205 | std::vector InferenceEngine::preprocessImage(const cv::Mat &image) 206 | { 207 | if (image.empty()) 208 | { 209 | throw std::runtime_error("Could not read the image"); 210 | } 211 | 212 | // Step 1: Apply image enhancement techniques 213 | cv::Mat enhanced_image = applyCLAHE(image); // Use CLAHE as an example 214 | // cv::Mat enhanced_image = applyHistogramEqualization(image); // Or use Histogram Equalization 215 | // cv::Mat enhanced_image = applyGammaCorrection(image, 1.2); // Or use Gamma Correction 216 | 217 | // Step 2: Apply letterbox to the enhanced image 218 | cv::Mat letterboxed_image = letterbox(enhanced_image, cv::Size(input_shape[2], input_shape[3])); 219 | 220 | // Step 3: Convert image to float and normalize 221 | letterboxed_image.convertTo(letterboxed_image, CV_32F, 1.0 / 255); 222 | 223 | // Step 4: Convert from BGR to RGB 224 | cv::cvtColor(letterboxed_image, letterboxed_image, cv::COLOR_BGR2RGB); 225 | 226 | // Step 5: Prepare the input tensor values as a 1D vector 227 | std::vector input_tensor_values; 228 | input_tensor_values.reserve(input_shape[1] * input_shape[2] * input_shape[3]); 229 | 230 | // Convert Mat to vector of floats (HWC to CHW) 231 | std::vector channels(3); 232 | cv::split(letterboxed_image, channels); 233 | 234 | for (int c = 0; c < 3; ++c) 235 | { 236 | input_tensor_values.insert(input_tensor_values.end(), (float *)channels[c].data, (float *)channels[c].data + input_shape[2] * input_shape[3]); 237 | } 238 | 239 | return input_tensor_values; 240 | } 241 | 242 | 243 | 244 | /* 245 | * Function to filter the detections based on the confidence threshold 246 | * 247 | * @param results: vector of floats representing the output tensor 248 | * @param confidence_threshold: minimum confidence threshold 249 | * @param img_width: width of the input image 250 | * @param img_height: height of the input image 251 | * @param orig_width: original width of the image 252 | * @param orig_height: original height of the image 253 | * @return: vector of Detection objects 254 | */ 255 | std::vector InferenceEngine::filterDetections(const std::vector &results, float confidence_threshold, int img_width, int img_height, int orig_width, int orig_height) 256 | { 257 | std::vector detections; 258 | const int num_detections = results.size() / 6; 259 | 260 | // Calculate scale and padding factors 261 | float scale = std::min(img_width / (float)orig_width, img_height / (float)orig_height); 262 | int new_width = static_cast(orig_width * scale); 263 | int new_height = static_cast(orig_height * scale); 264 | int pad_x = (img_width - new_width) / 2; 265 | int pad_y = (img_height - new_height) / 2; 266 | 267 | detections.reserve(num_detections); 268 | 269 | for (int i = 0; i < num_detections; ++i) 270 | { 271 | float left = results[i * 6 + 0]; 272 | float top = results[i * 6 + 1]; 273 | float right = results[i * 6 + 2]; 274 | float bottom = results[i * 6 + 3]; 275 | float confidence = results[i * 6 + 4]; 276 | int class_id = static_cast(results[i * 6 + 5]); 277 | 278 | if (confidence >= confidence_threshold) 279 | { 280 | // Remove padding and rescale to original image dimensions 281 | left = (left - pad_x) / scale; 282 | top = (top - pad_y) / scale; 283 | right = (right - pad_x) / scale; 284 | bottom = (bottom - pad_y) / scale; 285 | 286 | int x = static_cast(left); 287 | int y = static_cast(top); 288 | int width = static_cast(right - left); 289 | int height = static_cast(bottom - top); 290 | 291 | detections.push_back( 292 | {confidence, 293 | cv::Rect(x, y, width, height), 294 | class_id, 295 | CLASS_NAMES[class_id]}); 296 | } 297 | } 298 | 299 | // Apply Soft-NMS to refine detections 300 | applySoftNMS(detections, 0.5, 0.3); // You can tweak the sigma and IoU threshold values as needed 301 | 302 | return detections; 303 | } 304 | 305 | 306 | /* 307 | * Function to run inference 308 | * 309 | * @param input_tensor_values: vector of floats representing the input tensor 310 | * @return: vector of floats representing the output tensor 311 | */ 312 | std::vector InferenceEngine::runInference(const std::vector &input_tensor_values) 313 | { 314 | Ort::AllocatorWithDefaultOptions allocator; 315 | 316 | std::string input_name = getInputName(); 317 | std::string output_name = getOutputName(); 318 | 319 | const char *input_name_ptr = input_name.c_str(); 320 | const char *output_name_ptr = output_name.c_str(); 321 | 322 | Ort::MemoryInfo memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); 323 | Ort::Value input_tensor = Ort::Value::CreateTensor(memory_info, const_cast(input_tensor_values.data()), input_tensor_values.size(), input_shape.data(), input_shape.size()); 324 | 325 | auto output_tensors = session.Run(Ort::RunOptions{nullptr}, &input_name_ptr, &input_tensor, 1, &output_name_ptr, 1); 326 | 327 | float *floatarr = output_tensors[0].GetTensorMutableData(); 328 | size_t output_tensor_size = output_tensors[0].GetTensorTypeAndShapeInfo().GetElementCount(); 329 | 330 | return std::vector(floatarr, floatarr + output_tensor_size); 331 | } 332 | 333 | /* 334 | * Function to draw the labels on the image 335 | * 336 | * @param image: input image 337 | * @param detections: vector of Detection objects 338 | * @return: image with labels drawn 339 | */ 340 | cv::Mat InferenceEngine::draw_labels(const cv::Mat &image, const std::vector &detections) 341 | { 342 | cv::Mat result = image.clone(); 343 | 344 | for (const auto &detection : detections) 345 | { 346 | cv::rectangle(result, detection.bbox, cv::Scalar(0, 255, 0), 2); 347 | std::string label = detection.class_name + ": " + std::to_string(detection.confidence); 348 | 349 | int baseLine; 350 | cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); 351 | 352 | cv::rectangle( 353 | result, 354 | cv::Point(detection.bbox.x, detection.bbox.y - labelSize.height), 355 | cv::Point(detection.bbox.x + labelSize.width, detection.bbox.y + baseLine), 356 | cv::Scalar(255, 255, 255), 357 | cv::FILLED); 358 | 359 | cv::putText( 360 | result, 361 | label, 362 | cv::Point(detection.bbox.x, detection.bbox.y), 363 | cv::FONT_HERSHEY_SIMPLEX, 364 | 0.5, 365 | cv::Scalar(0, 0, 0), 366 | 1); 367 | } 368 | 369 | return result; 370 | } 371 | 372 | /* 373 | * Function to get the input name 374 | * 375 | * @return: name of the input tensor 376 | */ 377 | std::string InferenceEngine::getInputName() 378 | { 379 | Ort::AllocatorWithDefaultOptions allocator; 380 | Ort::AllocatedStringPtr name_allocator = session.GetInputNameAllocated(0, allocator); 381 | return std::string(name_allocator.get()); 382 | } 383 | 384 | /* 385 | * Function to get the output name 386 | * 387 | * @return: name of the output tensor 388 | */ 389 | std::string InferenceEngine::getOutputName() 390 | { 391 | Ort::AllocatorWithDefaultOptions allocator; 392 | Ort::AllocatedStringPtr name_allocator = session.GetOutputNameAllocated(0, allocator); 393 | return std::string(name_allocator.get()); 394 | } -------------------------------------------------------------------------------- /src/ia/inference.h: -------------------------------------------------------------------------------- 1 | #ifndef INFERENCE_H 2 | #define INFERENCE_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include // For exp function 9 | 10 | struct Detection 11 | { 12 | float confidence; 13 | cv::Rect bbox; 14 | int class_id; 15 | std::string class_name; 16 | }; 17 | 18 | 19 | class InferenceEngine 20 | { 21 | public: 22 | InferenceEngine(const std::string &model_path); 23 | ~InferenceEngine(); 24 | 25 | std::vector preprocessImage(const cv::Mat &image); 26 | std::vector filterDetections(const std::vector &results, float confidence_threshold, int img_width, int img_height, int orig_width, int orig_height); 27 | std::vector runInference(const std::vector &input_tensor_values); 28 | 29 | cv::Mat draw_labels(const cv::Mat &image, const std::vector &detections); 30 | 31 | std::vector input_shape; 32 | 33 | private: 34 | Ort::Env env; 35 | Ort::SessionOptions session_options; 36 | Ort::Session session; 37 | 38 | std::string getInputName(); 39 | std::string getOutputName(); 40 | 41 | static const std::vector CLASS_NAMES; 42 | }; 43 | 44 | 45 | #endif // INFERENCE_H 46 | -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | #include "./ia/inference.h" 2 | #include 3 | #include 4 | 5 | 6 | 7 | int main(int argc, char *argv[]) 8 | { 9 | if (argc != 3) 10 | { 11 | std::cerr << "Usage: " << argv[0] << " " << std::endl; 12 | return 1; 13 | } 14 | 15 | std::string model_path = argv[1]; 16 | std::string image_path = argv[2]; 17 | 18 | try 19 | { 20 | InferenceEngine engine(model_path); 21 | 22 | cv::Mat image = cv::imread(image_path); 23 | int orig_width = image.cols; 24 | int orig_height = image.rows; 25 | std::vector input_tensor_values = engine.preprocessImage(image ); 26 | 27 | std::vector results = engine.runInference(input_tensor_values); 28 | 29 | float confidence_threshold = 0.3; 30 | 31 | std::vector detections = engine.filterDetections(results, confidence_threshold, engine.input_shape[2], engine.input_shape[3], orig_width, orig_height); 32 | 33 | cv::Mat output = engine.draw_labels(image, detections); 34 | 35 | cv::imwrite("result.jpg", output); 36 | } 37 | catch (const std::exception &e) 38 | { 39 | std::cerr << "Error: " << e.what() << std::endl; 40 | return 1; 41 | } 42 | 43 | return 0; 44 | } 45 | -------------------------------------------------------------------------------- /src/video.cpp: -------------------------------------------------------------------------------- 1 | #include "./ia/inference.h" 2 | #include 3 | #include 4 | 5 | int main(int argc, char const *argv[]) 6 | { 7 | if (argc != 3) 8 | { 9 | std::cerr << "Usage: " << argv[0] << " " << std::endl; 10 | return 1; 11 | } 12 | std::string model_path = argv[1]; 13 | 14 | auto source = atoi(argv[1]); // 0 for webcam, 1 for video file 15 | int apiID = cv::CAP_ANY; // 0 = autodetect default API 16 | 17 | cv::namedWindow("yolov10", cv::WINDOW_AUTOSIZE); 18 | 19 | InferenceEngine engine(model_path); 20 | 21 | cv::VideoCapture cap; 22 | 23 | cap.open(source, apiID); 24 | 25 | if (!cap.isOpened()) 26 | { 27 | std::cerr << "ERROR! Unable to open camera\n"; 28 | return -1; 29 | } 30 | 31 | cv::Mat frame; 32 | 33 | std::cout << "Start grabbing" << std::endl 34 | << "Press any key to terminate" << std::endl; 35 | 36 | for (;;) 37 | { 38 | cap.read(frame); 39 | 40 | if (frame.empty()) 41 | { 42 | std::cerr << "ERROR! blank frame grabbed\n"; 43 | break; 44 | } 45 | 46 | int orig_width = frame.cols; 47 | int orig_height = frame.rows; 48 | auto timer = cv::getTickCount(); 49 | 50 | std::vector input_tensor_values = engine.preprocessImage(frame); 51 | 52 | std::vector results = engine.runInference(input_tensor_values); 53 | 54 | float confidence_threshold = 0.3; 55 | 56 | std::vector detections = engine.filterDetections(results, confidence_threshold, engine.input_shape[2], engine.input_shape[3], orig_width, orig_height); 57 | 58 | double fps = cv::getTickFrequency() / ((double)cv::getTickCount() - timer); 59 | 60 | cv::putText(frame, "FPS: " + std::to_string(fps), cv::Point(10, 30), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2, 8); 61 | 62 | cv::Mat output = engine.draw_labels(frame, detections); 63 | 64 | cv::imshow("test", output); 65 | 66 | if (cv::waitKey(5) >= 0) 67 | break; 68 | } 69 | 70 | return 0; 71 | } 72 | --------------------------------------------------------------------------------