├── .github
└── workflows
│ └── docker-image.yml
├── .gitignore
├── .vscode
├── c_cpp_properties.json
├── launch.json
├── settings.json
└── tasks.json
├── CMakeLists.txt
├── Dockerfile
├── IMG_4057.JPG
├── README.md
├── assets
├── cpp
│ ├── bus.jpg
│ └── zidane.jpg
└── yolo
│ ├── bus.jpg
│ └── zidane.jpg
├── download_model.py
├── requirements.txt
└── src
├── ia
├── inference.cpp
└── inference.h
├── main.cpp
└── video.cpp
/.github/workflows/docker-image.yml:
--------------------------------------------------------------------------------
1 | name: Docker Image CI
2 |
3 | on:
4 | push:
5 | branches: [ "main" ]
6 | pull_request:
7 | branches: [ "main" ]
8 |
9 | jobs:
10 |
11 | build:
12 |
13 | runs-on: ubuntu-latest
14 |
15 | steps:
16 | - uses: actions/checkout@v4
17 | - name: Build the Docker image
18 | run: docker build . --file Dockerfile --tag my-image-name:$(date +%s)
19 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # pytype static type analyzer
150 | .pytype/
151 |
152 | # Cython debug symbols
153 | cython_debug/
154 |
155 | # PyCharm
156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | # and can be added to the global gitignore or merged into this file. For a more nuclear
159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 |
162 |
163 | # Prerequisites
164 | *.d
165 |
166 | # Object files
167 | *.o
168 | *.ko
169 | *.obj
170 | *.elf
171 |
172 | # Linker output
173 | *.ilk
174 | *.map
175 | *.exp
176 |
177 | # Precompiled Headers
178 | *.gch
179 | *.pch
180 |
181 | # Libraries
182 | *.lib
183 | *.a
184 | *.la
185 | *.lo
186 |
187 | # Shared objects (inc. Windows DLLs)
188 | *.dll
189 | *.so
190 | *.so.*
191 | *.dylib
192 |
193 | # Executables
194 | *.exe
195 | *.out
196 | *.app
197 | *.i*86
198 | *.x86_64
199 | *.hex
200 |
201 | # Debug files
202 | *.dSYM/
203 | *.su
204 | *.idb
205 | *.pdb
206 |
207 | # Kernel Module Compile Results
208 | *.mod*
209 | *.cmd
210 | .tmp_versions/
211 | modules.order
212 | Module.symvers
213 | Mkfile.old
214 | dkms.conf
215 |
216 | *.onnx
217 |
218 | *.pt
219 |
220 | *.DS_Store
--------------------------------------------------------------------------------
/.vscode/c_cpp_properties.json:
--------------------------------------------------------------------------------
1 | {
2 | "configurations": [
3 | {
4 | "name": "Mac",
5 | "includePath": [
6 | "${workspaceFolder}/**",
7 | "/opt/homebrew/Cellar/opencv/4.10.0_9/include/opencv4/opencv2",
8 | "/opt/homebrew/Cellar/opencv/4.10.0_9/include/opencv4",
9 | "/opt/homebrew/Cellar/onnxruntime/1.17.1/include/onnxruntime"
10 | ],
11 | "defines": [],
12 | "macFrameworkPath": [],
13 | "compilerPath": "/usr/bin/g++",
14 | "cStandard": "c17",
15 | "cppStandard": "c++17",
16 | "intelliSenseMode": "clang-x64",
17 | "browse": {
18 | "path": [
19 | "/opt/homebrew/Cellar/opencv/4.10.0_9/include/opencv4",
20 | "/opt/homebrew/Cellar/onnxruntime/1.17.1/include/onnxruntime"
21 | ],
22 | "limitSymbolsToIncludedHeaders": true,
23 | "databaseFilename": ""
24 | }
25 | }
26 | ],
27 | "version": 4
28 | }
--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
1 | {
2 | "version": "0.2.0",
3 | "configurations": [
4 | {
5 | "name": "(lldb) Launch",
6 | "type": "cppdbg",
7 | "request": "launch",
8 | "program": "${fileDirname}/${fileBasenameNoExtension}.out",
9 | "args": [],
10 | "stopAtEntry": true,
11 | "cwd": "${workspaceFolder}",
12 | "environment": [],
13 | "externalConsole": true,
14 | "MIMode": "lldb",
15 | "preLaunchTask": "Build"
16 | }
17 | ]
18 | }
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "files.associations": {
3 | "__availability": "cpp",
4 | "charconv": "cpp",
5 | "string": "cpp",
6 | "vector": "cpp",
7 | "__config": "cpp",
8 | "__split_buffer": "cpp",
9 | "deque": "cpp",
10 | "list": "cpp",
11 | "__bit_reference": "cpp",
12 | "__debug": "cpp",
13 | "__errc": "cpp",
14 | "__hash_table": "cpp",
15 | "__locale": "cpp",
16 | "__mutex_base": "cpp",
17 | "__node_handle": "cpp",
18 | "__threading_support": "cpp",
19 | "__tree": "cpp",
20 | "__verbose_abort": "cpp",
21 | "array": "cpp",
22 | "atomic": "cpp",
23 | "bitset": "cpp",
24 | "cctype": "cpp",
25 | "clocale": "cpp",
26 | "cmath": "cpp",
27 | "complex": "cpp",
28 | "cstdarg": "cpp",
29 | "cstddef": "cpp",
30 | "cstdint": "cpp",
31 | "cstdio": "cpp",
32 | "cstdlib": "cpp",
33 | "cstring": "cpp",
34 | "ctime": "cpp",
35 | "cwchar": "cpp",
36 | "cwctype": "cpp",
37 | "exception": "cpp",
38 | "fstream": "cpp",
39 | "initializer_list": "cpp",
40 | "iomanip": "cpp",
41 | "ios": "cpp",
42 | "iosfwd": "cpp",
43 | "iostream": "cpp",
44 | "istream": "cpp",
45 | "limits": "cpp",
46 | "locale": "cpp",
47 | "map": "cpp",
48 | "mutex": "cpp",
49 | "new": "cpp",
50 | "optional": "cpp",
51 | "ostream": "cpp",
52 | "queue": "cpp",
53 | "ratio": "cpp",
54 | "set": "cpp",
55 | "sstream": "cpp",
56 | "stdexcept": "cpp",
57 | "streambuf": "cpp",
58 | "string_view": "cpp",
59 | "system_error": "cpp",
60 | "tuple": "cpp",
61 | "typeinfo": "cpp",
62 | "unordered_map": "cpp",
63 | "variant": "cpp",
64 | "algorithm": "cpp"
65 | }
66 | }
--------------------------------------------------------------------------------
/.vscode/tasks.json:
--------------------------------------------------------------------------------
1 | {
2 | "version": "2.0.0",
3 | "tasks": [
4 | {
5 | "label": "Build",
6 | "type": "shell",
7 | "command": "clang++",
8 | "args": [
9 | "-std=c++17",
10 | "${file}",
11 | "/Users/josedanielsarmientoblanco/Desktop/hobby/yolov10cpp/src/ia/inference.h",
12 | "-o",
13 | "${fileDirname}/${fileBasenameNoExtension}.out",
14 | "-I",
15 | "/opt/homebrew/Cellar/opencv/4.10.0_9/include/opencv4/opencv2",
16 | "-I",
17 | "/opt/homebrew/Cellar/onnxruntime/1.17.1/include/onnxruntime",
18 | "-I",
19 | "/opt/homebrew/Cellar/opencv/4.10.0_9/include/opencv4",
20 | "-L",
21 | "/opt/homebrew/Cellar/opencv/4.10.0_9/lib",
22 | "-L",
23 | "/opt/homebrew/Cellar/onnxruntime/1.17.1/lib",
24 | "-l",
25 | "onnxruntime",
26 | "-l",
27 | "opencv_stitching",
28 | "-l",
29 | "opencv_superres",
30 | "-l",
31 | "opencv_videostab",
32 | "-l",
33 | "opencv_aruco",
34 | "-l",
35 | "opencv_bgsegm",
36 | "-l",
37 | "opencv_bioinspired",
38 | "-l",
39 | "opencv_ccalib",
40 | "-l",
41 | "opencv_dnn_objdetect",
42 | "-l",
43 | "opencv_dpm",
44 | "-l",
45 | "opencv_face",
46 | "-l",
47 | "opencv_fuzzy",
48 | "-l",
49 | "opencv_hfs",
50 | "-l",
51 | "opencv_img_hash",
52 | "-l",
53 | "opencv_line_descriptor",
54 | "-l",
55 | "opencv_optflow",
56 | "-l",
57 | "opencv_reg",
58 | "-l",
59 | "opencv_rgbd",
60 | "-l",
61 | "opencv_saliency",
62 | "-l",
63 | "opencv_stereo",
64 | "-l",
65 | "opencv_structured_light",
66 | "-l",
67 | "opencv_phase_unwrapping",
68 | "-l",
69 | "opencv_surface_matching",
70 | "-l",
71 | "opencv_tracking",
72 | "-l",
73 | "opencv_datasets",
74 | "-l",
75 | "opencv_dnn",
76 | "-l",
77 | "opencv_plot",
78 | "-l",
79 | "opencv_xfeatures2d",
80 | "-l",
81 | "opencv_shape",
82 | "-l",
83 | "opencv_video",
84 | "-l",
85 | "opencv_ml",
86 | "-l",
87 | "opencv_ximgproc",
88 | "-l",
89 | "opencv_xobjdetect",
90 | "-l",
91 | "opencv_objdetect",
92 | "-l",
93 | "opencv_calib3d",
94 | "-l",
95 | "opencv_features2d",
96 | "-l",
97 | "opencv_highgui",
98 | "-l",
99 | "opencv_videoio",
100 | "-l",
101 | "opencv_imgcodecs",
102 | "-l",
103 | "opencv_flann",
104 | "-l",
105 | "opencv_xphoto",
106 | "-l",
107 | "opencv_photo",
108 | "-l",
109 | "opencv_imgproc",
110 | "-l",
111 | "opencv_core",
112 | "-g"
113 | ],
114 | "group": {
115 | "kind": "build",
116 | "isDefault": true
117 | },
118 | "problemMatcher": [
119 | "$gcc"
120 | ]
121 | }
122 | ]
123 | }
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.10)
2 |
3 | # Set the project name in a variable
4 | set(project_name yolov10_cpp)
5 | project(${project_name})
6 | set(CMAKE_CXX_STANDARD 17)
7 |
8 | find_package(OpenCV REQUIRED)
9 |
10 | # Find ONNX Runtime package
11 | find_path(ONNXRUNTIME_INCLUDE_DIR onnxruntime_c_api.h
12 | HINTS /opt/homebrew/Cellar/onnxruntime/1.17.1/include/onnxruntime
13 | )
14 | find_library(ONNXRUNTIME_LIBRARY onnxruntime
15 | HINTS /opt/homebrew/Cellar/onnxruntime/1.17.1/lib
16 | )
17 |
18 | if(NOT ONNXRUNTIME_INCLUDE_DIR)
19 | message(FATAL_ERROR "ONNX Runtime include directory not found")
20 | endif()
21 | if(NOT ONNXRUNTIME_LIBRARY)
22 | message(FATAL_ERROR "ONNX Runtime library not found")
23 | endif()
24 |
25 | add_library(${project_name}-lib
26 | src/ia/inference.cpp
27 | src/ia/inference.h
28 | )
29 |
30 | target_include_directories(${project_name}-lib PUBLIC src)
31 | target_include_directories(${project_name}-lib PUBLIC ${ONNXRUNTIME_INCLUDE_DIR})
32 |
33 | target_link_libraries(${project_name}-lib
34 | PUBLIC ${OpenCV_LIBS}
35 | PUBLIC ${ONNXRUNTIME_LIBRARY}
36 | )
37 |
38 | # Add the main executable
39 | add_executable(${project_name}
40 | ./src/main.cpp
41 | )
42 | target_include_directories(${project_name} PUBLIC ${ONNXRUNTIME_INCLUDE_DIR})
43 | target_link_libraries(${project_name} ${project_name}-lib)
44 |
45 | # Add the video executable
46 | add_executable(${project_name}_video
47 | ./src/video.cpp
48 | )
49 | target_include_directories(${project_name}_video PUBLIC ${ONNXRUNTIME_INCLUDE_DIR})
50 | target_link_libraries(${project_name}_video ${project_name}-lib)
51 |
52 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # Use an official image as a parent image
2 | FROM ubuntu:20.04
3 |
4 | # Set environment variables to avoid interactive prompts during installation
5 | ENV DEBIAN_FRONTEND=noninteractive
6 |
7 | # Install necessary dependencies
8 | RUN apt-get update && apt-get install -y \
9 | build-essential \
10 | cmake \
11 | libopencv-dev \
12 | wget \
13 | git \
14 | && rm -rf /var/lib/apt/lists/*
15 |
16 | # Install ONNX Runtime
17 | RUN wget https://github.com/microsoft/onnxruntime/releases/download/v1.10.0/onnxruntime-linux-x64-1.10.0.tgz && \
18 | tar -xzf onnxruntime-linux-x64-1.10.0.tgz && \
19 | rm onnxruntime-linux-x64-1.10.0.tgz
20 |
21 | # Set ONNX Runtime library path
22 | ENV LD_LIBRARY_PATH="/onnxruntime-linux-x64-1.10.0/lib:$LD_LIBRARY_PATH"
23 |
24 | # Create a directory for your application
25 | WORKDIR /app
26 |
27 | # Copy your source code into the container
28 | COPY . .
29 |
30 | # Build your C++ application
31 | RUN mkdir build && cd build && \
32 | cmake .. && \
33 | make
34 |
35 | # # Run the application
36 | # CMD ["./build/object_detection", "model.onnx", "test_image.jpg"]
37 |
--------------------------------------------------------------------------------
/IMG_4057.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DanielSarmiento04/yolov10cpp/7838e60d8de584f7d36010d9a67e5bbd713ccbca/IMG_4057.JPG
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
Yolo V10 cpp
2 |
3 | Jose Sarmiento | josedanielsarmiento219@gmail.com
4 |
5 |
6 | ## Resumen
7 |
8 | The next repository aims to provide a basic c++ script using std 17 over, to do it and consider the speed The code use OpenCv 4.9.0_8 and Onnx 1.17.1 to manipulate the image and inference the model. Note that Opncv don't support a native integration because yolov10 integra A top K layer in their architecture.
9 |
10 |
11 |
12 | ## Prepare the code
13 |
14 |
15 | 1. Download de model you want
16 |
17 |
18 | - yolov10n
19 | - yolov10s
20 | - yolov10m
21 | - yolov10b
22 | - yolov10l
23 | - yolov10x
24 |
25 |
26 | ```bash
27 | python download_model.py --model {MODEL_SELECTED}
28 | ```
29 |
30 | ## Install packages
31 |
32 | ```
33 | conda create -n yolov10 python=3.9
34 | conda activate yolov10
35 |
36 | git clone https://github.com/THU-MIG/yolov10
37 | cd yolov10
38 |
39 | pip install -r requirements.txt
40 | pip install -e .
41 |
42 | cd ..
43 | ```
44 |
45 | ## Convert model
46 |
47 | ```
48 | yolo export model=yolov10n.pt format=onnx
49 | ```
50 | ## Dependencies
51 |
52 | 1. ffmpeg
53 | 2. Opnecv
54 | 3. onnxruntime
55 |
56 |
57 | - MacOs
58 | ```
59 | brew install ffmpeg
60 | brew install opencv
61 | brew install onnxruntime
62 | ```
63 |
64 | - Ubuntu: Unfortunately, onnx runtime is no available using native apt-get
65 |
66 | You can use python
67 | ```
68 | sudo apt-get update
69 | sudo apt-get install python3-pip
70 | pip3 install onnxruntime
71 | ```
72 |
73 | dotnet
74 | ```
75 | dotnet add package Microsoft.ML.OnnxRuntime
76 |
77 | ```
78 |
79 |
80 | ## How to run this code
81 |
82 |
83 | 1. Using Cmake, Recommended
84 |
85 | ```
86 | mkdir build
87 | cd build
88 | cmake ..
89 | make
90 | ```
91 |
92 |
93 | 2. Run the following command
94 |
95 | > static images
96 |
97 | ```
98 | ./yolov10_cpp [MODEL_PATH] [IMAGE_PATH]
99 | ```
100 |
101 | > realtime
102 |
103 | ```
104 | ./yolov10_cpp_video [MODEL_PATH] [SOURCE]
105 | ```
106 |
107 | ## Results
108 |
109 | our cpp binding | python binding
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 | > source = Apple M3 PRO
122 |
123 | | Command Line Execution | Resource Utilization |
124 | |---------------------------------------------------------------------|------------------------------------------------------|
125 | | `./yolov10_cpp ../yolov10n.onnx ../bus.jpg` | **0.46s** user, **0.10s** system, **94%** CPU, **0.595s** total |
126 | | `yolo detect predict model=yolov10n.onnx source=bus.jpg` | **1.69s** user, **2.44s** system, **291%** CPU, **1.413s** total |
127 |
128 |
129 | ## Future plans
130 |
131 | 1. Modularize the components. ✅
132 | 2. Make a example to video real time. ✅
133 | 3. Support Cuda. ?
134 |
135 | ## Inspiration
136 |
137 | [Ultraopxt](https://github.com/Ultraopxt/yolov10cpp)
138 |
139 |
140 | ## Reference
141 |
142 | [1] Wang, A., Chen, H., Liu, L., Chen, K., Lin, Z., Han, J., & Ding, G. (2024). YOLOv10: Real-Time End-to-End Object Detection. arXiv [Cs.CV]. Retrieved from http://arxiv.org/abs/2405.14458
--------------------------------------------------------------------------------
/assets/cpp/bus.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DanielSarmiento04/yolov10cpp/7838e60d8de584f7d36010d9a67e5bbd713ccbca/assets/cpp/bus.jpg
--------------------------------------------------------------------------------
/assets/cpp/zidane.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DanielSarmiento04/yolov10cpp/7838e60d8de584f7d36010d9a67e5bbd713ccbca/assets/cpp/zidane.jpg
--------------------------------------------------------------------------------
/assets/yolo/bus.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DanielSarmiento04/yolov10cpp/7838e60d8de584f7d36010d9a67e5bbd713ccbca/assets/yolo/bus.jpg
--------------------------------------------------------------------------------
/assets/yolo/zidane.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DanielSarmiento04/yolov10cpp/7838e60d8de584f7d36010d9a67e5bbd713ccbca/assets/yolo/zidane.jpg
--------------------------------------------------------------------------------
/download_model.py:
--------------------------------------------------------------------------------
1 | # Import libraries
2 | import os
3 | import cv2
4 | import numpy as np
5 | import matplotlib.pyplot as plt
6 |
7 | from zipfile import ZipFile
8 | from urllib.request import urlretrieve
9 | import argparse
10 |
11 |
12 | # Define the function to download the model
13 | parser = argparse.ArgumentParser(description='Process some integers.')
14 | parser.add_argument(
15 | '--model',
16 | choices=['yolov10n', 'yolov10s', 'yolov10m', 'yolov10b', 'yolov10l', 'yolov10x'],
17 | default='yolov10n',
18 | help='Model to download'
19 | )
20 |
21 | args = parser.parse_args()
22 |
23 | def download_model(model):
24 | '''
25 | Function to download the model from the github release page
26 | '''
27 |
28 | url = "https://github.com/THU-MIG/yolov10/releases/download/v1.1/" + model + ".pt"
29 | # Downloading zip file using urllib package.
30 | print("Downloading the model...")
31 | urlretrieve(url, model + ".pt")
32 | print("Model downloaded successfully!")
33 |
34 |
35 | # Call the function to download the model
36 | download_model(args.model)
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | certifi==2024.2.2
2 | charset-normalizer==3.3.2
3 | coloredlogs==15.0.1
4 | contourpy==1.2.1
5 | cycler==0.12.1
6 | filelock==3.14.0
7 | flatbuffers==24.3.25
8 | fonttools==4.52.1
9 | humanfriendly==10.0
10 | idna==3.7
11 | Jinja2==3.1.4
12 | kiwisolver==1.4.5
13 | markdown-it-py==3.0.0
14 | MarkupSafe==2.1.5
15 | matplotlib==3.9.0
16 | mdurl==0.1.2
17 | mpmath==1.3.0
18 | networkx==3.3
19 | numpy==1.26.4
20 | onnx==1.14.0
21 | onnxruntime==1.15.1
22 | onnxsim==0.4.36
23 | opencv-python==4.9.0.80
24 | packaging==24.0
25 | pandas==2.2.2
26 | pillow==10.3.0
27 | protobuf==5.27.0
28 | psutil==5.9.8
29 | py-cpuinfo==9.0.0
30 | pycocotools==2.0.7
31 | Pygments==2.18.0
32 | pyparsing==3.1.2
33 | python-dateutil==2.9.0.post0
34 | pytz==2024.1
35 | PyYAML==6.0.1
36 | requests==2.32.2
37 | rich==13.7.1
38 | scipy==1.13.0
39 | seaborn==0.13.2
40 | six==1.16.0
41 | sympy==1.12
42 | thop==0.1.1.post2209072238
43 | torch==2.0.1
44 | torchvision==0.15.2
45 | tqdm==4.66.4
46 | typing_extensions==4.12.0
47 | tzdata==2024.1
48 | -e git+https://github.com/THU-MIG/yolov10@4197c99fbfc2da7a23ff5282519dbee40ff97207#egg=ultralytics
49 | urllib3==2.2.1
50 |
--------------------------------------------------------------------------------
/src/ia/inference.cpp:
--------------------------------------------------------------------------------
1 | #include "inference.h"
2 | #include
3 | #include
4 | #include // For exp function
5 |
6 | const std::vector InferenceEngine::CLASS_NAMES = {
7 | "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
8 | "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
9 | "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
10 | "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
11 | "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
12 | "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
13 | "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard",
14 | "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase",
15 | "scissors", "teddy bear", "hair drier", "toothbrush"};
16 |
17 |
18 | /**
19 | * @brief Letterbox an image to fit into the target size without changing its aspect ratio.
20 | * Adds padding to the shorter side to match the target dimensions.
21 | *
22 | * @param src Image to be letterboxed.
23 | * @param target_size Desired output size (width and height should be the same).
24 | * @param color Color of the padding (default is black).
25 | * @return Letterboxed image with padding.
26 | */
27 | cv::Mat letterbox(const cv::Mat &src, const cv::Size &target_size, const cv::Scalar &color = cv::Scalar(0, 0, 0))
28 | {
29 | // Calculate scale and padding
30 | float scale = std::min(target_size.width / (float)src.cols, target_size.height / (float)src.rows);
31 | int new_width = static_cast(src.cols * scale);
32 | int new_height = static_cast(src.rows * scale);
33 |
34 | // Resize the image with the computed scale
35 | cv::Mat resized_image;
36 | cv::resize(src, resized_image, cv::Size(new_width, new_height));
37 |
38 | // Create the output image with the target size and fill it with the padding color
39 | cv::Mat dst = cv::Mat::zeros(target_size.height, target_size.width, src.type());
40 | dst.setTo(color);
41 |
42 | // Calculate the top-left corner where the resized image will be placed
43 | int top = (target_size.height - new_height) / 2;
44 | int left = (target_size.width - new_width) / 2;
45 |
46 | // Place the resized image onto the center of the letterboxed image
47 | resized_image.copyTo(dst(cv::Rect(left, top, resized_image.cols, resized_image.rows)));
48 |
49 | return dst;
50 | }
51 |
52 | /**
53 | * @brief Computes the Intersection over Union (IoU) between two bounding boxes.
54 | *
55 | * @param boxA First bounding box.
56 | * @param boxB Second bounding box.
57 | * @return IoU value between 0 and 1.
58 | */
59 | float computeIOU(const cv::Rect &boxA, const cv::Rect &boxB)
60 | {
61 | int xA = std::max(boxA.x, boxB.x);
62 | int yA = std::max(boxA.y, boxB.y);
63 | int xB = std::min(boxA.x + boxA.width, boxB.x + boxB.width);
64 | int yB = std::min(boxA.y + boxA.height, boxB.y + boxB.height);
65 |
66 | int interArea = std::max(0, xB - xA) * std::max(0, yB - yA);
67 |
68 | int boxAArea = boxA.width * boxA.height;
69 | int boxBArea = boxB.width * boxB.height;
70 |
71 | float iou = static_cast(interArea) / (boxAArea + boxBArea - interArea);
72 | return iou;
73 | }
74 |
75 |
76 | /**
77 | * @brief Applies Soft-NMS to a set of detected bounding boxes to reduce overlapping detections.
78 | *
79 | * @param detections Vector of detections to process.
80 | * @param sigma Soft-NMS parameter controlling the Gaussian function's width. Default is 0.5.
81 | * @param iou_threshold IoU threshold for suppression. Default is 0.3.
82 | */
83 | void applySoftNMS(std::vector &detections, float sigma = 0.5, float iou_threshold = 0.3)
84 | {
85 | for (size_t i = 0; i < detections.size(); ++i)
86 | {
87 | for (size_t j = i + 1; j < detections.size(); ++j)
88 | {
89 | float iou = computeIOU(detections[i].bbox, detections[j].bbox);
90 | if (iou > iou_threshold)
91 | {
92 | // Apply the Soft-NMS score decay formula
93 | detections[j].confidence *= std::exp(-iou * iou / sigma);
94 | }
95 | }
96 | }
97 |
98 | // Remove detections with low confidence scores
99 | detections.erase(std::remove_if(detections.begin(), detections.end(),
100 | [](const Detection &det) { return det.confidence < 0.001; }),
101 | detections.end());
102 | }
103 |
104 |
105 | /**
106 | * @brief Apply Histogram Equalization to an image.
107 | *
108 | * @param src Input image in BGR format.
109 | * @return Image with enhanced contrast.
110 | */
111 | cv::Mat applyHistogramEqualization(const cv::Mat &src)
112 | {
113 | cv::Mat ycrcb_image;
114 | cv::cvtColor(src, ycrcb_image, cv::COLOR_BGR2YCrCb); // Convert to YCrCb color space
115 |
116 | std::vector channels;
117 | cv::split(ycrcb_image, channels);
118 |
119 | // Apply histogram equalization to the Y channel (intensity)
120 | cv::equalizeHist(channels[0], channels[0]);
121 |
122 | // Merge back the channels and convert to BGR
123 | cv::merge(channels, ycrcb_image);
124 | cv::Mat result;
125 | cv::cvtColor(ycrcb_image, result, cv::COLOR_YCrCb2BGR);
126 |
127 | return result;
128 | }
129 |
130 | /**
131 | * @brief Apply CLAHE to an image for adaptive contrast enhancement.
132 | *
133 | * @param src Input image in BGR format.
134 | * @return Image with enhanced local contrast.
135 | */
136 | cv::Mat applyCLAHE(const cv::Mat &src)
137 | {
138 | cv::Mat lab_image;
139 | cv::cvtColor(src, lab_image, cv::COLOR_BGR2Lab); // Convert to LAB color space
140 |
141 | std::vector lab_planes;
142 | cv::split(lab_image, lab_planes);
143 |
144 | // Apply CLAHE to the L channel (lightness)
145 | cv::Ptr clahe = cv::createCLAHE();
146 | clahe->setClipLimit(4.0); // Set the clip limit for contrast enhancement
147 | clahe->apply(lab_planes[0], lab_planes[0]);
148 |
149 | // Merge the planes back and convert to BGR
150 | cv::merge(lab_planes, lab_image);
151 | cv::Mat result;
152 | cv::cvtColor(lab_image, result, cv::COLOR_Lab2BGR);
153 |
154 | return result;
155 | }
156 |
157 |
158 | /**
159 | * @brief Apply Gamma Correction to an image.
160 | *
161 | * @param src Input image in BGR format.
162 | * @param gamma Gamma value for correction. Values < 1 will lighten the image, values > 1 will darken it.
163 | * @return Image with gamma correction applied.
164 | */
165 | cv::Mat applyGammaCorrection(const cv::Mat &src, float gamma)
166 | {
167 | cv::Mat lut(1, 256, CV_8UC1);
168 | uchar* p = lut.ptr();
169 | for (int i = 0; i < 256; ++i)
170 | {
171 | p[i] = cv::saturate_cast(std::pow(i / 255.0, gamma) * 255.0);
172 | }
173 |
174 | cv::Mat result;
175 | cv::LUT(src, lut, result); // Apply the gamma lookup table to the image
176 |
177 | return result;
178 | }
179 |
180 |
181 | InferenceEngine::InferenceEngine(const std::string &model_path)
182 | : env(ORT_LOGGING_LEVEL_WARNING, "ONNXRuntime"),
183 | session_options(),
184 | session(env, model_path.c_str(), session_options),
185 | input_shape{1, 3, 640, 640}
186 | {
187 | session_options.SetIntraOpNumThreads(1);
188 | session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_BASIC);
189 |
190 | // Check if the session was created successfully
191 | if (!session)
192 | {
193 | throw std::runtime_error("Failed to create ONNX Runtime session.");
194 | }
195 | }
196 |
197 | InferenceEngine::~InferenceEngine() {}
198 |
199 | /*
200 | * Function to preprocess the image
201 | *
202 | * @param image: input image as cv::Mat
203 | * @return: vector of floats representing the preprocessed image
204 | */
205 | std::vector InferenceEngine::preprocessImage(const cv::Mat &image)
206 | {
207 | if (image.empty())
208 | {
209 | throw std::runtime_error("Could not read the image");
210 | }
211 |
212 | // Step 1: Apply image enhancement techniques
213 | cv::Mat enhanced_image = applyCLAHE(image); // Use CLAHE as an example
214 | // cv::Mat enhanced_image = applyHistogramEqualization(image); // Or use Histogram Equalization
215 | // cv::Mat enhanced_image = applyGammaCorrection(image, 1.2); // Or use Gamma Correction
216 |
217 | // Step 2: Apply letterbox to the enhanced image
218 | cv::Mat letterboxed_image = letterbox(enhanced_image, cv::Size(input_shape[2], input_shape[3]));
219 |
220 | // Step 3: Convert image to float and normalize
221 | letterboxed_image.convertTo(letterboxed_image, CV_32F, 1.0 / 255);
222 |
223 | // Step 4: Convert from BGR to RGB
224 | cv::cvtColor(letterboxed_image, letterboxed_image, cv::COLOR_BGR2RGB);
225 |
226 | // Step 5: Prepare the input tensor values as a 1D vector
227 | std::vector input_tensor_values;
228 | input_tensor_values.reserve(input_shape[1] * input_shape[2] * input_shape[3]);
229 |
230 | // Convert Mat to vector of floats (HWC to CHW)
231 | std::vector channels(3);
232 | cv::split(letterboxed_image, channels);
233 |
234 | for (int c = 0; c < 3; ++c)
235 | {
236 | input_tensor_values.insert(input_tensor_values.end(), (float *)channels[c].data, (float *)channels[c].data + input_shape[2] * input_shape[3]);
237 | }
238 |
239 | return input_tensor_values;
240 | }
241 |
242 |
243 |
244 | /*
245 | * Function to filter the detections based on the confidence threshold
246 | *
247 | * @param results: vector of floats representing the output tensor
248 | * @param confidence_threshold: minimum confidence threshold
249 | * @param img_width: width of the input image
250 | * @param img_height: height of the input image
251 | * @param orig_width: original width of the image
252 | * @param orig_height: original height of the image
253 | * @return: vector of Detection objects
254 | */
255 | std::vector InferenceEngine::filterDetections(const std::vector &results, float confidence_threshold, int img_width, int img_height, int orig_width, int orig_height)
256 | {
257 | std::vector detections;
258 | const int num_detections = results.size() / 6;
259 |
260 | // Calculate scale and padding factors
261 | float scale = std::min(img_width / (float)orig_width, img_height / (float)orig_height);
262 | int new_width = static_cast(orig_width * scale);
263 | int new_height = static_cast(orig_height * scale);
264 | int pad_x = (img_width - new_width) / 2;
265 | int pad_y = (img_height - new_height) / 2;
266 |
267 | detections.reserve(num_detections);
268 |
269 | for (int i = 0; i < num_detections; ++i)
270 | {
271 | float left = results[i * 6 + 0];
272 | float top = results[i * 6 + 1];
273 | float right = results[i * 6 + 2];
274 | float bottom = results[i * 6 + 3];
275 | float confidence = results[i * 6 + 4];
276 | int class_id = static_cast(results[i * 6 + 5]);
277 |
278 | if (confidence >= confidence_threshold)
279 | {
280 | // Remove padding and rescale to original image dimensions
281 | left = (left - pad_x) / scale;
282 | top = (top - pad_y) / scale;
283 | right = (right - pad_x) / scale;
284 | bottom = (bottom - pad_y) / scale;
285 |
286 | int x = static_cast(left);
287 | int y = static_cast(top);
288 | int width = static_cast(right - left);
289 | int height = static_cast(bottom - top);
290 |
291 | detections.push_back(
292 | {confidence,
293 | cv::Rect(x, y, width, height),
294 | class_id,
295 | CLASS_NAMES[class_id]});
296 | }
297 | }
298 |
299 | // Apply Soft-NMS to refine detections
300 | applySoftNMS(detections, 0.5, 0.3); // You can tweak the sigma and IoU threshold values as needed
301 |
302 | return detections;
303 | }
304 |
305 |
306 | /*
307 | * Function to run inference
308 | *
309 | * @param input_tensor_values: vector of floats representing the input tensor
310 | * @return: vector of floats representing the output tensor
311 | */
312 | std::vector InferenceEngine::runInference(const std::vector &input_tensor_values)
313 | {
314 | Ort::AllocatorWithDefaultOptions allocator;
315 |
316 | std::string input_name = getInputName();
317 | std::string output_name = getOutputName();
318 |
319 | const char *input_name_ptr = input_name.c_str();
320 | const char *output_name_ptr = output_name.c_str();
321 |
322 | Ort::MemoryInfo memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
323 | Ort::Value input_tensor = Ort::Value::CreateTensor(memory_info, const_cast(input_tensor_values.data()), input_tensor_values.size(), input_shape.data(), input_shape.size());
324 |
325 | auto output_tensors = session.Run(Ort::RunOptions{nullptr}, &input_name_ptr, &input_tensor, 1, &output_name_ptr, 1);
326 |
327 | float *floatarr = output_tensors[0].GetTensorMutableData();
328 | size_t output_tensor_size = output_tensors[0].GetTensorTypeAndShapeInfo().GetElementCount();
329 |
330 | return std::vector(floatarr, floatarr + output_tensor_size);
331 | }
332 |
333 | /*
334 | * Function to draw the labels on the image
335 | *
336 | * @param image: input image
337 | * @param detections: vector of Detection objects
338 | * @return: image with labels drawn
339 | */
340 | cv::Mat InferenceEngine::draw_labels(const cv::Mat &image, const std::vector &detections)
341 | {
342 | cv::Mat result = image.clone();
343 |
344 | for (const auto &detection : detections)
345 | {
346 | cv::rectangle(result, detection.bbox, cv::Scalar(0, 255, 0), 2);
347 | std::string label = detection.class_name + ": " + std::to_string(detection.confidence);
348 |
349 | int baseLine;
350 | cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
351 |
352 | cv::rectangle(
353 | result,
354 | cv::Point(detection.bbox.x, detection.bbox.y - labelSize.height),
355 | cv::Point(detection.bbox.x + labelSize.width, detection.bbox.y + baseLine),
356 | cv::Scalar(255, 255, 255),
357 | cv::FILLED);
358 |
359 | cv::putText(
360 | result,
361 | label,
362 | cv::Point(detection.bbox.x, detection.bbox.y),
363 | cv::FONT_HERSHEY_SIMPLEX,
364 | 0.5,
365 | cv::Scalar(0, 0, 0),
366 | 1);
367 | }
368 |
369 | return result;
370 | }
371 |
372 | /*
373 | * Function to get the input name
374 | *
375 | * @return: name of the input tensor
376 | */
377 | std::string InferenceEngine::getInputName()
378 | {
379 | Ort::AllocatorWithDefaultOptions allocator;
380 | Ort::AllocatedStringPtr name_allocator = session.GetInputNameAllocated(0, allocator);
381 | return std::string(name_allocator.get());
382 | }
383 |
384 | /*
385 | * Function to get the output name
386 | *
387 | * @return: name of the output tensor
388 | */
389 | std::string InferenceEngine::getOutputName()
390 | {
391 | Ort::AllocatorWithDefaultOptions allocator;
392 | Ort::AllocatedStringPtr name_allocator = session.GetOutputNameAllocated(0, allocator);
393 | return std::string(name_allocator.get());
394 | }
--------------------------------------------------------------------------------
/src/ia/inference.h:
--------------------------------------------------------------------------------
1 | #ifndef INFERENCE_H
2 | #define INFERENCE_H
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include // For exp function
9 |
10 | struct Detection
11 | {
12 | float confidence;
13 | cv::Rect bbox;
14 | int class_id;
15 | std::string class_name;
16 | };
17 |
18 |
19 | class InferenceEngine
20 | {
21 | public:
22 | InferenceEngine(const std::string &model_path);
23 | ~InferenceEngine();
24 |
25 | std::vector preprocessImage(const cv::Mat &image);
26 | std::vector filterDetections(const std::vector &results, float confidence_threshold, int img_width, int img_height, int orig_width, int orig_height);
27 | std::vector runInference(const std::vector &input_tensor_values);
28 |
29 | cv::Mat draw_labels(const cv::Mat &image, const std::vector &detections);
30 |
31 | std::vector input_shape;
32 |
33 | private:
34 | Ort::Env env;
35 | Ort::SessionOptions session_options;
36 | Ort::Session session;
37 |
38 | std::string getInputName();
39 | std::string getOutputName();
40 |
41 | static const std::vector CLASS_NAMES;
42 | };
43 |
44 |
45 | #endif // INFERENCE_H
46 |
--------------------------------------------------------------------------------
/src/main.cpp:
--------------------------------------------------------------------------------
1 | #include "./ia/inference.h"
2 | #include
3 | #include
4 |
5 |
6 |
7 | int main(int argc, char *argv[])
8 | {
9 | if (argc != 3)
10 | {
11 | std::cerr << "Usage: " << argv[0] << " " << std::endl;
12 | return 1;
13 | }
14 |
15 | std::string model_path = argv[1];
16 | std::string image_path = argv[2];
17 |
18 | try
19 | {
20 | InferenceEngine engine(model_path);
21 |
22 | cv::Mat image = cv::imread(image_path);
23 | int orig_width = image.cols;
24 | int orig_height = image.rows;
25 | std::vector input_tensor_values = engine.preprocessImage(image );
26 |
27 | std::vector results = engine.runInference(input_tensor_values);
28 |
29 | float confidence_threshold = 0.3;
30 |
31 | std::vector detections = engine.filterDetections(results, confidence_threshold, engine.input_shape[2], engine.input_shape[3], orig_width, orig_height);
32 |
33 | cv::Mat output = engine.draw_labels(image, detections);
34 |
35 | cv::imwrite("result.jpg", output);
36 | }
37 | catch (const std::exception &e)
38 | {
39 | std::cerr << "Error: " << e.what() << std::endl;
40 | return 1;
41 | }
42 |
43 | return 0;
44 | }
45 |
--------------------------------------------------------------------------------
/src/video.cpp:
--------------------------------------------------------------------------------
1 | #include "./ia/inference.h"
2 | #include
3 | #include
4 |
5 | int main(int argc, char const *argv[])
6 | {
7 | if (argc != 3)
8 | {
9 | std::cerr << "Usage: " << argv[0] << " " << std::endl;
10 | return 1;
11 | }
12 | std::string model_path = argv[1];
13 |
14 | auto source = atoi(argv[1]); // 0 for webcam, 1 for video file
15 | int apiID = cv::CAP_ANY; // 0 = autodetect default API
16 |
17 | cv::namedWindow("yolov10", cv::WINDOW_AUTOSIZE);
18 |
19 | InferenceEngine engine(model_path);
20 |
21 | cv::VideoCapture cap;
22 |
23 | cap.open(source, apiID);
24 |
25 | if (!cap.isOpened())
26 | {
27 | std::cerr << "ERROR! Unable to open camera\n";
28 | return -1;
29 | }
30 |
31 | cv::Mat frame;
32 |
33 | std::cout << "Start grabbing" << std::endl
34 | << "Press any key to terminate" << std::endl;
35 |
36 | for (;;)
37 | {
38 | cap.read(frame);
39 |
40 | if (frame.empty())
41 | {
42 | std::cerr << "ERROR! blank frame grabbed\n";
43 | break;
44 | }
45 |
46 | int orig_width = frame.cols;
47 | int orig_height = frame.rows;
48 | auto timer = cv::getTickCount();
49 |
50 | std::vector input_tensor_values = engine.preprocessImage(frame);
51 |
52 | std::vector results = engine.runInference(input_tensor_values);
53 |
54 | float confidence_threshold = 0.3;
55 |
56 | std::vector detections = engine.filterDetections(results, confidence_threshold, engine.input_shape[2], engine.input_shape[3], orig_width, orig_height);
57 |
58 | double fps = cv::getTickFrequency() / ((double)cv::getTickCount() - timer);
59 |
60 | cv::putText(frame, "FPS: " + std::to_string(fps), cv::Point(10, 30), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2, 8);
61 |
62 | cv::Mat output = engine.draw_labels(frame, detections);
63 |
64 | cv::imshow("test", output);
65 |
66 | if (cv::waitKey(5) >= 0)
67 | break;
68 | }
69 |
70 | return 0;
71 | }
72 |
--------------------------------------------------------------------------------