├── .github
    └── workflows
    │   └── docker-image.yml
├── .gitignore
├── .vscode
    ├── c_cpp_properties.json
    ├── launch.json
    ├── settings.json
    └── tasks.json
├── CMakeLists.txt
├── Dockerfile
├── IMG_4057.JPG
├── README.md
├── assets
    ├── cpp
    │   ├── bus.jpg
    │   └── zidane.jpg
    └── yolo
    │   ├── bus.jpg
    │   └── zidane.jpg
├── download_model.py
├── requirements.txt
└── src
    ├── ia
        ├── inference.cpp
        └── inference.h
    ├── main.cpp
    └── video.cpp


/.github/workflows/docker-image.yml:
--------------------------------------------------------------------------------
 1 | name: Docker Image CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ "main" ]
 6 |   pull_request:
 7 |     branches: [ "main" ]
 8 | 
 9 | jobs:
10 | 
11 |   build:
12 | 
13 |     runs-on: ubuntu-latest
14 | 
15 |     steps:
16 |     - uses: actions/checkout@v4
17 |     - name: Build the Docker image
18 |       run: docker build . --file Dockerfile --tag my-image-name:$(date +%s)
19 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 
162 | 
163 | # Prerequisites
164 | *.d
165 | 
166 | # Object files
167 | *.o
168 | *.ko
169 | *.obj
170 | *.elf
171 | 
172 | # Linker output
173 | *.ilk
174 | *.map
175 | *.exp
176 | 
177 | # Precompiled Headers
178 | *.gch
179 | *.pch
180 | 
181 | # Libraries
182 | *.lib
183 | *.a
184 | *.la
185 | *.lo
186 | 
187 | # Shared objects (inc. Windows DLLs)
188 | *.dll
189 | *.so
190 | *.so.*
191 | *.dylib
192 | 
193 | # Executables
194 | *.exe
195 | *.out
196 | *.app
197 | *.i*86
198 | *.x86_64
199 | *.hex
200 | 
201 | # Debug files
202 | *.dSYM/
203 | *.su
204 | *.idb
205 | *.pdb
206 | 
207 | # Kernel Module Compile Results
208 | *.mod*
209 | *.cmd
210 | .tmp_versions/
211 | modules.order
212 | Module.symvers
213 | Mkfile.old
214 | dkms.conf
215 | 
216 | *.onnx
217 | 
218 | *.pt
219 | 
220 | *.DS_Store


--------------------------------------------------------------------------------
/.vscode/c_cpp_properties.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "configurations": [
 3 |         {
 4 |             "name": "Mac",
 5 |             "includePath": [
 6 |                 "${workspaceFolder}/**",
 7 |                 "/opt/homebrew/Cellar/opencv/4.10.0_9/include/opencv4/opencv2",
 8 |                 "/opt/homebrew/Cellar/opencv/4.10.0_9/include/opencv4",
 9 |                 "/opt/homebrew/Cellar/onnxruntime/1.17.1/include/onnxruntime"
10 |             ],
11 |             "defines": [],
12 |             "macFrameworkPath": [],
13 |             "compilerPath": "/usr/bin/g++",
14 |             "cStandard": "c17",
15 |             "cppStandard": "c++17",
16 |             "intelliSenseMode": "clang-x64",
17 |             "browse": {
18 |                 "path": [
19 |                     "/opt/homebrew/Cellar/opencv/4.10.0_9/include/opencv4",
20 |                     "/opt/homebrew/Cellar/onnxruntime/1.17.1/include/onnxruntime"
21 |                 ],
22 |                 "limitSymbolsToIncludedHeaders": true,
23 |                 "databaseFilename": ""
24 |             }
25 |         }
26 |     ],
27 |     "version": 4
28 | }


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "version": "0.2.0",
 3 |     "configurations": [
 4 |         {
 5 |             "name": "(lldb) Launch",
 6 |             "type": "cppdbg",
 7 |             "request": "launch",
 8 |             "program": "${fileDirname}/${fileBasenameNoExtension}.out",
 9 |             "args": [],
10 |             "stopAtEntry": true,
11 |             "cwd": "${workspaceFolder}",
12 |             "environment": [],
13 |             "externalConsole": true,
14 |             "MIMode": "lldb",
15 |             "preLaunchTask": "Build"
16 |         }
17 |     ]
18 | }


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "files.associations": {
 3 |         "__availability": "cpp",
 4 |         "charconv": "cpp",
 5 |         "string": "cpp",
 6 |         "vector": "cpp",
 7 |         "__config": "cpp",
 8 |         "__split_buffer": "cpp",
 9 |         "deque": "cpp",
10 |         "list": "cpp",
11 |         "__bit_reference": "cpp",
12 |         "__debug": "cpp",
13 |         "__errc": "cpp",
14 |         "__hash_table": "cpp",
15 |         "__locale": "cpp",
16 |         "__mutex_base": "cpp",
17 |         "__node_handle": "cpp",
18 |         "__threading_support": "cpp",
19 |         "__tree": "cpp",
20 |         "__verbose_abort": "cpp",
21 |         "array": "cpp",
22 |         "atomic": "cpp",
23 |         "bitset": "cpp",
24 |         "cctype": "cpp",
25 |         "clocale": "cpp",
26 |         "cmath": "cpp",
27 |         "complex": "cpp",
28 |         "cstdarg": "cpp",
29 |         "cstddef": "cpp",
30 |         "cstdint": "cpp",
31 |         "cstdio": "cpp",
32 |         "cstdlib": "cpp",
33 |         "cstring": "cpp",
34 |         "ctime": "cpp",
35 |         "cwchar": "cpp",
36 |         "cwctype": "cpp",
37 |         "exception": "cpp",
38 |         "fstream": "cpp",
39 |         "initializer_list": "cpp",
40 |         "iomanip": "cpp",
41 |         "ios": "cpp",
42 |         "iosfwd": "cpp",
43 |         "iostream": "cpp",
44 |         "istream": "cpp",
45 |         "limits": "cpp",
46 |         "locale": "cpp",
47 |         "map": "cpp",
48 |         "mutex": "cpp",
49 |         "new": "cpp",
50 |         "optional": "cpp",
51 |         "ostream": "cpp",
52 |         "queue": "cpp",
53 |         "ratio": "cpp",
54 |         "set": "cpp",
55 |         "sstream": "cpp",
56 |         "stdexcept": "cpp",
57 |         "streambuf": "cpp",
58 |         "string_view": "cpp",
59 |         "system_error": "cpp",
60 |         "tuple": "cpp",
61 |         "typeinfo": "cpp",
62 |         "unordered_map": "cpp",
63 |         "variant": "cpp",
64 |         "algorithm": "cpp"
65 |     }
66 | }


--------------------------------------------------------------------------------
/.vscode/tasks.json:
--------------------------------------------------------------------------------
  1 | { 
  2 |     "version": "2.0.0",
  3 |     "tasks": [
  4 |         {
  5 |             "label": "Build",
  6 |             "type": "shell",
  7 |             "command": "clang++",
  8 |             "args": [
  9 |                 "-std=c++17",
 10 |                 "${file}",
 11 |                 "/Users/josedanielsarmientoblanco/Desktop/hobby/yolov10cpp/src/ia/inference.h",
 12 |                 "-o",
 13 |                 "${fileDirname}/${fileBasenameNoExtension}.out",
 14 |                 "-I",
 15 |                 "/opt/homebrew/Cellar/opencv/4.10.0_9/include/opencv4/opencv2",
 16 |                 "-I",
 17 |                 "/opt/homebrew/Cellar/onnxruntime/1.17.1/include/onnxruntime",
 18 |                 "-I",
 19 |                 "/opt/homebrew/Cellar/opencv/4.10.0_9/include/opencv4",
 20 |                 "-L",
 21 |                 "/opt/homebrew/Cellar/opencv/4.10.0_9/lib",
 22 |                 "-L",
 23 |                 "/opt/homebrew/Cellar/onnxruntime/1.17.1/lib",
 24 |                 "-l",
 25 |                 "onnxruntime",
 26 |                 "-l",
 27 |                 "opencv_stitching",
 28 |                 "-l",
 29 |                 "opencv_superres",
 30 |                 "-l",
 31 |                 "opencv_videostab",
 32 |                 "-l",
 33 |                 "opencv_aruco",
 34 |                 "-l",
 35 |                 "opencv_bgsegm",
 36 |                 "-l",
 37 |                 "opencv_bioinspired",
 38 |                 "-l",
 39 |                 "opencv_ccalib",
 40 |                 "-l",
 41 |                 "opencv_dnn_objdetect",
 42 |                 "-l",
 43 |                 "opencv_dpm",
 44 |                 "-l",
 45 |                 "opencv_face",
 46 |                 "-l",
 47 |                 "opencv_fuzzy",
 48 |                 "-l",
 49 |                 "opencv_hfs",
 50 |                 "-l",
 51 |                 "opencv_img_hash",
 52 |                 "-l",
 53 |                 "opencv_line_descriptor",
 54 |                 "-l",
 55 |                 "opencv_optflow",
 56 |                 "-l",
 57 |                 "opencv_reg",
 58 |                 "-l",
 59 |                 "opencv_rgbd",
 60 |                 "-l",
 61 |                 "opencv_saliency",
 62 |                 "-l",
 63 |                 "opencv_stereo",
 64 |                 "-l",
 65 |                 "opencv_structured_light",
 66 |                 "-l",
 67 |                 "opencv_phase_unwrapping",
 68 |                 "-l",
 69 |                 "opencv_surface_matching",
 70 |                 "-l",
 71 |                 "opencv_tracking",
 72 |                 "-l",
 73 |                 "opencv_datasets",
 74 |                 "-l",
 75 |                 "opencv_dnn",
 76 |                 "-l",
 77 |                 "opencv_plot",
 78 |                 "-l",
 79 |                 "opencv_xfeatures2d",
 80 |                 "-l",
 81 |                 "opencv_shape",
 82 |                 "-l",
 83 |                 "opencv_video",
 84 |                 "-l",
 85 |                 "opencv_ml",
 86 |                 "-l",
 87 |                 "opencv_ximgproc",
 88 |                 "-l",
 89 |                 "opencv_xobjdetect",
 90 |                 "-l",
 91 |                 "opencv_objdetect",
 92 |                 "-l",
 93 |                 "opencv_calib3d",
 94 |                 "-l",
 95 |                 "opencv_features2d",
 96 |                 "-l",
 97 |                 "opencv_highgui",
 98 |                 "-l",
 99 |                 "opencv_videoio",
100 |                 "-l",
101 |                 "opencv_imgcodecs",
102 |                 "-l",
103 |                 "opencv_flann",
104 |                 "-l",
105 |                 "opencv_xphoto",
106 |                 "-l",
107 |                 "opencv_photo",
108 |                 "-l",
109 |                 "opencv_imgproc",
110 |                 "-l",
111 |                 "opencv_core",
112 |                 "-g"
113 |             ],
114 |             "group": {
115 |                 "kind": "build",
116 |                 "isDefault": true
117 |             },
118 |             "problemMatcher": [
119 |                 "$gcc"
120 |             ]
121 |         }
122 |     ]
123 | }


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10)
 2 | 
 3 | # Set the project name in a variable
 4 | set(project_name yolov10_cpp)
 5 | project(${project_name})
 6 | set(CMAKE_CXX_STANDARD 17)
 7 | 
 8 | find_package(OpenCV REQUIRED)
 9 | 
10 | # Find ONNX Runtime package
11 | find_path(ONNXRUNTIME_INCLUDE_DIR onnxruntime_c_api.h
12 |     HINTS /opt/homebrew/Cellar/onnxruntime/1.17.1/include/onnxruntime
13 | )
14 | find_library(ONNXRUNTIME_LIBRARY onnxruntime
15 |     HINTS /opt/homebrew/Cellar/onnxruntime/1.17.1/lib
16 | )
17 | 
18 | if(NOT ONNXRUNTIME_INCLUDE_DIR)
19 |     message(FATAL_ERROR "ONNX Runtime include directory not found")
20 | endif()
21 | if(NOT ONNXRUNTIME_LIBRARY)
22 |     message(FATAL_ERROR "ONNX Runtime library not found")
23 | endif()
24 | 
25 | add_library(${project_name}-lib
26 |     src/ia/inference.cpp
27 |     src/ia/inference.h
28 | )
29 | 
30 | target_include_directories(${project_name}-lib PUBLIC src)
31 | target_include_directories(${project_name}-lib PUBLIC ${ONNXRUNTIME_INCLUDE_DIR})
32 | 
33 | target_link_libraries(${project_name}-lib
34 |     PUBLIC ${OpenCV_LIBS}
35 |     PUBLIC ${ONNXRUNTIME_LIBRARY}
36 | )
37 | 
38 | # Add the main executable
39 | add_executable(${project_name} 
40 |     ./src/main.cpp
41 | )
42 | target_include_directories(${project_name} PUBLIC ${ONNXRUNTIME_INCLUDE_DIR})
43 | target_link_libraries(${project_name} ${project_name}-lib)
44 | 
45 | # Add the video executable
46 | add_executable(${project_name}_video 
47 |     ./src/video.cpp
48 | )
49 | target_include_directories(${project_name}_video PUBLIC ${ONNXRUNTIME_INCLUDE_DIR})
50 | target_link_libraries(${project_name}_video ${project_name}-lib)
51 | 
52 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Use an official image as a parent image
 2 | FROM ubuntu:20.04
 3 | 
 4 | # Set environment variables to avoid interactive prompts during installation
 5 | ENV DEBIAN_FRONTEND=noninteractive
 6 | 
 7 | # Install necessary dependencies
 8 | RUN apt-get update && apt-get install -y \
 9 |     build-essential \
10 |     cmake \
11 |     libopencv-dev \
12 |     wget \
13 |     git \
14 |     && rm -rf /var/lib/apt/lists/*
15 | 
16 | # Install ONNX Runtime
17 | RUN wget https://github.com/microsoft/onnxruntime/releases/download/v1.10.0/onnxruntime-linux-x64-1.10.0.tgz && \
18 |     tar -xzf onnxruntime-linux-x64-1.10.0.tgz && \
19 |     rm onnxruntime-linux-x64-1.10.0.tgz
20 | 
21 | # Set ONNX Runtime library path
22 | ENV LD_LIBRARY_PATH="/onnxruntime-linux-x64-1.10.0/lib:$LD_LIBRARY_PATH"
23 | 
24 | # Create a directory for your application
25 | WORKDIR /app
26 | 
27 | # Copy your source code into the container
28 | COPY . .
29 | 
30 | # Build your C++ application
31 | RUN mkdir build && cd build && \
32 |     cmake .. && \
33 |     make
34 | 
35 | # # Run the application
36 | # CMD ["./build/object_detection", "model.onnx", "test_image.jpg"]
37 | 


--------------------------------------------------------------------------------
/IMG_4057.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DanielSarmiento04/yolov10cpp/7838e60d8de584f7d36010d9a67e5bbd713ccbca/IMG_4057.JPG


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <h1 align="center">Yolo V10 cpp</h1>
  2 | 
  3 | <h3 align="center"> Jose Sarmiento | josedanielsarmiento219@gmail.com</h3>
  4 | 
  5 | 
  6 | ## Resumen
  7 | 
  8 | The next repository aims to provide a basic c++ script using std 17 over, to do it and consider the speed The code use OpenCv 4.9.0_8 and Onnx 1.17.1 to manipulate the image and inference the model. Note that Opncv don't support a native integration because yolov10 integra A top K layer in their architecture.
  9 | 
 10 | 
 11 | 
 12 | ## Prepare the code 
 13 | 
 14 | 
 15 | 1. Download de model you want 
 16 | 
 17 |   
 18 |   - yolov10n
 19 |   - yolov10s
 20 |   - yolov10m
 21 |   - yolov10b
 22 |   - yolov10l
 23 |   - yolov10x
 24 | 
 25 | 
 26 | ```bash
 27 |     python download_model.py  --model {MODEL_SELECTED}
 28 | ```
 29 | 
 30 | ## Install packages
 31 | 
 32 | ```
 33 |     conda create -n yolov10 python=3.9
 34 |     conda activate yolov10
 35 | 
 36 |     git clone https://github.com/THU-MIG/yolov10
 37 |     cd yolov10
 38 | 
 39 |     pip install -r requirements.txt
 40 |     pip install -e .
 41 | 
 42 |     cd ..
 43 | ```
 44 | 
 45 | ## Convert model
 46 | 
 47 | ```
 48 |     yolo export model=yolov10n.pt format=onnx
 49 | ```
 50 | ## Dependencies 
 51 | 
 52 | 1. ffmpeg
 53 | 2. Opnecv
 54 | 3. onnxruntime
 55 | 
 56 | 
 57 | - MacOs
 58 | ```
 59 |     brew install ffmpeg 
 60 |     brew install opencv
 61 |     brew install onnxruntime
 62 | ```
 63 | 
 64 | - Ubuntu: Unfortunately, onnx runtime is no available using native apt-get
 65 | 
 66 | You can use python
 67 | ```
 68 | sudo apt-get update
 69 | sudo apt-get install python3-pip
 70 | pip3 install onnxruntime
 71 | ```
 72 | 
 73 | dotnet 
 74 | ```
 75 | dotnet add package Microsoft.ML.OnnxRuntime
 76 | 
 77 | ```
 78 | 
 79 | 
 80 | ## How to run this code 
 81 | 
 82 | 
 83 | 1. Using Cmake, Recommended
 84 | 
 85 | ```
 86 |     mkdir build
 87 |     cd build
 88 |     cmake ..
 89 |     make
 90 | ```
 91 | 
 92 | 
 93 | 2. Run the following command 
 94 | 
 95 | > static images
 96 | 
 97 | ```
 98 |     ./yolov10_cpp [MODEL_PATH] [IMAGE_PATH]
 99 | ```
100 | 
101 | > realtime 
102 | 
103 | ```
104 |     ./yolov10_cpp_video [MODEL_PATH] [SOURCE]
105 | ```
106 | 
107 | ## Results 
108 | 
109 | our cpp binding | python binding
110 | 
111 | <p align="center">
112 |   <img src="./assets/cpp/bus.jpg" alt="Image 1" width="45%" style="margin-right: 10px;"/>
113 |   <img src="./assets/yolo/bus.jpg" alt="Image 2" width="45%"/>
114 | </p>
115 | 
116 | <p align="center">
117 |   <img src="./assets/cpp/zidane.jpg" alt="Image 1" width="45%" style="margin-right: 10px;"/>
118 |   <img src="./assets/yolo/zidane.jpg" alt="Image 2" width="45%"/>
119 | </p>
120 | 
121 | > source = Apple M3 PRO
122 | 
123 | | Command Line Execution                                              | Resource Utilization                                 |
124 | |---------------------------------------------------------------------|------------------------------------------------------|
125 | | `./yolov10_cpp ../yolov10n.onnx ../bus.jpg`                         | **0.46s** user, **0.10s** system, **94%** CPU, **0.595s** total |
126 | | `yolo detect predict model=yolov10n.onnx source=bus.jpg`            | **1.69s** user, **2.44s** system, **291%** CPU, **1.413s** total |
127 | 
128 | 
129 | ## Future plans
130 | 
131 | 1. Modularize the components. ✅
132 | 2. Make a example to video real time. ✅
133 | 3. Support Cuda. ?
134 | 
135 | ## Inspiration
136 | 
137 | [Ultraopxt](https://github.com/Ultraopxt/yolov10cpp)
138 | 
139 | 
140 | ## Reference 
141 | 
142 | [1] Wang, A., Chen, H., Liu, L., Chen, K., Lin, Z., Han, J., & Ding, G. (2024). YOLOv10: Real-Time End-to-End Object Detection. arXiv [Cs.CV]. Retrieved from http://arxiv.org/abs/2405.14458


--------------------------------------------------------------------------------
/assets/cpp/bus.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DanielSarmiento04/yolov10cpp/7838e60d8de584f7d36010d9a67e5bbd713ccbca/assets/cpp/bus.jpg


--------------------------------------------------------------------------------
/assets/cpp/zidane.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DanielSarmiento04/yolov10cpp/7838e60d8de584f7d36010d9a67e5bbd713ccbca/assets/cpp/zidane.jpg


--------------------------------------------------------------------------------
/assets/yolo/bus.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DanielSarmiento04/yolov10cpp/7838e60d8de584f7d36010d9a67e5bbd713ccbca/assets/yolo/bus.jpg


--------------------------------------------------------------------------------
/assets/yolo/zidane.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DanielSarmiento04/yolov10cpp/7838e60d8de584f7d36010d9a67e5bbd713ccbca/assets/yolo/zidane.jpg


--------------------------------------------------------------------------------
/download_model.py:
--------------------------------------------------------------------------------
 1 | # Import libraries
 2 | import os
 3 | import cv2
 4 | import numpy as np
 5 | import matplotlib.pyplot as plt
 6 | 
 7 | from zipfile import ZipFile
 8 | from urllib.request import urlretrieve
 9 | import argparse
10 | 
11 | 
12 | # Define the function to download the model
13 | parser = argparse.ArgumentParser(description='Process some integers.')
14 | parser.add_argument(
15 |     '--model', 
16 |     choices=['yolov10n', 'yolov10s', 'yolov10m', 'yolov10b', 'yolov10l', 'yolov10x'],
17 |     default='yolov10n', 
18 |     help='Model to download'
19 | )
20 | 
21 | args = parser.parse_args()
22 | 
23 | def download_model(model):
24 |     '''
25 |         Function to download the model from the github release page
26 |     '''
27 | 
28 |     url = "https://github.com/THU-MIG/yolov10/releases/download/v1.1/" + model + ".pt"
29 |     # Downloading zip file using urllib package.
30 |     print("Downloading the model...")
31 |     urlretrieve(url, model + ".pt")
32 |     print("Model downloaded successfully!")
33 | 
34 | 
35 | # Call the function to download the model
36 | download_model(args.model)


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | certifi==2024.2.2
 2 | charset-normalizer==3.3.2
 3 | coloredlogs==15.0.1
 4 | contourpy==1.2.1
 5 | cycler==0.12.1
 6 | filelock==3.14.0
 7 | flatbuffers==24.3.25
 8 | fonttools==4.52.1
 9 | humanfriendly==10.0
10 | idna==3.7
11 | Jinja2==3.1.4
12 | kiwisolver==1.4.5
13 | markdown-it-py==3.0.0
14 | MarkupSafe==2.1.5
15 | matplotlib==3.9.0
16 | mdurl==0.1.2
17 | mpmath==1.3.0
18 | networkx==3.3
19 | numpy==1.26.4
20 | onnx==1.14.0
21 | onnxruntime==1.15.1
22 | onnxsim==0.4.36
23 | opencv-python==4.9.0.80
24 | packaging==24.0
25 | pandas==2.2.2
26 | pillow==10.3.0
27 | protobuf==5.27.0
28 | psutil==5.9.8
29 | py-cpuinfo==9.0.0
30 | pycocotools==2.0.7
31 | Pygments==2.18.0
32 | pyparsing==3.1.2
33 | python-dateutil==2.9.0.post0
34 | pytz==2024.1
35 | PyYAML==6.0.1
36 | requests==2.32.2
37 | rich==13.7.1
38 | scipy==1.13.0
39 | seaborn==0.13.2
40 | six==1.16.0
41 | sympy==1.12
42 | thop==0.1.1.post2209072238
43 | torch==2.0.1
44 | torchvision==0.15.2
45 | tqdm==4.66.4
46 | typing_extensions==4.12.0
47 | tzdata==2024.1
48 | -e git+https://github.com/THU-MIG/yolov10@4197c99fbfc2da7a23ff5282519dbee40ff97207#egg=ultralytics
49 | urllib3==2.2.1
50 | 


--------------------------------------------------------------------------------
/src/ia/inference.cpp:
--------------------------------------------------------------------------------
  1 | #include "inference.h"
  2 | #include <algorithm>
  3 | #include <iostream>
  4 | #include <cmath> // For exp function
  5 | 
  6 | const std::vector<std::string> InferenceEngine::CLASS_NAMES = {
  7 |     "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
  8 |     "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
  9 |     "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
 10 |     "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
 11 |     "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
 12 |     "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
 13 |     "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard",
 14 |     "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase",
 15 |     "scissors", "teddy bear", "hair drier", "toothbrush"};
 16 | 
 17 | 
 18 | /**
 19 |  * @brief Letterbox an image to fit into the target size without changing its aspect ratio.
 20 |  * Adds padding to the shorter side to match the target dimensions.
 21 |  *
 22 |  * @param src Image to be letterboxed.
 23 |  * @param target_size Desired output size (width and height should be the same).
 24 |  * @param color Color of the padding (default is black).
 25 |  * @return Letterboxed image with padding.
 26 |  */
 27 | cv::Mat letterbox(const cv::Mat &src, const cv::Size &target_size, const cv::Scalar &color = cv::Scalar(0, 0, 0))
 28 | {
 29 |     // Calculate scale and padding
 30 |     float scale = std::min(target_size.width / (float)src.cols, target_size.height / (float)src.rows);
 31 |     int new_width = static_cast<int>(src.cols * scale);
 32 |     int new_height = static_cast<int>(src.rows * scale);
 33 | 
 34 |     // Resize the image with the computed scale
 35 |     cv::Mat resized_image;
 36 |     cv::resize(src, resized_image, cv::Size(new_width, new_height));
 37 | 
 38 |     // Create the output image with the target size and fill it with the padding color
 39 |     cv::Mat dst = cv::Mat::zeros(target_size.height, target_size.width, src.type());
 40 |     dst.setTo(color);
 41 | 
 42 |     // Calculate the top-left corner where the resized image will be placed
 43 |     int top = (target_size.height - new_height) / 2;
 44 |     int left = (target_size.width - new_width) / 2;
 45 | 
 46 |     // Place the resized image onto the center of the letterboxed image
 47 |     resized_image.copyTo(dst(cv::Rect(left, top, resized_image.cols, resized_image.rows)));
 48 | 
 49 |     return dst;
 50 | }
 51 | 
 52 | /**
 53 |  * @brief Computes the Intersection over Union (IoU) between two bounding boxes.
 54 |  *
 55 |  * @param boxA First bounding box.
 56 |  * @param boxB Second bounding box.
 57 |  * @return IoU value between 0 and 1.
 58 |  */
 59 | float computeIOU(const cv::Rect &boxA, const cv::Rect &boxB)
 60 | {
 61 |     int xA = std::max(boxA.x, boxB.x);
 62 |     int yA = std::max(boxA.y, boxB.y);
 63 |     int xB = std::min(boxA.x + boxA.width, boxB.x + boxB.width);
 64 |     int yB = std::min(boxA.y + boxA.height, boxB.y + boxB.height);
 65 | 
 66 |     int interArea = std::max(0, xB - xA) * std::max(0, yB - yA);
 67 | 
 68 |     int boxAArea = boxA.width * boxA.height;
 69 |     int boxBArea = boxB.width * boxB.height;
 70 | 
 71 |     float iou = static_cast<float>(interArea) / (boxAArea + boxBArea - interArea);
 72 |     return iou;
 73 | }
 74 | 
 75 | 
 76 | /**
 77 |  * @brief Applies Soft-NMS to a set of detected bounding boxes to reduce overlapping detections.
 78 |  *
 79 |  * @param detections Vector of detections to process.
 80 |  * @param sigma Soft-NMS parameter controlling the Gaussian function's width. Default is 0.5.
 81 |  * @param iou_threshold IoU threshold for suppression. Default is 0.3.
 82 |  */
 83 | void applySoftNMS(std::vector<Detection> &detections, float sigma = 0.5, float iou_threshold = 0.3)
 84 | {
 85 |     for (size_t i = 0; i < detections.size(); ++i)
 86 |     {
 87 |         for (size_t j = i + 1; j < detections.size(); ++j)
 88 |         {
 89 |             float iou = computeIOU(detections[i].bbox, detections[j].bbox);
 90 |             if (iou > iou_threshold)
 91 |             {
 92 |                 // Apply the Soft-NMS score decay formula
 93 |                 detections[j].confidence *= std::exp(-iou * iou / sigma);
 94 |             }
 95 |         }
 96 |     }
 97 | 
 98 |     // Remove detections with low confidence scores
 99 |     detections.erase(std::remove_if(detections.begin(), detections.end(),
100 |                                     [](const Detection &det) { return det.confidence < 0.001; }),
101 |                      detections.end());
102 | }
103 | 
104 | 
105 | /**
106 |  * @brief Apply Histogram Equalization to an image.
107 |  *
108 |  * @param src Input image in BGR format.
109 |  * @return Image with enhanced contrast.
110 |  */
111 | cv::Mat applyHistogramEqualization(const cv::Mat &src)
112 | {
113 |     cv::Mat ycrcb_image;
114 |     cv::cvtColor(src, ycrcb_image, cv::COLOR_BGR2YCrCb);  // Convert to YCrCb color space
115 | 
116 |     std::vector<cv::Mat> channels;
117 |     cv::split(ycrcb_image, channels);
118 | 
119 |     // Apply histogram equalization to the Y channel (intensity)
120 |     cv::equalizeHist(channels[0], channels[0]);
121 | 
122 |     // Merge back the channels and convert to BGR
123 |     cv::merge(channels, ycrcb_image);
124 |     cv::Mat result;
125 |     cv::cvtColor(ycrcb_image, result, cv::COLOR_YCrCb2BGR);
126 | 
127 |     return result;
128 | }
129 | 
130 | /**
131 |  * @brief Apply CLAHE to an image for adaptive contrast enhancement.
132 |  *
133 |  * @param src Input image in BGR format.
134 |  * @return Image with enhanced local contrast.
135 |  */
136 | cv::Mat applyCLAHE(const cv::Mat &src)
137 | {
138 |     cv::Mat lab_image;
139 |     cv::cvtColor(src, lab_image, cv::COLOR_BGR2Lab);  // Convert to LAB color space
140 | 
141 |     std::vector<cv::Mat> lab_planes;
142 |     cv::split(lab_image, lab_planes);
143 | 
144 |     // Apply CLAHE to the L channel (lightness)
145 |     cv::Ptr<cv::CLAHE> clahe = cv::createCLAHE();
146 |     clahe->setClipLimit(4.0);  // Set the clip limit for contrast enhancement
147 |     clahe->apply(lab_planes[0], lab_planes[0]);
148 | 
149 |     // Merge the planes back and convert to BGR
150 |     cv::merge(lab_planes, lab_image);
151 |     cv::Mat result;
152 |     cv::cvtColor(lab_image, result, cv::COLOR_Lab2BGR);
153 | 
154 |     return result;
155 | }
156 | 
157 | 
158 | /**
159 |  * @brief Apply Gamma Correction to an image.
160 |  *
161 |  * @param src Input image in BGR format.
162 |  * @param gamma Gamma value for correction. Values < 1 will lighten the image, values > 1 will darken it.
163 |  * @return Image with gamma correction applied.
164 |  */
165 | cv::Mat applyGammaCorrection(const cv::Mat &src, float gamma)
166 | {
167 |     cv::Mat lut(1, 256, CV_8UC1);
168 |     uchar* p = lut.ptr();
169 |     for (int i = 0; i < 256; ++i)
170 |     {
171 |         p[i] = cv::saturate_cast<uchar>(std::pow(i / 255.0, gamma) * 255.0);
172 |     }
173 | 
174 |     cv::Mat result;
175 |     cv::LUT(src, lut, result);  // Apply the gamma lookup table to the image
176 | 
177 |     return result;
178 | }
179 | 
180 | 
181 | InferenceEngine::InferenceEngine(const std::string &model_path)
182 |     : env(ORT_LOGGING_LEVEL_WARNING, "ONNXRuntime"),
183 |       session_options(),
184 |       session(env, model_path.c_str(), session_options),
185 |       input_shape{1, 3, 640, 640}
186 | {
187 |     session_options.SetIntraOpNumThreads(1);
188 |     session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_BASIC);
189 | 
190 |     // Check if the session was created successfully
191 |     if (!session)
192 |     {
193 |         throw std::runtime_error("Failed to create ONNX Runtime session.");
194 |     }
195 | }
196 | 
197 | InferenceEngine::~InferenceEngine() {}
198 | 
199 | /*
200 |  * Function to preprocess the image
201 |  *
202 |  * @param image: input image as cv::Mat
203 |  * @return: vector of floats representing the preprocessed image
204 |  */
205 | std::vector<float> InferenceEngine::preprocessImage(const cv::Mat &image)
206 | {
207 |     if (image.empty())
208 |     {
209 |         throw std::runtime_error("Could not read the image");
210 |     }
211 | 
212 |     // Step 1: Apply image enhancement techniques
213 |     cv::Mat enhanced_image = applyCLAHE(image);  // Use CLAHE as an example
214 |     // cv::Mat enhanced_image = applyHistogramEqualization(image);  // Or use Histogram Equalization
215 |     // cv::Mat enhanced_image = applyGammaCorrection(image, 1.2);  // Or use Gamma Correction
216 | 
217 |     // Step 2: Apply letterbox to the enhanced image
218 |     cv::Mat letterboxed_image = letterbox(enhanced_image, cv::Size(input_shape[2], input_shape[3]));
219 | 
220 |     // Step 3: Convert image to float and normalize
221 |     letterboxed_image.convertTo(letterboxed_image, CV_32F, 1.0 / 255);
222 | 
223 |     // Step 4: Convert from BGR to RGB
224 |     cv::cvtColor(letterboxed_image, letterboxed_image, cv::COLOR_BGR2RGB);
225 | 
226 |     // Step 5: Prepare the input tensor values as a 1D vector
227 |     std::vector<float> input_tensor_values;
228 |     input_tensor_values.reserve(input_shape[1] * input_shape[2] * input_shape[3]);
229 | 
230 |     // Convert Mat to vector of floats (HWC to CHW)
231 |     std::vector<cv::Mat> channels(3);
232 |     cv::split(letterboxed_image, channels);
233 | 
234 |     for (int c = 0; c < 3; ++c)
235 |     {
236 |         input_tensor_values.insert(input_tensor_values.end(), (float *)channels[c].data, (float *)channels[c].data + input_shape[2] * input_shape[3]);
237 |     }
238 | 
239 |     return input_tensor_values;
240 | }
241 | 
242 | 
243 | 
244 | /*
245 |  * Function to filter the detections based on the confidence threshold
246 |  *
247 |  * @param results: vector of floats representing the output tensor
248 |  * @param confidence_threshold: minimum confidence threshold
249 |  * @param img_width: width of the input image
250 |  * @param img_height: height of the input image
251 |  * @param orig_width: original width of the image
252 |  * @param orig_height: original height of the image
253 |  * @return: vector of Detection objects
254 |  */
255 | std::vector<Detection> InferenceEngine::filterDetections(const std::vector<float> &results, float confidence_threshold, int img_width, int img_height, int orig_width, int orig_height)
256 | {
257 |     std::vector<Detection> detections;
258 |     const int num_detections = results.size() / 6;
259 | 
260 |     // Calculate scale and padding factors
261 |     float scale = std::min(img_width / (float)orig_width, img_height / (float)orig_height);
262 |     int new_width = static_cast<int>(orig_width * scale);
263 |     int new_height = static_cast<int>(orig_height * scale);
264 |     int pad_x = (img_width - new_width) / 2;
265 |     int pad_y = (img_height - new_height) / 2;
266 | 
267 |     detections.reserve(num_detections);
268 | 
269 |     for (int i = 0; i < num_detections; ++i)
270 |     {
271 |         float left = results[i * 6 + 0];
272 |         float top = results[i * 6 + 1];
273 |         float right = results[i * 6 + 2];
274 |         float bottom = results[i * 6 + 3];
275 |         float confidence = results[i * 6 + 4];
276 |         int class_id = static_cast<int>(results[i * 6 + 5]);
277 | 
278 |         if (confidence >= confidence_threshold)
279 |         {
280 |             // Remove padding and rescale to original image dimensions
281 |             left = (left - pad_x) / scale;
282 |             top = (top - pad_y) / scale;
283 |             right = (right - pad_x) / scale;
284 |             bottom = (bottom - pad_y) / scale;
285 | 
286 |             int x = static_cast<int>(left);
287 |             int y = static_cast<int>(top);
288 |             int width = static_cast<int>(right - left);
289 |             int height = static_cast<int>(bottom - top);
290 | 
291 |             detections.push_back(
292 |                 {confidence,
293 |                  cv::Rect(x, y, width, height),
294 |                  class_id,
295 |                  CLASS_NAMES[class_id]});
296 |         }
297 |     }
298 | 
299 |     // Apply Soft-NMS to refine detections
300 |     applySoftNMS(detections, 0.5, 0.3); // You can tweak the sigma and IoU threshold values as needed
301 | 
302 |     return detections;
303 | }
304 | 
305 | 
306 | /*
307 |  * Function to run inference
308 |  *
309 |  * @param input_tensor_values: vector of floats representing the input tensor
310 |  * @return: vector of floats representing the output tensor
311 |  */
312 | std::vector<float> InferenceEngine::runInference(const std::vector<float> &input_tensor_values)
313 | {
314 |     Ort::AllocatorWithDefaultOptions allocator;
315 | 
316 |     std::string input_name = getInputName();
317 |     std::string output_name = getOutputName();
318 | 
319 |     const char *input_name_ptr = input_name.c_str();
320 |     const char *output_name_ptr = output_name.c_str();
321 | 
322 |     Ort::MemoryInfo memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
323 |     Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, const_cast<float *>(input_tensor_values.data()), input_tensor_values.size(), input_shape.data(), input_shape.size());
324 | 
325 |     auto output_tensors = session.Run(Ort::RunOptions{nullptr}, &input_name_ptr, &input_tensor, 1, &output_name_ptr, 1);
326 | 
327 |     float *floatarr = output_tensors[0].GetTensorMutableData<float>();
328 |     size_t output_tensor_size = output_tensors[0].GetTensorTypeAndShapeInfo().GetElementCount();
329 | 
330 |     return std::vector<float>(floatarr, floatarr + output_tensor_size);
331 | }
332 | 
333 | /*
334 |  * Function to draw the labels on the image
335 |  *
336 |  * @param image: input image
337 |  * @param detections: vector of Detection objects
338 |  * @return: image with labels drawn
339 |  */
340 | cv::Mat InferenceEngine::draw_labels(const cv::Mat &image, const std::vector<Detection> &detections)
341 | {
342 |     cv::Mat result = image.clone();
343 | 
344 |     for (const auto &detection : detections)
345 |     {
346 |         cv::rectangle(result, detection.bbox, cv::Scalar(0, 255, 0), 2);
347 |         std::string label = detection.class_name + ": " + std::to_string(detection.confidence);
348 | 
349 |         int baseLine;
350 |         cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
351 | 
352 |         cv::rectangle(
353 |             result,
354 |             cv::Point(detection.bbox.x, detection.bbox.y - labelSize.height),
355 |             cv::Point(detection.bbox.x + labelSize.width, detection.bbox.y + baseLine),
356 |             cv::Scalar(255, 255, 255),
357 |             cv::FILLED);
358 | 
359 |         cv::putText(
360 |             result,
361 |             label,
362 |             cv::Point(detection.bbox.x, detection.bbox.y),
363 |             cv::FONT_HERSHEY_SIMPLEX,
364 |             0.5,
365 |             cv::Scalar(0, 0, 0),
366 |             1);
367 |     }
368 | 
369 |     return result;
370 | }
371 | 
372 | /*
373 |  * Function to get the input name
374 |  *
375 |  * @return: name of the input tensor
376 |  */
377 | std::string InferenceEngine::getInputName()
378 | {
379 |     Ort::AllocatorWithDefaultOptions allocator;
380 |     Ort::AllocatedStringPtr name_allocator = session.GetInputNameAllocated(0, allocator);
381 |     return std::string(name_allocator.get());
382 | }
383 | 
384 | /*
385 |  * Function to get the output name
386 |  *
387 |  * @return: name of the output tensor
388 |  */
389 | std::string InferenceEngine::getOutputName()
390 | {
391 |     Ort::AllocatorWithDefaultOptions allocator;
392 |     Ort::AllocatedStringPtr name_allocator = session.GetOutputNameAllocated(0, allocator);
393 |     return std::string(name_allocator.get());
394 | }


--------------------------------------------------------------------------------
/src/ia/inference.h:
--------------------------------------------------------------------------------
 1 | #ifndef INFERENCE_H
 2 | #define INFERENCE_H
 3 | 
 4 | #include <onnxruntime_cxx_api.h>
 5 | #include <opencv2/opencv.hpp>
 6 | #include <vector>
 7 | #include <string>
 8 | #include <cmath> // For exp function
 9 | 
10 | struct Detection
11 | {
12 |     float confidence;
13 |     cv::Rect bbox;
14 |     int class_id;
15 |     std::string class_name;
16 | };
17 | 
18 | 
19 | class InferenceEngine
20 | {
21 | public:
22 |     InferenceEngine(const std::string &model_path);
23 |     ~InferenceEngine();
24 | 
25 |     std::vector<float> preprocessImage(const cv::Mat &image);
26 |     std::vector<Detection> filterDetections(const std::vector<float> &results, float confidence_threshold, int img_width, int img_height, int orig_width, int orig_height);
27 |     std::vector<float> runInference(const std::vector<float> &input_tensor_values);
28 |     
29 |     cv::Mat draw_labels(const cv::Mat &image, const std::vector<Detection> &detections);
30 | 
31 |     std::vector<int64_t> input_shape;
32 |     
33 | private:
34 |     Ort::Env env;
35 |     Ort::SessionOptions session_options;
36 |     Ort::Session session;
37 | 
38 |     std::string getInputName();
39 |     std::string getOutputName();
40 | 
41 |     static const std::vector<std::string> CLASS_NAMES;
42 | };
43 | 
44 | 
45 | #endif // INFERENCE_H
46 | 


--------------------------------------------------------------------------------
/src/main.cpp:
--------------------------------------------------------------------------------
 1 | #include "./ia/inference.h"
 2 | #include <iostream>
 3 | #include <opencv2/opencv.hpp>
 4 | 
 5 | 
 6 | 
 7 | int main(int argc, char *argv[])
 8 | {
 9 |     if (argc != 3)
10 |     {
11 |         std::cerr << "Usage: " << argv[0] << " <model_path> <image_path>" << std::endl;
12 |         return 1;
13 |     }
14 | 
15 |     std::string model_path = argv[1];
16 |     std::string image_path = argv[2];
17 | 
18 |     try
19 |     {
20 |         InferenceEngine engine(model_path);
21 |     
22 |         cv::Mat image = cv::imread(image_path);
23 |         int orig_width = image.cols;
24 |         int orig_height = image.rows;
25 |         std::vector<float> input_tensor_values = engine.preprocessImage(image );
26 | 
27 |         std::vector<float> results = engine.runInference(input_tensor_values);
28 | 
29 |         float confidence_threshold = 0.3;
30 | 
31 |         std::vector<Detection> detections = engine.filterDetections(results, confidence_threshold, engine.input_shape[2], engine.input_shape[3], orig_width, orig_height);
32 | 
33 |         cv::Mat output = engine.draw_labels(image, detections);
34 | 
35 |         cv::imwrite("result.jpg", output);
36 |     }
37 |     catch (const std::exception &e)
38 |     {
39 |         std::cerr << "Error: " << e.what() << std::endl;
40 |         return 1;
41 |     }
42 | 
43 |     return 0;
44 | }
45 | 


--------------------------------------------------------------------------------
/src/video.cpp:
--------------------------------------------------------------------------------
 1 | #include "./ia/inference.h"
 2 | #include <iostream>
 3 | #include <opencv2/opencv.hpp>
 4 | 
 5 | int main(int argc, char const *argv[])
 6 | {
 7 |     if (argc != 3)
 8 |     {
 9 |         std::cerr << "Usage: " << argv[0] << " <model_path> <source>" << std::endl;
10 |         return 1;
11 |     }
12 |     std::string model_path = argv[1];
13 | 
14 |     auto source = atoi(argv[1]); // 0 for webcam, 1 for video file
15 |     int apiID = cv::CAP_ANY;     // 0 = autodetect default API
16 | 
17 |     cv::namedWindow("yolov10", cv::WINDOW_AUTOSIZE);
18 | 
19 |     InferenceEngine engine(model_path);
20 | 
21 |     cv::VideoCapture cap;
22 | 
23 |     cap.open(source, apiID);
24 | 
25 |     if (!cap.isOpened())
26 |     {
27 |         std::cerr << "ERROR! Unable to open camera\n";
28 |         return -1;
29 |     }
30 | 
31 |     cv::Mat frame;
32 | 
33 |     std::cout << "Start grabbing" << std::endl
34 |               << "Press any key to terminate" << std::endl;
35 | 
36 |     for (;;)
37 |     {
38 |         cap.read(frame);
39 | 
40 |         if (frame.empty())
41 |         {
42 |             std::cerr << "ERROR! blank frame grabbed\n";
43 |             break;
44 |         }
45 | 
46 |         int orig_width = frame.cols;
47 |         int orig_height = frame.rows;
48 |         auto timer = cv::getTickCount();
49 | 
50 |         std::vector<float> input_tensor_values = engine.preprocessImage(frame);
51 | 
52 |         std::vector<float> results = engine.runInference(input_tensor_values);
53 | 
54 |         float confidence_threshold = 0.3;
55 | 
56 |         std::vector<Detection> detections = engine.filterDetections(results, confidence_threshold, engine.input_shape[2], engine.input_shape[3], orig_width, orig_height);
57 | 
58 |         double fps = cv::getTickFrequency() / ((double)cv::getTickCount() - timer);
59 | 
60 |         cv::putText(frame, "FPS: " + std::to_string(fps), cv::Point(10, 30), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2, 8);
61 | 
62 |         cv::Mat output = engine.draw_labels(frame, detections);
63 | 
64 |         cv::imshow("test", output);
65 | 
66 |         if (cv::waitKey(5) >= 0)
67 |             break;
68 |     }
69 | 
70 |     return 0;
71 | }
72 | 


--------------------------------------------------------------------------------