├── .gitignore ├── CHANGELOG.md ├── CMakeLists.example.txt ├── LICENSE ├── README.md ├── assets ├── 000000000143-det.jpg ├── 000000000143-py-det.jpg ├── 000000000143-py-seg.jpg ├── 000000000143-seg.jpg ├── 000000000144-det.jpg ├── 000000000144-py-det.jpg ├── 000000000144-py-seg.jpg ├── 000000000144-seg.jpg ├── 000000000382-kpt-cpp.jpg ├── 000000000382-kpt-py.jpg ├── clion_screen.png └── export.png ├── checkpoints ├── yolov8n-seg.onnx └── yolov8n.onnx ├── images ├── 000000000143.jpg ├── 000000000144.jpg └── 000000000382.jpg ├── include ├── constants.h ├── nn │ ├── autobackend.h │ └── onnx_model_base.h └── utils │ ├── augment.h │ ├── common.h │ └── ops.h └── src ├── main.cpp ├── nn ├── autobackend.cpp └── onnx_model_base.cpp └── utils ├── augment.cpp ├── common.cpp └── ops.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | # ide related 2 | .vs/* 3 | .idea/* 4 | # build 5 | .build/* 6 | x64/* 7 | cmake-build-debug/* 8 | cmake-build-debug-visual-studio/* 9 | # ignore CMakeLists.txt but add example file 10 | CMakeLists.txt 11 | # dlls: 12 | *.dll 13 | # vs-like objects 14 | *.sln 15 | *vcxproj* 16 | packages.config 17 | packages/* 18 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 2024-05-09 4 | ### Fixed 🔨 5 | * Fixed memory leak by deleting the `blob` during the `predict_once` method call: 6 | [PR#7](https://github.com/FourierMourier/yolov8-onnx-cpp/pull/7), by [@dusionlike](https://github.com/dusionlike) 7 | 8 | ## 2024-04-22 9 | ### Fixed 🔨 10 | * Fixed returning not scaled coords for the keypoint task. 11 | [PR#5](https://github.com/FourierMourier/yolov8-onnx-cpp/pull/5), by [@youngday](https://github.com/youngday) 12 | * Fixed compilation issue on Linux due to the same type `model_path` arg 13 | in the `Ort::Session` constructor for both Windows and Linux. 14 | [PR#4](https://github.com/FourierMourier/yolov8-onnx-cpp/pull/4), 15 | by [@bhavya-goyal-22](https://github.com/bhavya-goyal-22) and [FourierMourier](https://github.com/FourierMourier) 16 | -------------------------------------------------------------------------------- /CMakeLists.example.txt: -------------------------------------------------------------------------------- 1 | CMAKE_MINIMUM_REQUIRED(VERSION 3.0.0) 2 | project(YOLOv8CPP) 3 | 4 | SET (OpenCV_DIR your/path/to/opencv/build/x64/vc16/lib) # opencv lib root 5 | SET (OpenCV_BIN_DIR your/path/to/opencv/build/x64/vc16/bin) #opencv bin root 6 | 7 | SET (OpenCV_DEBUG_DLL_FILENAME opencv_world480d.dll) # change filenames 8 | SET (OpenCV_RELEASE_DLL_FILENAME opencv_world480.dll) # change filenames 9 | 10 | SET (ONNXRUNTIME_DIR your/path/to/onnxruntime-win-x64-1.15.1) # onnxruntime root 11 | 12 | FIND_PACKAGE(OpenCV REQUIRED) 13 | 14 | # --- Configure your project files --- 15 | include_directories(include) # Include your header files directory 16 | 17 | # Recursively collect all source files under 'src' directory 18 | file(GLOB_RECURSE CURR_SOURCES src/*.cpp) 19 | 20 | # Create the executable 21 | add_executable(YOLOv8CPP ${CURR_SOURCES}) 22 | 23 | SET(CMAKE_CXX_STANDARD 17) 24 | SET(CMAKE_CXX_STANDARD_REQUIRED ON) 25 | 26 | TARGET_INCLUDE_DIRECTORIES(YOLOv8CPP PRIVATE "${ONNXRUNTIME_DIR}/include") 27 | 28 | target_compile_features(YOLOv8CPP PRIVATE cxx_std_17) 29 | 30 | TARGET_LINK_LIBRARIES(YOLOv8CPP ${OpenCV_LIBS}) 31 | 32 | if (WIN32) 33 | TARGET_LINK_LIBRARIES(YOLOv8CPP "${ONNXRUNTIME_DIR}/lib/onnxruntime.lib") 34 | 35 | # some changes to the original version: 36 | # copy onnxruntime dll 37 | add_custom_command(TARGET YOLOv8CPP POST_BUILD 38 | COMMAND ${CMAKE_COMMAND} -E copy_if_different 39 | "${ONNXRUNTIME_DIR}/lib/onnxruntime.dll" 40 | "$" 41 | ) 42 | # copy opencv 43 | #[[ add_custom_command(TARGET YOLOv8CPP POST_BUILD 44 | COMMAND ${CMAKE_COMMAND} -E copy_if_different 45 | "${OpenCV_DIR}/${OpenCV_DLL_FILENAME}" 46 | "$" 47 | )]] 48 | add_custom_command(TARGET YOLOv8CPP POST_BUILD 49 | COMMAND ${CMAKE_COMMAND} -E copy_if_different 50 | "${OpenCV_BIN_DIR}/${OpenCV_DEBUG_DLL_FILENAME}" 51 | "$" 52 | ) 53 | # add release 54 | add_custom_command(TARGET YOLOv8CPP POST_BUILD 55 | COMMAND ${CMAKE_COMMAND} -E copy_if_different 56 | "${OpenCV_BIN_DIR}/${OpenCV_RELEASE_DLL_FILENAME}" 57 | "$" 58 | ) 59 | 60 | endif(WIN32) 61 | 62 | if (UNIX) 63 | TARGET_LINK_LIBRARIES(YOLOv8CPP "${ONNXRUNTIME_DIR}/lib/libonnxruntime.so") 64 | endif(UNIX) -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright 2023 Elshat Akmaev 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # yolov8-onnx-cpp 2 | 3 | ## Changelog 4 | See the changelog [here](CHANGELOG.md) 5 | 6 | ## Description 7 | Hello there! yolov8-onnx-cpp is a C++ demo implementation of the YOLOv8 model using the ONNX library. 8 | This project is based on the YOLOv8 model by Ultralytics. 9 | I aimed to replicate the behavior of the Python version and achieve consistent results across various image sizes. 10 | 11 | By the way, you don't need to specify names, img_size etc while initializing the model, since we can use ONNX metadata! 12 | 13 | When you do export in python in onnx format, the following code executes 14 | ```python 15 | self.metadata = { 16 | 'description': description, 17 | 'author': 'Ultralytics', 18 | 'license': 'AGPL-3.0 https://ultralytics.com/license', 19 | 'date': datetime.now().isoformat(), 20 | 'version': __version__, 21 | 'stride': int(max(model.stride)), 22 | 'task': model.task, 23 | 'batch': self.args.batch, 24 | 'imgsz': self.imgsz, 25 | 'names': model.names} # model metadata 26 | if model.task == 'pose': 27 | self.metadata['kpt_shape'] = model.model[-1].kpt_shape 28 | ``` 29 | 30 | (ultralytics 8.0.160, ultralytics/engine/exporter.py lines 221-233)) 31 | 32 | We can use this parameters at least to define stride, task, names and image size as described in the schema below: 33 | 34 | ![Schema](assets/export.png) 35 | 36 | ## Supported Tasks and Hardware 37 | 38 | | Task | Supported | 39 | |------------|-----------| 40 | | Detect | ✔️ | 41 | | Segment | ✔️ | 42 | | Pose | ✔️ | 43 | | Classify | | 44 | 45 | 46 | | Hardware | Supported | 47 | |------------|-----------| 48 | | CPU | ✔️ | 49 | | GPU | | 50 | 51 | 52 | ## Comparison between Python and C++ 53 | 54 | I exported `yolov8n.pt`, `yolov8n-seg.pt`, `yolov8n-pose.pt` to ONNX format with an input size of [480, 640] ([height, width]). 55 | For the test I used some images from the COCO128 dataset with different image sizes than the specified input. 56 | This difference in sizes triggered letterboxing. I maintained consistent parameters, 57 | setting `conf=0.3` and `iou=0.5` for all models. 58 | 59 | Here are the comparison results: 60 | 61 | 62 | ### Pose 63 | Python Result 64 | 65 | ![Python Result](assets/000000000382-kpt-py.jpg) 66 | 67 | C++ Result 68 | 69 | ![C++ Result](assets/000000000382-kpt-cpp.jpg) 70 | 71 | ### Segmentation 72 | Python Result 1 73 | 74 | ![Python Result](assets/000000000143-py-seg.jpg) 75 | 76 | C++ Result 1 77 | 78 | ![C++ Result](assets/000000000143-seg.jpg) 79 | 80 | Python Result 2 81 | 82 | ![Python Result](assets/000000000144-py-seg.jpg) 83 | 84 | C++ Result 2 85 | 86 | ![C++ Result](assets/000000000144-seg.jpg) 87 | 88 | ### Object detection 89 | 90 | Python Result 1 91 | 92 | ![Python Result](assets/000000000143-py-det.jpg) 93 | 94 | C++ Result 1 95 | 96 | ![C++ Result](assets/000000000143-det.jpg) 97 | 98 | Python Result 2 99 | 100 | ![Python Result](assets/000000000144-py-det.jpg) 101 | 102 | C++ Result 2 103 | 104 | ![C++ Result](assets/000000000144-det.jpg) 105 | 106 | ## Getting Started 107 | To get started with yolov8-onnx-cpp, follow these steps: 108 | 109 | 1. Clone the repository: 110 | ```shell 111 | git clone https://github.com/FourierMourier/yolov8-onnx-cpp.git 112 | ``` 113 | 2. Setup additional libraries: 114 | 115 | Download [opencv here](https://opencv.org/releases/) (4.80+) 116 | 117 | ### Solution-like build 118 | * onnxruntime (1.50+) (nuget package) 119 | 120 | ### Cmake in non-visual studio-like ide 121 | Copy `CMakeLists.example.txt` as `CMakeLists.txt` (git-ignored) and edit the following lines: 122 | ```cmake 123 | 124 | SET (OpenCV_DIR your/path/to/opencv/build/x64/vc16/lib) # opencv lib root 125 | SET (OpenCV_BIN_DIR your/path/to/opencv/build/x64/vc16/bin) #opencv bin root 126 | 127 | SET (OpenCV_DEBUG_DLL_FILENAME opencv_world480d.dll) # change filenames 128 | SET (OpenCV_RELEASE_DLL_FILENAME opencv_world480.dll) # change filenames 129 | 130 | SET (ONNXRUNTIME_DIR your/path/to/onnxruntime-win-x64-1.15.1) # onnxruntime root 131 | 132 | ``` 133 | 134 | Even though you'll find only a nuget package on the [official page](https://onnxruntime.ai/docs/install/#cccwinml-installs) 135 | you can still download release for cmake file here for your platform: 136 | https://github.com/microsoft/onnxruntime/releases 137 | 138 | If you're working in different IDE like Clion rather than visual studio you still have to do the following: 139 | 140 | * Install Visual Studio: If you haven't already, consider installing Visual Studio on your Windows system. 141 | You can download the Visual Studio Community edition for free from the official Microsoft website. 142 | Ensure that you select the components necessary for C++ development. 143 | * Configure CLion to Use Visual Studio: Open CLion, go to "File" > "Settings" > "Build, Execution, Deployment" > 144 | "Toolchains." In the "Environment" section, select the Visual Studio toolchain that you installed. 145 | Make sure it points to the correct Visual Studio installation directory. 146 | 147 | * CMake Configuration: Ensure that your CMake configuration in CLion specifies the Visual Studio generator 148 | (e.g., "Visual Studio 2022"). 149 | This can be set in "File" > "Settings" > "Build, Execution, Deployment" > "CMake" ( > "CMake options.") 150 | ![clion_screen](assets/clion_screen.png) 151 | 152 | So the issues like 153 | * https://github.com/microsoft/onnxruntime/issues/1175 154 | * https://github.com/microsoft/onnxruntime/issues/9332 155 | * https://github.com/microsoft/onnxruntime/issues/11545 156 | will be gone 157 | 158 | The issue like `"The given version [15] is not supported, only version 1 to 10 is supported in this build="` 159 | https://github.com/microsoft/onnxruntime/issues/11230 160 | 161 | also should not occur since you configure dll in cmake 162 | 163 | Hope that helps! 164 | 165 | 3. edit in the ./src/main.cpp img_path/&modelPath: 166 | ```cpp 167 | std::string img_path = "./images/000000000143.jpg"; 168 | //const std::string& modelPath = "./checkpoints/yolov8n.onnx"; // detection 169 | const std::string& modelPath = "./checkpoints/yolov8n-seg.onnx"; // instance segmentation 170 | const std::string& onnx_provider = OnnxProviders::CPU; // "cpu"; 171 | ``` 172 | # Usage 173 | Provide an input image to the application, and it will perform object detection using the YOLOv8 model. 174 | Customize the model configuration and parameters in the code as needed. 175 | 176 | # References 177 | * [YOLOv8 by Ultralytics](https://github.com/ultralytics/ultralytics) 178 | * [ONNX](https://onnx.ai) 179 | * [OpenCV](https://opencv.org) 180 | 181 | # License 182 | This project is licensed under the Apache 2.0 License - see the [LICENSE](LICENSE) file for details. 183 | 184 | # Acknowledgments 185 | Some other cool repositories I found useful (and you might too): 186 | * https://github.com/winxos/yolov8_segment_onnx_in_cpp - another project implementing yolov8 segmentation in cpp 187 | * https://github.com/cyrusbehr/YOLOv8-TensorRT-CPP - tensorrt impelemntation in cpp 188 | * https://github.com/itsnine/yolov5-onnxruntime/tree/master yolov5 onnx in C++ 189 | 190 | This README was created with the assistance of OpenAI's ChatGPT (August 3 Version), a large language model. 191 | You can learn more about it [here](https://chat.openai.com/chat) 192 | -------------------------------------------------------------------------------- /assets/000000000143-det.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FourierMourier/yolov8-onnx-cpp/6f9d6716fe98febd04df553dd71f9d01423a7f2a/assets/000000000143-det.jpg -------------------------------------------------------------------------------- /assets/000000000143-py-det.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FourierMourier/yolov8-onnx-cpp/6f9d6716fe98febd04df553dd71f9d01423a7f2a/assets/000000000143-py-det.jpg -------------------------------------------------------------------------------- /assets/000000000143-py-seg.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FourierMourier/yolov8-onnx-cpp/6f9d6716fe98febd04df553dd71f9d01423a7f2a/assets/000000000143-py-seg.jpg -------------------------------------------------------------------------------- /assets/000000000143-seg.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FourierMourier/yolov8-onnx-cpp/6f9d6716fe98febd04df553dd71f9d01423a7f2a/assets/000000000143-seg.jpg -------------------------------------------------------------------------------- /assets/000000000144-det.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FourierMourier/yolov8-onnx-cpp/6f9d6716fe98febd04df553dd71f9d01423a7f2a/assets/000000000144-det.jpg -------------------------------------------------------------------------------- /assets/000000000144-py-det.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FourierMourier/yolov8-onnx-cpp/6f9d6716fe98febd04df553dd71f9d01423a7f2a/assets/000000000144-py-det.jpg -------------------------------------------------------------------------------- /assets/000000000144-py-seg.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FourierMourier/yolov8-onnx-cpp/6f9d6716fe98febd04df553dd71f9d01423a7f2a/assets/000000000144-py-seg.jpg -------------------------------------------------------------------------------- /assets/000000000144-seg.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FourierMourier/yolov8-onnx-cpp/6f9d6716fe98febd04df553dd71f9d01423a7f2a/assets/000000000144-seg.jpg -------------------------------------------------------------------------------- /assets/000000000382-kpt-cpp.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FourierMourier/yolov8-onnx-cpp/6f9d6716fe98febd04df553dd71f9d01423a7f2a/assets/000000000382-kpt-cpp.jpg -------------------------------------------------------------------------------- /assets/000000000382-kpt-py.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FourierMourier/yolov8-onnx-cpp/6f9d6716fe98febd04df553dd71f9d01423a7f2a/assets/000000000382-kpt-py.jpg -------------------------------------------------------------------------------- /assets/clion_screen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FourierMourier/yolov8-onnx-cpp/6f9d6716fe98febd04df553dd71f9d01423a7f2a/assets/clion_screen.png -------------------------------------------------------------------------------- /assets/export.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FourierMourier/yolov8-onnx-cpp/6f9d6716fe98febd04df553dd71f9d01423a7f2a/assets/export.png -------------------------------------------------------------------------------- /checkpoints/yolov8n-seg.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FourierMourier/yolov8-onnx-cpp/6f9d6716fe98febd04df553dd71f9d01423a7f2a/checkpoints/yolov8n-seg.onnx -------------------------------------------------------------------------------- /checkpoints/yolov8n.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FourierMourier/yolov8-onnx-cpp/6f9d6716fe98febd04df553dd71f9d01423a7f2a/checkpoints/yolov8n.onnx -------------------------------------------------------------------------------- /images/000000000143.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FourierMourier/yolov8-onnx-cpp/6f9d6716fe98febd04df553dd71f9d01423a7f2a/images/000000000143.jpg -------------------------------------------------------------------------------- /images/000000000144.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FourierMourier/yolov8-onnx-cpp/6f9d6716fe98febd04df553dd71f9d01423a7f2a/images/000000000144.jpg -------------------------------------------------------------------------------- /images/000000000382.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FourierMourier/yolov8-onnx-cpp/6f9d6716fe98febd04df553dd71f9d01423a7f2a/images/000000000382.jpg -------------------------------------------------------------------------------- /include/constants.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | namespace MetadataConstants { 5 | inline const std::string IMGSZ = "imgsz"; 6 | inline const std::string STRIDE = "stride"; 7 | inline const std::string NC = "nc"; 8 | inline const std::string CH = "ch"; 9 | inline const std::string DATE = "date"; 10 | inline const std::string VERSION = "version"; 11 | inline const std::string TASK = "task"; 12 | inline const std::string BATCH = "batch"; 13 | inline const std::string NAMES = "names"; 14 | } 15 | 16 | namespace OnnxProviders { 17 | inline const std::string CPU = "cpu"; 18 | inline const std::string CUDA = "cuda"; 19 | } 20 | 21 | namespace OnnxInitializers 22 | { 23 | inline const int UNINITIALIZED_STRIDE = -1; 24 | inline const int UNINITIALIZED_NC = -1; 25 | } 26 | 27 | 28 | namespace YoloTasks 29 | { 30 | inline const std::string SEGMENT = "segment"; 31 | inline const std::string DETECT = "detect"; 32 | inline const std::string POSE = "pose"; 33 | inline const std::string CLASSIFY = "classify"; 34 | } 35 | -------------------------------------------------------------------------------- /include/nn/autobackend.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "onnx_model_base.h" 8 | #include "constants.h" 9 | 10 | /** 11 | * @brief Represents the results of YOLO prediction. 12 | * 13 | * This structure stores information about a detected object, including its class index, 14 | * confidence score, bounding box, semantic segmentation mask, and keypoints (if available). 15 | */ 16 | struct YoloResults { 17 | int class_idx{}; ///< The class index of the detected object. 18 | float conf{}; ///< The confidence score of the detection. 19 | cv::Rect_ bbox; ///< The bounding box of the detected object. 20 | cv::Mat mask; ///< The semantic segmentation mask (if available). 21 | std::vector keypoints{}; ///< Keypoints representing the object's pose (if available). 22 | }; 23 | 24 | struct ImageInfo { 25 | cv::Size raw_size; // add additional attrs if you need 26 | }; 27 | 28 | 29 | class AutoBackendOnnx : public OnnxModelBase { 30 | public: 31 | // constructors 32 | AutoBackendOnnx(const char* modelPath, const char* logid, const char* provider, 33 | const std::vector& imgsz, const int& stride, 34 | const int& nc, std::unordered_map names); 35 | 36 | AutoBackendOnnx(const char* modelPath, const char* logid, const char* provider); 37 | 38 | // getters 39 | virtual const std::vector& getImgsz(); 40 | virtual const int& getStride(); 41 | virtual const int& getCh(); 42 | virtual const int& getNc(); 43 | virtual const std::unordered_map& getNames(); 44 | virtual const std::vector& getInputTensorShape(); 45 | virtual const int& getWidth(); 46 | virtual const int& getHeight(); 47 | virtual const cv::Size& getCvSize(); 48 | virtual const std::string& getTask(); 49 | /** 50 | * @brief Runs object detection on an input image. 51 | * 52 | * This method performs object detection on the input image and returns the detected objects as YoloResults. 53 | * 54 | * @param image The input image to run object detection on. 55 | * @param conf The confidence threshold for object detection. 56 | * @param iou The intersection-over-union (IoU) threshold for non-maximum suppression. 57 | * @param mask_threshold The threshold for the semantic segmentation mask. 58 | * @param conversionCode An optional conversion code for image format conversion (e.g., cv::COLOR_BGR2RGB). 59 | * Default value is -1, indicating no conversion. 60 | * TODO: use some constant from some namespace rather than hardcoded values here 61 | * 62 | * @return A vector of YoloResults representing the detected objects. 63 | */ 64 | virtual std::vector predict_once(cv::Mat& image, float& conf, float& iou, float& mask_threshold, int conversionCode = -1, bool verbose = true); 65 | virtual std::vector predict_once(const std::filesystem::path& imagePath, float& conf, float& iou, float& mask_threshold, int conversionCode = -1, bool verbose = true); 66 | virtual std::vector predict_once(const std::string& imagePath, float& conf, float& iou, float& mask_threshold, int conversionCode = -1, bool verbose = true); 67 | 68 | virtual void fill_blob(cv::Mat& image, float*& blob, std::vector& inputTensorShape); 69 | virtual void postprocess_masks(cv::Mat& output0, cv::Mat& output1, ImageInfo para, std::vector& output, 70 | int& class_names_num, float& conf_threshold, float& iou_threshold, 71 | int& iw, int& ih, int& mw, int& mh, int& masks_features_num, float mask_threshold = 0.50f); 72 | 73 | virtual void postprocess_detects(cv::Mat& output0, ImageInfo image_info, std::vector& output, 74 | int& class_names_num, float& conf_threshold, float& iou_threshold); 75 | virtual void postprocess_kpts(cv::Mat& output0, ImageInfo& image_info, std::vector& output, 76 | int& class_names_num, float& conf_threshold, float& iou_threshold); 77 | static void _get_mask2(const cv::Mat& mask_info, const cv::Mat& mask_data, const ImageInfo& image_info, cv::Rect bound, cv::Mat& mask_out, 78 | float& mask_thresh, int& iw, int& ih, int& mw, int& mh, int& masks_features_num, bool round_downsampled = false); 79 | 80 | protected: 81 | std::vector imgsz_; 82 | int stride_ = OnnxInitializers::UNINITIALIZED_STRIDE; 83 | int nc_ = OnnxInitializers::UNINITIALIZED_NC; // 84 | int ch_ = 3; 85 | std::unordered_map names_; 86 | std::vector inputTensorShape_; 87 | cv::Size cvSize_; 88 | std::string task_; 89 | //cv::MatSize cvMatSize_; 90 | }; 91 | -------------------------------------------------------------------------------- /include/nn/onnx_model_base.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | /* 8 | * This interface must provide only required arguments to load any onnx model regarding specific info - 9 | * - i.e. modelPath will always be required, provider like "cpu" or "cuda" the same, since these are parameters you need 10 | * to set up `sessionOptions` or `session` objects properly, but image size is not needed for pure onnx graph to be loaded so do NOT include it here 11 | */ 12 | class OnnxModelBase { 13 | public: 14 | OnnxModelBase(const char* modelPath, const char* logid, const char* provider); 15 | //OnnxModelBase(); // no default constructor should be there 16 | //virtual ~OnnxModelBase(); 17 | virtual const std::vector& getInputNames(); // = 0 18 | virtual const std::vector& getOutputNames(); 19 | virtual const std::vector getOutputNamesCStr(); 20 | virtual const std::vector getInputNamesCStr(); 21 | virtual const Ort::ModelMetadata& getModelMetadata(); 22 | virtual const std::unordered_map& getMetadata(); 23 | virtual const char* getModelPath(); 24 | virtual const Ort::Session& getSession(); 25 | //virtual std::vector forward(std::vector inputTensors); 26 | virtual std::vector forward(std::vector& inputTensors); 27 | Ort::Session session{ nullptr }; 28 | 29 | protected: 30 | const char* modelPath_; 31 | Ort::Env env{ nullptr }; 32 | 33 | std::vector inputNodeNames; 34 | std::vector outputNodeNames; 35 | Ort::ModelMetadata model_metadata{ nullptr }; 36 | std::unordered_map metadata; 37 | std::vector outputNamesCStr; 38 | std::vector inputNamesCStr; 39 | }; 40 | -------------------------------------------------------------------------------- /include/utils/augment.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | void letterbox(const cv::Mat& image, 5 | cv::Mat& outImage, 6 | const cv::Size& newShape = cv::Size(640, 640), 7 | cv::Scalar_ color = cv::Scalar(), bool auto_ = true, 8 | bool scaleFill = false, 9 | bool scaleUp = true, 10 | int stride = 32 11 | ); 12 | 13 | 14 | cv::Mat scale_image(const cv::Mat& resized_mask, const cv::Size& im0_shape, const std::pair& ratio_pad = std::make_pair(-1.0f, cv::Point2f(-1.0f, -1.0f))); 16 | 17 | void scale_image2( 18 | cv::Mat& scaled_mask, const cv::Mat& resized_mask, const cv::Size& im0_shape, 19 | const std::pair& ratio_pad = std::make_pair(-1.0f, cv::Point2f(-1.0f, -1.0f)) 20 | ); 21 | -------------------------------------------------------------------------------- /include/utils/common.h: -------------------------------------------------------------------------------- 1 | #ifndef COMMON_UTILS_H 2 | #define COMMON_UTILS_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | class Timer { 11 | public: 12 | Timer(double& accumulator, bool isEnabled = true); 13 | void Stop(); 14 | 15 | private: 16 | double& accumulator; 17 | bool isEnabled; 18 | std::chrono::time_point start; 19 | }; 20 | 21 | std::wstring get_win_path(const std::string& path); 22 | std::vector parseVectorString(const std::string& input); 23 | std::vector convertStringVectorToInts(const std::vector& input); 24 | std::unordered_map parseNames(const std::string& input); 25 | int64_t vector_product(const std::vector& vec); 26 | #endif // COMMON_H COMMON_UTILS_H -------------------------------------------------------------------------------- /include/utils/ops.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | //cv::Rect scaleCoords(const cv::Size& imageShape, const cv::Rect& coords, const cv::Size& imageOriginalShape); 5 | /** 6 | * Scales a bounding box from the shape of the input image to the shape of an original image. 7 | * 8 | * @param img1_shape The shape (height, width) of the input image for the model. 9 | * @param box The bounding box to be scaled, specified as cv::Rect_. 10 | * @param img0_shape The shape (height, width) of the original target image. 11 | * @param ratio_pad An optional parameter that specifies scaling and padding factors as a pair of values. 12 | * The first value (ratio) is used for scaling, and the second value (pad) is used for padding. 13 | * If not provided, default values will be used. 14 | * @param padding An optional boolean parameter that specifies whether padding should be applied. 15 | * If set to true, padding will be applied to the bounding box. 16 | * 17 | * @return A scaled bounding box specified as cv::Rect_. 18 | * 19 | * This function rescales a bounding box from the shape of the input image (img1_shape) to the shape of an original image (img0_shape). 20 | */ 21 | cv::Rect_ scale_boxes(const cv::Size& img1_shape, cv::Rect_& box, const cv::Size& img0_shape, std::pair ratio_pad = std::make_pair(-1.0f, cv::Point2f(-1.0f, -1.0f)), bool padding = true); 22 | void clip_boxes(cv::Rect& box, const cv::Size& shape); 23 | void clip_boxes(cv::Rect_& box, const cv::Size& shape); 24 | void clip_boxes(std::vector& boxes, const cv::Size& shape); 25 | void clip_boxes(std::vector>& boxes, const cv::Size& shape); 26 | 27 | //void clip_coords(cv::Mat& coords, const cv::Size& shape); 28 | //cv::Mat scale_coords(const cv::Size& img1_shape, cv::Mat& coords, const cv::Size& img0_shape); 29 | void clip_coords(std::vector& coords, const cv::Size& shape); 30 | std::vector scale_coords(const cv::Size& img1_shape, std::vector& coords, const cv::Size& img0_shape); 31 | 32 | cv::Mat crop_mask(const cv::Mat& mask, const cv::Rect& box); 33 | 34 | 35 | struct NMSResult{ 36 | std::vector bboxes; 37 | std::vector confidences; 38 | std::vector classes; 39 | std::vector> rest; 40 | }; 41 | 42 | //std::tuple>, std::vector, std::vector, std::vector>> 43 | std::tuple, std::vector, std::vector, std::vector>> 44 | non_max_suppression(const cv::Mat& output0, int class_names_num, int total_features_num, double conf_threshold, float iou_threshold); -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include "nn/onnx_model_base.h" 5 | #include "nn/autobackend.h" 6 | #include 7 | #include 8 | 9 | #include "utils/augment.h" 10 | #include "constants.h" 11 | #include "utils/common.h" 12 | 13 | 14 | namespace fs = std::filesystem; 15 | 16 | 17 | // Define the skeleton and color mappings 18 | std::vector> skeleton = {{16, 14}, {14, 12}, {17, 15}, {15, 13}, {12, 13}, {6, 12}, {7, 13}, {6, 7}, 19 | {6, 8}, {7, 9}, {8, 10}, {9, 11}, {2, 3}, {1, 2}, {1, 3}, {2, 4}, {3, 5}, {4, 6}, {5, 7}}; 20 | 21 | std::vector posePalette = { 22 | cv::Scalar(255, 128, 0), cv::Scalar(255, 153, 51), cv::Scalar(255, 178, 102), cv::Scalar(230, 230, 0), cv::Scalar(255, 153, 255), 23 | cv::Scalar(153, 204, 255), cv::Scalar(255, 102, 255), cv::Scalar(255, 51, 255), cv::Scalar(102, 178, 255), cv::Scalar(51, 153, 255), 24 | cv::Scalar(255, 153, 153), cv::Scalar(255, 102, 102), cv::Scalar(255, 51, 51), cv::Scalar(153, 255, 153), cv::Scalar(102, 255, 102), 25 | cv::Scalar(51, 255, 51), cv::Scalar(0, 255, 0), cv::Scalar(0, 0, 255), cv::Scalar(255, 0, 0), cv::Scalar(255, 255, 255) 26 | }; 27 | 28 | std::vector limbColorIndices = {9, 9, 9, 9, 7, 7, 7, 0, 0, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16}; 29 | std::vector kptColorIndices = {16, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9}; 30 | 31 | 32 | 33 | cv::Scalar generateRandomColor(int numChannels) { 34 | if (numChannels < 1 || numChannels > 3) { 35 | throw std::invalid_argument("Invalid number of channels. Must be between 1 and 3."); 36 | } 37 | 38 | std::random_device rd; 39 | std::mt19937 gen(rd()); 40 | std::uniform_int_distribution dis(0, 255); 41 | 42 | cv::Scalar color; 43 | for (int i = 0; i < numChannels; i++) { 44 | color[i] = dis(gen); // for each channel separately generate value 45 | } 46 | 47 | return color; 48 | } 49 | 50 | std::vector generateRandomColors(int class_names_num, int numChannels) { 51 | std::vector colors; 52 | for (int i = 0; i < class_names_num; i++) { 53 | cv::Scalar color = generateRandomColor(numChannels); 54 | colors.push_back(color); 55 | } 56 | return colors; 57 | } 58 | 59 | void plot_masks(cv::Mat img, std::vector& result, std::vector color, 60 | std::unordered_map& names) 61 | { 62 | cv::Mat mask = img.clone(); 63 | for (int i = 0; i < result.size(); i++) 64 | { 65 | float left, top; 66 | left = result[i].bbox.x; 67 | top = result[i].bbox.y; 68 | int color_num = i; 69 | int& class_idx = result[i].class_idx; 70 | rectangle(img, result[i].bbox, color[result[i].class_idx], 2); 71 | 72 | // try to get string value corresponding to given class_idx 73 | std::string class_name; 74 | auto it = names.find(class_idx); 75 | if (it != names.end()) { 76 | class_name = it->second; 77 | } 78 | else { 79 | std::cerr << "Warning: class_idx not found in names for class_idx = " << class_idx << std::endl; 80 | // then convert it to string anyway 81 | class_name = std::to_string(class_idx); 82 | } 83 | 84 | if (result[i].mask.rows && result[i].mask.cols > 0) 85 | { 86 | mask(result[i].bbox).setTo(color[result[i].class_idx], result[i].mask); 87 | } 88 | std::stringstream labelStream; 89 | labelStream << class_name << " " << std::fixed << std::setprecision(2) << result[i].conf; 90 | std::string label = labelStream.str(); 91 | 92 | cv::Size text_size = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.6, 2, nullptr); 93 | cv::Rect rect_to_fill(left - 1, top - text_size.height - 5, text_size.width + 2, text_size.height + 5); 94 | cv::Scalar text_color = cv::Scalar(255.0, 255.0, 255.0); 95 | rectangle(img, rect_to_fill, color[result[i].class_idx], -1); 96 | 97 | putText(img, label, cv::Point(left - 1.5, top - 2.5), cv::FONT_HERSHEY_SIMPLEX, 0.6, text_color, 2); 98 | } 99 | addWeighted(img, 0.6, mask, 0.4, 0, img); //add mask to src 100 | resize(img, img, img.size()); 101 | imshow("img", img); 102 | cv::waitKey(); 103 | } 104 | 105 | 106 | //void plot_keypoints(cv::Mat& image, const std::vector>& keypoints, const cv::Size& shape) { 107 | void plot_keypoints(cv::Mat& image, const std::vector& results, const cv::Size& shape) { 108 | 109 | int radius = 5; 110 | bool drawLines = true; 111 | 112 | if (results.empty()) { 113 | return; 114 | } 115 | 116 | std::vector limbColorPalette; 117 | std::vector kptColorPalette; 118 | 119 | for (int index : limbColorIndices) { 120 | limbColorPalette.push_back(posePalette[index]); 121 | } 122 | 123 | for (int index : kptColorIndices) { 124 | kptColorPalette.push_back(posePalette[index]); 125 | } 126 | 127 | for (const auto& res: results) { 128 | auto keypoint = res.keypoints; 129 | bool isPose = keypoint.size() == 51; // numKeypoints == 17 && keypoints[0].size() == 3; 130 | drawLines &= isPose; 131 | 132 | // draw points 133 | for (int i = 0; i < 17; i++) { 134 | int idx = i * 3; 135 | int x_coord = static_cast(keypoint[idx]); 136 | int y_coord = static_cast(keypoint[idx + 1]); 137 | 138 | if (x_coord % shape.width != 0 && y_coord % shape.height != 0) { 139 | if (keypoint.size() == 3) { 140 | float conf = keypoint[2]; 141 | if (conf < 0.5) { 142 | continue; 143 | } 144 | } 145 | cv::Scalar color_k = isPose ? kptColorPalette[i] : cv::Scalar(0, 0, 146 | 255); // Default to red if not in pose mode 147 | cv::circle(image, cv::Point(x_coord, y_coord), radius, color_k, -1, cv::LINE_AA); 148 | } 149 | } 150 | // draw lines 151 | if (drawLines) { 152 | for (int i = 0; i < skeleton.size(); i++) { 153 | const std::vector &sk = skeleton[i]; 154 | int idx1 = sk[0] - 1; 155 | int idx2 = sk[1] - 1; 156 | 157 | int idx1_x_pos = idx1 * 3; 158 | int idx2_x_pos = idx2 * 3; 159 | 160 | int x1 = static_cast(keypoint[idx1_x_pos]); 161 | int y1 = static_cast(keypoint[idx1_x_pos + 1]); 162 | int x2 = static_cast(keypoint[idx2_x_pos]); 163 | int y2 = static_cast(keypoint[idx2_x_pos + 1]); 164 | 165 | float conf1 = keypoint[idx1_x_pos + 2]; 166 | float conf2 = keypoint[idx2_x_pos + 2]; 167 | 168 | // Check confidence thresholds 169 | if (conf1 < 0.5 || conf2 < 0.5) { 170 | continue; 171 | } 172 | 173 | // Check if positions are within bounds 174 | if (x1 % shape.width == 0 || y1 % shape.height == 0 || x1 < 0 || y1 < 0 || 175 | x2 % shape.width == 0 || y2 % shape.height == 0 || x2 < 0 || y2 < 0) { 176 | continue; 177 | } 178 | 179 | // Draw a line between keypoints 180 | cv::Scalar color_limb = limbColorPalette[i]; 181 | cv::line(image, cv::Point(x1, y1), cv::Point(x2, y2), color_limb, 2, cv::LINE_AA); 182 | } 183 | } 184 | } 185 | } 186 | 187 | void plot_results(cv::Mat img, std::vector& results, 188 | std::vector color, std::unordered_map& names, 189 | const cv::Size& shape 190 | ) { 191 | 192 | cv::Mat mask = img.clone(); 193 | 194 | int radius = 5; 195 | bool drawLines = true; 196 | 197 | auto raw_image_shape = img.size(); 198 | std::vector limbColorPalette; 199 | std::vector kptColorPalette; 200 | 201 | for (int index : limbColorIndices) { 202 | limbColorPalette.push_back(posePalette[index]); 203 | } 204 | 205 | for (int index : kptColorIndices) { 206 | kptColorPalette.push_back(posePalette[index]); 207 | } 208 | 209 | for (const auto& res : results) { 210 | float left = res.bbox.x; 211 | float top = res.bbox.y; 212 | int color_num = res.class_idx; 213 | 214 | // Draw bounding box 215 | rectangle(img, res.bbox, color[res.class_idx], 2); 216 | 217 | // Try to get the class name corresponding to the given class_idx 218 | std::string class_name; 219 | auto it = names.find(res.class_idx); 220 | if (it != names.end()) { 221 | class_name = it->second; 222 | } 223 | else { 224 | std::cerr << "Warning: class_idx not found in names for class_idx = " << res.class_idx << std::endl; 225 | // Then convert it to a string anyway 226 | class_name = std::to_string(res.class_idx); 227 | } 228 | 229 | // Draw mask if available 230 | if (res.mask.rows && res.mask.cols > 0) { 231 | mask(res.bbox).setTo(color[res.class_idx], res.mask); 232 | } 233 | 234 | // Create label 235 | std::stringstream labelStream; 236 | labelStream << class_name << " " << std::fixed << std::setprecision(2) << res.conf; 237 | std::string label = labelStream.str(); 238 | 239 | cv::Size text_size = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.6, 2, nullptr); 240 | cv::Rect rect_to_fill(left - 1, top - text_size.height - 5, text_size.width + 2, text_size.height + 5); 241 | cv::Scalar text_color = cv::Scalar(255.0, 255.0, 255.0); 242 | rectangle(img, rect_to_fill, color[res.class_idx], -1); 243 | putText(img, label, cv::Point(left - 1.5, top - 2.5), cv::FONT_HERSHEY_SIMPLEX, 0.6, text_color, 2); 244 | 245 | // Check if keypoints are available 246 | if (!res.keypoints.empty()) { 247 | auto keypoint = res.keypoints; 248 | bool isPose = keypoint.size() == 51; // numKeypoints == 17 && keypoints[0].size() == 3; 249 | drawLines &= isPose; 250 | 251 | // draw points 252 | for (int i = 0; i < 17; i++) { 253 | int idx = i * 3; 254 | int x_coord = static_cast(keypoint[idx]); 255 | int y_coord = static_cast(keypoint[idx + 1]); 256 | 257 | if (x_coord % raw_image_shape.width != 0 && y_coord % raw_image_shape.height != 0) { 258 | if (keypoint.size() == 3) { 259 | float conf = keypoint[2]; 260 | if (conf < 0.5) { 261 | continue; 262 | } 263 | } 264 | cv::Scalar color_k = isPose ? kptColorPalette[i] : cv::Scalar(0, 0, 265 | 255); // Default to red if not in pose mode 266 | cv::circle(img, cv::Point(x_coord, y_coord), radius, color_k, -1, cv::LINE_AA); 267 | } 268 | } 269 | // draw lines 270 | if (drawLines) { 271 | for (int i = 0; i < skeleton.size(); i++) { 272 | const std::vector &sk = skeleton[i]; 273 | int idx1 = sk[0] - 1; 274 | int idx2 = sk[1] - 1; 275 | 276 | int idx1_x_pos = idx1 * 3; 277 | int idx2_x_pos = idx2 * 3; 278 | 279 | int x1 = static_cast(keypoint[idx1_x_pos]); 280 | int y1 = static_cast(keypoint[idx1_x_pos + 1]); 281 | int x2 = static_cast(keypoint[idx2_x_pos]); 282 | int y2 = static_cast(keypoint[idx2_x_pos + 1]); 283 | 284 | float conf1 = keypoint[idx1_x_pos + 2]; 285 | float conf2 = keypoint[idx2_x_pos + 2]; 286 | 287 | // Check confidence thresholds 288 | if (conf1 < 0.5 || conf2 < 0.5) { 289 | continue; 290 | } 291 | 292 | // Check if positions are within bounds 293 | if (x1 % raw_image_shape.width == 0 || y1 % raw_image_shape.height == 0 || x1 < 0 || y1 < 0 || 294 | x2 % raw_image_shape.width == 0 || y2 % raw_image_shape.height == 0 || x2 < 0 || y2 < 0) { 295 | continue; 296 | } 297 | 298 | // Draw a line between keypoints 299 | cv::Scalar color_limb = limbColorPalette[i]; 300 | cv::line(img, cv::Point(x1, y1), cv::Point(x2, y2), color_limb, 2, cv::LINE_AA); 301 | } 302 | } 303 | } 304 | } 305 | 306 | // Combine the image and mask 307 | addWeighted(img, 0.6, mask, 0.4, 0, img); 308 | // resize(img, img, img.size()); 309 | // resize(img, img, shape); 310 | // // Show the image 311 | // imshow("img", img); 312 | // cv::waitKey(); 313 | } 314 | 315 | 316 | 317 | int main() 318 | { 319 | std::string img_path = "../../images/000000000382.jpg"; 320 | //const std::img_path& modelPath = "./checkpoints/yolov8n.onnx"; // detection 321 | // vs: 322 | // const std::string& modelPath = "./checkpoints/yolov8n-seg.onnx"; // instance segmentation 323 | // clion: 324 | const std::string& modelPath = "../../checkpoints/yolov8n-pose.onnx"; // pose 325 | 326 | fs::path imageFilePath(img_path); 327 | fs::path newFilePath = imageFilePath.stem(); 328 | newFilePath += "-kpt-cpp"; 329 | newFilePath += imageFilePath.extension(); 330 | assert(newFilePath != imageFilePath); 331 | std::cout << "newFilePath: " << newFilePath << std::endl; 332 | 333 | const std::string& onnx_provider = OnnxProviders::CPU; // "cpu"; 334 | const std::string& onnx_logid = "yolov8_inference2"; 335 | float mask_threshold = 0.5f; // in python it's 0.5 and you can see that at ultralytics/utils/ops.process_mask line 705 (ultralytics.__version__ == .160) 336 | float conf_threshold = 0.30f; 337 | float iou_threshold = 0.45f; // 0.70f; 338 | int conversion_code = cv::COLOR_BGR2RGB; 339 | cv::Mat img = cv::imread(img_path, cv::IMREAD_UNCHANGED); 340 | if (img.empty()) { 341 | std::cerr << "Error: Unable to load image" << std::endl; 342 | return 1; 343 | } 344 | AutoBackendOnnx model(modelPath.c_str(), onnx_logid.c_str(), onnx_provider.c_str()); 345 | std::vector objs = model.predict_once(img, conf_threshold, iou_threshold, mask_threshold, conversion_code); 346 | std::vector colors = generateRandomColors(model.getNc(), model.getCh()); 347 | std::unordered_map names = model.getNames(); 348 | 349 | std::vector> keypointsVector; 350 | for (const YoloResults& result : objs) { 351 | keypointsVector.push_back(result.keypoints); 352 | } 353 | 354 | cv::cvtColor(img, img, cv::COLOR_RGB2BGR); 355 | cv::Size show_shape = img.size(); // cv::Size(1280, 720); // img.size() 356 | plot_results(img, objs, colors, names, show_shape); 357 | // plot_masks(img, objs, colors, names); 358 | cv::imshow("img", img); 359 | cv::waitKey(); 360 | return -1; 361 | } 362 | -------------------------------------------------------------------------------- /src/nn/autobackend.cpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "nn/autobackend.h" 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "utils/augment.h" 15 | #include "constants.h" 16 | #include "utils/common.h" 17 | #include "utils/ops.h" 18 | 19 | 20 | namespace fs = std::filesystem; 21 | 22 | 23 | AutoBackendOnnx::AutoBackendOnnx(const char* modelPath, const char* logid, const char* provider, 24 | const std::vector& imgsz, const int& stride, 25 | const int& nc, const std::unordered_map names) 26 | : OnnxModelBase(modelPath, logid, provider), imgsz_(imgsz), stride_(stride), nc_(nc), names_(names), 27 | inputTensorShape_() 28 | { 29 | } 30 | 31 | AutoBackendOnnx::AutoBackendOnnx(const char* modelPath, const char* logid, const char* provider) 32 | : OnnxModelBase(modelPath, logid, provider) { 33 | // init metadata etc 34 | OnnxModelBase(modelPath, logid, provider); 35 | // then try to get additional info from metadata like imgsz, stride etc; 36 | // ideally you should get all of them but you'll raise error if smth is not in metadata (or not under the appropriate keys) 37 | const std::unordered_map& base_metadata = OnnxModelBase::getMetadata(); 38 | 39 | // post init imgsz 40 | auto imgsz_iterator = base_metadata.find(MetadataConstants::IMGSZ); 41 | if (imgsz_iterator != base_metadata.end()) { 42 | // parse it and convert to int iterable 43 | std::vector imgsz = convertStringVectorToInts(parseVectorString(imgsz_iterator->second)); 44 | // set it here: 45 | if (imgsz_.empty()) { 46 | imgsz_ = imgsz; 47 | } 48 | } 49 | else { 50 | std::cerr << "Warning: Cannot get imgsz value from metadata" << std::endl; 51 | } 52 | 53 | // post init stride 54 | auto stride_item = base_metadata.find(MetadataConstants::STRIDE); 55 | if (stride_item != base_metadata.end()) { 56 | // parse it and convert to int iterable 57 | int stide_int = std::stoi(stride_item->second); 58 | // set it here: 59 | if (stride_ == OnnxInitializers::UNINITIALIZED_STRIDE) { 60 | stride_ = stide_int; 61 | } 62 | } 63 | else { 64 | std::cerr << "Warning: Cannot get stride value from metadata" << std::endl; 65 | } 66 | 67 | // post init names 68 | auto names_item = base_metadata.find(MetadataConstants::NAMES); 69 | if (names_item != base_metadata.end()) { 70 | // parse it and convert to int iterable 71 | std::unordered_map names = parseNames(names_item->second); 72 | std::cout << "***Names from metadata***" << std::endl; 73 | for (const auto& pair : names) { 74 | std::cout << "Key: " << pair.first << ", Value: " << pair.second << std::endl; 75 | } 76 | // set it here: 77 | if (names_.empty()) { 78 | names_ = names; 79 | } 80 | } 81 | else { 82 | std::cerr << "Warning: Cannot get names value from metadata" << std::endl; 83 | } 84 | 85 | // post init number of classes - you can do that only and only if names_ is not empty and nc was not initialized previously 86 | if (nc_ == OnnxInitializers::UNINITIALIZED_NC && !names_.empty()) { 87 | nc_ = names_.size(); 88 | } 89 | else { 90 | std::cerr << "Warning: Cannot get nc value from metadata (probably names wasn't set)" << std::endl; 91 | } 92 | 93 | if (!imgsz_.empty() && inputTensorShape_.empty()) 94 | { 95 | inputTensorShape_ = { 1, ch_, getHeight(), getWidth() }; 96 | } 97 | 98 | if (!imgsz_.empty()) 99 | { 100 | // Initialize cvSize_ using getHeight() and getWidth() 101 | //cvSize_ = cv::MatSize() 102 | cvSize_ = cv::Size(getWidth(), getHeight()); 103 | //cvMatSize_ = cv::MatSize(cvSize_.width, cvSize_.height); 104 | } 105 | 106 | // task init: 107 | auto task_item = base_metadata.find(MetadataConstants::TASK); 108 | if (task_item != base_metadata.end()) { 109 | // parse it and convert to int iterable 110 | std::string task = std::string(task_item->second); 111 | // set it here: 112 | if (task_.empty()) 113 | { 114 | task_ = task; 115 | } 116 | } 117 | else { 118 | std::cerr << "Warning: Cannot get task value from metadata" << std::endl; 119 | } 120 | 121 | // TODO: raise assert if imgsz_ and task_ were not initialized (since you don't know in that case which postprocessing to use) 122 | 123 | } 124 | 125 | 126 | 127 | const std::vector& AutoBackendOnnx::getImgsz() { 128 | return imgsz_; 129 | } 130 | 131 | const int& AutoBackendOnnx::getHeight() 132 | { 133 | return imgsz_[0]; 134 | } 135 | 136 | const int& AutoBackendOnnx::getWidth() 137 | { 138 | return imgsz_[1]; 139 | } 140 | 141 | const int& AutoBackendOnnx::getStride() { 142 | return stride_; 143 | } 144 | 145 | const int& AutoBackendOnnx::getCh() { 146 | return ch_; 147 | } 148 | 149 | const int& AutoBackendOnnx::getNc() { 150 | return nc_; 151 | } 152 | 153 | const std::unordered_map& AutoBackendOnnx::getNames() { 154 | return names_; 155 | } 156 | 157 | 158 | const cv::Size& AutoBackendOnnx::getCvSize() 159 | { 160 | return cvSize_; 161 | } 162 | 163 | const std::vector& AutoBackendOnnx::getInputTensorShape() 164 | { 165 | return inputTensorShape_; 166 | } 167 | 168 | const std::string& AutoBackendOnnx::getTask() 169 | { 170 | return task_; 171 | } 172 | 173 | std::vector AutoBackendOnnx::predict_once(const std::string& imagePath, float& conf, float& iou, float& mask_threshold, 174 | int conversionCode, bool verbose) { 175 | // Convert the string imagePath to an object of type std::filesystem::path 176 | fs::path imageFilePath(imagePath); 177 | // Call the predict_once method, converting the image to a cv::Mat 178 | return predict_once(imageFilePath, conf, iou, mask_threshold, conversionCode); 179 | } 180 | 181 | std::vector AutoBackendOnnx::predict_once(const fs::path& imagePath, float& conf, float& iou, float& mask_threshold, 182 | int conversionCode, bool verbose) { 183 | // Check if the specified path exists 184 | if (!fs::exists(imagePath)) { 185 | std::cerr << "Error: File does not exist: " << imagePath << std::endl; 186 | // Return an empty vector or throw an exception, depending on your logic 187 | return {}; 188 | } 189 | 190 | // Load the image into a cv::Mat 191 | cv::Mat image = cv::imread(imagePath.string(), cv::IMREAD_UNCHANGED); 192 | 193 | // Check if loading the image was successful 194 | if (image.empty()) { 195 | std::cerr << "Error: Failed to load image: " << imagePath << std::endl; 196 | // Return an empty vector or throw an exception, depending on your logic 197 | return {}; 198 | } 199 | 200 | // now do some preprocessing based on channels info: 201 | int required_image_channels = this->getCh(); 202 | /*assert(required_image_channels == image.channels() && "");*/ 203 | // Assert that the number of channels in the input image matches the required number of channels for the model 204 | if (required_image_channels != image.channels()) { 205 | const std::string& errorMessage = "Error: Number of image channels does not match the required channels.\n" 206 | "Number of channels in the image: " + std::to_string(image.channels()); 207 | throw std::runtime_error(errorMessage); 208 | } 209 | 210 | // Call overloaded one 211 | return predict_once(image, conf, iou, mask_threshold, conversionCode); 212 | } 213 | 214 | 215 | std::vector AutoBackendOnnx::predict_once(cv::Mat& image, float& conf, float& iou, float& mask_threshold, int conversionCode, bool verbose) { 216 | double preprocess_time = 0.0; 217 | double inference_time = 0.0; 218 | double postprocess_time = 0.0; 219 | Timer preprocess_timer = Timer(preprocess_time, verbose); 220 | // 1. preprocess 221 | float* blob = nullptr; 222 | //double* blob = nullptr; 223 | std::vector inputTensors; 224 | if (conversionCode >= 0) { 225 | cv::cvtColor(image, image, conversionCode); 226 | } 227 | std::vector inputTensorShape; 228 | // TODO: for classify task preprocessed image will be different (!): 229 | cv::Mat preprocessed_img; 230 | cv::Size new_shape = cv::Size(getWidth(), getHeight()); 231 | const bool& scaleFill = false; // false 232 | const bool& auto_ = false; // true 233 | letterbox(image, preprocessed_img, new_shape, cv::Scalar(), auto_, scaleFill, true, getStride()); 234 | fill_blob(preprocessed_img, blob, inputTensorShape); 235 | int64_t inputTensorSize = vector_product(inputTensorShape); 236 | std::vector inputTensorValues(blob, blob + inputTensorSize); 237 | 238 | Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu( 239 | OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault); 240 | 241 | inputTensors.push_back(Ort::Value::CreateTensor( 242 | memoryInfo, inputTensorValues.data(), inputTensorSize, 243 | inputTensorShape.data(), inputTensorShape.size() 244 | )); 245 | preprocess_timer.Stop(); 246 | Timer inference_timer = Timer(inference_time, verbose); 247 | // 2. inference 248 | std::vector outputTensors = forward(inputTensors); 249 | inference_timer.Stop(); 250 | Timer postprocess_timer = Timer(postprocess_time, verbose); 251 | // create container for the results 252 | std::vector results; 253 | // 3. postprocess based on task: 254 | std::unordered_map names = this->getNames(); 255 | // 4. cleanup blob since it was created using the "new" keyword during the `fill_blob` func call 256 | delete[] blob; 257 | 258 | int class_names_num = names.size(); 259 | if (task_ == YoloTasks::SEGMENT) { 260 | 261 | // get outputs info 262 | std::vector outputTensor0Shape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape(); 263 | std::vector outputTensor1Shape = outputTensors[1].GetTensorTypeAndShapeInfo().GetShape(); 264 | // get outputs 265 | float* all_data0 = outputTensors[0].GetTensorMutableData(); 266 | 267 | cv::Mat output0 = cv::Mat(cv::Size((int)outputTensor0Shape[2], (int)outputTensor0Shape[1]), CV_32F, all_data0).t(); // [bs, features, preds_num]=>[bs, preds_num, features] 268 | auto mask_shape = outputTensor1Shape; 269 | std::vector mask_sz = { 1,(int)mask_shape[1],(int)mask_shape[2],(int)mask_shape[3] }; 270 | cv::Mat output1 = cv::Mat(mask_sz, CV_32F, outputTensors[1].GetTensorMutableData()); 271 | 272 | int iw = this->getWidth(); 273 | int ih = this->getHeight(); 274 | int mask_features_num = outputTensor1Shape[1]; 275 | int mh = outputTensor1Shape[2]; 276 | int mw = outputTensor1Shape[3]; 277 | ImageInfo img_info = { image.size() }; 278 | postprocess_masks(output0, output1, img_info, results, class_names_num, conf, iou, 279 | iw, ih, mw, mh, mask_features_num, mask_threshold); 280 | } 281 | else if (task_ == YoloTasks::DETECT) { 282 | ImageInfo img_info = { image.size() }; 283 | std::vector outputTensor0Shape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape(); 284 | float* all_data0 = outputTensors[0].GetTensorMutableData(); 285 | cv::Mat output0 = cv::Mat(cv::Size((int)outputTensor0Shape[2], (int)outputTensor0Shape[1]), CV_32F, all_data0).t(); // [bs, features, preds_num]=>[bs, preds_num, features] 286 | postprocess_detects(output0, img_info, results, class_names_num, conf, iou); 287 | } 288 | else if (task_ == YoloTasks::POSE) { 289 | ImageInfo image_info = { image.size() }; 290 | std::vector outputTensor0Shape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape(); 291 | float* all_data0 = outputTensors[0].GetTensorMutableData(); 292 | cv::Mat output0 = cv::Mat(cv::Size((int)outputTensor0Shape[2], (int)outputTensor0Shape[1]), CV_32F, all_data0).t(); // [bs, features, preds_num]=>[bs, preds_num, features] 293 | postprocess_kpts(output0, image_info, results, class_names_num, conf, iou); 294 | } 295 | else { 296 | throw std::runtime_error("NotImplementedError: task: " + task_); 297 | } 298 | 299 | postprocess_timer.Stop(); 300 | if (verbose) { 301 | std::cout << std::fixed << std::setprecision(1); 302 | std::cout << "image: " << preprocessed_img.rows << "x" << preprocessed_img.cols << " " << results.size() << " objs, "; 303 | std::cout << (preprocess_time + inference_time + postprocess_time) * 1000.0 << "ms" << std::endl; 304 | std::cout << "Speed: " << (preprocess_time * 1000.0) << "ms preprocess, "; 305 | std::cout << (inference_time * 1000.0) << "ms inference, "; 306 | std::cout << (postprocess_time * 1000.0) << "ms postprocess per image "; 307 | std::cout << "at shape (1, " << image.channels() << ", " << preprocessed_img.rows << ", " << preprocessed_img.cols << ")" << std::endl; 308 | } 309 | 310 | return results; 311 | } 312 | 313 | 314 | void AutoBackendOnnx::postprocess_masks(cv::Mat& output0, cv::Mat& output1, ImageInfo image_info, std::vector& output, 315 | int& class_names_num, float& conf_threshold, float& iou_threshold, 316 | int& iw, int& ih, int& mw, int& mh, int& masks_features_num, float mask_threshold /* = 0.5f */) 317 | { 318 | output.clear(); 319 | std::vector class_ids; 320 | std::vector confidences; 321 | std::vector boxes; 322 | std::vector> masks; 323 | // 4 - your default number of rect parameters {x, y, w, h} 324 | int data_width = class_names_num + 4 + masks_features_num; 325 | int rows = output0.rows; 326 | float* pdata = (float*)output0.data; 327 | for (int r = 0; r < rows; ++r) 328 | { 329 | cv::Mat scores(1, class_names_num, CV_32FC1, pdata + 4); 330 | cv::Point class_id; 331 | double max_conf; 332 | minMaxLoc(scores, 0, &max_conf, 0, &class_id); 333 | if (max_conf > conf_threshold) 334 | { 335 | masks.push_back(std::vector(pdata + 4 + class_names_num, pdata + data_width)); 336 | class_ids.push_back(class_id.x); 337 | confidences.push_back(max_conf); 338 | 339 | float out_w = pdata[2]; 340 | float out_h = pdata[3]; 341 | float out_left = MAX((pdata[0] - 0.5 * out_w + 0.5), 0); 342 | float out_top = MAX((pdata[1] - 0.5 * out_h + 0.5), 0); 343 | cv::Rect_ bbox = cv::Rect(out_left, out_top, (out_w + 0.5), (out_h + 0.5)); 344 | cv::Rect_ scaled_bbox = scale_boxes(getCvSize(), bbox, image_info.raw_size); 345 | boxes.push_back(scaled_bbox); 346 | } 347 | pdata += data_width; // next pred 348 | } 349 | 350 | // 351 | //float masks_threshold = 0.50; 352 | //int top_k = 500; 353 | //const float& nmsde_eta = 1.0f; 354 | std::vector nms_result; 355 | cv::dnn::NMSBoxes(boxes, confidences, conf_threshold, iou_threshold, nms_result); // , nms_eta, top_k); 356 | 357 | // select all of the protos tensor 358 | cv::Size downsampled_size = cv::Size(mw, mh); 359 | std::vector roi_rangs = { cv::Range(0, 1), cv::Range::all(), 360 | cv::Range(0, downsampled_size.height), cv::Range(0, downsampled_size.width) }; 361 | cv::Mat temp_mask = output1(roi_rangs).clone(); 362 | cv::Mat proto = temp_mask.reshape(0, { masks_features_num, downsampled_size.width * downsampled_size.height }); 363 | 364 | for (int i = 0; i < nms_result.size(); ++i) 365 | { 366 | int idx = nms_result[i]; 367 | boxes[idx] = boxes[idx] & cv::Rect(0, 0, image_info.raw_size.width, image_info.raw_size.height); 368 | YoloResults result = { class_ids[idx] ,confidences[idx] ,boxes[idx] }; 369 | _get_mask2(cv::Mat(masks[idx]).t(), proto, image_info, boxes[idx], result.mask, mask_threshold, 370 | iw, ih, mw, mh, masks_features_num); 371 | output.push_back(result); 372 | } 373 | } 374 | 375 | 376 | void AutoBackendOnnx::postprocess_detects(cv::Mat& output0, ImageInfo image_info, std::vector& output, 377 | int& class_names_num, float& conf_threshold, float& iou_threshold) 378 | { 379 | output.clear(); 380 | std::vector class_ids; 381 | std::vector confidences; 382 | std::vector boxes; 383 | std::vector> masks; 384 | // 4 - your default number of rect parameters {x, y, w, h} 385 | int data_width = class_names_num + 4; 386 | int rows = output0.rows; 387 | float* pdata = (float*)output0.data; 388 | 389 | for (int r = 0; r < rows; ++r) 390 | { 391 | cv::Mat scores(1, class_names_num, CV_32FC1, pdata + 4); 392 | cv::Point class_id; 393 | double max_conf; 394 | minMaxLoc(scores, nullptr, &max_conf, nullptr, &class_id); 395 | 396 | if (max_conf > conf_threshold) 397 | { 398 | masks.emplace_back(pdata + 4 + class_names_num, pdata + data_width); 399 | class_ids.push_back(class_id.x); 400 | confidences.push_back((float) max_conf); 401 | 402 | float out_w = pdata[2]; 403 | float out_h = pdata[3]; 404 | float out_left = MAX((pdata[0] - 0.5 * out_w + 0.5), 0); 405 | float out_top = MAX((pdata[1] - 0.5 * out_h + 0.5), 0); 406 | 407 | cv::Rect_ bbox = cv::Rect_ (out_left, out_top, (out_w + 0.5), (out_h + 0.5)); 408 | cv::Rect_ scaled_bbox = scale_boxes(getCvSize(), bbox, image_info.raw_size); 409 | 410 | boxes.push_back(scaled_bbox); 411 | } 412 | pdata += data_width; // next pred 413 | } 414 | 415 | std::vector nms_result; 416 | cv::dnn::NMSBoxes(boxes, confidences, conf_threshold, iou_threshold, nms_result); // , nms_eta, top_k); 417 | for (int idx : nms_result) 418 | { 419 | boxes[idx] = boxes[idx] & cv::Rect(0, 0, image_info.raw_size.width, image_info.raw_size.height); 420 | YoloResults result = { class_ids[idx] ,confidences[idx] ,boxes[idx] }; 421 | output.push_back(result); 422 | } 423 | } 424 | 425 | void AutoBackendOnnx::postprocess_kpts(cv::Mat& output0, ImageInfo& image_info, std::vector& output, 426 | int& class_names_num, float& conf_threshold, float& iou_threshold) 427 | { 428 | std::vector boxes; 429 | std::vector confidences; 430 | std::vector class_ids; 431 | std::vector> rest; 432 | std::tie(boxes, confidences, class_ids, rest) = non_max_suppression(output0, class_names_num, output0.cols, conf_threshold, iou_threshold); 433 | cv::Size img1_shape = getCvSize(); 434 | auto bound_bbox = cv::Rect_ (0, 0, image_info.raw_size.width, image_info.raw_size.height); 435 | for (int i = 0; i < boxes.size(); i++) { 436 | // pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], shape).round() 437 | // pred_kpts = pred[:, 6:].view(len(pred), *self.model.kpt_shape) if len(pred) else pred[:, 6:] 438 | // pred_kpts = ops.scale_coords(img.shape[2:], pred_kpts, shape) 439 | // path = self.batch[0] 440 | // img_path = path[i] if isinstance(path, list) else path 441 | // results.append( 442 | // Results(orig_img=orig_img, 443 | // path=img_path, 444 | // names=self.model.names, 445 | // boxes=pred[:, :6], 446 | // keypoints=pred_kpts)) 447 | cv::Rect_ bbox = boxes[i]; 448 | auto scaled_bbox = scale_boxes(img1_shape, bbox, image_info.raw_size); 449 | scaled_bbox = scaled_bbox & bound_bbox; 450 | // cv::Mat kpt = cv::Mat(rest[i]).t(); 451 | // scale_coords(img1_shape, kpt, image_info.raw_size); 452 | // TODO: overload scale_coords so that will accept cv::Mat of shape [17, 3] 453 | // so that it will be more similar to what we have in python 454 | std::vector kpt = scale_coords(img1_shape, rest[i], image_info.raw_size); 455 | YoloResults tmp_res = { class_ids[i], confidences[i], scaled_bbox, {}, kpt}; 456 | output.push_back(tmp_res); 457 | } 458 | } 459 | 460 | void AutoBackendOnnx::_get_mask2(const cv::Mat& masks_features, 461 | const cv::Mat& proto, 462 | const ImageInfo& image_info, const cv::Rect bound, cv::Mat& mask_out, 463 | float& mask_thresh, int& iw, int& ih, int& mw, int& mh, int& masks_features_num, 464 | bool round_downsampled) 465 | 466 | { 467 | cv::Size img0_shape = image_info.raw_size; 468 | cv::Size img1_shape = cv::Size(iw, ih); 469 | cv::Size downsampled_size = cv::Size(mw, mh); 470 | 471 | cv::Rect_ bound_float( 472 | static_cast(bound.x), 473 | static_cast(bound.y), 474 | static_cast(bound.width), 475 | static_cast(bound.height) 476 | ); 477 | 478 | cv::Rect_ downsampled_bbox = scale_boxes(img0_shape, bound_float, downsampled_size); 479 | cv::Size bound_size = cv::Size(mw, mh); 480 | clip_boxes(downsampled_bbox, bound_size); 481 | 482 | cv::Mat matmul_res = (masks_features * proto).t(); 483 | matmul_res = matmul_res.reshape(1, { downsampled_size.height, downsampled_size.width }); 484 | // apply sigmoid to the mask: 485 | cv::Mat sigmoid_mask; 486 | exp(-matmul_res, sigmoid_mask); 487 | sigmoid_mask = 1.0 / (1.0 + sigmoid_mask); 488 | cv::Mat resized_mask; 489 | cv::Rect_ input_bbox = scale_boxes(img0_shape, bound_float, img1_shape); 490 | cv::resize(sigmoid_mask, resized_mask, img1_shape, 0, 0, cv::INTER_LANCZOS4); 491 | cv::Mat pre_out_mask = resized_mask(input_bbox); 492 | cv::Mat scaled_mask; 493 | scale_image2(scaled_mask, resized_mask, img0_shape); 494 | cv::resize(scaled_mask, mask_out, img0_shape); 495 | mask_out = mask_out(bound) > mask_thresh; 496 | } 497 | 498 | 499 | void AutoBackendOnnx::fill_blob(cv::Mat& image, float*& blob, std::vector& inputTensorShape) { 500 | 501 | cv::Mat floatImage; 502 | if (inputTensorShape.empty()) 503 | { 504 | inputTensorShape = getInputTensorShape(); 505 | } 506 | int inputChannelsNum = inputTensorShape[1]; 507 | int rtype = CV_32FC3; 508 | image.convertTo(floatImage, rtype, 1.0f / 255.0); 509 | blob = new float[floatImage.cols * floatImage.rows * floatImage.channels()]; 510 | cv::Size floatImageSize{ floatImage.cols, floatImage.rows }; 511 | 512 | // hwc -> chw 513 | std::vector chw(floatImage.channels()); 514 | for (int i = 0; i < floatImage.channels(); ++i) 515 | { 516 | chw[i] = cv::Mat(floatImageSize, CV_32FC1, blob + i * floatImageSize.width * floatImageSize.height); 517 | } 518 | cv::split(floatImage, chw); 519 | } 520 | 521 | -------------------------------------------------------------------------------- /src/nn/onnx_model_base.cpp: -------------------------------------------------------------------------------- 1 | #include "nn/onnx_model_base.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "constants.h" 8 | #include "utils/common.h" 9 | 10 | 11 | /** 12 | * @brief Base class for any onnx model regarding the target. 13 | * 14 | * Wraps OrtApi. 15 | * 16 | * The caller provides a model path, logid, and provider. 17 | * 18 | * See the output logs for more information on warnings/errors that occur while processing the model. 19 | * 20 | * @param[in] modelPath Path to the model file. 21 | * @param[in] logid Log identifier. 22 | * @param[in] provider Provider (e.g., "CPU" or "CUDA"). (NOTE: for now only CPU is supported) 23 | */ 24 | 25 | OnnxModelBase::OnnxModelBase(const char* modelPath, const char* logid, const char* provider) 26 | //: modelPath_(modelPath), env(std::move(env)), session(std::move(session)) 27 | : modelPath_(modelPath) 28 | { 29 | 30 | // TODO: too bad passing `ORT_LOGGING_LEVEL_WARNING` by default - for some cases 31 | // info level would make sense too 32 | env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, logid); 33 | Ort::SessionOptions sessionOptions = Ort::SessionOptions(); 34 | 35 | std::vector availableProviders = Ort::GetAvailableProviders(); 36 | auto cudaAvailable = std::find(availableProviders.begin(), availableProviders.end(), "CUDAExecutionProvider"); 37 | OrtCUDAProviderOptions cudaOption; 38 | 39 | if (provider == OnnxProviders::CUDA.c_str()) { // strcmp(provider, OnnxProviders::CUDA.c_str()) == true strcmp(provider, "cuda") // (providerStr == "cuda") 40 | if (cudaAvailable == availableProviders.end()) { 41 | std::cout << "CUDA is not supported by your ONNXRuntime build. Fallback to CPU." << std::endl; 42 | //std::cout << "Inference device: CPU" << std::endl; 43 | } 44 | else { 45 | //std::cout << "Inference device: GPU" << std::endl; 46 | sessionOptions.AppendExecutionProvider_CUDA(cudaOption); 47 | } 48 | } 49 | 50 | else if (provider == OnnxProviders::CPU.c_str()) { // strcmp(provider, OnnxProviders::CPU.c_str()) == true) (providerStr == "cpu") { 51 | // "cpu" by default 52 | } 53 | else 54 | { 55 | throw std::runtime_error("NotImplemented provider=" + std::string(provider)); 56 | } 57 | 58 | std::cout << "Inference device: " << std::string(provider) << std::endl; 59 | #ifdef _WIN32 60 | auto modelPathW = get_win_path(modelPath); // For Windows (wstring) 61 | session = Ort::Session(env, modelPathW.c_str(), sessionOptions); 62 | #else 63 | session = Ort::Session(env, modelPath, sessionOptions); // For Linux (string) 64 | #endif 65 | //session = Ort::Session(env) 66 | // https://github.com/microsoft/onnxruntime/issues/14157 67 | //std::vector inputNodeNames; // 68 | // ---------------- 69 | // init input names 70 | inputNodeNames; 71 | std::vector inputNodeNameAllocatedStrings; // <-- newly added 72 | Ort::AllocatorWithDefaultOptions allocator; 73 | auto inputNodesNum = session.GetInputCount(); 74 | for (int i = 0; i < inputNodesNum; i++) { 75 | auto input_name = session.GetInputNameAllocated(i, allocator); 76 | inputNodeNameAllocatedStrings.push_back(std::move(input_name)); 77 | inputNodeNames.push_back(inputNodeNameAllocatedStrings.back().get()); 78 | } 79 | // ----------------- 80 | // init output names 81 | outputNodeNames; 82 | auto outputNodesNum = session.GetOutputCount(); 83 | std::vector outputNodeNameAllocatedStrings; // <-- newly added 84 | Ort::AllocatorWithDefaultOptions output_names_allocator; 85 | for (int i = 0; i < outputNodesNum; i++) 86 | { 87 | auto output_name = session.GetOutputNameAllocated(i, output_names_allocator); 88 | outputNodeNameAllocatedStrings.push_back(std::move(output_name)); 89 | outputNodeNames.push_back(outputNodeNameAllocatedStrings.back().get()); 90 | } 91 | // ------------------------- 92 | // initialize model metadata 93 | model_metadata = session.GetModelMetadata(); 94 | Ort::AllocatorWithDefaultOptions metadata_allocator; 95 | 96 | std::vector metadataAllocatedKeys = model_metadata.GetCustomMetadataMapKeysAllocated(metadata_allocator); 97 | std::vector metadata_keys; 98 | metadata_keys.reserve(metadataAllocatedKeys.size()); 99 | 100 | for (const Ort::AllocatedStringPtr& allocatedString : metadataAllocatedKeys) { 101 | metadata_keys.emplace_back(allocatedString.get()); 102 | } 103 | 104 | // ------------------------- 105 | // initialize metadata as the dict 106 | // even though we know exactly what metadata we intend to use 107 | // base onnx class should not have any ultralytics yolo-specific attributes like stride, task etc, so keep it clean as much as possible 108 | for (const std::string& key : metadata_keys) { 109 | Ort::AllocatedStringPtr metadata_value = model_metadata.LookupCustomMetadataMapAllocated(key.c_str(), metadata_allocator); 110 | if (metadata_value != nullptr) { 111 | auto raw_metadata_value = metadata_value.get(); 112 | metadata[key] = std::string(raw_metadata_value); 113 | } 114 | } 115 | 116 | // initialize cstr 117 | for (const std::string& name : outputNodeNames) { 118 | outputNamesCStr.push_back(name.c_str()); 119 | } 120 | 121 | for (const std::string& name : inputNodeNames) 122 | { 123 | inputNamesCStr.push_back(name.c_str()); 124 | } 125 | 126 | } 127 | 128 | const std::vector& OnnxModelBase::getInputNames() { 129 | return inputNodeNames; 130 | } 131 | 132 | const std::vector& OnnxModelBase::getOutputNames() { 133 | return outputNodeNames; 134 | } 135 | 136 | const Ort::ModelMetadata& OnnxModelBase::getModelMetadata() 137 | { 138 | return model_metadata; 139 | } 140 | 141 | const std::unordered_map& OnnxModelBase::getMetadata() 142 | { 143 | return metadata; 144 | } 145 | 146 | 147 | const Ort::Session& OnnxModelBase::getSession() 148 | { 149 | return session; 150 | } 151 | 152 | const char* OnnxModelBase::getModelPath() 153 | { 154 | return modelPath_; 155 | } 156 | 157 | const std::vector OnnxModelBase::getOutputNamesCStr() 158 | { 159 | return outputNamesCStr; 160 | } 161 | 162 | const std::vector OnnxModelBase::getInputNamesCStr() 163 | { 164 | return inputNamesCStr; 165 | } 166 | 167 | std::vector OnnxModelBase::forward(std::vector& inputTensors) 168 | { 169 | // todo: make runOptions parameter here 170 | 171 | return session.Run(Ort::RunOptions{ nullptr }, 172 | inputNamesCStr.data(), 173 | inputTensors.data(), 174 | inputNamesCStr.size(), 175 | outputNamesCStr.data(), 176 | outputNamesCStr.size()); 177 | } 178 | 179 | //OnnxModelBase::~OnnxModelBase() { 180 | // // empty body 181 | //} 182 | -------------------------------------------------------------------------------- /src/utils/augment.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | 7 | /** 8 | * \brief padding value when letterbox changes image size ratio 9 | */ 10 | const int& DEFAULT_LETTERBOX_PAD_VALUE = 114; 11 | 12 | 13 | void letterbox(const cv::Mat& image, 14 | cv::Mat& outImage, 15 | const cv::Size& newShape, 16 | cv::Scalar_ color, 17 | bool auto_, 18 | bool scaleFill, 19 | bool scaleUp, int stride 20 | ) { 21 | cv::Size shape = image.size(); 22 | float r = std::min(static_cast(newShape.height) / static_cast(shape.height), 23 | static_cast(newShape.width) / static_cast(shape.width)); 24 | if (!scaleUp) 25 | r = std::min(r, 1.0f); 26 | 27 | float ratio[2]{ r, r }; 28 | int newUnpad[2]{ static_cast(std::round(static_cast(shape.width) * r)), 29 | static_cast(std::round(static_cast(shape.height) * r)) }; 30 | 31 | auto dw = static_cast(newShape.width - newUnpad[0]); 32 | auto dh = static_cast(newShape.height - newUnpad[1]); 33 | 34 | if (auto_) 35 | { 36 | dw = static_cast((static_cast(dw) % stride)); 37 | dh = static_cast((static_cast(dh) % stride)); 38 | } 39 | else if (scaleFill) 40 | { 41 | dw = 0.0f; 42 | dh = 0.0f; 43 | newUnpad[0] = newShape.width; 44 | newUnpad[1] = newShape.height; 45 | ratio[0] = static_cast(newShape.width) / static_cast(shape.width); 46 | ratio[1] = static_cast(newShape.height) / static_cast(shape.height); 47 | } 48 | 49 | dw /= 2.0f; 50 | dh /= 2.0f; 51 | 52 | //cv::Mat outImage; 53 | if (shape.width != newUnpad[0] || shape.height != newUnpad[1]) 54 | { 55 | cv::resize(image, outImage, cv::Size(newUnpad[0], newUnpad[1])); 56 | } 57 | else 58 | { 59 | outImage = image.clone(); 60 | } 61 | 62 | int top = static_cast(std::round(dh - 0.1f)); 63 | int bottom = static_cast(std::round(dh + 0.1f)); 64 | int left = static_cast(std::round(dw - 0.1f)); 65 | int right = static_cast(std::round(dw + 0.1f)); 66 | 67 | 68 | if (color == cv::Scalar()) { 69 | color = cv::Scalar(DEFAULT_LETTERBOX_PAD_VALUE, DEFAULT_LETTERBOX_PAD_VALUE, DEFAULT_LETTERBOX_PAD_VALUE); 70 | } 71 | 72 | cv::copyMakeBorder(outImage, outImage, top, bottom, left, right, cv::BORDER_CONSTANT, color); 73 | 74 | } 75 | 76 | cv::Mat scale_image(const cv::Mat& resized_mask, const cv::Size& im0_shape, const std::pair& ratio_pad) { 77 | cv::Size im1_shape = resized_mask.size(); 78 | 79 | // Check if resizing is needed 80 | if (im1_shape == im0_shape) { 81 | return resized_mask.clone(); 82 | } 83 | 84 | float gain, pad_x, pad_y; 85 | 86 | if (ratio_pad.first < 0.0f) { 87 | gain = std::min(static_cast(im1_shape.height) / static_cast(im0_shape.height), 88 | static_cast(im1_shape.width) / static_cast(im0_shape.width)); 89 | pad_x = (im1_shape.width - im0_shape.width * gain) / 2.0f; 90 | pad_y = (im1_shape.height - im0_shape.height * gain) / 2.0f; 91 | } 92 | else { 93 | gain = ratio_pad.first; 94 | pad_x = ratio_pad.second.x; 95 | pad_y = ratio_pad.second.y; 96 | } 97 | 98 | int top = static_cast(pad_y); 99 | int left = static_cast(pad_x); 100 | int bottom = static_cast(im1_shape.height - pad_y); 101 | int right = static_cast(im1_shape.width - pad_x); 102 | 103 | // Clip and resize the mask 104 | cv::Rect clipped_rect(left, top, right - left, bottom - top); 105 | cv::Mat clipped_mask = resized_mask(clipped_rect); 106 | cv::Mat scaled_mask; 107 | cv::resize(clipped_mask, scaled_mask, im0_shape); 108 | 109 | return scaled_mask; 110 | } 111 | 112 | 113 | void scale_image2(cv::Mat& scaled_mask, const cv::Mat& resized_mask, const cv::Size& im0_shape, const std::pair& ratio_pad) { 114 | cv::Size im1_shape = resized_mask.size(); 115 | 116 | // Check if resizing is needed 117 | if (im1_shape == im0_shape) { 118 | scaled_mask = resized_mask.clone(); 119 | return; 120 | } 121 | 122 | float gain, pad_x, pad_y; 123 | 124 | if (ratio_pad.first < 0.0f) { 125 | gain = std::min(static_cast(im1_shape.height) / static_cast(im0_shape.height), 126 | static_cast(im1_shape.width) / static_cast(im0_shape.width)); 127 | pad_x = (im1_shape.width - im0_shape.width * gain) / 2.0f; 128 | pad_y = (im1_shape.height - im0_shape.height * gain) / 2.0f; 129 | } 130 | else { 131 | gain = ratio_pad.first; 132 | pad_x = ratio_pad.second.x; 133 | pad_y = ratio_pad.second.y; 134 | } 135 | 136 | int top = static_cast(pad_y); 137 | int left = static_cast(pad_x); 138 | int bottom = static_cast(im1_shape.height - pad_y); 139 | int right = static_cast(im1_shape.width - pad_x); 140 | 141 | // Clip and resize the mask 142 | cv::Rect clipped_rect(left, top, right - left, bottom - top); 143 | cv::Mat clipped_mask = resized_mask(clipped_rect); 144 | cv::resize(clipped_mask, scaled_mask, im0_shape); 145 | } 146 | -------------------------------------------------------------------------------- /src/utils/common.cpp: -------------------------------------------------------------------------------- 1 | #define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING 2 | #include 3 | #include 4 | #include 5 | #include "utils/common.h" 6 | 7 | #include 8 | //#include 9 | 10 | #include 11 | #include 12 | 13 | #include 14 | 15 | #include 16 | #include 17 | 18 | 19 | 20 | Timer::Timer(double& accumulator, bool isEnabled) 21 | : accumulator(accumulator), isEnabled(isEnabled) { 22 | if (isEnabled) { 23 | start = std::chrono::high_resolution_clock::now(); 24 | } 25 | } 26 | 27 | // Stop the timer and update the accumulator 28 | void Timer::Stop() { 29 | if (isEnabled) { 30 | auto end = std::chrono::high_resolution_clock::now(); 31 | double duration = std::chrono::duration(end - start).count(); 32 | accumulator += duration; 33 | } 34 | } 35 | 36 | // С++ 14 version 37 | //#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING 38 | std::wstring get_win_path(const std::string& modelPath) { 39 | #ifdef _WIN32 40 | return std::wstring_convert>().from_bytes(modelPath); 41 | #else 42 | // return modelPath; 43 | return std::wstring(modelPath.begin(), modelPath.end()); 44 | #endif 45 | } 46 | 47 | 48 | std::vector parseVectorString(const std::string& input) { 49 | /* Main purpose of this function is to parse `imgsz` key value of model metadata 50 | * and from [height, width] get height, width values in the vector of strings 51 | * Args: 52 | * input: 53 | * expected to be something like [544, 960] or [3,544, 960] 54 | * output: 55 | * iterable of strings, representing integers 56 | */ 57 | std::regex number_pattern(R"(\d+)"); 58 | 59 | std::vector result; 60 | std::sregex_iterator it(input.begin(), input.end(), number_pattern); 61 | std::sregex_iterator end; 62 | 63 | while (it != end) { 64 | result.push_back(it->str()); 65 | ++it; 66 | } 67 | 68 | return result; 69 | } 70 | 71 | std::vector convertStringVectorToInts(const std::vector& input) { 72 | std::vector result; 73 | 74 | for (const std::string& str : input) { 75 | try { 76 | int value = std::stoi(str); 77 | result.push_back(value); 78 | } 79 | catch (const std::invalid_argument& e) { 80 | // raise explicit exception 81 | throw std::invalid_argument("Bad argument (cannot cast): value=" + str); 82 | } 83 | catch (const std::out_of_range& e) { 84 | // check bounds 85 | throw std::out_of_range("Value out of range: " + str); 86 | } 87 | } 88 | 89 | return result; 90 | } 91 | 92 | 93 | /* 94 | std::unordered_map parseNames(const std::string& input) { 95 | std::unordered_map result; 96 | 97 | std::string cleanedInput = input; 98 | boost::erase_all(cleanedInput, "{"); 99 | boost::erase_all(cleanedInput, "}"); 100 | 101 | std::vector elements; 102 | boost::split(elements, cleanedInput, boost::is_any_of(",")); 103 | 104 | for (const std::string& element : elements) { 105 | std::vector keyValue; 106 | boost::split(keyValue, element, boost::is_any_of(":")); 107 | 108 | if (keyValue.size() == 2) { 109 | int key = std::stoi(boost::trim_copy(keyValue[0])); 110 | std::string value = boost::trim_copy(keyValue[1]); 111 | 112 | result[key] = value; 113 | } 114 | } 115 | 116 | return result; 117 | } 118 | */ 119 | 120 | std::unordered_map parseNames(const std::string& input) { 121 | std::unordered_map result; 122 | 123 | std::string cleanedInput = input; 124 | cleanedInput.erase(std::remove(cleanedInput.begin(), cleanedInput.end(), '{'), cleanedInput.end()); 125 | cleanedInput.erase(std::remove(cleanedInput.begin(), cleanedInput.end(), '}'), cleanedInput.end()); 126 | 127 | std::istringstream elementStream(cleanedInput); 128 | std::string element; 129 | while (std::getline(elementStream, element, ',')) { 130 | std::istringstream keyValueStream(element); 131 | std::string keyStr, value; 132 | if (std::getline(keyValueStream, keyStr, ':') && std::getline(keyValueStream, value)) { 133 | int key = std::stoi(keyStr); 134 | result[key] = value; 135 | } 136 | } 137 | 138 | return result; 139 | } 140 | 141 | int64_t vector_product(const std::vector& vec) { 142 | int64_t result = 1; 143 | for (int64_t value : vec) { 144 | result *= value; 145 | } 146 | return result; 147 | } 148 | -------------------------------------------------------------------------------- /src/utils/ops.cpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | //#include 5 | //#include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | 13 | 14 | void clip_boxes(cv::Rect& box, const cv::Size& shape) { 15 | box.x = std::max(0, std::min(box.x, shape.width)); 16 | box.y = std::max(0, std::min(box.y, shape.height)); 17 | box.width = std::max(0, std::min(box.width, shape.width - box.x)); 18 | box.height = std::max(0, std::min(box.height, shape.height - box.y)); 19 | } 20 | 21 | void clip_boxes(cv::Rect_& box, const cv::Size& shape) { 22 | box.x = std::max(0.0f, std::min(box.x, static_cast(shape.width))); 23 | box.y = std::max(0.0f, std::min(box.y, static_cast(shape.height))); 24 | box.width = std::max(0.0f, std::min(box.width, static_cast(shape.width - box.x))); 25 | box.height = std::max(0.0f, std::min(box.height, static_cast(shape.height - box.y))); 26 | } 27 | 28 | 29 | void clip_boxes(std::vector& boxes, const cv::Size& shape) { 30 | for (cv::Rect& box : boxes) { 31 | clip_boxes(box, shape); 32 | } 33 | } 34 | 35 | void clip_boxes(std::vector>& boxes, const cv::Size& shape) { 36 | for (cv::Rect_& box : boxes) { 37 | clip_boxes(box, shape); 38 | } 39 | } 40 | 41 | // source: ultralytics/utils/ops.py scale_boxes lines 99+ (ultralytics==8.0.160) 42 | cv::Rect_ scale_boxes(const cv::Size& img1_shape, cv::Rect_& box, const cv::Size& img0_shape, 43 | std::pair ratio_pad = std::make_pair(-1.0f, cv::Point2f(-1.0f, -1.0f)), bool padding = true) { 44 | 45 | float gain, pad_x, pad_y; 46 | 47 | if (ratio_pad.first < 0.0f) { 48 | gain = std::min(static_cast(img1_shape.height) / static_cast(img0_shape.height), 49 | static_cast(img1_shape.width) / static_cast(img0_shape.width)); 50 | pad_x = roundf((img1_shape.width - img0_shape.width * gain) / 2.0f - 0.1f); 51 | pad_y = roundf((img1_shape.height - img0_shape.height * gain) / 2.0f - 0.1f); 52 | } 53 | else { 54 | gain = ratio_pad.first; 55 | pad_x = ratio_pad.second.x; 56 | pad_y = ratio_pad.second.y; 57 | } 58 | 59 | //cv::Rect scaledCoords(box); 60 | cv::Rect_ scaledCoords(box); 61 | 62 | if (padding) { 63 | scaledCoords.x -= pad_x; 64 | scaledCoords.y -= pad_y; 65 | } 66 | 67 | scaledCoords.x /= gain; 68 | scaledCoords.y /= gain; 69 | scaledCoords.width /= gain; 70 | scaledCoords.height /= gain; 71 | 72 | // Clip the box to the bounds of the image 73 | clip_boxes(scaledCoords, img0_shape); 74 | 75 | return scaledCoords; 76 | } 77 | 78 | 79 | //void clip_coords(cv::Mat& coords, const cv::Size& shape) { 80 | // // Clip x-coordinates to the image width 81 | // cv::Mat xCoords = coords.col(0); 82 | // cv::Mat yCoords = coords.col(1); 83 | // 84 | // for (int i = 0; i < coords.rows; ++i) { 85 | // xCoords.at(i) = std::max(std::min(xCoords.at(i), static_cast(shape.width - 1)), 0.0f); 86 | // yCoords.at(i) = std::max(std::min(yCoords.at(i), static_cast(shape.height - 1)), 0.0f); 87 | // } 88 | //} 89 | 90 | void clip_coords(std::vector& coords, const cv::Size& shape) { 91 | // Assuming coords are of shape [1, 17, 3] 92 | for (int i = 0; i < coords.size(); i += 3) { 93 | coords[i] = std::min(std::max(coords[i], 0.0f), static_cast(shape.width - 1)); // x 94 | coords[i + 1] = std::min(std::max(coords[i + 1], 0.0f), static_cast(shape.height - 1)); // y 95 | } 96 | } 97 | 98 | // source: ultralytics/utils/ops.py scale_coords lines 753+ (ultralytics==8.0.160) 99 | //cv::Mat scale_coords(const cv::Size& img1_shape, cv::Mat& coords, const cv::Size& img0_shape) 100 | //cv::Mat scale_coords(const cv::Size& img1_shape, std::vector coords, const cv::Size& img0_shape) 101 | std::vector scale_coords(const cv::Size& img1_shape, std::vector& coords, const cv::Size& img0_shape) 102 | { 103 | // cv::Mat scaledCoords = coords.clone(); 104 | std::vector scaledCoords = coords; 105 | 106 | // Calculate gain and padding 107 | double gain = std::min(static_cast(img1_shape.width) / img0_shape.width, static_cast(img1_shape.height) / img0_shape.height); 108 | cv::Point2d pad((img1_shape.width - img0_shape.width * gain) / 2, (img1_shape.height - img0_shape.height * gain) / 2); 109 | 110 | // Apply padding 111 | // scaledCoords.col(0) = (scaledCoords.col(0) - pad.x); 112 | // scaledCoords.col(1) = (scaledCoords.col(1) - pad.y); 113 | // Assuming coords are of shape [1, 17, 3] 114 | for (int i = 0; i < scaledCoords.size(); i += 3) { 115 | scaledCoords[i] -= pad.x; // x padding 116 | scaledCoords[i + 1] -= pad.y; // y padding 117 | } 118 | 119 | // Scale coordinates 120 | // scaledCoords.col(0) /= gain; 121 | // scaledCoords.col(1) /= gain; 122 | // Assuming coords are of shape [1, 17, 3] 123 | for (int i = 0; i < scaledCoords.size(); i += 3) { 124 | scaledCoords[i] /= gain; 125 | scaledCoords[i + 1] /= gain; 126 | } 127 | 128 | clip_coords(scaledCoords, img0_shape); 129 | return scaledCoords; 130 | } 131 | 132 | 133 | cv::Mat crop_mask(const cv::Mat& mask, const cv::Rect& box) { 134 | int h = mask.rows; 135 | int w = mask.cols; 136 | 137 | int x1 = box.x; 138 | int y1 = box.y; 139 | int x2 = box.x + box.width; 140 | int y2 = box.y + box.height; 141 | 142 | cv::Mat cropped_mask = cv::Mat::zeros(h, w, mask.type()); 143 | 144 | for (int r = 0; r < h; ++r) { 145 | for (int c = 0; c < w; ++c) { 146 | if (r >= y1 && r < y2 && c >= x1 && c < x2) { 147 | cropped_mask.at(r, c) = mask.at(r, c); 148 | } 149 | } 150 | } 151 | 152 | return cropped_mask; 153 | } 154 | 155 | //std::tuple>, std::vector, std::vector, std::vector>> 156 | std::tuple, std::vector, std::vector, std::vector>> 157 | non_max_suppression(const cv::Mat& output0, int class_names_num, int data_width, double conf_threshold, 158 | float iou_threshold) { 159 | 160 | std::vector class_ids; 161 | std::vector confidences; 162 | // std::vector> boxes; 163 | std::vector boxes; 164 | std::vector> rest; 165 | 166 | int rest_start_pos = class_names_num + 4; 167 | int rest_features = data_width - rest_start_pos; 168 | // int data_width = rest_start_pos + total_features_num; 169 | 170 | int rows = output0.rows; 171 | float* pdata = (float*) output0.data; 172 | 173 | for (int r = 0; r < rows; ++r) { 174 | cv::Mat scores(1, class_names_num, CV_32FC1, pdata + 4); 175 | cv::Point class_id; 176 | double max_conf; 177 | minMaxLoc(scores, nullptr, &max_conf, nullptr, &class_id); 178 | 179 | if (max_conf > conf_threshold) { 180 | std::vector mask_data(pdata + 4 + class_names_num, pdata + data_width); 181 | class_ids.push_back(class_id.x); 182 | confidences.push_back((float) max_conf); 183 | 184 | float out_w = pdata[2]; 185 | float out_h = pdata[3]; 186 | float out_left = MAX((pdata[0] - 0.5 * out_w + 0.5), 0); 187 | float out_top = MAX((pdata[1] - 0.5 * out_h + 0.5), 0); 188 | cv::Rect_ bbox(out_left, out_top, (out_w + 0.5), (out_h + 0.5)); 189 | boxes.push_back(bbox); 190 | if (rest_features > 0) { 191 | std::vector rest_data(pdata + rest_start_pos, pdata + data_width); 192 | rest.push_back(rest_data); 193 | } 194 | } 195 | pdata += data_width; // next prediction 196 | } 197 | 198 | // 199 | //float masks_threshold = 0.50; 200 | //int top_k = 500; 201 | //const float& nmsde_eta = 1.0f; 202 | std::vector nms_result; 203 | cv::dnn::NMSBoxes(boxes, confidences, conf_threshold, iou_threshold, nms_result); // , nms_eta, top_k); 204 | // cv::dnn::NMSBoxes(boxes, confidences, ); 205 | std::vector nms_class_ids; 206 | std::vector nms_confidences; 207 | // std::vector> boxes; 208 | std::vector nms_boxes; 209 | std::vector> nms_rest; 210 | for (int idx: nms_result) { 211 | nms_class_ids.push_back(class_ids[idx]); 212 | nms_confidences.push_back(confidences[idx]); 213 | nms_boxes.push_back(boxes[idx]); 214 | nms_rest.push_back(rest[idx]); 215 | } 216 | return std::make_tuple(nms_boxes, nms_confidences, nms_class_ids, nms_rest); 217 | } 218 | --------------------------------------------------------------------------------