├── .clang-format ├── .github └── workflows │ └── build.yml ├── .gitignore ├── .gitmodules ├── .pre-commit-config.yaml ├── CMakeLists.txt ├── CPPLINT.cfg ├── LICENSE ├── Makefile ├── README.md ├── cmake ├── cmake_utility.cmake └── tensorrt.cmake ├── data ├── images │ ├── bird_detection.jpg │ ├── dog.jpg │ ├── dogs.jpg │ ├── endgame.jpg │ ├── indoor.jpg │ ├── matrix.jpg │ ├── no_way_home.jpg │ ├── odaiba.jpg │ └── sample_city_scapes.png ├── squeezenet1.1.onnx └── version-RFB-640.onnx ├── dockerfiles ├── ubuntu2004.dockerfile ├── ubuntu2004_gpu.dockerfile └── ubuntu2004_tensorrt.dockerfile ├── docs ├── images │ ├── ComputerVision_VN_30.jpg │ ├── ComputerVision_VisionCS_0.jpg │ ├── ComputerVision_VisionDN_1.jpg │ ├── RemoteSensing_CS1.jpg │ ├── RemoteSensing_CS5.jpg │ ├── RetinaFix.jpg │ ├── bird_detection_result.jpg │ ├── cityscapes_legend.jpg │ ├── dogs_maskrcnn_result.jpg │ ├── endgame_result.jpg │ ├── indoor_maskrcnn_result.jpg │ ├── loftr.jpg │ ├── matrix_result.jpg │ ├── no_way_home_result.jpg │ ├── odaiba_result.jpg │ ├── sample_city_scapes_result.jpg │ ├── street_result.jpg │ ├── super_point_good_matches.jpg │ └── tiny_yolov2_result.jpg └── onnxruntime_tensorrt.md ├── examples ├── CMakeLists.txt ├── LoFTR.cpp ├── LoFTR.hpp ├── LoFTRApp.cpp ├── MaskRCNN.cpp ├── MaskRCNN.hpp ├── MaskRCNNApp.cpp ├── PrimitiveTest.cpp ├── SemanticSegmentationPaddleSegBisenetv2.cpp ├── SemanticSegmentationPaddleSegBisenetv2.hpp ├── SemanticSegmentationPaddleSegBisenetv2App.cpp ├── SuperGlueApp.cpp ├── SuperPoint.cpp ├── SuperPoint.hpp ├── SuperPointApp.cpp ├── TestImageClassification.cpp ├── TinyYolov2.cpp ├── TinyYolov2.hpp ├── TinyYolov2App.cpp ├── UltraLightFastGenericFaceDetector.cpp ├── UltraLightFastGenericFaceDetector.hpp ├── UltraLightFastGenericFaceDetectorApp.cpp ├── Utility.hpp ├── YoloX.cpp ├── YoloX.hpp ├── YoloXApp.cpp ├── Yolov3.cpp ├── Yolov3.hpp └── Yolov3App.cpp ├── include └── ort_utility │ ├── Constants.hpp │ ├── ImageClassificationOrtSessionHandler.hpp │ ├── ImageRecognitionOrtSessionHandlerBase.hpp │ ├── ObjectDetectionOrtSessionHandler.hpp │ ├── OrtSessionHandler.hpp │ ├── Utility.hpp │ └── ort_utility.hpp ├── scripts ├── get_cuda_environment_variables.bash ├── get_tensorrt_environment_variables.bash ├── install_apps_dependencies.bash ├── install_latest_cmake.bash ├── install_onnx_runtime.bash ├── loftr │ ├── README.md │ ├── convert_to_onnx.py │ ├── loftr_wrapper.py │ └── requirements.txt ├── superglue │ ├── README.md │ ├── convert_to_onnx.py │ ├── requirements.txt │ └── superglue_wrapper.py └── superpoint │ ├── README.md │ ├── convert_to_onnx.py │ └── requirements.txt └── src ├── CMakeLists.txt ├── ImageClassificationOrtSessionHandler.cpp ├── ImageRecognitionOrtSessionHandlerBase.cpp ├── ObjectDetectionOrtSessionHandler.cpp └── OrtSessionHandler.cpp /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: Cpp 3 | # BasedOnStyle: LLVM 4 | AccessModifierOffset: -3 5 | AlignAfterOpenBracket: true 6 | AlignEscapedNewlinesLeft: false 7 | AlignOperands: true 8 | AlignTrailingComments: true 9 | AllowAllParametersOfDeclarationOnNextLine: true 10 | AllowShortBlocksOnASingleLine: false 11 | AllowShortCaseLabelsOnASingleLine: false 12 | AllowShortIfStatementsOnASingleLine: false 13 | AllowShortLoopsOnASingleLine: false 14 | AllowShortFunctionsOnASingleLine: None 15 | AlwaysBreakAfterDefinitionReturnType: false 16 | AlwaysBreakTemplateDeclarations: false 17 | AlwaysBreakBeforeMultilineStrings: false 18 | BreakBeforeBinaryOperators: None 19 | BreakBeforeTernaryOperators: true 20 | BreakConstructorInitializersBeforeComma: true 21 | BinPackParameters: true 22 | BinPackArguments: true 23 | ColumnLimit: 120 24 | ConstructorInitializerAllOnOneLineOrOnePerLine: false 25 | ConstructorInitializerIndentWidth: 4 26 | DerivePointerAlignment: false 27 | ExperimentalAutoDetectBinPacking: false 28 | IndentCaseLabels: true 29 | IndentWrappedFunctionNames: false 30 | IndentFunctionDeclarationAfterType: false 31 | MaxEmptyLinesToKeep: 1 32 | KeepEmptyLinesAtTheStartOfBlocks: false 33 | NamespaceIndentation: None 34 | ObjCBlockIndentWidth: 2 35 | ObjCSpaceAfterProperty: false 36 | ObjCSpaceBeforeProtocolList: true 37 | PenaltyBreakBeforeFirstCallParameter: 19 38 | PenaltyBreakComment: 300 39 | PenaltyBreakString: 1000 40 | PenaltyBreakFirstLessLess: 120 41 | PenaltyExcessCharacter: 1000000 42 | PenaltyReturnTypeOnItsOwnLine: 60 43 | PointerAlignment: Left 44 | SpacesBeforeTrailingComments: 2 45 | Cpp11BracedListStyle: true 46 | Standard: Cpp11 47 | IndentWidth: 4 48 | TabWidth: 8 49 | UseTab: Never 50 | BreakBeforeBraces: Linux # the position of the braces 51 | SpacesInParentheses: false 52 | SpacesInSquareBrackets: false 53 | SpacesInAngles: false 54 | SpaceInEmptyParentheses: false 55 | SpacesInCStyleCastParentheses: false 56 | SpaceAfterCStyleCast: false 57 | SpacesInContainerLiterals: true 58 | SpaceBeforeAssignmentOperators: true 59 | ContinuationIndentWidth: 4 60 | CommentPragmas: ".*" 61 | ForEachMacros: [foreach, Q_FOREACH, BOOST_FOREACH] 62 | SpaceBeforeParens: ControlStatements 63 | DisableFormat: false 64 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | 3 | on: 4 | push: 5 | branches: ["master"] 6 | pull_request: 7 | 8 | jobs: 9 | linting: 10 | runs-on: ubuntu-20.04 11 | steps: 12 | - uses: actions/checkout@v3 13 | 14 | - name: Setup python 15 | uses: actions/setup-python@v4 16 | with: 17 | python-version: "3.x" 18 | 19 | - name: Apply pre-commit 20 | uses: pre-commit/action@v3.0.0 21 | with: 22 | extra_args: --all-files 23 | 24 | test-production: 25 | runs-on: ubuntu-20.04 26 | container: 27 | image: xmba15/onnx_runtime_cpp:v1.14.1-ubuntu20.04 28 | 29 | steps: 30 | - uses: actions/checkout@v3 31 | 32 | - name: Setup environment 33 | run: | 34 | sudo apt-get update 35 | bash ./scripts/install_apps_dependencies.bash 36 | 37 | - name: Test building 38 | run: | 39 | make apps -j`nproc` 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled source # 2 | ################### 3 | *.com 4 | *.class 5 | *.dll 6 | *.exe 7 | *.o 8 | *.so 9 | *.pyc 10 | .ipynb_checkpoints 11 | *~ 12 | *# 13 | build* 14 | .cache* 15 | 16 | # Packages # 17 | ################### 18 | # it's better to unpack these files and commit the raw source 19 | # git has its own built in compression methods 20 | *.7z 21 | *.dmg 22 | *.gz 23 | *.iso 24 | *.jar 25 | *.rar 26 | *.tar 27 | *.zip 28 | 29 | # Logs and databases # 30 | ###################### 31 | *.log 32 | *.sql 33 | *.sqlite 34 | 35 | # OS generated files # 36 | ###################### 37 | .DS_Store 38 | .DS_Store? 39 | ._* 40 | .Spotlight-V100 41 | .Trashes 42 | ehthumbs.db 43 | Thumbs.db 44 | 45 | # Images 46 | ###################### 47 | *.jpg 48 | *.gif 49 | *.png 50 | *.svg 51 | *.ico 52 | 53 | # Video 54 | ###################### 55 | *.wmv 56 | *.mpg 57 | *.mpeg 58 | *.mp4 59 | *.mov 60 | *.flv 61 | *.avi 62 | *.ogv 63 | *.ogg 64 | *.webm 65 | 66 | # Audio 67 | ###################### 68 | *.wav 69 | *.mp3 70 | *.wma 71 | 72 | # Fonts 73 | ###################### 74 | Fonts 75 | *.eot 76 | *.ttf 77 | *.woff 78 | 79 | # Format 80 | ###################### 81 | CPPLINT.cfg 82 | .clang-format 83 | 84 | # Gtags 85 | ###################### 86 | GPATH 87 | GRTAGS 88 | GSYMS 89 | GTAGS 90 | 91 | data* 92 | *onnx* 93 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "scripts/superpoint/SuperPointPretrainedNetwork"] 2 | path = scripts/superpoint/SuperPointPretrainedNetwork 3 | url = https://github.com/magicleap/SuperPointPretrainedNetwork 4 | [submodule "scripts/superglue/SuperGluePretrainedNetwork"] 5 | path = scripts/superglue/SuperGluePretrainedNetwork 6 | url = https://github.com/magicleap/SuperGluePretrainedNetwork.git 7 | [submodule "scripts/loftr/LoFTR"] 8 | path = scripts/loftr/LoFTR 9 | url = https://github.com/xmba15/LoFTR 10 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/mirrors-clang-format 3 | rev: v14.0.6 4 | hooks: 5 | - id: clang-format 6 | 7 | - repo: https://github.com/cpplint/cpplint 8 | rev: 1.6.0 9 | hooks: 10 | - id: cpplint 11 | 12 | - repo: https://github.com/pre-commit/pre-commit-hooks 13 | rev: v4.3.0 14 | hooks: 15 | - id: end-of-file-fixer 16 | - id: trailing-whitespace 17 | 18 | - repo: https://github.com/pre-commit/mirrors-prettier 19 | rev: v2.7.1 20 | hooks: 21 | - id: prettier 22 | types_or: [json, markdown, yaml] 23 | 24 | - repo: https://github.com/lovesegfault/beautysh 25 | rev: v6.2.1 26 | hooks: 27 | - id: beautysh 28 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | 3 | project(ort_utility VERSION 0.0.2) 4 | set (CMAKE_CXX_STANDARD 17) 5 | 6 | list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) 7 | 8 | set(onnxruntime_INSTALL_PREFIX /usr/local) 9 | set(onnxruntime_INCLUDE_DIRS 10 | ${onnxruntime_INSTALL_PREFIX}/include/onnxruntime 11 | ${onnxruntime_INSTALL_PREFIX}/include/onnxruntime/core/session 12 | ) 13 | 14 | find_library(onnxruntime_LIBS NAMES onnxruntime PATHS /usr/local/lib) 15 | 16 | find_package(CUDA QUIET) 17 | 18 | if(CUDA_FOUND AND USE_GPU) 19 | add_definitions(-DENABLE_GPU=1) 20 | include(tensorrt) 21 | if (TENSORRT_FOUND) 22 | add_definitions(-DENABLE_TENSORRT=1) 23 | endif() 24 | else() 25 | add_definitions(-DENABLE_GPU=0) 26 | add_definitions(-DENABLE_TENSORRT=0) 27 | endif() 28 | 29 | 30 | add_compile_options( 31 | "$<$:-DENABLE_DEBUG=1>" 32 | "$<$:-DENABLE_DEBUG=0>" 33 | ) 34 | 35 | add_subdirectory(src) 36 | 37 | if(BUILD_EXAMPLES) 38 | add_subdirectory(examples) 39 | endif(BUILD_EXAMPLES) 40 | -------------------------------------------------------------------------------- /CPPLINT.cfg: -------------------------------------------------------------------------------- 1 | set noparent 2 | 3 | filter=-legal/copyright 4 | filter=-whitespace/braces 5 | filter=-build/header_guard 6 | filter=+build/pragma_once 7 | filter=-build/include_order 8 | filter=-build/include_subdir 9 | filter=-build/c++11 10 | linelength=120 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020-2021 Ba Tran 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | BUILD_EXAMPLES=OFF 2 | BUILD_TYPE=Release 3 | CMAKE_ARGS:=$(CMAKE_ARGS) 4 | USE_GPU=OFF 5 | 6 | default: 7 | @mkdir -p build 8 | @cd build && cmake .. -DBUILD_EXAMPLES=$(BUILD_EXAMPLES) \ 9 | -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \ 10 | -DUSE_GPU=$(USE_GPU) \ 11 | -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ 12 | $(CMAKE_ARGS) 13 | @cd build && make 14 | 15 | gpu_default: 16 | @make default USE_GPU=ON 17 | 18 | debug: 19 | @make default BUILD_TYPE=Debug 20 | 21 | apps: 22 | @make default BUILD_EXAMPLES=ON 23 | 24 | gpu_apps: 25 | @make apps USE_GPU=ON 26 | 27 | debug_apps: 28 | @make debug BUILD_EXAMPLES=ON 29 | 30 | clean: 31 | @rm -rf build* 32 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build](https://github.com/xmba15/onnx_runtime_cpp/actions/workflows/build.yml/badge.svg)](https://github.com/xmba15/onnx_runtime_cpp/actions/workflows/build.yml) 2 | 3 | If this work somehow makes your day, maybe you can consider :) 4 | 5 | [!["Buy Me A Coffee"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://www.buymeacoffee.com/xmba15) 6 | 7 | # small c++ library to quickly use [onnxruntime](https://github.com/microsoft/onnxruntime) to deploy deep learning models 8 | 9 | Thanks to [cardboardcode](https://github.com/cardboardcode), we have [the documentation](https://onnx-runtime-cpp.readthedocs.io/en/latest/index.html) for this small library. 10 | Hope that they both are helpful for your work. 11 | 12 |
13 | Table of Contents 14 |
    15 |
  1. TODO
  2. 16 |
  3. Installation
  4. 17 |
  5. 18 | How to Build 19 | 22 |
  6. 23 |
  7. How to test apps
  8. 24 |
25 |
26 | 27 | ## TODO 28 | 29 | - [x] Support inference of multi-inputs, multi-outputs 30 | - [x] Examples for famous models, like yolov3, mask-rcnn, [ultra-light-weight face detector](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB), [yolox](https://github.com/Megvii-BaseDetection/YOLOX), [PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.3), [SuperPoint](https://github.com/magicleap/SuperPointPretrainedNetwork), [SuperGlue](https://github.com/magicleap/SuperGluePretrainedNetwork/tree/ddcf11f42e7e0732a0c4607648f9448ea8d73590), [LoFTR](https://zju3dv.github.io/loftr/). Might consider supporting more if requested. 31 | - [x] (Minimal^^) Support for TensorRT backend 32 | - [ ] Batch-inference 33 | 34 | ## Installation 35 | 36 | - build onnxruntime from source with the following script 37 | 38 | ```bash 39 | # onnxruntime needs newer cmake version to build 40 | bash ./scripts/install_latest_cmake.bash 41 | 42 | 43 | bash ./scripts/install_onnx_runtime.bash 44 | 45 | # dependencies to build apps 46 | bash ./scripts/install_apps_dependencies.bash 47 | ``` 48 | 49 | ## How to build 50 | 51 | --- 52 | 53 |
54 | CPU 55 | 56 | ```bash 57 | make default 58 | 59 | # build examples 60 | make apps 61 | ``` 62 | 63 |
64 | 65 |
66 | GPU with CUDA 67 | 68 | ```bash 69 | make gpu_default 70 | 71 | make gpu_apps 72 | ``` 73 | 74 |
75 | 76 | ### How to Run with Docker 77 | 78 |
79 | CPU 80 | 81 | ```bash 82 | # build 83 | docker build -f ./dockerfiles/ubuntu2004.dockerfile -t onnx_runtime . 84 | 85 | # run 86 | docker run -it --rm -v `pwd`:/workspace onnx_runtime 87 | ``` 88 | 89 |
90 | 91 |
92 | GPU with CUDA 93 | 94 | ```bash 95 | # build 96 | # change the cuda version to match your local cuda version before build the docker 97 | 98 | docker build -f ./dockerfiles/ubuntu2004_gpu.dockerfile -t onnx_runtime_gpu . 99 | 100 | # run 101 | docker run -it --rm --gpus all -v `pwd`:/workspace onnx_runtime_gpu 102 | ``` 103 | 104 | - Onnxruntime will be built with TensorRT support if the environment has TensorRT. Check [this memo](./docs/onnxruntime_tensorrt.md) for useful URLs related to building with TensorRT. 105 | - Be careful to choose TensorRT version compatible with onnxruntime. A good guess can be inferred from [HERE](https://github.com/microsoft/onnxruntime/blob/main/dockerfiles/Dockerfile.tensorrt). 106 | - Also it is not possible to use models whose input shapes are dynamic with TensorRT backend, according to [this](https://onnxruntime.ai/docs/execution-providers/TensorRT-ExecutionProvider.html#shape-inference-for-tensorrt-subgraphs) 107 | 108 |
109 | 110 | ## How to test apps 111 | 112 | --- 113 | 114 | ### Image Classification With Squeezenet 115 | 116 | --- 117 | 118 |
119 | Usage 120 | 121 | ```bash 122 | # after make apps 123 | ./build/examples/TestImageClassification ./data/squeezenet1.1.onnx ./data/images/dog.jpg 124 | ``` 125 | 126 | the following result can be obtained 127 | 128 | ``` 129 | 264 : Cardigan, Cardigan Welsh corgi : 0.391365 130 | 263 : Pembroke, Pembroke Welsh corgi : 0.376214 131 | 227 : kelpie : 0.0314975 132 | 158 : toy terrier : 0.0223435 133 | 230 : Shetland sheepdog, Shetland sheep dog, Shetland : 0.020529 134 | ``` 135 | 136 |
137 | 138 |

(back to top)

139 | 140 | ### Object Detection With Tiny-Yolov2 trained on VOC dataset (with 20 classes) 141 | 142 | --- 143 | 144 |

145 | 146 |

147 | 148 |
149 | Usage 150 | 151 | - Download model from onnx model zoo: [HERE](https://github.com/onnx/models/tree/master/vision/object_detection_segmentation/yolov2) 152 | 153 | - The shape of the output would be 154 | 155 | ```text 156 | OUTPUT_FEATUREMAP_SIZE X OUTPUT_FEATUREMAP_SIZE * NUM_ANCHORS * (NUM_CLASSES + 4 + 1) 157 | where OUTPUT_FEATUREMAP_SIZE = 13; NUM_ANCHORS = 5; NUM_CLASSES = 20 for the tiny-yolov2 model from onnx model zoo 158 | ``` 159 | 160 | - Test tiny-yolov2 inference apps 161 | 162 | ```bash 163 | # after make apps 164 | ./build/examples/tiny_yolo_v2 [path/to/tiny_yolov2/onnx/model] ./data/images/dog.jpg 165 | ``` 166 | 167 |
168 | 169 |

(back to top)

170 | 171 | ### Object Instance Segmentation With MaskRCNN trained on MS CoCo Dataset (80 + 1(background) clasess) 172 | 173 | --- 174 | 175 |

176 | 177 | 178 |

179 | 180 |
181 | Usage 182 | 183 | - Download model from onnx model zoo: [HERE](https://github.com/onnx/models/tree/master/vision/object_detection_segmentation/mask-rcnn) 184 | 185 | - As also stated in the url above, there are four outputs: boxes(nboxes x 4), labels(nboxes), scores(nboxes), masks(nboxesx1x28x28) 186 | - Test mask-rcnn inference apps 187 | 188 | ```bash 189 | # after make apps 190 | ./build/examples/mask_rcnn [path/to/mask_rcnn/onnx/model] ./data/images/dogs.jpg 191 | ``` 192 | 193 |
194 | 195 |

(back to top)

196 | 197 | ### Yolo V3 trained on Ms CoCo Dataset 198 | 199 | --- 200 | 201 |

202 | 203 |

204 | 205 |
206 | Usage 207 | 208 | - Download model from onnx model zoo: [HERE](https://github.com/onnx/models/tree/master/vision/object_detection_segmentation/yolov3) 209 | 210 | - Test yolo-v3 inference apps 211 | 212 | ```bash 213 | # after make apps 214 | ./build/examples/yolov3 [path/to/yolov3/onnx/model] ./data/images/no_way_home.jpg 215 | ``` 216 | 217 |
218 | 219 |

(back to top)

220 | 221 | ### [Ultra-Light-Fast-Generic-Face-Detector-1MB](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB) 222 | 223 | --- 224 | 225 |

226 | 227 |

228 | 229 |
230 | Usage 231 | 232 | - App to use onnx model trained with famous light-weight [Ultra-Light-Fast-Generic-Face-Detector-1MB](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB) 233 | - Sample weight has been saved [./data/version-RFB-640.onnx](./data/version-RFB-640.onnx) 234 | - Test inference apps 235 | 236 | ```bash 237 | # after make apps 238 | ./build/examples/ultra_light_face_detector ./data/version-RFB-640.onnx ./data/images/endgame.jpg 239 | ``` 240 | 241 |
242 | 243 |

(back to top)

244 | 245 | ### [YoloX: high-performance anchor-free YOLO by Megvii](https://github.com/Megvii-BaseDetection/YOLOX) 246 | 247 | --- 248 | 249 |

250 | 251 |

252 | 253 |
254 | Usage 255 | 256 | - Download onnx model trained on COCO dataset from [HERE](https://github.com/Megvii-BaseDetection/YOLOX/tree/main/demo/ONNXRuntime) 257 | 258 | ```bash 259 | # this app tests yolox_l model but you can try with other yolox models also. 260 | wget https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_l.onnx -O ./data/yolox_l.onnx 261 | ``` 262 | 263 | - Test inference apps 264 | 265 | ```bash 266 | # after make apps 267 | ./build/examples/yolox ./data/yolox_l.onnx ./data/images/matrix.jpg 268 | ``` 269 | 270 |
271 | 272 |

(back to top)

273 | 274 | ### [Semantic Segmentation Paddle Seg](https://github.com/PaddlePaddle/PaddleSeg) 275 | 276 | --- 277 | 278 |

279 | 280 |

281 | 282 |

283 | 284 | 285 |

286 | 287 |
288 | Usage 289 | 290 | - Download PaddleSeg's bisenetv2 trained on cityscapes dataset that has been converted to onnx [HERE](https://drive.google.com/file/d/1e-anuWG_ppDXmoy0sQ0sgrdutCTGlk95/view?usp=sharing) and copy to [./data directory](./data) 291 | 292 |
293 | You can also convert your own PaddleSeg with following procedures 294 | 295 | - [export PaddleSeg model](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.3/docs/model_export.md) 296 | - convert exported model to onnx format with [Paddle2ONNX](https://github.com/PaddlePaddle/Paddle2ONNX) 297 | 298 |
299 | 300 | - Test inference apps 301 | 302 | ```bash 303 | ./build/examples/semantic_segmentation_paddleseg_bisenetv2 ./data/bisenetv2_cityscapes.onnx ./data/images/sample_city_scapes.png 304 | ./build/examples/semantic_segmentation_paddleseg_bisenetv2 ./data/bisenetv2_cityscapes.onnx ./data/images/odaiba.jpg 305 | ``` 306 | 307 |
308 | 309 |

(back to top)

310 | 311 | ### [SuperPoint](https://arxiv.org/pdf/1712.07629.pdf) 312 | 313 | --- 314 | 315 |

316 | 317 |

318 | 319 |
320 | Usage 321 | 322 | - Convert SuperPoint's pretrained weights to onnx format 323 | 324 | ```bash 325 | git submodule update --init --recursive 326 | python3 -m pip install -r scripts/superpoint/requirements.txt 327 | python3 scripts/superpoint/convert_to_onnx.py 328 | ``` 329 | 330 | - Download test images from [this dataset](https://github.com/StaRainJ/Multi-modality-image-matching-database-metrics-methods) 331 | 332 | ```bash 333 | wget https://raw.githubusercontent.com/StaRainJ/Multi-modality-image-matching-database-metrics-methods/master/Multimodal_Image_Matching_Datasets/ComputerVision/CrossSeason/VisionCS_0a.png -P data 334 | 335 | wget https://raw.githubusercontent.com/StaRainJ/Multi-modality-image-matching-database-metrics-methods/master/Multimodal_Image_Matching_Datasets/ComputerVision/CrossSeason/VisionCS_0b.png -P data 336 | ``` 337 | 338 | - Test inference apps 339 | 340 | ```bash 341 | ./build/examples/super_point /path/to/super_point.onnx data/VisionCS_0a.png data/VisionCS_0b.png 342 | ``` 343 | 344 |
345 | 346 |

(back to top)

347 | 348 | ### [SuperGlue](https://arxiv.org/pdf/1911.11763.pdf) 349 | 350 | --- 351 | 352 |

353 | 354 | 355 |

356 | 357 |

358 | 359 | 360 |

361 | 362 |

363 | 364 | 365 |

366 | 367 |
368 | Usage 369 | 370 | - Convert SuperPoint's pretrained weights to onnx format: Follow the above instruction 371 | 372 | - Convert SuperGlue's pretrained weights to onnx format 373 | 374 | ```bash 375 | git submodule update --init --recursive 376 | python3 -m pip install -r scripts/superglue/requirements.txt 377 | python3 -m pip install -r scripts/superglue/SuperGluePretrainedNetwork/requirements.txt 378 | python3 scripts/superglue/convert_to_onnx.py 379 | ``` 380 | 381 | - Download test images from [this dataset](https://github.com/StaRainJ/Multi-modality-image-matching-database-metrics-methods): Or prepare some pairs of your own images 382 | 383 | - Test inference apps 384 | 385 | ```bash 386 | ./build/examples/super_glue /path/to/super_point.onnx /path/to/super_glue.onnx /path/to/1st/image /path/to/2nd/image 387 | ``` 388 | 389 |
390 | 391 |

(back to top)

392 | 393 | ### [LoFTR](https://zju3dv.github.io/loftr/) 394 | 395 | --- 396 | 397 |

398 | 399 |

400 | 401 |
402 | Usage 403 | 404 | - Download [LoFTR](https://github.com/zju3dv/LoFTR) weights indoor*ds_new.ckpt from [HERE](https://drive.google.com/drive/folders/1xu2Pq6mZT5hmFgiYMBT9Zt8h1yO-3SIp). (LoFTR's [latest commit](b4ee7eb0359d0062e794c99f73e27639d7c7ac9f) seems to be only compatible with the new weights (Ref: https://github.com/zju3dv/LoFTR/issues/48). Hence, this onnx cpp application is only compatible with \_indoor_ds_new.ckpt* weights) 405 | 406 | - Convert LoFTR's pretrained weights to onnx format 407 | 408 | ```bash 409 | git submodule update --init --recursive 410 | python3 -m pip install -r scripts/loftr/requirements.txt 411 | python3 scripts/loftr/convert_to_onnx.py --model_path /path/to/indoor_ds_new.ckpt 412 | ``` 413 | 414 | - Download test images from [this dataset](https://github.com/StaRainJ/Multi-modality-image-matching-database-metrics-methods): Or prepare some pairs of your own images 415 | 416 | - Test inference apps 417 | 418 | ```bash 419 | ./build/examples/loftr /path/to/loftr.onnx /path/to/loftr.onnx /path/to/1st/image /path/to/2nd/image 420 | ``` 421 | 422 |
423 | 424 |

(back to top)

425 | -------------------------------------------------------------------------------- /cmake/cmake_utility.cmake: -------------------------------------------------------------------------------- 1 | function(__build_example example_name public_lib private_lib) 2 | add_executable( 3 | ${example_name} 4 | ${CMAKE_CURRENT_LIST_DIR}/${example_name}.cpp 5 | ) 6 | 7 | target_link_libraries( 8 | ${example_name} 9 | PUBLIC 10 | ${public_lib} 11 | PRIVATE 12 | ${private_lib} 13 | ) 14 | endfunction() 15 | -------------------------------------------------------------------------------- /cmake/tensorrt.cmake: -------------------------------------------------------------------------------- 1 | # Ref: //github.com/PRBonn/rangenet_lib/blob/master/cmake/tensorrt-config.cmake 2 | 3 | find_package(CUDA) 4 | find_library(NVINFER NAMES nvinfer) 5 | find_library(NVINFERPLUGIN NAMES nvinfer_plugin) 6 | find_library(NVPARSERS NAMES nvparsers) 7 | find_library(NVONNXPARSER NAMES nvonnxparser) 8 | 9 | # newer tensorrt does not have nvonnxparser_runtime 10 | # find_library(NVONNXPARSERRUNTIME NAMES nvonnxparser_runtime) 11 | 12 | # If it is ALL there, export libraries as a single package 13 | if(CUDA_FOUND AND NVINFER AND NVINFERPLUGIN AND NVPARSERS AND NVONNXPARSER) 14 | message("TensorRT available!") 15 | message("CUDA Libs: ${CUDA_LIBRARIES}") 16 | message("CUDA Headers: ${CUDA_INCLUDE_DIRS}") 17 | message("NVINFER: ${NVINFER}") 18 | message("NVINFERPLUGIN: ${NVINFERPLUGIN}") 19 | message("NVPARSERS: ${NVPARSERS}") 20 | message("NVONNXPARSER: ${NVONNXPARSER}") 21 | list(APPEND TENSORRT_LIBRARIES ${CUDA_LIBRARIES} nvinfer nvinfer_plugin nvparsers nvonnxparser) 22 | message("All togheter now (libs): ${TENSORRT_LIBRARIES}") 23 | list(APPEND TENSORRT_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS}) 24 | message("All togheter now (inc): ${TENSORRT_INCLUDE_DIRS}") 25 | set(TENSORRT_FOUND ON) 26 | else() 27 | message("TensorRT NOT Available") 28 | set(TENSORRT_FOUND OFF) 29 | endif() 30 | -------------------------------------------------------------------------------- /data/images/bird_detection.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/data/images/bird_detection.jpg -------------------------------------------------------------------------------- /data/images/dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/data/images/dog.jpg -------------------------------------------------------------------------------- /data/images/dogs.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/data/images/dogs.jpg -------------------------------------------------------------------------------- /data/images/endgame.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/data/images/endgame.jpg -------------------------------------------------------------------------------- /data/images/indoor.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/data/images/indoor.jpg -------------------------------------------------------------------------------- /data/images/matrix.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/data/images/matrix.jpg -------------------------------------------------------------------------------- /data/images/no_way_home.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/data/images/no_way_home.jpg -------------------------------------------------------------------------------- /data/images/odaiba.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/data/images/odaiba.jpg -------------------------------------------------------------------------------- /data/images/sample_city_scapes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/data/images/sample_city_scapes.png -------------------------------------------------------------------------------- /data/squeezenet1.1.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/data/squeezenet1.1.onnx -------------------------------------------------------------------------------- /data/version-RFB-640.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/data/version-RFB-640.onnx -------------------------------------------------------------------------------- /dockerfiles/ubuntu2004.dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 2 | 3 | ENV DEBIAN_FRONTEND=noninteractive 4 | 5 | WORKDIR /build 6 | COPY ./scripts . 7 | 8 | RUN apt-get update && \ 9 | apt-get install -y --no-install-recommends \ 10 | sudo \ 11 | gnupg2 \ 12 | lsb-release \ 13 | build-essential \ 14 | software-properties-common \ 15 | cmake \ 16 | git \ 17 | tmux && \ 18 | bash install_latest_cmake.bash && \ 19 | bash install_onnx_runtime.bash && \ 20 | bash install_apps_dependencies.bash && \ 21 | rm -rf /build && \ 22 | apt-get clean && \ 23 | rm -rf /var/lib/apt/lists/* 24 | 25 | WORKDIR /workspace 26 | 27 | ENTRYPOINT ["/bin/bash"] 28 | -------------------------------------------------------------------------------- /dockerfiles/ubuntu2004_gpu.dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:11.4.0-cudnn8-devel-ubuntu20.04 2 | 3 | ENV DEBIAN_FRONTEND=noninteractive 4 | 5 | WORKDIR /build 6 | 7 | RUN apt-get update && \ 8 | apt-get install -y --no-install-recommends \ 9 | sudo \ 10 | gnupg2 \ 11 | lsb-release \ 12 | build-essential \ 13 | software-properties-common \ 14 | cmake \ 15 | git \ 16 | tmux && \ 17 | bash install_latest_cmake.bash && \ 18 | bash install_onnx_runtime.bash && \ 19 | bash install_apps_dependencies.bash && \ 20 | rm -rf /build && \ 21 | apt-get clean && \ 22 | rm -rf /var/lib/apt/lists/* 23 | 24 | WORKDIR /workspace 25 | 26 | ENTRYPOINT ["/bin/bash"] 27 | -------------------------------------------------------------------------------- /dockerfiles/ubuntu2004_tensorrt.dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvcr.io/nvidia/tensorrt:21.07-py3 2 | 3 | ENV DEBIAN_FRONTEND=noninteractive 4 | 5 | WORKDIR /build 6 | 7 | RUN apt-get update && \ 8 | apt-get install -y --no-install-recommends \ 9 | sudo \ 10 | gnupg2 \ 11 | lsb-release \ 12 | build-essential \ 13 | software-properties-common \ 14 | cmake \ 15 | git \ 16 | tmux && \ 17 | bash install_latest_cmake.bash && \ 18 | bash install_onnx_runtime.bash && \ 19 | bash install_apps_dependencies.bash && \ 20 | rm -rf /build && \ 21 | apt-get clean && \ 22 | rm -rf /var/lib/apt/lists/* 23 | 24 | WORKDIR /workspace 25 | 26 | ENTRYPOINT ["/bin/bash"] 27 | -------------------------------------------------------------------------------- /docs/images/ComputerVision_VN_30.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/ComputerVision_VN_30.jpg -------------------------------------------------------------------------------- /docs/images/ComputerVision_VisionCS_0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/ComputerVision_VisionCS_0.jpg -------------------------------------------------------------------------------- /docs/images/ComputerVision_VisionDN_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/ComputerVision_VisionDN_1.jpg -------------------------------------------------------------------------------- /docs/images/RemoteSensing_CS1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/RemoteSensing_CS1.jpg -------------------------------------------------------------------------------- /docs/images/RemoteSensing_CS5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/RemoteSensing_CS5.jpg -------------------------------------------------------------------------------- /docs/images/RetinaFix.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/RetinaFix.jpg -------------------------------------------------------------------------------- /docs/images/bird_detection_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/bird_detection_result.jpg -------------------------------------------------------------------------------- /docs/images/cityscapes_legend.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/cityscapes_legend.jpg -------------------------------------------------------------------------------- /docs/images/dogs_maskrcnn_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/dogs_maskrcnn_result.jpg -------------------------------------------------------------------------------- /docs/images/endgame_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/endgame_result.jpg -------------------------------------------------------------------------------- /docs/images/indoor_maskrcnn_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/indoor_maskrcnn_result.jpg -------------------------------------------------------------------------------- /docs/images/loftr.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/loftr.jpg -------------------------------------------------------------------------------- /docs/images/matrix_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/matrix_result.jpg -------------------------------------------------------------------------------- /docs/images/no_way_home_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/no_way_home_result.jpg -------------------------------------------------------------------------------- /docs/images/odaiba_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/odaiba_result.jpg -------------------------------------------------------------------------------- /docs/images/sample_city_scapes_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/sample_city_scapes_result.jpg -------------------------------------------------------------------------------- /docs/images/street_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/street_result.jpg -------------------------------------------------------------------------------- /docs/images/super_point_good_matches.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/super_point_good_matches.jpg -------------------------------------------------------------------------------- /docs/images/tiny_yolov2_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/tiny_yolov2_result.jpg -------------------------------------------------------------------------------- /docs/onnxruntime_tensorrt.md: -------------------------------------------------------------------------------- 1 | # 📝 memo on how to use tensorrt onnxruntime 2 | 3 | --- 4 | 5 | ## :running: How to Run 6 | 7 | --- 8 | 9 | Sample docker with tensorrt environment is provided [HERE](../dockerfiles/ubuntu2004_tensorrt.dockerfile) 10 | 11 | ## :gem: References 12 | 13 | --- 14 | 15 | - [tensorrt tags](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tensorrt/tags) 16 | - [overview of tensorrt docker](https://docs.nvidia.com/deeplearning/tensorrt/container-release-notes/overview.html) 17 | - [tensorrt introduction](https://developer.nvidia.com/tensorrt) 18 | - [build instruction](https://onnxruntime.ai/docs/build/eps.html) 19 | - [sample dockerfile provided by onnxruntime repo](https://github.com/microsoft/onnxruntime/blob/v1.10.0/dockerfiles/Dockerfile.tensorrt): _need to choose suitable tensorrt version that matches onnxruntime version_ 20 | -------------------------------------------------------------------------------- /examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | 3 | list(APPEND EXAMPLES 4 | TestImageClassification 5 | PrimitiveTest 6 | ) 7 | 8 | include(cmake_utility) 9 | 10 | find_package(OpenCV REQUIRED) 11 | 12 | list(APPEND PUBLIC_LIBS 13 | ${PROJECT_NAME} 14 | ${OpenCV_LIBS} 15 | ) 16 | 17 | list(APPEND PRIVATE_LIBS 18 | ) 19 | 20 | foreach(EXAMPLE ${EXAMPLES}) 21 | __build_example( 22 | ${EXAMPLE} 23 | "${PUBLIC_LIBS}" 24 | "${PRIVATE_LIBS}" 25 | ) 26 | target_include_directories(${EXAMPLE} 27 | PUBLIC 28 | ${OpenCV_INCLUDE_DIRS} 29 | ) 30 | endforeach(EXAMPLE) 31 | 32 | # --------------------------------------------------------- 33 | 34 | add_executable(tiny_yolo_v2 35 | ${CMAKE_CURRENT_LIST_DIR}/TinyYolov2.cpp 36 | ${CMAKE_CURRENT_LIST_DIR}/TinyYolov2.hpp 37 | ${CMAKE_CURRENT_LIST_DIR}/TinyYolov2App.cpp 38 | ) 39 | 40 | target_link_libraries(tiny_yolo_v2 41 | PUBLIC 42 | ${PROJECT_NAME} 43 | ${OpenCV_LIBS} 44 | ) 45 | 46 | target_include_directories(tiny_yolo_v2 47 | PUBLIC 48 | ${OpenCV_INCLUDE_DIRS} 49 | ) 50 | 51 | # --------------------------------------------------------- 52 | 53 | add_executable(mask_rcnn 54 | ${CMAKE_CURRENT_LIST_DIR}/MaskRCNN.cpp 55 | ${CMAKE_CURRENT_LIST_DIR}/MaskRCNN.hpp 56 | ${CMAKE_CURRENT_LIST_DIR}/MaskRCNNApp.cpp 57 | ) 58 | 59 | target_link_libraries(mask_rcnn 60 | PUBLIC 61 | ${PROJECT_NAME} 62 | ${OpenCV_LIBS} 63 | ) 64 | 65 | target_include_directories(mask_rcnn 66 | PUBLIC 67 | ${OpenCV_INCLUDE_DIRS} 68 | ) 69 | 70 | # --------------------------------------------------------- 71 | 72 | add_executable(yolov3 73 | ${CMAKE_CURRENT_LIST_DIR}/Yolov3.cpp 74 | ${CMAKE_CURRENT_LIST_DIR}/Yolov3.hpp 75 | ${CMAKE_CURRENT_LIST_DIR}/Yolov3App.cpp 76 | ) 77 | 78 | target_link_libraries(yolov3 79 | PUBLIC 80 | ${PROJECT_NAME} 81 | ${OpenCV_LIBS} 82 | ) 83 | 84 | target_include_directories(yolov3 85 | PUBLIC 86 | ${OpenCV_INCLUDE_DIRS} 87 | ) 88 | 89 | # --------------------------------------------------------- 90 | 91 | add_executable(ultra_light_face_detector 92 | ${CMAKE_CURRENT_LIST_DIR}/UltraLightFastGenericFaceDetector.cpp 93 | ${CMAKE_CURRENT_LIST_DIR}/UltraLightFastGenericFaceDetector.hpp 94 | ${CMAKE_CURRENT_LIST_DIR}/UltraLightFastGenericFaceDetectorApp.cpp 95 | ) 96 | 97 | target_link_libraries(ultra_light_face_detector 98 | PUBLIC 99 | ${PROJECT_NAME} 100 | ${OpenCV_LIBS} 101 | ) 102 | 103 | target_include_directories(ultra_light_face_detector 104 | PUBLIC 105 | ${OpenCV_INCLUDE_DIRS} 106 | ) 107 | 108 | # --------------------------------------------------------- 109 | 110 | add_executable(yolox 111 | ${CMAKE_CURRENT_LIST_DIR}/YoloX.cpp 112 | ${CMAKE_CURRENT_LIST_DIR}/YoloX.hpp 113 | ${CMAKE_CURRENT_LIST_DIR}/YoloXApp.cpp 114 | ) 115 | 116 | target_link_libraries(yolox 117 | PUBLIC 118 | ${PROJECT_NAME} 119 | ${OpenCV_LIBS} 120 | ) 121 | 122 | target_include_directories(yolox 123 | PUBLIC 124 | ${OpenCV_INCLUDE_DIRS} 125 | ) 126 | 127 | # --------------------------------------------------------- 128 | 129 | add_executable(semantic_segmentation_paddleseg_bisenetv2 130 | ${CMAKE_CURRENT_LIST_DIR}/SemanticSegmentationPaddleSegBisenetv2.cpp 131 | ${CMAKE_CURRENT_LIST_DIR}/SemanticSegmentationPaddleSegBisenetv2App.cpp 132 | ) 133 | 134 | target_link_libraries(semantic_segmentation_paddleseg_bisenetv2 135 | PUBLIC 136 | ${PROJECT_NAME} 137 | ${OpenCV_LIBS} 138 | ) 139 | 140 | target_include_directories(semantic_segmentation_paddleseg_bisenetv2 141 | PUBLIC 142 | ${OpenCV_INCLUDE_DIRS} 143 | ) 144 | 145 | # --------------------------------------------------------- 146 | 147 | add_executable(super_point 148 | ${CMAKE_CURRENT_LIST_DIR}/SuperPoint.cpp 149 | ${CMAKE_CURRENT_LIST_DIR}/SuperPointApp.cpp 150 | ) 151 | 152 | target_link_libraries(super_point 153 | PUBLIC 154 | ${PROJECT_NAME} 155 | ${OpenCV_LIBS} 156 | ) 157 | 158 | target_include_directories(super_point 159 | PUBLIC 160 | ${OpenCV_INCLUDE_DIRS} 161 | ) 162 | 163 | # --------------------------------------------------------- 164 | 165 | add_executable(super_glue 166 | ${CMAKE_CURRENT_LIST_DIR}/SuperPoint.cpp 167 | ${CMAKE_CURRENT_LIST_DIR}/SuperGlueApp.cpp 168 | ) 169 | 170 | target_link_libraries(super_glue 171 | PUBLIC 172 | ${PROJECT_NAME} 173 | ${OpenCV_LIBS} 174 | ) 175 | 176 | target_include_directories(super_glue 177 | PUBLIC 178 | ${OpenCV_INCLUDE_DIRS} 179 | ) 180 | 181 | # --------------------------------------------------------- 182 | 183 | add_executable(loftr 184 | ${CMAKE_CURRENT_LIST_DIR}/LoFTR.cpp 185 | ${CMAKE_CURRENT_LIST_DIR}/LoFTRApp.cpp 186 | ) 187 | 188 | target_link_libraries(loftr 189 | PUBLIC 190 | ${PROJECT_NAME} 191 | ${OpenCV_LIBS} 192 | ) 193 | 194 | target_include_directories(loftr 195 | PUBLIC 196 | ${OpenCV_INCLUDE_DIRS} 197 | ) 198 | -------------------------------------------------------------------------------- /examples/LoFTR.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file LoFTR.cpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | #include "LoFTR.hpp" 9 | 10 | namespace Ort 11 | { 12 | void LoFTR::preprocess(float* dst, const unsigned char* src, const int64_t targetImgWidth, 13 | const int64_t targetImgHeight, const int numChannels) const 14 | { 15 | for (int i = 0; i < targetImgHeight; ++i) { 16 | for (int j = 0; j < targetImgWidth; ++j) { 17 | for (int c = 0; c < numChannels; ++c) { 18 | dst[c * targetImgHeight * targetImgWidth + i * targetImgWidth + j] = 19 | (src[i * targetImgWidth * numChannels + j * numChannels + c] / 255.0); 20 | } 21 | } 22 | } 23 | } 24 | } // namespace Ort 25 | -------------------------------------------------------------------------------- /examples/LoFTR.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file LoFTR.hpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | #pragma once 9 | 10 | #include 11 | 12 | namespace Ort 13 | { 14 | class LoFTR : public OrtSessionHandler 15 | { 16 | public: 17 | static constexpr int64_t IMG_H = 480; 18 | static constexpr int64_t IMG_W = 640; 19 | static constexpr int64_t IMG_CHANNEL = 1; 20 | 21 | using OrtSessionHandler::OrtSessionHandler; 22 | 23 | void preprocess(float* dst, // 24 | const unsigned char* src, // 25 | const int64_t targetImgWidth, // 26 | const int64_t targetImgHeight, // 27 | const int numChannels) const; 28 | }; 29 | } // namespace Ort 30 | -------------------------------------------------------------------------------- /examples/LoFTRApp.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file LoFTRApp.cpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | #include "LoFTR.hpp" 9 | #include "Utility.hpp" 10 | #include 11 | 12 | static constexpr float CONFIDENCE_THRESHOLD = 0.1; 13 | 14 | namespace 15 | { 16 | std::pair, std::vector> 17 | processOneImagePair(const Ort::LoFTR& loftrOsh, const cv::Mat& queryImg, const cv::Mat& refImg, float* queryData, 18 | float* refData, float confidenceThresh = CONFIDENCE_THRESHOLD); 19 | } // namespace 20 | 21 | int main(int argc, char* argv[]) 22 | { 23 | if (argc != 4) { 24 | std::cerr << "Usage: [apps] [path/to/onnx/loftr] [path/to/image1] [path/to/image2]" << std::endl; 25 | return EXIT_FAILURE; 26 | } 27 | 28 | const std::string ONNX_MODEL_PATH = argv[1]; 29 | const std::vector IMAGE_PATHS = {argv[2], argv[3]}; 30 | 31 | std::vector images; 32 | std::vector grays; 33 | std::transform(IMAGE_PATHS.begin(), IMAGE_PATHS.end(), std::back_inserter(images), 34 | [](const auto& imagePath) { return cv::imread(imagePath); }); 35 | for (int i = 0; i < 2; ++i) { 36 | if (images[i].empty()) { 37 | throw std::runtime_error("failed to open " + IMAGE_PATHS[i]); 38 | } 39 | } 40 | 41 | std::transform(IMAGE_PATHS.begin(), IMAGE_PATHS.end(), std::back_inserter(grays), 42 | [](const auto& imagePath) { return cv::imread(imagePath, 0); }); 43 | 44 | std::vector queryData(Ort::LoFTR::IMG_CHANNEL * Ort::LoFTR::IMG_H * Ort::LoFTR::IMG_W); 45 | std::vector refData(Ort::LoFTR::IMG_CHANNEL * Ort::LoFTR::IMG_H * Ort::LoFTR::IMG_W); 46 | 47 | Ort::LoFTR osh( 48 | ONNX_MODEL_PATH, 0, 49 | std::vector>{{1, Ort::LoFTR::IMG_CHANNEL, Ort::LoFTR::IMG_H, Ort::LoFTR::IMG_W}, 50 | {1, Ort::LoFTR::IMG_CHANNEL, Ort::LoFTR::IMG_H, Ort::LoFTR::IMG_W}}); 51 | 52 | auto matchedKpts = processOneImagePair(osh, grays[0], grays[1], queryData.data(), refData.data()); 53 | const std::vector& queryKpts = matchedKpts.first; 54 | const std::vector& refKpts = matchedKpts.second; 55 | std::vector matches; 56 | for (int i = 0; i < queryKpts.size(); ++i) { 57 | cv::DMatch match; 58 | match.imgIdx = 0; 59 | match.queryIdx = i; 60 | match.trainIdx = i; 61 | matches.emplace_back(std::move(match)); 62 | } 63 | cv::Mat matchesImage; 64 | cv::drawMatches(images[0], queryKpts, images[1], refKpts, matches, matchesImage, cv::Scalar::all(-1), 65 | cv::Scalar::all(-1), std::vector(), cv::DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS); 66 | cv::imwrite("loftr.jpg", matchesImage); 67 | cv::imshow("loftr", matchesImage); 68 | cv::waitKey(); 69 | 70 | return EXIT_SUCCESS; 71 | } 72 | 73 | namespace 74 | { 75 | std::pair, std::vector> 76 | processOneImagePair(const Ort::LoFTR& loftrOsh, const cv::Mat& queryImg, const cv::Mat& refImg, float* queryData, 77 | float* refData, float confidenceThresh) 78 | { 79 | int origQueryW = queryImg.cols, origQueryH = queryImg.rows; 80 | int origRefW = refImg.cols, origRefH = refImg.rows; 81 | 82 | cv::Mat scaledQueryImg, scaledRefImg; 83 | cv::resize(queryImg, scaledQueryImg, cv::Size(Ort::LoFTR::IMG_W, Ort::LoFTR::IMG_H), 0, 0, cv::INTER_CUBIC); 84 | cv::resize(refImg, scaledRefImg, cv::Size(Ort::LoFTR::IMG_W, Ort::LoFTR::IMG_H), 0, 0, cv::INTER_CUBIC); 85 | 86 | loftrOsh.preprocess(queryData, scaledQueryImg.data, Ort::LoFTR::IMG_W, Ort::LoFTR::IMG_H, Ort::LoFTR::IMG_CHANNEL); 87 | loftrOsh.preprocess(refData, scaledRefImg.data, Ort::LoFTR::IMG_W, Ort::LoFTR::IMG_H, Ort::LoFTR::IMG_CHANNEL); 88 | auto inferenceOutput = loftrOsh({queryData, refData}); 89 | 90 | // inferenceOutput[0].second: keypoints0 of shape [num kpt x 2] 91 | // inferenceOutput[1].second: keypoints1 of shape [num kpt x 2] 92 | // inferenceOutput[2].second: confidences of shape [num kpt] 93 | 94 | int numKeyPoints = inferenceOutput[2].second[0]; 95 | std::vector queryKpts, refKpts; 96 | queryKpts.reserve(numKeyPoints); 97 | refKpts.reserve(numKeyPoints); 98 | 99 | for (int i = 0; i < numKeyPoints; ++i) { 100 | float confidence = inferenceOutput[2].first[i]; 101 | if (confidence < confidenceThresh) { 102 | continue; 103 | } 104 | float queryX = inferenceOutput[0].first[i * 2 + 0]; 105 | float queryY = inferenceOutput[0].first[i * 2 + 1]; 106 | float refX = inferenceOutput[1].first[i * 2 + 0]; 107 | float refY = inferenceOutput[1].first[i * 2 + 1]; 108 | cv::KeyPoint queryKpt, refKpt; 109 | queryKpt.pt.x = queryX * origQueryW / Ort::LoFTR::IMG_W; 110 | queryKpt.pt.y = queryY * origQueryH / Ort::LoFTR::IMG_H; 111 | 112 | refKpt.pt.x = refX * origRefW / Ort::LoFTR::IMG_W; 113 | refKpt.pt.y = refY * origRefH / Ort::LoFTR::IMG_H; 114 | 115 | queryKpts.emplace_back(std::move(queryKpt)); 116 | refKpts.emplace_back(std::move(refKpt)); 117 | } 118 | 119 | return std::make_pair(queryKpts, refKpts); 120 | } 121 | } // namespace 122 | -------------------------------------------------------------------------------- /examples/MaskRCNN.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file MaskRCNN.cpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | #include 9 | 10 | #include "MaskRCNN.hpp" 11 | 12 | namespace Ort 13 | { 14 | MaskRCNN::MaskRCNN(const uint16_t numClasses, // 15 | const std::string& modelPath, // 16 | const std::optional& gpuIdx, 17 | const std::optional>>& inputShapes) 18 | : ImageRecognitionOrtSessionHandlerBase(numClasses, modelPath, gpuIdx, inputShapes) 19 | { 20 | } 21 | 22 | MaskRCNN::~MaskRCNN() 23 | { 24 | } 25 | 26 | void MaskRCNN::preprocess(float* dst, // 27 | const float* src, // 28 | const int64_t targetImgWidth, // 29 | const int64_t targetImgHeight, // 30 | const int numChannels) const 31 | { 32 | for (int c = 0; c < numChannels; ++c) { 33 | for (int i = 0; i < targetImgHeight; ++i) { 34 | for (int j = 0; j < targetImgWidth; ++j) { 35 | dst[c * targetImgHeight * targetImgWidth + i * targetImgWidth + j] = 36 | src[i * targetImgWidth * numChannels + j * numChannels + c]; 37 | } 38 | } 39 | } 40 | } 41 | 42 | void MaskRCNN::preprocess(float* dst, // 43 | const cv::Mat& imgSrc, // 44 | const int64_t targetImgWidth, // 45 | const int64_t targetImgHeight, // 46 | const int numChannels) const 47 | { 48 | for (int i = 0; i < targetImgHeight; ++i) { 49 | for (int j = 0; j < targetImgWidth; ++j) { 50 | for (int c = 0; c < numChannels; ++c) { 51 | dst[c * targetImgHeight * targetImgWidth + i * targetImgWidth + j] = imgSrc.ptr(i, j)[c]; 52 | } 53 | } 54 | } 55 | } 56 | 57 | } // namespace Ort 58 | -------------------------------------------------------------------------------- /examples/MaskRCNN.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file MaskRCNN.hpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | #pragma once 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | 16 | #include 17 | 18 | namespace Ort 19 | { 20 | class MaskRCNN : public ImageRecognitionOrtSessionHandlerBase 21 | { 22 | public: 23 | static constexpr int64_t MIN_IMAGE_SIZE = 800; 24 | 25 | static constexpr int64_t IMG_CHANNEL = 3; 26 | 27 | MaskRCNN(const uint16_t numClasses, // 28 | const std::string& modelPath, // 29 | const std::optional& gpuIdx = std::nullopt, // 30 | const std::optional>>& inputShapes = std::nullopt); 31 | 32 | ~MaskRCNN(); 33 | 34 | void preprocess(float* dst, // 35 | const float* src, // 36 | const int64_t targetImgWidth, // 37 | const int64_t targetImgHeight, // 38 | const int numChannels) const; 39 | 40 | void preprocess(float* dst, // 41 | const cv::Mat& imgSrc, // 42 | const int64_t targetImgWidth, // 43 | const int64_t targetImgHeight, // 44 | const int numChannels) const; 45 | }; 46 | } // namespace Ort 47 | -------------------------------------------------------------------------------- /examples/MaskRCNNApp.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file MaskRCNNApp.cpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | 14 | #include 15 | 16 | #include "MaskRCNN.hpp" 17 | #include "Utility.hpp" 18 | 19 | static constexpr float CONFIDENCE_THRESHOLD = 0.5; 20 | static const std::vector COLORS = toCvScalarColors(Ort::MSCOCO_COLOR_CHART); 21 | 22 | namespace 23 | { 24 | cv::Mat processOneFrame(const Ort::MaskRCNN& osh, const cv::Mat& inputImg, int newW, int newH, int paddedW, int paddedH, 25 | float ratio, float* dst, const float confThresh = 0.5, 26 | const cv::Scalar& meanVal = cv::Scalar(102.9801, 115.9465, 122.7717), 27 | bool visualizeMask = true); 28 | } // namespace 29 | 30 | int main(int argc, char* argv[]) 31 | { 32 | if (argc != 3) { 33 | std::cerr << "Usage: [apps] [path/to/onnx/maskrcnn.onnx] [path/to/image]" << std::endl; 34 | return EXIT_FAILURE; 35 | } 36 | 37 | const std::string ONNX_MODEL_PATH = argv[1]; 38 | const std::string IMAGE_PATH = argv[2]; 39 | 40 | cv::Mat img = cv::imread(IMAGE_PATH); 41 | 42 | if (img.empty()) { 43 | std::cerr << "Failed to read input image" << std::endl; 44 | return EXIT_FAILURE; 45 | } 46 | 47 | float ratio = 800.0 / std::min(img.cols, img.rows); 48 | int newW = ratio * img.cols; 49 | int newH = ratio * img.rows; 50 | int paddedH = static_cast(((newH + 31) / 32) * 32); 51 | int paddedW = static_cast(((newW + 31) / 32) * 32); 52 | 53 | Ort::MaskRCNN osh(Ort::MSCOCO_NUM_CLASSES, ONNX_MODEL_PATH, 0, 54 | std::vector>{{Ort::MaskRCNN::IMG_CHANNEL, paddedH, paddedW}}); 55 | 56 | osh.initClassNames(Ort::MSCOCO_CLASSES); 57 | 58 | std::vector dst(Ort::MaskRCNN::IMG_CHANNEL * paddedH * paddedW); 59 | 60 | auto resultImg = ::processOneFrame(osh, img, newW, newH, paddedW, paddedH, ratio, dst.data(), CONFIDENCE_THRESHOLD); 61 | cv::imwrite("result.jpg", resultImg); 62 | 63 | return EXIT_SUCCESS; 64 | } 65 | 66 | namespace 67 | { 68 | cv::Mat processOneFrame(const Ort::MaskRCNN& osh, const cv::Mat& inputImg, int newW, int newH, int paddedW, int paddedH, 69 | float ratio, float* dst, float confThresh, const cv::Scalar& meanVal, bool visualizeMask) 70 | { 71 | cv::Mat tmpImg; 72 | cv::resize(inputImg, tmpImg, cv::Size(newW, newH)); 73 | 74 | tmpImg.convertTo(tmpImg, CV_32FC3); 75 | tmpImg -= meanVal; 76 | 77 | cv::Mat paddedImg(paddedH, paddedW, CV_32FC3, cv::Scalar(0, 0, 0)); 78 | tmpImg.copyTo(paddedImg(cv::Rect(0, 0, newW, newH))); 79 | 80 | osh.preprocess(dst, paddedImg.ptr(), paddedW, paddedH, 3); 81 | // or 82 | // osh.preprocess(dst, paddedImg, paddedW, paddedH, 3); 83 | 84 | // boxes, labels, scores, masks 85 | auto inferenceOutput = osh({dst}); 86 | 87 | assert(inferenceOutput[1].second.size() == 1); 88 | size_t nBoxes = inferenceOutput[1].second[0]; 89 | 90 | std::vector> bboxes; 91 | std::vector classIndices; 92 | std::vector masks; 93 | 94 | bboxes.reserve(nBoxes); 95 | classIndices.reserve(nBoxes); 96 | masks.reserve(nBoxes); 97 | 98 | for (size_t i = 0; i < nBoxes; ++i) { 99 | if (inferenceOutput[2].first[i] > confThresh) { 100 | DEBUG_LOG("%f", inferenceOutput[2].first[i]); 101 | 102 | float xmin = inferenceOutput[0].first[i * 4 + 0] / ratio; 103 | float ymin = inferenceOutput[0].first[i * 4 + 1] / ratio; 104 | float xmax = inferenceOutput[0].first[i * 4 + 2] / ratio; 105 | float ymax = inferenceOutput[0].first[i * 4 + 3] / ratio; 106 | 107 | xmin = std::max(xmin, 0); 108 | ymin = std::max(ymin, 0); 109 | xmax = std::min(xmax, inputImg.cols - 1); 110 | ymax = std::min(ymax, inputImg.rows - 1); 111 | 112 | bboxes.emplace_back(std::array{xmin, ymin, xmax, ymax}); 113 | classIndices.emplace_back(reinterpret_cast(inferenceOutput[1].first)[i]); 114 | 115 | cv::Mat curMask(28, 28, CV_32FC1); 116 | memcpy(curMask.data, inferenceOutput[3].first + i * 28 * 28, 28 * 28 * sizeof(float)); 117 | masks.emplace_back(curMask); 118 | } 119 | } 120 | 121 | if (bboxes.size() == 0) { 122 | return inputImg; 123 | } 124 | 125 | return visualizeMask ? ::visualizeOneImageWithMask(inputImg, bboxes, classIndices, masks, COLORS, osh.classNames()) 126 | : ::visualizeOneImage(inputImg, bboxes, classIndices, COLORS, osh.classNames()); 127 | } 128 | } // namespace 129 | -------------------------------------------------------------------------------- /examples/PrimitiveTest.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file PrimitiveTest.cpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | /** 9 | * @brief primitive app to test size and name of input/output tensors 10 | * DO BUILD with DEBUG mode to see the debug messages 11 | */ 12 | 13 | #include 14 | 15 | #include 16 | 17 | namespace 18 | { 19 | static constexpr int64_t DUMMY_NUM_CLASSES = 2021; 20 | } // namespace 21 | 22 | int main(int argc, char* argv[]) 23 | { 24 | if (argc != 2) { 25 | std::cerr << "Usage: [apps] [path/to/onnx/model]" << std::endl; 26 | return EXIT_FAILURE; 27 | } 28 | 29 | const std::string ONNX_MODEL_PATH = argv[1]; 30 | Ort::ObjectDetectionOrtSessionHandler osh(DUMMY_NUM_CLASSES, ONNX_MODEL_PATH, 0); 31 | 32 | return EXIT_SUCCESS; 33 | } 34 | -------------------------------------------------------------------------------- /examples/SemanticSegmentationPaddleSegBisenetv2.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file SemanticSegmentationPaddleSegBisenetv2.cpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | #include "SemanticSegmentationPaddleSegBisenetv2.hpp" 9 | 10 | namespace Ort 11 | { 12 | SemanticSegmentationPaddleSegBisenetv2::SemanticSegmentationPaddleSegBisenetv2( 13 | const uint16_t numClasses, // 14 | const std::string& modelPath, // 15 | const std::optional& gpuIdx, const std::optional>>& inputShapes) 16 | : ImageRecognitionOrtSessionHandlerBase(numClasses, modelPath, gpuIdx, inputShapes) 17 | { 18 | } 19 | 20 | void SemanticSegmentationPaddleSegBisenetv2::preprocess(float* dst, // 21 | const unsigned char* src, // 22 | int64_t targetImgWidth, // 23 | int64_t targetImgHeight, // 24 | int numChannels, // 25 | const std::vector& meanVal, // 26 | const std::vector& stdVal) const 27 | { 28 | for (int i = 0; i < targetImgHeight; ++i) { 29 | for (int j = 0; j < targetImgWidth; ++j) { 30 | for (int c = 0; c < numChannels; ++c) { 31 | dst[c * targetImgHeight * targetImgWidth + i * targetImgWidth + j] = 32 | (src[i * targetImgWidth * numChannels + j * numChannels + c] / 255.0 - meanVal[c]) / stdVal[c]; 33 | } 34 | } 35 | } 36 | } 37 | } // namespace Ort 38 | -------------------------------------------------------------------------------- /examples/SemanticSegmentationPaddleSegBisenetv2.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file SemanticSegmentationPaddleSegBisenetv2.hpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | #pragma once 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | 16 | namespace Ort 17 | { 18 | class SemanticSegmentationPaddleSegBisenetv2 : public ImageRecognitionOrtSessionHandlerBase 19 | { 20 | public: 21 | static constexpr int64_t IMG_H = 1024; 22 | static constexpr int64_t IMG_W = 1024; 23 | static constexpr int64_t IMG_CHANNEL = 3; 24 | 25 | SemanticSegmentationPaddleSegBisenetv2( 26 | const uint16_t numClasses, // 27 | const std::string& modelPath, // 28 | const std::optional& gpuIdx = std::nullopt, // 29 | const std::optional>>& inputShapes = std::nullopt); 30 | 31 | void preprocess(float* dst, // 32 | const unsigned char* src, // 33 | int64_t targetImgWidth, // 34 | int64_t targetImgHeight, // 35 | int numChannels, // 36 | const std::vector& meanVal = {0.5, 0.5, 0.5}, // 37 | const std::vector& stdVal = {0.5, 0.5, 0.5}) const; 38 | }; 39 | }; // namespace Ort 40 | -------------------------------------------------------------------------------- /examples/SemanticSegmentationPaddleSegBisenetv2App.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file SemanticSegmentationPaddleSegBisenetv2App.cpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | #include 9 | 10 | #include "SemanticSegmentationPaddleSegBisenetv2.hpp" 11 | #include "Utility.hpp" 12 | 13 | namespace 14 | { 15 | cv::Mat processOneFrame(const Ort::SemanticSegmentationPaddleSegBisenetv2& osh, const cv::Mat& inputImg, float* dst, 16 | float alpha = 0.4); 17 | static const std::vector COLORS = toCvScalarColors(Ort::CITY_SCAPES_COLOR_CHART); 18 | } // namespace 19 | 20 | int main(int argc, char* argv[]) 21 | { 22 | if (argc != 3) { 23 | std::cerr << "Usage: [apps] [path/to/onnx/semantic/segmentation] [path/to/image]" << std::endl; 24 | return EXIT_FAILURE; 25 | } 26 | 27 | const std::string ONNX_MODEL_PATH = argv[1]; 28 | const std::string IMAGE_PATH = argv[2]; 29 | 30 | cv::Mat img = cv::imread(IMAGE_PATH); 31 | 32 | if (img.empty()) { 33 | std::cerr << "Failed to read input image" << std::endl; 34 | return EXIT_FAILURE; 35 | } 36 | 37 | Ort::SemanticSegmentationPaddleSegBisenetv2 osh( 38 | Ort::CITY_SCAPES_NUM_CLASSES, ONNX_MODEL_PATH, 0, 39 | std::vector>{{1, Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_CHANNEL, 40 | Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_H, 41 | Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_W}}); 42 | 43 | osh.initClassNames(Ort::CITY_SCAPES_CLASSES); 44 | std::vector dst(Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_CHANNEL * 45 | Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_H * 46 | Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_W); 47 | 48 | auto result = processOneFrame(osh, img, dst.data()); 49 | cv::Mat legend = drawColorChart(Ort::CITY_SCAPES_CLASSES, COLORS); 50 | cv::imshow("legend", legend); 51 | cv::imshow("overlaid result", result); 52 | cv::waitKey(0); 53 | 54 | return EXIT_SUCCESS; 55 | } 56 | 57 | namespace 58 | { 59 | cv::Mat processOneFrame(const Ort::SemanticSegmentationPaddleSegBisenetv2& osh, const cv::Mat& inputImg, float* dst, 60 | float alpha) 61 | { 62 | int origW = inputImg.cols, origH = inputImg.rows; 63 | std::vector originImageSize{static_cast(origH), static_cast(origW)}; 64 | cv::Mat scaledImg = inputImg.clone(); 65 | cv::resize(inputImg, scaledImg, 66 | cv::Size(Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_W, 67 | Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_H), 68 | 0, 0, cv::INTER_CUBIC); 69 | cv::cvtColor(scaledImg, scaledImg, cv::COLOR_BGR2RGB); 70 | osh.preprocess(dst, scaledImg.data, Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_W, 71 | Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_H, 3); 72 | auto inferenceOutput = osh({dst}); 73 | 74 | // tips: when you have done all the tricks but still get the wrong output result, 75 | // try checking the type of inferenceOutput 76 | int64_t* data = reinterpret_cast(inferenceOutput[0].first); 77 | cv::Mat segm(Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_H, Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_W, 78 | CV_8UC(3)); 79 | for (int i = 0; i < Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_H; ++i) { 80 | cv::Vec3b* ptrSegm = segm.ptr(i); 81 | for (int j = 0; j < Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_W; ++j) { 82 | const auto& color = COLORS[data[i * Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_W + j]]; 83 | ptrSegm[j] = cv::Vec3b(color[0], color[1], color[2]); 84 | } 85 | } 86 | 87 | cv::resize(segm, segm, inputImg.size(), 0, 0, cv::INTER_NEAREST); 88 | segm = (1 - alpha) * inputImg + alpha * segm; 89 | return segm; 90 | } 91 | } // namespace 92 | -------------------------------------------------------------------------------- /examples/SuperGlueApp.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file SuperGlueApp.cpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | #include "SuperPoint.hpp" 9 | #include "Utility.hpp" 10 | 11 | namespace 12 | { 13 | using KeyPointAndDesc = std::pair, cv::Mat>; 14 | 15 | KeyPointAndDesc processOneFrameSuperPoint(const Ort::SuperPoint& superPointOsh, const cv::Mat& inputImg, float* dst, 16 | int borderRemove = 4, float confidenceThresh = 0.015, 17 | bool alignCorners = true, int distThresh = 2); 18 | 19 | void normalizeDescriptors(cv::Mat* descriptors); 20 | } // namespace 21 | 22 | int main(int argc, char* argv[]) 23 | { 24 | if (argc != 5) { 25 | std::cerr 26 | << "Usage: [apps] [path/to/onnx/super/point] [path/to/onnx/super/glue] [path/to/image1] [path/to/image2]" 27 | << std::endl; 28 | return EXIT_FAILURE; 29 | } 30 | 31 | const std::string ONNX_MODEL_PATH = argv[1]; 32 | const std::string SUPERGLUE_ONNX_MODEL_PATH = argv[2]; 33 | const std::vector IMAGE_PATHS = {argv[3], argv[4]}; 34 | 35 | Ort::SuperPoint superPointOsh(ONNX_MODEL_PATH, 0, 36 | std::vector>{{1, Ort::SuperPoint::IMG_CHANNEL, 37 | Ort::SuperPoint::IMG_H, Ort::SuperPoint::IMG_W}}); 38 | 39 | std::vector images; 40 | std::vector grays; 41 | std::transform(IMAGE_PATHS.begin(), IMAGE_PATHS.end(), std::back_inserter(images), 42 | [](const auto& imagePath) { return cv::imread(imagePath); }); 43 | for (int i = 0; i < 2; ++i) { 44 | if (images[i].empty()) { 45 | throw std::runtime_error("failed to open " + IMAGE_PATHS[i]); 46 | } 47 | } 48 | std::transform(IMAGE_PATHS.begin(), IMAGE_PATHS.end(), std::back_inserter(grays), 49 | [](const auto& imagePath) { return cv::imread(imagePath, 0); }); 50 | 51 | std::vector dst(Ort::SuperPoint::IMG_CHANNEL * Ort::SuperPoint::IMG_H * Ort::SuperPoint::IMG_W); 52 | 53 | std::vector superPointResults; 54 | std::transform(grays.begin(), grays.end(), std::back_inserter(superPointResults), 55 | [&superPointOsh, &dst](const auto& gray) { 56 | return processOneFrameSuperPoint(superPointOsh, gray, dst.data()); 57 | }); 58 | 59 | for (auto& curKeyPointAndDesc : superPointResults) { 60 | normalizeDescriptors(&curKeyPointAndDesc.second); 61 | } 62 | 63 | // superglue 64 | static const int DUMMY_NUM_KEYPOINTS = 256; 65 | Ort::OrtSessionHandler superGlueOsh(SUPERGLUE_ONNX_MODEL_PATH, 0, 66 | std::vector>{ 67 | {4}, 68 | {1, DUMMY_NUM_KEYPOINTS}, 69 | {1, DUMMY_NUM_KEYPOINTS, 2}, 70 | {1, 256, DUMMY_NUM_KEYPOINTS}, 71 | {4}, 72 | {1, DUMMY_NUM_KEYPOINTS}, 73 | {1, DUMMY_NUM_KEYPOINTS, 2}, 74 | {1, 256, DUMMY_NUM_KEYPOINTS}, 75 | }); 76 | 77 | int numKeypoints0 = superPointResults[0].first.size(); 78 | int numKeypoints1 = superPointResults[1].first.size(); 79 | std::vector> inputShapes = { 80 | {4}, {1, numKeypoints0}, {1, numKeypoints0, 2}, {1, 256, numKeypoints0}, 81 | {4}, {1, numKeypoints1}, {1, numKeypoints1, 2}, {1, 256, numKeypoints1}, 82 | }; 83 | superGlueOsh.updateInputShapes(inputShapes); 84 | 85 | std::vector> imageShapes(2); 86 | std::vector> scores(2); 87 | std::vector> keypoints(2); 88 | std::vector> descriptors(2); 89 | 90 | cv::Mat buffer; 91 | for (int i = 0; i < 2; ++i) { 92 | imageShapes[i] = {1, 1, static_cast(images[0].rows), static_cast(images[0].cols)}; 93 | std::transform(superPointResults[i].first.begin(), superPointResults[i].first.end(), 94 | std::back_inserter(scores[i]), [](const cv::KeyPoint& keypoint) { return keypoint.response; }); 95 | for (const auto& k : superPointResults[i].first) { 96 | keypoints[i].emplace_back(k.pt.y); 97 | keypoints[i].emplace_back(k.pt.x); 98 | } 99 | 100 | transposeNDWrapper(superPointResults[i].second, {1, 0}, buffer); 101 | std::copy(buffer.begin(), buffer.end(), std::back_inserter(descriptors[i])); 102 | buffer.release(); 103 | } 104 | std::vector superGlueOrtOutput = 105 | superGlueOsh({imageShapes[0].data(), scores[0].data(), keypoints[0].data(), descriptors[0].data(), 106 | imageShapes[1].data(), scores[1].data(), keypoints[1].data(), descriptors[1].data()}); 107 | 108 | // match keypoints 0 to keypoints 1 109 | std::vector matchIndices(reinterpret_cast(superGlueOrtOutput[0].first), 110 | reinterpret_cast(superGlueOrtOutput[0].first) + numKeypoints0); 111 | 112 | std::vector goodMatches; 113 | for (std::size_t i = 0; i < matchIndices.size(); ++i) { 114 | if (matchIndices[i] < 0) { 115 | continue; 116 | } 117 | cv::DMatch match; 118 | match.imgIdx = 0; 119 | match.queryIdx = i; 120 | match.trainIdx = matchIndices[i]; 121 | goodMatches.emplace_back(match); 122 | } 123 | 124 | cv::Mat matchesImage; 125 | cv::drawMatches(images[0], superPointResults[0].first, images[1], superPointResults[1].first, goodMatches, 126 | matchesImage, cv::Scalar::all(-1), cv::Scalar::all(-1), std::vector(), 127 | cv::DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS); 128 | cv::imwrite("super_point_super_glue_good_matches.jpg", matchesImage); 129 | cv::imshow("super_point_super_glue_good_matches", matchesImage); 130 | cv::waitKey(); 131 | 132 | return EXIT_SUCCESS; 133 | } 134 | 135 | namespace 136 | { 137 | void normalizeDescriptors(cv::Mat* descriptors) 138 | { 139 | cv::Mat rsquaredSumMat; 140 | cv::reduce(descriptors->mul(*descriptors), rsquaredSumMat, 1, cv::REDUCE_SUM); 141 | cv::sqrt(rsquaredSumMat, rsquaredSumMat); 142 | for (int i = 0; i < descriptors->rows; ++i) { 143 | float rsquaredSum = std::max(rsquaredSumMat.ptr()[i], 1e-12); 144 | descriptors->row(i) /= rsquaredSum; 145 | } 146 | } 147 | 148 | KeyPointAndDesc processOneFrameSuperPoint(const Ort::SuperPoint& superPointOsh, const cv::Mat& inputImg, float* dst, 149 | int borderRemove, float confidenceThresh, bool alignCorners, int distThresh) 150 | { 151 | int origW = inputImg.cols, origH = inputImg.rows; 152 | cv::Mat scaledImg; 153 | cv::resize(inputImg, scaledImg, cv::Size(Ort::SuperPoint::IMG_W, Ort::SuperPoint::IMG_H), 0, 0, cv::INTER_CUBIC); 154 | superPointOsh.preprocess(dst, scaledImg.data, Ort::SuperPoint::IMG_W, Ort::SuperPoint::IMG_H, 155 | Ort::SuperPoint::IMG_CHANNEL); 156 | auto inferenceOutput = superPointOsh({dst}); 157 | 158 | std::vector keyPoints = superPointOsh.getKeyPoints(inferenceOutput, borderRemove, confidenceThresh); 159 | 160 | std::vector descriptorShape(inferenceOutput[1].second.begin(), inferenceOutput[1].second.end()); 161 | cv::Mat coarseDescriptorMat(descriptorShape.size(), descriptorShape.data(), CV_32F, 162 | inferenceOutput[1].first); // 1 x 256 x H/8 x W/8 163 | 164 | std::vector keepIndices = 165 | superPointOsh.nmsFast(keyPoints, Ort::SuperPoint::IMG_H, Ort::SuperPoint::IMG_W, distThresh); 166 | 167 | std::vector keepKeyPoints; 168 | keepKeyPoints.reserve(keepIndices.size()); 169 | std::transform(keepIndices.begin(), keepIndices.end(), std::back_inserter(keepKeyPoints), 170 | [&keyPoints](int idx) { return keyPoints[idx]; }); 171 | keyPoints = std::move(keepKeyPoints); 172 | 173 | cv::Mat descriptors = superPointOsh.getDescriptors(coarseDescriptorMat, keyPoints, Ort::SuperPoint::IMG_H, 174 | Ort::SuperPoint::IMG_W, alignCorners); 175 | 176 | for (auto& keyPoint : keyPoints) { 177 | keyPoint.pt.x *= static_cast(origW) / Ort::SuperPoint::IMG_W; 178 | keyPoint.pt.y *= static_cast(origH) / Ort::SuperPoint::IMG_H; 179 | } 180 | 181 | return {keyPoints, descriptors}; 182 | } 183 | } // namespace 184 | -------------------------------------------------------------------------------- /examples/SuperPoint.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file SuperPoint.cpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | #include "SuperPoint.hpp" 9 | #include "Utility.hpp" 10 | 11 | namespace Ort 12 | { 13 | void SuperPoint::preprocess(float* dst, const unsigned char* src, const int64_t targetImgWidth, 14 | const int64_t targetImgHeight, const int numChannels) const 15 | { 16 | for (int i = 0; i < targetImgHeight; ++i) { 17 | for (int j = 0; j < targetImgWidth; ++j) { 18 | for (int c = 0; c < numChannels; ++c) { 19 | dst[c * targetImgHeight * targetImgWidth + i * targetImgWidth + j] = 20 | (src[i * targetImgWidth * numChannels + j * numChannels + c] / 255.0); 21 | } 22 | } 23 | } 24 | } 25 | 26 | std::vector SuperPoint::nmsFast(const std::vector& keyPoints, int height, int width, 27 | int distThresh) const 28 | { 29 | static const int TO_PROCESS = 1; 30 | static const int EMPTY_OR_SUPPRESSED = 0; 31 | 32 | std::vector sortedIndices(keyPoints.size()); 33 | std::iota(sortedIndices.begin(), sortedIndices.end(), 0); 34 | 35 | // sort in descending order base on confidence 36 | std::stable_sort(sortedIndices.begin(), sortedIndices.end(), 37 | [&keyPoints](int lidx, int ridx) { return keyPoints[lidx].response > keyPoints[ridx].response; }); 38 | 39 | cv::Mat grid = cv::Mat(height, width, CV_8U, TO_PROCESS); 40 | std::vector keepIndices; 41 | 42 | for (int idx : sortedIndices) { 43 | int x = keyPoints[idx].pt.x; 44 | int y = keyPoints[idx].pt.y; 45 | 46 | if (grid.at(y, x) == TO_PROCESS) { 47 | for (int i = y - distThresh; i < y + distThresh; ++i) { 48 | if (i < 0 || i >= height) { 49 | continue; 50 | } 51 | 52 | for (int j = x - distThresh; j < x + distThresh; ++j) { 53 | if (j < 0 || j >= width) { 54 | continue; 55 | } 56 | grid.at(i, j) = EMPTY_OR_SUPPRESSED; 57 | } 58 | } 59 | keepIndices.emplace_back(idx); 60 | } 61 | } 62 | 63 | return keepIndices; 64 | } 65 | 66 | std::vector 67 | SuperPoint::getKeyPoints(const std::vector& inferenceOutput, int borderRemove, 68 | float confidenceThresh) const 69 | { 70 | std::vector detectorShape(inferenceOutput[0].second.begin() + 1, inferenceOutput[0].second.end()); 71 | 72 | cv::Mat detectorMat(detectorShape.size(), detectorShape.data(), CV_32F, 73 | inferenceOutput[0].first); // 65 x H/8 x W/8 74 | cv::Mat buffer; 75 | 76 | transposeNDWrapper(detectorMat, {1, 2, 0}, buffer); 77 | buffer.copyTo(detectorMat); // H/8 x W/8 x 65 78 | 79 | for (int i = 0; i < detectorShape[1]; ++i) { 80 | for (int j = 0; j < detectorShape[2]; ++j) { 81 | Ort::softmax(detectorMat.ptr(i, j), detectorShape[0]); 82 | } 83 | } 84 | detectorMat = detectorMat({cv::Range::all(), cv::Range::all(), cv::Range(0, detectorShape[0] - 1)}) 85 | .clone(); // H/8 x W/8 x 64 86 | detectorMat = detectorMat.reshape(1, {detectorShape[1], detectorShape[2], 8, 8}); // H/8 x W/8 x 8 x 8 87 | transposeNDWrapper(detectorMat, {0, 2, 1, 3}, buffer); 88 | buffer.copyTo(detectorMat); // H/8 x 8 x W/8 x 8 89 | 90 | detectorMat = detectorMat.reshape(1, {detectorShape[1] * 8, detectorShape[2] * 8}); // H x W 91 | 92 | std::vector keyPoints; 93 | for (int i = borderRemove; i < detectorMat.rows - borderRemove; ++i) { 94 | auto rowPtr = detectorMat.ptr(i); 95 | for (int j = borderRemove; j < detectorMat.cols - borderRemove; ++j) { 96 | if (rowPtr[j] > confidenceThresh) { 97 | cv::KeyPoint keyPoint; 98 | keyPoint.pt.x = j; 99 | keyPoint.pt.y = i; 100 | keyPoint.response = rowPtr[j]; 101 | keyPoints.emplace_back(keyPoint); 102 | } 103 | } 104 | } 105 | 106 | return keyPoints; 107 | } 108 | 109 | cv::Mat SuperPoint::getDescriptors(const cv::Mat& coarseDescriptors, const std::vector& keyPoints, 110 | int height, int width, bool alignCorners) const 111 | { 112 | cv::Mat keyPointMat(keyPoints.size(), 2, CV_32F); 113 | 114 | for (int i = 0; i < keyPoints.size(); ++i) { 115 | auto rowPtr = keyPointMat.ptr(i); 116 | rowPtr[0] = 2 * keyPoints[i].pt.y / (height - 1) - 1; 117 | rowPtr[1] = 2 * keyPoints[i].pt.x / (width - 1) - 1; 118 | } 119 | keyPointMat = keyPointMat.reshape(1, {1, 1, static_cast(keyPoints.size()), 2}); 120 | cv::Mat descriptors = bilinearGridSample(coarseDescriptors, keyPointMat, alignCorners); 121 | descriptors = descriptors.reshape(1, {coarseDescriptors.size[1], static_cast(keyPoints.size())}); 122 | 123 | cv::Mat buffer; 124 | transposeNDWrapper(descriptors, {1, 0}, buffer); 125 | 126 | return buffer; 127 | } 128 | } // namespace Ort 129 | -------------------------------------------------------------------------------- /examples/SuperPoint.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file SuperPoint.hpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | #pragma once 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | namespace Ort 15 | { 16 | class SuperPoint : public OrtSessionHandler 17 | { 18 | public: 19 | static constexpr int64_t IMG_H = 480; 20 | static constexpr int64_t IMG_W = 640; 21 | static constexpr int64_t IMG_CHANNEL = 1; 22 | 23 | using OrtSessionHandler::OrtSessionHandler; 24 | 25 | void preprocess(float* dst, // 26 | const unsigned char* src, // 27 | const int64_t targetImgWidth, // 28 | const int64_t targetImgHeight, // 29 | const int numChannels) const; 30 | 31 | std::vector nmsFast(const std::vector& keyPoints, int height, int width, 32 | int distThresh = 2) const; 33 | 34 | /** 35 | * @brief detect super point 36 | * 37 | * @return vector of detected key points 38 | */ 39 | std::vector getKeyPoints(const std::vector& inferenceOutput, 40 | int borderRemove, float confidenceThresh) const; 41 | 42 | /** 43 | * @brief estimate super point's keypoint descriptor 44 | * 45 | * @return keypoint Mat of shape [num key point x 256] 46 | */ 47 | cv::Mat getDescriptors(const cv::Mat& coarseDescriptors, const std::vector& keyPoints, int height, 48 | int width, bool alignCorners) const; 49 | }; 50 | } // namespace Ort 51 | -------------------------------------------------------------------------------- /examples/SuperPointApp.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file SuperPointApp.cpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | #include "SuperPoint.hpp" 9 | #include "Utility.hpp" 10 | #include 11 | 12 | namespace 13 | { 14 | using KeyPointAndDesc = std::pair, cv::Mat>; 15 | 16 | KeyPointAndDesc processOneFrame(const Ort::SuperPoint& osh, const cv::Mat& inputImg, float* dst, int borderRemove = 4, 17 | float confidenceThresh = 0.015, bool alignCorners = true, int distThresh = 2); 18 | 19 | } // namespace 20 | 21 | int main(int argc, char* argv[]) 22 | { 23 | if (argc != 4) { 24 | std::cerr << "Usage: [apps] [path/to/onnx/super/point] [path/to/image1] [path/to/image2]" << std::endl; 25 | return EXIT_FAILURE; 26 | } 27 | 28 | const std::string ONNX_MODEL_PATH = argv[1]; 29 | const std::vector IMAGE_PATHS = {argv[2], argv[3]}; 30 | 31 | Ort::SuperPoint osh(ONNX_MODEL_PATH, 0, 32 | std::vector>{ 33 | {1, Ort::SuperPoint::IMG_CHANNEL, Ort::SuperPoint::IMG_H, Ort::SuperPoint::IMG_W}}); 34 | 35 | std::vector images; 36 | std::vector grays; 37 | std::transform(IMAGE_PATHS.begin(), IMAGE_PATHS.end(), std::back_inserter(images), 38 | [](const auto& imagePath) { return cv::imread(imagePath); }); 39 | for (int i = 0; i < 2; ++i) { 40 | if (images[i].empty()) { 41 | throw std::runtime_error("failed to open " + IMAGE_PATHS[i]); 42 | } 43 | } 44 | std::transform(IMAGE_PATHS.begin(), IMAGE_PATHS.end(), std::back_inserter(grays), 45 | [](const auto& imagePath) { return cv::imread(imagePath, 0); }); 46 | 47 | std::vector dst(Ort::SuperPoint::IMG_CHANNEL * Ort::SuperPoint::IMG_H * Ort::SuperPoint::IMG_W); 48 | 49 | std::vector results; 50 | std::transform(grays.begin(), grays.end(), std::back_inserter(results), 51 | [&osh, &dst](const auto& gray) { return processOneFrame(osh, gray, dst.data()); }); 52 | 53 | cv::BFMatcher matcher(cv::NORM_L2, true /* crossCheck */); 54 | std::vector knnMatches; 55 | matcher.match(results[0].second, results[1].second, knnMatches); 56 | 57 | cv::Mat matchesImage; 58 | cv::drawMatches(images[0], results[0].first, images[1], results[1].first, knnMatches, matchesImage, 59 | cv::Scalar::all(-1), cv::Scalar::all(-1), std::vector(), 60 | cv::DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS); 61 | cv::imwrite("super_point_good_matches.jpg", matchesImage); 62 | cv::imshow("super_point_good_matches", matchesImage); 63 | cv::waitKey(); 64 | 65 | return EXIT_SUCCESS; 66 | } 67 | 68 | namespace 69 | { 70 | KeyPointAndDesc processOneFrame(const Ort::SuperPoint& osh, const cv::Mat& inputImg, float* dst, int borderRemove, 71 | float confidenceThresh, bool alignCorners, int distThresh) 72 | { 73 | int origW = inputImg.cols, origH = inputImg.rows; 74 | cv::Mat scaledImg; 75 | cv::resize(inputImg, scaledImg, cv::Size(Ort::SuperPoint::IMG_W, Ort::SuperPoint::IMG_H), 0, 0, cv::INTER_CUBIC); 76 | osh.preprocess(dst, scaledImg.data, Ort::SuperPoint::IMG_W, Ort::SuperPoint::IMG_H, Ort::SuperPoint::IMG_CHANNEL); 77 | auto inferenceOutput = osh({dst}); 78 | 79 | std::vector keyPoints = osh.getKeyPoints(inferenceOutput, borderRemove, confidenceThresh); 80 | 81 | std::vector descriptorShape(inferenceOutput[1].second.begin(), inferenceOutput[1].second.end()); 82 | cv::Mat coarseDescriptorMat(descriptorShape.size(), descriptorShape.data(), CV_32F, 83 | inferenceOutput[1].first); // 1 x 256 x H/8 x W/8 84 | 85 | std::vector keepIndices = osh.nmsFast(keyPoints, Ort::SuperPoint::IMG_H, Ort::SuperPoint::IMG_W, distThresh); 86 | 87 | std::vector keepKeyPoints; 88 | keepKeyPoints.reserve(keepIndices.size()); 89 | std::transform(keepIndices.begin(), keepIndices.end(), std::back_inserter(keepKeyPoints), 90 | [&keyPoints](int idx) { return keyPoints[idx]; }); 91 | keyPoints = std::move(keepKeyPoints); 92 | 93 | cv::Mat descriptors = osh.getDescriptors(coarseDescriptorMat, keyPoints, Ort::SuperPoint::IMG_H, 94 | Ort::SuperPoint::IMG_W, alignCorners); 95 | 96 | for (auto& keyPoint : keyPoints) { 97 | keyPoint.pt.x *= static_cast(origW) / Ort::SuperPoint::IMG_W; 98 | keyPoint.pt.y *= static_cast(origH) / Ort::SuperPoint::IMG_H; 99 | } 100 | 101 | return {keyPoints, descriptors}; 102 | } 103 | } // namespace 104 | -------------------------------------------------------------------------------- /examples/TestImageClassification.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file TestImageClassification.cpp 3 | * 4 | * @author btran 5 | * 6 | * Copyright (c) organization 7 | * 8 | */ 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include 19 | 20 | #include 21 | 22 | static constexpr int64_t IMG_WIDTH = 224; 23 | static constexpr int64_t IMG_HEIGHT = 224; 24 | static constexpr int64_t IMG_CHANNEL = 3; 25 | static constexpr int64_t TEST_TIMES = 1000; 26 | 27 | int main(int argc, char* argv[]) 28 | { 29 | if (argc != 3) { 30 | std::cerr << "Usage: [apps] [path/to/onnx/model] [path/to/image]" << std::endl; 31 | return EXIT_FAILURE; 32 | } 33 | 34 | const std::string ONNX_MODEL_PATH = argv[1]; 35 | const std::string IMAGE_PATH = argv[2]; 36 | 37 | Ort::ImageClassificationOrtSessionHandler osh(Ort::IMAGENET_NUM_CLASSES, ONNX_MODEL_PATH, 0); 38 | osh.initClassNames(Ort::IMAGENET_CLASSES); 39 | 40 | cv::Mat img = cv::imread(IMAGE_PATH); 41 | 42 | if (img.empty()) { 43 | std::cerr << "Failed to read input image" << std::endl; 44 | return EXIT_FAILURE; 45 | } 46 | 47 | cv::resize(img, img, cv::Size(IMG_WIDTH, IMG_HEIGHT)); 48 | float* dst = new float[IMG_WIDTH * IMG_HEIGHT * IMG_CHANNEL]; 49 | osh.preprocess(dst, img.data, IMG_WIDTH, IMG_HEIGHT, IMG_CHANNEL, Ort::IMAGENET_MEAN, Ort::IMAGENET_STD); 50 | 51 | std::chrono::high_resolution_clock::time_point begin = std::chrono::high_resolution_clock::now(); 52 | for (int i = 0; i < TEST_TIMES; ++i) { 53 | auto inferenceOutput = osh({reinterpret_cast(dst)}); 54 | 55 | const int TOP_K = 5; 56 | // osh.topK({inferenceOutput[0].first}, TOP_K); 57 | std::cout << osh.topKToString({inferenceOutput[0].first}, TOP_K) << std::endl; 58 | } 59 | std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now(); 60 | auto elapsedTime = std::chrono::duration_cast(end - begin); 61 | std::cout << elapsedTime.count() / 1000. << "[sec]" << std::endl; 62 | 63 | delete[] dst; 64 | 65 | return 0; 66 | } 67 | -------------------------------------------------------------------------------- /examples/TinyYolov2.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file TinyYolov2.cpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | #include "TinyYolov2.hpp" 9 | 10 | namespace Ort 11 | { 12 | TinyYolov2::TinyYolov2(const uint16_t numClasses, // 13 | const std::string& modelPath, // 14 | const std::optional& gpuIdx, 15 | const std::optional>>& inputShapes) 16 | : ImageRecognitionOrtSessionHandlerBase(numClasses, modelPath, gpuIdx, inputShapes) 17 | { 18 | } 19 | 20 | TinyYolov2::~TinyYolov2() 21 | { 22 | } 23 | 24 | std::tuple>, std::vector, std::vector> 25 | TinyYolov2::postProcess(const std::vector& inferenceOutput, const float confidenceThresh) const 26 | { 27 | std::vector outputData(inferenceOutput.front().first, inferenceOutput.front().first + NUM_BOXES); 28 | 29 | // or 30 | // std::vector outputData(inferenceOutput.front().first, 31 | // inferenceOutput.front().first + 32 | // std::accumulate(inferenceOutput.front().second.begin(), 33 | // inferenceOutput.front().second.end(), 1, std::multiplies<>())); 34 | 35 | std::vector> bboxes; 36 | std::vector scores; 37 | std::vector classIndices; 38 | 39 | std::vector tmpScores(m_numClasses); 40 | 41 | for (uint64_t i = 0; i < FEATURE_MAP_SIZE; ++i) { 42 | for (uint64_t j = 0; j < NUM_ANCHORS; ++j) { 43 | for (uint64_t k = 0; k < m_numClasses; ++k) { 44 | tmpScores[k] = outputData[i + FEATURE_MAP_SIZE * ((m_numClasses + 5) * j + k + 5)]; 45 | } 46 | Ort::softmax(tmpScores.data(), m_numClasses); 47 | uint64_t maxIdx = 48 | std::distance(tmpScores.begin(), std::max_element(tmpScores.begin(), tmpScores.begin() + m_numClasses)); 49 | float probability = tmpScores[maxIdx]; 50 | 51 | if (Ort::sigmoid(outputData[i + FEATURE_MAP_SIZE * ((m_numClasses + 5) * j + 4)]) * probability >= 52 | confidenceThresh) { 53 | float xcenter = 54 | (Ort::sigmoid(outputData[i + FEATURE_MAP_SIZE * (m_numClasses + 5 * j)]) + i % 13) * 32.0; 55 | float ycenter = 56 | (Ort::sigmoid(outputData[i + FEATURE_MAP_SIZE * ((m_numClasses + 5) * j + 1)]) + i % 13) * 32.0; 57 | 58 | float width = 59 | expf(outputData[i + FEATURE_MAP_SIZE * ((m_numClasses + 5) * j + 2)]) * ANCHORS[2 * j] * 32.0; 60 | float height = 61 | expf(outputData[i + FEATURE_MAP_SIZE * ((m_numClasses + 5) * j + 3)]) * ANCHORS[2 * j + 1] * 32.0; 62 | 63 | float xmin = std::max(xcenter - width / 2, 0.0); 64 | float ymin = std::max(ycenter - height / 2, 0.0); 65 | float xmax = std::min(xcenter + width / 2, IMG_WIDTH); 66 | float ymax = std::min(ycenter + height / 2, IMG_HEIGHT); 67 | 68 | bboxes.emplace_back(std::array{xmin, ymin, xmax, ymax}); 69 | scores.emplace_back(probability); 70 | classIndices.emplace_back(maxIdx); 71 | } 72 | } 73 | } 74 | 75 | return std::make_tuple(bboxes, scores, classIndices); 76 | } 77 | 78 | void TinyYolov2::preprocess(float* dst, // 79 | const unsigned char* src, // 80 | const int64_t targetImgWidth, // 81 | const int64_t targetImgHeight, // 82 | const int numChannels) const 83 | { 84 | for (int i = 0; i < targetImgHeight; ++i) { 85 | for (int j = 0; j < targetImgWidth; ++j) { 86 | for (int c = 0; c < numChannels; ++c) { 87 | dst[c * targetImgHeight * targetImgWidth + i * targetImgWidth + j] = 88 | src[i * targetImgWidth * numChannels + j * numChannels + c]; 89 | } 90 | } 91 | } 92 | } 93 | } // namespace Ort 94 | -------------------------------------------------------------------------------- /examples/TinyYolov2.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file TinyYolov2.hpp 3 | * 4 | * @author btran 5 | * 6 | * @date 2020-05-05 7 | * 8 | * Copyright (c) organization 9 | * 10 | */ 11 | 12 | #pragma once 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #include 21 | 22 | namespace Ort 23 | { 24 | class TinyYolov2 : public ImageRecognitionOrtSessionHandlerBase 25 | { 26 | public: 27 | static constexpr int64_t IMG_WIDTH = 416; 28 | 29 | static constexpr int64_t IMG_HEIGHT = 416; 30 | 31 | static constexpr int64_t IMG_CHANNEL = 3; 32 | 33 | static constexpr int64_t FEATURE_MAP_SIZE = 13 * 13; 34 | 35 | static constexpr int64_t NUM_BOXES = 1 * 13 * 13 * 125; 36 | 37 | static constexpr int64_t NUM_ANCHORS = 5; 38 | 39 | static constexpr float ANCHORS[10] = {1.08, 1.19, 3.42, 4.41, 6.63, 11.38, 9.42, 5.11, 16.62, 10.52}; 40 | 41 | TinyYolov2(const uint16_t numClasses, // 42 | const std::string& modelPath, // 43 | const std::optional& gpuIdx = std::nullopt, // 44 | const std::optional>>& inputShapes = std::nullopt); 45 | 46 | ~TinyYolov2(); 47 | 48 | void preprocess(float* dst, // 49 | const unsigned char* src, // 50 | const int64_t targetImgWidth, // 51 | const int64_t targetImgHeight, // 52 | const int numChannels) const; 53 | 54 | std::tuple>, std::vector, std::vector> 55 | postProcess(const std::vector& inferenceOutput, const float confidenceThresh = 0.5) const; 56 | }; 57 | } // namespace Ort 58 | -------------------------------------------------------------------------------- /examples/TinyYolov2App.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file TinyYolov2App.cpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | #include 14 | 15 | #include "TinyYolov2.hpp" 16 | #include "Utility.hpp" 17 | 18 | static constexpr float CONFIDENCE_THRESHOLD = 0.5; 19 | static constexpr float NMS_THRESHOLD = 0.6; 20 | static const std::vector COLORS = toCvScalarColors(Ort::VOC_COLOR_CHART); 21 | 22 | namespace 23 | { 24 | cv::Mat processOneFrame(const Ort::TinyYolov2& osh, const cv::Mat& inputImg, float* dst); 25 | } // namespace 26 | 27 | int main(int argc, char* argv[]) 28 | { 29 | if (argc != 3) { 30 | std::cerr << "Usage: [apps] [path/to/onnx/yolov3-tiny.onnx] [path/to/image]" << std::endl; 31 | return EXIT_FAILURE; 32 | } 33 | 34 | const std::string ONNX_MODEL_PATH = argv[1]; 35 | const std::string IMAGE_PATH = argv[2]; 36 | 37 | Ort::TinyYolov2 osh(Ort::VOC_NUM_CLASSES, ONNX_MODEL_PATH, 0, 38 | std::vector>{{1, Ort::TinyYolov2::IMG_CHANNEL, Ort::TinyYolov2::IMG_WIDTH, 39 | Ort::TinyYolov2::IMG_HEIGHT}}); 40 | 41 | osh.initClassNames(Ort::VOC_CLASSES); 42 | std::array dst; 43 | 44 | cv::Mat img = cv::imread(IMAGE_PATH); 45 | 46 | if (img.empty()) { 47 | std::cerr << "Failed to read input image" << std::endl; 48 | return EXIT_FAILURE; 49 | } 50 | 51 | auto resultImg = ::processOneFrame(osh, img, dst.data()); 52 | 53 | cv::imwrite("result.jpg", resultImg); 54 | 55 | return EXIT_SUCCESS; 56 | } 57 | 58 | namespace 59 | { 60 | cv::Mat processOneFrame(const Ort::TinyYolov2& osh, const cv::Mat& inputImg, float* dst) 61 | { 62 | cv::Mat result; 63 | cv::resize(inputImg, result, cv::Size(Ort::TinyYolov2::IMG_WIDTH, Ort::TinyYolov2::IMG_HEIGHT)); 64 | 65 | osh.preprocess(dst, result.data, Ort::TinyYolov2::IMG_WIDTH, Ort::TinyYolov2::IMG_HEIGHT, 66 | Ort::TinyYolov2::IMG_CHANNEL); 67 | auto inferenceOutput = osh({dst}); 68 | assert(inferenceOutput.size() == 1); 69 | 70 | auto processedResult = osh.postProcess(inferenceOutput, CONFIDENCE_THRESHOLD); 71 | std::vector> bboxes; 72 | std::vector scores; 73 | std::vector classIndices; 74 | std::tie(bboxes, scores, classIndices) = processedResult; 75 | 76 | if (bboxes.size() == 0) { 77 | return result; 78 | } 79 | 80 | auto afterNmsIndices = Ort::nms(bboxes, scores, NMS_THRESHOLD); 81 | 82 | std::vector> afterNmsBboxes; 83 | std::vector afterNmsClassIndices; 84 | 85 | afterNmsBboxes.reserve(afterNmsIndices.size()); 86 | afterNmsClassIndices.reserve(afterNmsIndices.size()); 87 | 88 | for (const auto idx : afterNmsIndices) { 89 | afterNmsBboxes.emplace_back(bboxes[idx]); 90 | afterNmsClassIndices.emplace_back(classIndices[idx]); 91 | } 92 | 93 | result = ::visualizeOneImage(result, afterNmsBboxes, afterNmsClassIndices, COLORS, osh.classNames()); 94 | 95 | return result; 96 | } 97 | } // namespace 98 | -------------------------------------------------------------------------------- /examples/UltraLightFastGenericFaceDetector.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file UltraLightFastGenericFaceDetector.cpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | #include "UltraLightFastGenericFaceDetector.hpp" 9 | 10 | namespace Ort 11 | { 12 | UltraLightFastGenericFaceDetector::UltraLightFastGenericFaceDetector( 13 | const std::string& modelPath, const std::optional& gpuIdx, 14 | const std::optional>>& inputShapes) 15 | : ImageRecognitionOrtSessionHandlerBase(1 /* num classes */, modelPath, gpuIdx, inputShapes) 16 | { 17 | } 18 | 19 | UltraLightFastGenericFaceDetector::~UltraLightFastGenericFaceDetector() 20 | { 21 | } 22 | 23 | // Ref: https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/blob/master/detect_imgs_onnx.py#L70 24 | void UltraLightFastGenericFaceDetector::preprocess(float* dst, // 25 | const unsigned char* src, // 26 | const int64_t targetImgWidth, // 27 | const int64_t targetImgHeight, // 28 | const int numChannels) const 29 | { 30 | for (int c = 0; c < numChannels; ++c) { 31 | for (int i = 0; i < targetImgHeight; ++i) { 32 | for (int j = 0; j < targetImgWidth; ++j) { 33 | dst[c * targetImgHeight * targetImgWidth + i * targetImgWidth + j] = 34 | (src[i * targetImgWidth * numChannels + j * numChannels + c] - 127.) / 128.; 35 | } 36 | } 37 | } 38 | } 39 | } // namespace Ort 40 | -------------------------------------------------------------------------------- /examples/UltraLightFastGenericFaceDetector.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file UltraLightFastGenericFaceDetector.hpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | #pragma once 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | 16 | namespace Ort 17 | { 18 | class UltraLightFastGenericFaceDetector : public ImageRecognitionOrtSessionHandlerBase 19 | { 20 | public: 21 | static constexpr int64_t IMG_H = 480; 22 | 23 | static constexpr int64_t IMG_W = 640; 24 | 25 | static constexpr int64_t IMG_CHANNEL = 3; 26 | 27 | explicit UltraLightFastGenericFaceDetector( 28 | const std::string& modelPath, const std::optional& gpuIdx = std::nullopt, 29 | const std::optional>>& inputShapes = std::nullopt); 30 | 31 | ~UltraLightFastGenericFaceDetector(); 32 | 33 | void preprocess(float* dst, // 34 | const unsigned char* src, // 35 | const int64_t targetImgWidth, // 36 | const int64_t targetImgHeight, // 37 | const int numChannels) const; 38 | }; 39 | } // namespace Ort 40 | -------------------------------------------------------------------------------- /examples/UltraLightFastGenericFaceDetectorApp.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file UltraLightFastGenericFaceDetectorApp.cpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | #include "Utility.hpp" 9 | 10 | #include "UltraLightFastGenericFaceDetector.hpp" 11 | 12 | static const std::vector FACE_CLASSES = {"face"}; 13 | static constexpr int64_t FACE_NUM_CLASSES = 1; 14 | static const std::vector> FACE_COLOR_CHART = 15 | Ort::generateColorCharts(FACE_NUM_CLASSES, 2020 /* choosing awesome 2020 as seed point */); 16 | 17 | static constexpr float CONFIDENCE_THRESHOLD = 0.7; 18 | static constexpr float NMS_THRESHOLD = 0.3; 19 | static const std::vector COLORS = toCvScalarColors(FACE_COLOR_CHART); 20 | 21 | namespace 22 | { 23 | cv::Mat processOneFrame(const Ort::UltraLightFastGenericFaceDetector& osh, const cv::Mat& inputImg, float* dst, 24 | const float confThresh = CONFIDENCE_THRESHOLD, const float nmsThresh = NMS_THRESHOLD); 25 | } // namespace 26 | 27 | int main(int argc, char* argv[]) 28 | { 29 | if (argc != 3) { 30 | std::cerr << "Usage: [apps] [path/to/onnx/version-RFB-640.onnx] [path/to/image]" << std::endl; 31 | return EXIT_FAILURE; 32 | } 33 | 34 | const std::string ONNX_MODEL_PATH = argv[1]; 35 | const std::string IMAGE_PATH = argv[2]; 36 | 37 | cv::Mat img = cv::imread(IMAGE_PATH); 38 | 39 | if (img.empty()) { 40 | std::cerr << "Failed to read input image" << std::endl; 41 | return EXIT_FAILURE; 42 | } 43 | 44 | Ort::UltraLightFastGenericFaceDetector osh( 45 | ONNX_MODEL_PATH, 0, 46 | std::vector>{{1, Ort::UltraLightFastGenericFaceDetector::IMG_CHANNEL, 47 | Ort::UltraLightFastGenericFaceDetector::IMG_H, 48 | Ort::UltraLightFastGenericFaceDetector::IMG_W}}); 49 | 50 | osh.initClassNames(FACE_CLASSES); 51 | 52 | std::vector dst(Ort::UltraLightFastGenericFaceDetector::IMG_CHANNEL * 53 | Ort::UltraLightFastGenericFaceDetector::IMG_H * 54 | Ort::UltraLightFastGenericFaceDetector::IMG_W); 55 | auto result = processOneFrame(osh, img, dst.data()); 56 | cv::imwrite("result.jpg", result); 57 | 58 | return 0; 59 | } 60 | 61 | namespace 62 | { 63 | cv::Mat processOneFrame(const Ort::UltraLightFastGenericFaceDetector& osh, const cv::Mat& inputImg, float* dst, 64 | const float confThresh, const float nmsThresh) 65 | { 66 | int origH = inputImg.rows; 67 | int origW = inputImg.cols; 68 | 69 | cv::Mat processedImg; 70 | cv::resize(inputImg, processedImg, cv::Size(osh.IMG_W, osh.IMG_H)); 71 | 72 | osh.preprocess(dst, processedImg.data, osh.IMG_W, osh.IMG_H, 3); 73 | auto inferenceOutput = osh({dst}); 74 | 75 | // output includes two tensors: 76 | // confidences: 1 x 17640 x 2 (2 represents 2 classes of background and face) 77 | // bboxes: 1 x 17640 x 4 (4 represents bbox coordinates) 78 | 79 | int numAnchors = inferenceOutput[0].second[1]; 80 | 81 | std::vector> bboxes; 82 | std::vector scores; 83 | std::vector classIndices; 84 | 85 | for (auto indices = std::make_pair(0, 0); indices.first < numAnchors * 2 && indices.second < numAnchors * 4; 86 | indices.first += 2, indices.second += 4) { 87 | float conf = inferenceOutput[0].first[indices.first + 1]; 88 | if (conf < confThresh) { 89 | continue; 90 | } 91 | float xmin = inferenceOutput[1].first[indices.second + 0] * origW; 92 | float ymin = inferenceOutput[1].first[indices.second + 1] * origH; 93 | float xmax = inferenceOutput[1].first[indices.second + 2] * origW; 94 | float ymax = inferenceOutput[1].first[indices.second + 3] * origH; 95 | 96 | bboxes.emplace_back(std::array{xmin, ymin, xmax, ymax}); 97 | scores.emplace_back(conf); 98 | 99 | // only consider face 100 | classIndices.emplace_back(0); 101 | } 102 | 103 | if (bboxes.size() == 0) { 104 | return inputImg; 105 | } 106 | 107 | auto afterNmsIndices = Ort::nms(bboxes, scores, nmsThresh); 108 | 109 | std::vector> afterNmsBboxes; 110 | std::vector afterNmsClassIndices; 111 | 112 | afterNmsBboxes.reserve(afterNmsIndices.size()); 113 | afterNmsClassIndices.reserve(afterNmsIndices.size()); 114 | 115 | for (const auto idx : afterNmsIndices) { 116 | afterNmsBboxes.emplace_back(bboxes[idx]); 117 | afterNmsClassIndices.emplace_back(classIndices[idx]); 118 | } 119 | 120 | return visualizeOneImage(inputImg, afterNmsBboxes, afterNmsClassIndices, COLORS, osh.classNames()); 121 | } 122 | } // namespace 123 | -------------------------------------------------------------------------------- /examples/Utility.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file Utility.hpp 3 | * 4 | * @author btran 5 | * 6 | * @date 2020-05-04 7 | * 8 | * Copyright (c) organization 9 | * 10 | */ 11 | 12 | #pragma once 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #include 20 | 21 | namespace 22 | { 23 | inline std::vector toCvScalarColors(const std::vector>& colors) 24 | { 25 | std::vector result; 26 | result.reserve(colors.size()); 27 | std::transform(std::begin(colors), std::end(colors), std::back_inserter(result), 28 | [](const auto& elem) { return cv::Scalar(elem[0], elem[1], elem[2]); }); 29 | 30 | return result; 31 | } 32 | 33 | inline cv::Mat visualizeOneImage(const cv::Mat& img, const std::vector>& bboxes, 34 | const std::vector& classIndices, const std::vector& allColors, 35 | const std::vector& allClassNames = {}) 36 | { 37 | assert(bboxes.size() == classIndices.size()); 38 | if (!allClassNames.empty()) { 39 | assert(allClassNames.size() > *std::max_element(classIndices.begin(), classIndices.end())); 40 | assert(allColors.size() == allClassNames.size()); 41 | } 42 | 43 | cv::Mat result = img.clone(); 44 | 45 | for (size_t i = 0; i < bboxes.size(); ++i) { 46 | const auto& curBbox = bboxes[i]; 47 | const uint64_t classIdx = classIndices[i]; 48 | const cv::Scalar& curColor = allColors[classIdx]; 49 | const std::string curLabel = allClassNames.empty() ? std::to_string(classIdx) : allClassNames[classIdx]; 50 | 51 | cv::rectangle(result, cv::Point(curBbox[0], curBbox[1]), cv::Point(curBbox[2], curBbox[3]), curColor, 2); 52 | 53 | int baseLine = 0; 54 | cv::Size labelSize = cv::getTextSize(curLabel, cv::FONT_HERSHEY_COMPLEX, 0.35, 1, &baseLine); 55 | cv::rectangle(result, cv::Point(curBbox[0], curBbox[1]), 56 | cv::Point(curBbox[0] + labelSize.width, curBbox[1] + static_cast(1.3 * labelSize.height)), 57 | curColor, -1); 58 | cv::putText(result, curLabel, cv::Point(curBbox[0], curBbox[1] + labelSize.height), cv::FONT_HERSHEY_COMPLEX, 59 | 0.35, cv::Scalar(255, 255, 255)); 60 | } 61 | 62 | return result; 63 | } 64 | 65 | inline cv::Mat visualizeOneImageWithMask(const cv::Mat& img, const std::vector>& bboxes, 66 | const std::vector& classIndices, const std::vector& masks, 67 | const std::vector& allColors, 68 | const std::vector& allClassNames = {}, 69 | const float maskThreshold = 0.5) 70 | { 71 | assert(bboxes.size() == classIndices.size()); 72 | if (!allClassNames.empty()) { 73 | assert(allClassNames.size() > *std::max_element(classIndices.begin(), classIndices.end())); 74 | assert(allColors.size() == allClassNames.size()); 75 | } 76 | 77 | cv::Mat result = img.clone(); 78 | 79 | for (size_t i = 0; i < bboxes.size(); ++i) { 80 | const auto& curBbox = bboxes[i]; 81 | const uint64_t classIdx = classIndices[i]; 82 | cv::Mat curMask = masks[i].clone(); 83 | const cv::Scalar& curColor = allColors[classIdx]; 84 | const std::string curLabel = allClassNames.empty() ? std::to_string(classIdx) : allClassNames[classIdx]; 85 | 86 | cv::rectangle(result, cv::Point(curBbox[0], curBbox[1]), cv::Point(curBbox[2], curBbox[3]), curColor, 2); 87 | 88 | int baseLine = 0; 89 | cv::Size labelSize = cv::getTextSize(curLabel, cv::FONT_HERSHEY_COMPLEX, 0.35, 1, &baseLine); 90 | cv::rectangle(result, cv::Point(curBbox[0], curBbox[1]), 91 | cv::Point(curBbox[0] + labelSize.width, curBbox[1] + static_cast(1.3 * labelSize.height)), 92 | curColor, -1); 93 | cv::putText(result, curLabel, cv::Point(curBbox[0], curBbox[1] + labelSize.height), cv::FONT_HERSHEY_COMPLEX, 94 | 0.35, cv::Scalar(255, 255, 255)); 95 | 96 | // ---------------------------------------------------------------------// 97 | // Visualize masks 98 | 99 | const cv::Rect curBoxRect(cv::Point(curBbox[0], curBbox[1]), cv::Point(curBbox[2], curBbox[3])); 100 | 101 | cv::resize(curMask, curMask, curBoxRect.size()); 102 | 103 | cv::Mat finalMask = (curMask > maskThreshold); 104 | 105 | cv::Mat coloredRoi = (0.3 * curColor + 0.7 * result(curBoxRect)); 106 | 107 | coloredRoi.convertTo(coloredRoi, CV_8UC3); 108 | 109 | std::vector contours; 110 | cv::Mat hierarchy; 111 | finalMask.convertTo(finalMask, CV_8U); 112 | 113 | cv::findContours(finalMask, contours, hierarchy, cv::RETR_TREE, cv::CHAIN_APPROX_SIMPLE); 114 | cv::drawContours(coloredRoi, contours, -1, curColor, 5, cv::LINE_8, hierarchy, 100); 115 | coloredRoi.copyTo(result(curBoxRect), finalMask); 116 | } 117 | 118 | return result; 119 | } 120 | 121 | inline cv::Mat drawColorChart(const std::vector& classes, const std::vector& colors) 122 | { 123 | cv::Mat legend = cv::Mat::zeros((classes.size() * 25) + 25, 300, CV_8UC(3)); 124 | for (std::size_t i = 0; i < classes.size(); ++i) { 125 | cv::putText(legend, classes[i], cv::Point(5, (i * 25) + 17), cv::FONT_HERSHEY_SIMPLEX, 0.5, 126 | cv::Scalar(0, 0, 255), 2); 127 | cv::rectangle(legend, cv::Point(100, (i * 25)), cv::Point(300, (i * 25) + 25), colors[i], -1); 128 | } 129 | 130 | return legend; 131 | } 132 | 133 | inline void transposeNDWrapper(cv::InputArray src_, const std::vector& order, cv::OutputArray dst_) 134 | { 135 | #if (CV_MAJOR_VERSION > 4 || (CV_MAJOR_VERSION == 4 && CV_MINOR_VERSION >= 6)) 136 | cv::transposeND(src_, order, dst_); 137 | #else 138 | cv::Mat inp = src_.getMat(); 139 | CV_Assert(inp.isContinuous()); 140 | CV_CheckEQ(inp.channels(), 1, "Input array should be single-channel"); 141 | CV_CheckEQ(order.size(), static_cast(inp.dims), "Number of dimensions shouldn't change"); 142 | 143 | auto order_ = order; 144 | std::sort(order_.begin(), order_.end()); 145 | for (size_t i = 0; i < order_.size(); ++i) { 146 | CV_CheckEQ(static_cast(order_[i]), i, "New order should be a valid permutation of the old one"); 147 | } 148 | 149 | std::vector newShape(order.size()); 150 | for (size_t i = 0; i < order.size(); ++i) { 151 | newShape[i] = inp.size[order[i]]; 152 | } 153 | 154 | dst_.create(static_cast(newShape.size()), newShape.data(), inp.type()); 155 | cv::Mat out = dst_.getMat(); 156 | CV_Assert(out.isContinuous()); 157 | CV_Assert(inp.data != out.data); 158 | 159 | int continuous_idx = 0; 160 | for (int i = static_cast(order.size()) - 1; i >= 0; --i) { 161 | if (order[i] != i) { 162 | continuous_idx = i + 1; 163 | break; 164 | } 165 | } 166 | 167 | size_t continuous_size = continuous_idx == 0 ? out.total() : out.step1(continuous_idx - 1); 168 | size_t outer_size = out.total() / continuous_size; 169 | 170 | std::vector steps(order.size()); 171 | for (int i = 0; i < static_cast(steps.size()); ++i) { 172 | steps[i] = inp.step1(order[i]); 173 | } 174 | 175 | auto* src = inp.ptr(); 176 | auto* dst = out.ptr(); 177 | 178 | size_t src_offset = 0; 179 | size_t es = out.elemSize(); 180 | for (size_t i = 0; i < outer_size; ++i) { 181 | std::memcpy(dst, src + es * src_offset, es * continuous_size); 182 | dst += es * continuous_size; 183 | for (int j = continuous_idx - 1; j >= 0; --j) { 184 | src_offset += steps[j]; 185 | if ((src_offset / steps[j]) % out.size[j] != 0) { 186 | break; 187 | } 188 | src_offset -= steps[j] * out.size[j]; 189 | } 190 | } 191 | #endif 192 | } 193 | 194 | /** 195 | * @brief https://mmcv.readthedocs.io/en/latest/_modules/mmcv/ops/point_sample.html 196 | */ 197 | inline cv::Mat bilinearGridSample(const cv::Mat& input, const cv::Mat& grid, bool alignCorners) 198 | { 199 | // input: B x C x Hi x Wi 200 | // grid: B x Hg x Wg x 2 201 | 202 | if (input.size[0] != grid.size[0]) { 203 | throw std::runtime_error("input and grid need to have the same batch size"); 204 | } 205 | int batch = input.size[0]; 206 | int channel = input.size[1]; 207 | int height = input.size[2]; 208 | int width = input.size[3]; 209 | 210 | int numKeyPoints = grid.size[2]; 211 | cv::Mat yMat = grid({cv::Range::all(), cv::Range::all(), cv::Range::all(), cv::Range(0, 1)}) 212 | .clone() 213 | .reshape(1, {batch, grid.size[1] * grid.size[2]}); 214 | cv::Mat xMat = grid({cv::Range::all(), cv::Range::all(), cv::Range::all(), cv::Range(1, 2)}) 215 | .clone() 216 | .reshape(1, {batch, grid.size[1] * grid.size[2]}); 217 | 218 | if (alignCorners) { 219 | xMat = ((xMat + 1) / 2) * (width - 1); 220 | yMat = ((yMat + 1) / 2) * (height - 1); 221 | } else { 222 | xMat = ((xMat + 1) * width - 1) / 2; 223 | yMat = ((yMat + 1) * height - 1) / 2; 224 | } 225 | 226 | // floor 227 | cv::Mat x0Mat = xMat - 0.5; 228 | cv::Mat y0Mat = yMat - 0.5; 229 | x0Mat.convertTo(x0Mat, CV_32S); 230 | y0Mat.convertTo(y0Mat, CV_32S); 231 | 232 | x0Mat.convertTo(x0Mat, CV_32F); 233 | y0Mat.convertTo(y0Mat, CV_32F); 234 | 235 | cv::Mat x1Mat = x0Mat + 1; 236 | cv::Mat y1Mat = x0Mat + 1; 237 | 238 | std::vector weights = {(x1Mat - xMat).mul(y1Mat - yMat), (x1Mat - xMat).mul(yMat - y0Mat), 239 | (xMat - x0Mat).mul(y1Mat - yMat), (xMat - x0Mat).mul(yMat - y0Mat)}; 240 | 241 | cv::Mat result = cv::Mat::zeros(3, std::vector{batch, channel, grid.size[1] * grid.size[2]}.data(), CV_32F); 242 | 243 | auto isCoordSafe = [](int size, int maxSize) -> bool { return size > 0 && size < maxSize; }; 244 | 245 | for (int b = 0; b < batch; ++b) { 246 | for (int i = 0; i < grid.size[1] * grid.size[2]; ++i) { 247 | int x0 = x0Mat.at(b, i); 248 | int y0 = y0Mat.at(b, i); 249 | int x1 = x1Mat.at(b, i); 250 | int y1 = y1Mat.at(b, i); 251 | 252 | std::vector> pairs = {{x0, y0}, {x0, y1}, {x1, y0}, {x1, y1}}; 253 | std::vector Is(4, cv::Mat::zeros(channel, 1, CV_32F)); 254 | 255 | for (int k = 0; k < 4; ++k) { 256 | if (isCoordSafe(pairs[k].first, width) && isCoordSafe(pairs[k].second, height)) { 257 | Is[k] = 258 | input({cv::Range(b, b + 1), cv::Range::all(), cv::Range(pairs[k].second, pairs[k].second + 1), 259 | cv::Range(pairs[k].first, pairs[k].first + 1)}) 260 | .clone() 261 | .reshape(1, channel); 262 | } 263 | } 264 | 265 | cv::Mat curDescriptor = Is[0] * weights[0].at(i) + Is[1] * weights[1].at(i) + 266 | Is[2] * weights[2].at(i) + Is[3] * weights[3].at(i); 267 | 268 | for (int c = 0; c < channel; ++c) { 269 | result.at(b, c, i) = curDescriptor.at(c); 270 | } 271 | } 272 | } 273 | 274 | return result.reshape(1, {batch, channel, grid.size[1], grid.size[2]}); 275 | } 276 | } // namespace 277 | -------------------------------------------------------------------------------- /examples/YoloX.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file YoloX.cpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | #include "YoloX.hpp" 9 | 10 | namespace Ort 11 | { 12 | struct YoloX::GridAndStride { 13 | int grid0; 14 | int grid1; 15 | int stride; 16 | }; 17 | 18 | YoloX::YoloX(const uint16_t numClasses, // 19 | const std::string& modelPath, // 20 | const std::optional& gpuIdx, const std::optional>>& inputShapes) 21 | : ImageRecognitionOrtSessionHandlerBase(numClasses, modelPath, gpuIdx, inputShapes) 22 | { 23 | } 24 | 25 | YoloX::~YoloX() 26 | { 27 | } 28 | 29 | void YoloX::preprocess(float* dst, // 30 | const unsigned char* src, // 31 | const int64_t targetImgWidth, // 32 | const int64_t targetImgHeight, // 33 | const int numChannels) const 34 | { 35 | for (int c = 0; c < numChannels; ++c) { 36 | for (int i = 0; i < targetImgHeight; ++i) { 37 | for (int j = 0; j < targetImgWidth; ++j) { 38 | dst[c * targetImgHeight * targetImgWidth + i * targetImgWidth + j] = 39 | src[i * targetImgWidth * numChannels + j * numChannels + c]; 40 | } 41 | } 42 | } 43 | } 44 | 45 | std::vector YoloX::decodeOutputs(const float* prob, float confThresh) const 46 | { 47 | std::vector gridStrides = this->genereateGridsAndStrides(IMG_W, m_strides); 48 | return this->generateYoloXProposals(prob, gridStrides, confThresh); 49 | } 50 | 51 | /** 52 | * @brief https://github.com/Megvii-BaseDetection/YOLOX/blob/main/demo/MegEngine/cpp/yolox.cpp#L64 53 | */ 54 | std::vector YoloX::genereateGridsAndStrides(int targetSize, const std::vector& strides) const 55 | { 56 | std::vector gridStrides; 57 | for (auto stride : strides) { 58 | int numGrid = targetSize / stride; 59 | for (int g1 = 0; g1 < numGrid; g1++) { 60 | for (int g0 = 0; g0 < numGrid; g0++) { 61 | gridStrides.push_back((GridAndStride){g0, g1, stride}); 62 | } 63 | } 64 | } 65 | return gridStrides; 66 | } 67 | 68 | /** 69 | * @brief https://github.com/Megvii-BaseDetection/YOLOX/blob/main/demo/MegEngine/cpp/yolox.cpp#L76 70 | */ 71 | std::vector 72 | YoloX::generateYoloXProposals(const float* prob, const std::vector& gridStrides, float confThresh) const 73 | { 74 | std::vector objects; 75 | 76 | const int numAnchors = gridStrides.size(); 77 | const int numClasses = m_numClasses; 78 | for (int anchorIdx = 0; anchorIdx < numAnchors; ++anchorIdx) { 79 | const int grid0 = gridStrides[anchorIdx].grid0; 80 | const int grid1 = gridStrides[anchorIdx].grid1; 81 | const int stride = gridStrides[anchorIdx].stride; 82 | 83 | // 4 parameters defining the bounding boxes and 1 parameter defining the confidence 84 | const int basicPos = anchorIdx * (numClasses + 5); 85 | float xCenter = (prob[basicPos + 0] + grid0) * stride; 86 | float yCenter = (prob[basicPos + 1] + grid1) * stride; 87 | float w = exp(prob[basicPos + 2]) * stride; 88 | float h = exp(prob[basicPos + 3]) * stride; 89 | float x0 = xCenter - w * 0.5f; 90 | float y0 = yCenter - h * 0.5f; 91 | 92 | float boxObjectness = prob[basicPos + 4]; 93 | for (int classIdx = 0; classIdx < numClasses; ++classIdx) { 94 | float boxClsScore = prob[basicPos + 5 + classIdx]; 95 | float boxProb = boxObjectness * boxClsScore; 96 | if (boxProb > confThresh) { 97 | Object obj; 98 | obj.pos.x = x0; 99 | obj.pos.y = y0; 100 | obj.pos.width = w; 101 | obj.pos.height = h; 102 | obj.label = classIdx; 103 | obj.prob = boxProb; 104 | objects.push_back(obj); 105 | } 106 | } 107 | } 108 | 109 | return objects; 110 | } 111 | } // namespace Ort 112 | -------------------------------------------------------------------------------- /examples/YoloX.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file YoloX.hpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | #pragma once 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | 16 | #include 17 | 18 | namespace Ort 19 | { 20 | class YoloX : public ImageRecognitionOrtSessionHandlerBase 21 | { 22 | public: 23 | static constexpr int64_t IMG_H = 640; 24 | static constexpr int64_t IMG_W = 640; 25 | static constexpr int64_t IMG_CHANNEL = 3; 26 | 27 | struct Object { 28 | cv::Rect_ pos; 29 | int label; 30 | float prob; 31 | }; 32 | 33 | YoloX(const uint16_t numClasses, // 34 | const std::string& modelPath, // 35 | const std::optional& gpuIdx = std::nullopt, // 36 | const std::optional>>& inputShapes = std::nullopt); 37 | 38 | ~YoloX(); 39 | 40 | void preprocess(float* dst, // 41 | const unsigned char* src, // 42 | const int64_t targetImgWidth, // 43 | const int64_t targetImgHeight, // 44 | const int numChannels) const; 45 | 46 | std::vector decodeOutputs(const float* prob, float confThresh) const; 47 | 48 | /** 49 | * @brief update strides 50 | * this method gives users the flexibility to use other yolox models 51 | */ 52 | void updateStrides(const std::vector& strides) 53 | { 54 | if (strides.empty()) { 55 | throw std::runtime_error("cannot update empty strides"); 56 | } 57 | m_strides = strides; 58 | } 59 | 60 | private: 61 | std::vector m_strides = {8, 16, 32}; 62 | 63 | struct GridAndStride; 64 | 65 | std::vector genereateGridsAndStrides(int targetSize, const std::vector& strides) const; 66 | 67 | std::vector generateYoloXProposals(const float* prob, const std::vector& gridStrides, 68 | float confThresh) const; 69 | }; 70 | } // namespace Ort 71 | -------------------------------------------------------------------------------- /examples/YoloXApp.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file YoloXApp.cpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | #include 9 | 10 | #include "Utility.hpp" 11 | #include "YoloX.hpp" 12 | 13 | static const std::vector MSCOCO_WITHOUT_BG_CLASSES(Ort::MSCOCO_CLASSES.begin() + 1, 14 | Ort::MSCOCO_CLASSES.end()); 15 | static constexpr int64_t NUM_CLASSES = 80; 16 | static const std::vector> COLOR_CHART = Ort::generateColorCharts(NUM_CLASSES); 17 | 18 | static constexpr float CONFIDENCE_THRESHOLD = 0.1; 19 | static const std::vector COLORS = toCvScalarColors(COLOR_CHART); 20 | 21 | namespace 22 | { 23 | cv::Mat processOneFrame(const Ort::YoloX& osh, const cv::Mat& inputImg, float* dst, const float confThresh); 24 | } // namespace 25 | 26 | int main(int argc, char* argv[]) 27 | { 28 | if (argc != 3) { 29 | std::cerr << "Usage: [apps] [path/to/onnx/yolox] [path/to/image]" << std::endl; 30 | return EXIT_FAILURE; 31 | } 32 | 33 | const std::string ONNX_MODEL_PATH = argv[1]; 34 | const std::string IMAGE_PATH = argv[2]; 35 | 36 | cv::Mat img = cv::imread(IMAGE_PATH); 37 | 38 | if (img.empty()) { 39 | std::cerr << "Failed to read input image" << std::endl; 40 | return EXIT_FAILURE; 41 | } 42 | 43 | Ort::YoloX osh( 44 | NUM_CLASSES, ONNX_MODEL_PATH, 0, 45 | std::vector>{{1, Ort::YoloX::IMG_CHANNEL, Ort::YoloX::IMG_H, Ort::YoloX::IMG_W}}); 46 | 47 | osh.initClassNames(MSCOCO_WITHOUT_BG_CLASSES); 48 | 49 | std::vector dst(Ort::YoloX::IMG_CHANNEL * Ort::YoloX::IMG_H * Ort::YoloX::IMG_W); 50 | auto result = processOneFrame(osh, img, dst.data(), CONFIDENCE_THRESHOLD); 51 | cv::imwrite("result.jpg", result); 52 | 53 | return EXIT_SUCCESS; 54 | } 55 | 56 | namespace 57 | { 58 | cv::Mat processOneFrame(const Ort::YoloX& osh, const cv::Mat& inputImg, float* dst, const float confThresh) 59 | { 60 | int origW = inputImg.cols, origH = inputImg.rows; 61 | std::vector originImageSize{static_cast(origH), static_cast(origW)}; 62 | cv::Mat scaledImg; 63 | cv::resize(inputImg, scaledImg, cv::Size(Ort::YoloX::IMG_W, Ort::YoloX::IMG_H), 0, 0, cv::INTER_CUBIC); 64 | osh.preprocess(dst, scaledImg.data, Ort::YoloX::IMG_W, Ort::YoloX::IMG_H, 3); 65 | auto inferenceOutput = osh({dst}); 66 | 67 | std::vector objects = osh.decodeOutputs(inferenceOutput[0].first, confThresh); 68 | 69 | std::vector> bboxes; 70 | std::vector scores; 71 | std::vector classIndices; 72 | 73 | float scaleW = 1. * origW / Ort::YoloX::IMG_W; 74 | float scaleH = 1. * origH / Ort::YoloX::IMG_H; 75 | 76 | for (const auto& object : objects) { 77 | float xmin = object.pos.x * scaleW; 78 | float ymin = object.pos.y * scaleH; 79 | float xmax = (object.pos.x + object.pos.width) * scaleW; 80 | float ymax = (object.pos.y + object.pos.height) * scaleH; 81 | 82 | xmin = std::max(xmin, 0); 83 | ymin = std::max(ymin, 0); 84 | xmax = std::min(xmax, inputImg.cols - 1); 85 | ymax = std::min(ymax, inputImg.rows - 1); 86 | 87 | bboxes.emplace_back(std::array{xmin, ymin, xmax, ymax}); 88 | scores.emplace_back(object.prob); 89 | classIndices.emplace_back(object.label); 90 | } 91 | 92 | auto afterNmsIndices = Ort::nms(bboxes, scores, confThresh); 93 | 94 | std::vector> afterNmsBboxes; 95 | std::vector afterNmsClassIndices; 96 | 97 | afterNmsBboxes.reserve(afterNmsIndices.size()); 98 | afterNmsClassIndices.reserve(afterNmsIndices.size()); 99 | 100 | for (const auto idx : afterNmsIndices) { 101 | afterNmsBboxes.emplace_back(bboxes[idx]); 102 | afterNmsClassIndices.emplace_back(classIndices[idx]); 103 | } 104 | 105 | return afterNmsBboxes.empty() 106 | ? inputImg 107 | : visualizeOneImage(inputImg, afterNmsBboxes, afterNmsClassIndices, COLORS, osh.classNames()); 108 | } 109 | } // namespace 110 | -------------------------------------------------------------------------------- /examples/Yolov3.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file Yolov3.cpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | #include "Yolov3.hpp" 9 | #include 10 | 11 | namespace Ort 12 | { 13 | Yolov3::Yolov3(const uint16_t numClasses, // 14 | const std::string& modelPath, // 15 | const std::optional& gpuIdx, const std::optional>>& inputShapes) 16 | : ImageRecognitionOrtSessionHandlerBase(numClasses, modelPath, gpuIdx, inputShapes) 17 | { 18 | } 19 | 20 | Yolov3::~Yolov3() 21 | { 22 | } 23 | 24 | void Yolov3::preprocess(float* dst, // 25 | const unsigned char* src, // 26 | const int64_t targetImgWidth, // 27 | const int64_t targetImgHeight, // 28 | const int numChannels) const 29 | { 30 | for (int c = 0; c < numChannels; ++c) { 31 | for (int i = 0; i < targetImgHeight; ++i) { 32 | for (int j = 0; j < targetImgWidth; ++j) { 33 | dst[c * targetImgHeight * targetImgWidth + i * targetImgWidth + j] = 34 | src[i * targetImgWidth * numChannels + j * numChannels + c] / 255.0; 35 | } 36 | } 37 | } 38 | } 39 | } // namespace Ort 40 | -------------------------------------------------------------------------------- /examples/Yolov3.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file Yolov3.hpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | #pragma once 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | 16 | #include 17 | 18 | namespace Ort 19 | { 20 | class Yolov3 : public ImageRecognitionOrtSessionHandlerBase 21 | { 22 | public: 23 | static constexpr int64_t IMG_H = 416; 24 | 25 | static constexpr int64_t IMG_W = 416; 26 | 27 | static constexpr int64_t IMG_CHANNEL = 3; 28 | 29 | Yolov3(const uint16_t numClasses, // 30 | const std::string& modelPath, // 31 | const std::optional& gpuIdx = std::nullopt, // 32 | const std::optional>>& inputShapes = std::nullopt); 33 | 34 | ~Yolov3(); 35 | 36 | void preprocess(float* dst, // 37 | const unsigned char* src, // 38 | const int64_t targetImgWidth, // 39 | const int64_t targetImgHeight, // 40 | const int numChannels) const; 41 | }; 42 | } // namespace Ort 43 | -------------------------------------------------------------------------------- /examples/Yolov3App.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file Yolov3App.cpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | #include 9 | 10 | #include "Utility.hpp" 11 | #include "Yolov3.hpp" 12 | 13 | static const std::vector MSCOCO_WITHOUT_BG_CLASSES(Ort::MSCOCO_CLASSES.begin() + 1, 14 | Ort::MSCOCO_CLASSES.end()); 15 | static constexpr int64_t NUM_CLASSES = 80; 16 | static const std::vector> COLOR_CHART = Ort::generateColorCharts(NUM_CLASSES); 17 | 18 | static constexpr float CONFIDENCE_THRESHOLD = 0.2; 19 | static const std::vector COLORS = toCvScalarColors(COLOR_CHART); 20 | 21 | namespace 22 | { 23 | cv::Mat processOneFrame(const Ort::Yolov3& osh, const cv::Mat& inputImg, float* dst, const float confThresh = 0.15); 24 | } // namespace 25 | 26 | int main(int argc, char* argv[]) 27 | { 28 | if (argc != 3) { 29 | std::cerr << "Usage: [apps] [path/to/onnx/yolov3.onnx] [path/to/image]" << std::endl; 30 | return EXIT_FAILURE; 31 | } 32 | 33 | const std::string ONNX_MODEL_PATH = argv[1]; 34 | const std::string IMAGE_PATH = argv[2]; 35 | 36 | cv::Mat img = cv::imread(IMAGE_PATH); 37 | 38 | if (img.empty()) { 39 | std::cerr << "Failed to read input image" << std::endl; 40 | return EXIT_FAILURE; 41 | } 42 | 43 | Ort::Yolov3 osh(NUM_CLASSES, ONNX_MODEL_PATH, 0, 44 | std::vector>{ 45 | {1, Ort::Yolov3::IMG_CHANNEL, Ort::Yolov3::IMG_H, Ort::Yolov3::IMG_W}, {1, 2}}); 46 | 47 | osh.initClassNames(MSCOCO_WITHOUT_BG_CLASSES); 48 | 49 | std::vector dst(Ort::Yolov3::IMG_CHANNEL * Ort::Yolov3::IMG_H * Ort::Yolov3::IMG_W); 50 | auto result = processOneFrame(osh, img, dst.data()); 51 | cv::imwrite("result.jpg", result); 52 | 53 | return 0; 54 | } 55 | 56 | namespace 57 | { 58 | cv::Mat processOneFrame(const Ort::Yolov3& osh, const cv::Mat& inputImg, float* dst, const float confThresh) 59 | { 60 | int origW = inputImg.cols, origH = inputImg.rows; 61 | std::vector originImageSize{static_cast(origH), static_cast(origW)}; 62 | 63 | float scale = std::min(1.0 * Ort::Yolov3::IMG_W / origW, 1.0 * Ort::Yolov3::IMG_H / origH); 64 | 65 | cv::Mat scaledImg; 66 | cv::resize(inputImg, scaledImg, cv::Size(), scale, scale, cv::INTER_CUBIC); 67 | cv::Mat processedImg(Ort::Yolov3::IMG_H, Ort::Yolov3::IMG_W, CV_8UC3, cv::Scalar(128, 128, 128)); 68 | 69 | scaledImg.copyTo(processedImg(cv::Rect((Ort::Yolov3::IMG_W - scaledImg.cols) / 2, 70 | (Ort::Yolov3::IMG_H - scaledImg.rows) / 2, scaledImg.cols, scaledImg.rows))); 71 | 72 | osh.preprocess(dst, processedImg.data, Ort::Yolov3::IMG_W, Ort::Yolov3::IMG_H, 3); 73 | auto inferenceOutput = osh({dst, originImageSize.data()}); 74 | int numAnchors = inferenceOutput[0].second[1]; 75 | int numOutputBboxes = inferenceOutput[2].second[0]; 76 | DEBUG_LOG("number anchor candidates: %d", numAnchors); 77 | DEBUG_LOG("number output bboxes: %d", numOutputBboxes); 78 | 79 | const float* candidateBboxes = inferenceOutput[0].first; 80 | const float* candidateScores = inferenceOutput[1].first; 81 | const int32_t* outputIndices = reinterpret_cast(inferenceOutput[2].first); 82 | 83 | std::vector> bboxes; 84 | std::vector scores; 85 | std::vector classIndices; 86 | 87 | for (int i = 0; i < numOutputBboxes; ++i) { 88 | int curClassIdx = outputIndices[i * 3 + 1]; 89 | int curCandidateIdx = outputIndices[i * 3 + 2]; 90 | 91 | float curScore = candidateScores[curClassIdx * numAnchors + curCandidateIdx]; 92 | 93 | if (curScore < confThresh) { 94 | continue; 95 | } 96 | 97 | float ymin = candidateBboxes[curCandidateIdx * 4 + 0]; 98 | float xmin = candidateBboxes[curCandidateIdx * 4 + 1]; 99 | float ymax = candidateBboxes[curCandidateIdx * 4 + 2]; 100 | float xmax = candidateBboxes[curCandidateIdx * 4 + 3]; 101 | 102 | xmin = std::max(xmin, 0); 103 | ymin = std::max(ymin, 0); 104 | xmax = std::min(xmax, inputImg.cols - 1); 105 | ymax = std::min(ymax, inputImg.rows - 1); 106 | 107 | bboxes.emplace_back(std::array{xmin, ymin, xmax, ymax}); 108 | scores.emplace_back(curScore); 109 | classIndices.emplace_back(curClassIdx); 110 | } 111 | 112 | return bboxes.empty() ? inputImg : visualizeOneImage(inputImg, bboxes, classIndices, COLORS, osh.classNames()); 113 | } 114 | } // namespace 115 | -------------------------------------------------------------------------------- /include/ort_utility/Constants.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file Constants.hpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | #pragma once 9 | 10 | #include 11 | #include 12 | 13 | #include "Utility.hpp" 14 | 15 | namespace Ort 16 | { 17 | static const std::vector IMAGENET_CLASSES = { 18 | "tench, Tinca tinca", 19 | "goldfish, Carassius auratus", 20 | "great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias", 21 | "tiger shark, Galeocerdo cuvieri", 22 | "hammerhead, hammerhead shark", 23 | "electric ray, crampfish, numbfish, torpedo", 24 | "stingray", 25 | "cock", 26 | "hen", 27 | "ostrich, Struthio camelus", 28 | "brambling, Fringilla montifringilla", 29 | "goldfinch, Carduelis carduelis", 30 | "house finch, linnet, Carpodacus mexicanus", 31 | "junco, snowbird", 32 | "indigo bunting, indigo finch, indigo bird, Passerina cyanea", 33 | "robin, American robin, Turdus migratorius", 34 | "bulbul", 35 | "jay", 36 | "magpie", 37 | "chickadee", 38 | "water ouzel, dipper", 39 | "kite", 40 | "bald eagle, American eagle, Haliaeetus leucocephalus", 41 | "vulture", 42 | "great grey owl, great gray owl, Strix nebulosa", 43 | "European fire salamander, Salamandra salamandra", 44 | "common newt, Triturus vulgaris", 45 | "eft", 46 | "spotted salamander, Ambystoma maculatum", 47 | "axolotl, mud puppy, Ambystoma mexicanum", 48 | "bullfrog, Rana catesbeiana", 49 | "tree frog, tree-frog", 50 | "tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui", 51 | "loggerhead, loggerhead turtle, Caretta caretta", 52 | "leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea", 53 | "mud turtle", 54 | "terrapin", 55 | "box turtle, box tortoise", 56 | "banded gecko", 57 | "common iguana, iguana, Iguana iguana", 58 | "American chameleon, anole, Anolis carolinensis", 59 | "whiptail, whiptail lizard", 60 | "agama", 61 | "frilled lizard, Chlamydosaurus kingi", 62 | "alligator lizard", 63 | "Gila monster, Heloderma suspectum", 64 | "green lizard, Lacerta viridis", 65 | "African chameleon, Chamaeleo chamaeleon", 66 | "Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis", 67 | "African crocodile, Nile crocodile, Crocodylus niloticus", 68 | "American alligator, Alligator mississipiensis", 69 | "triceratops", 70 | "thunder snake, worm snake, Carphophis amoenus", 71 | "ringneck snake, ring-necked snake, ring snake", 72 | "hognose snake, puff adder, sand viper", 73 | "green snake, grass snake", 74 | "king snake, kingsnake", 75 | "garter snake, grass snake", 76 | "water snake", 77 | "vine snake", 78 | "night snake, Hypsiglena torquata", 79 | "boa constrictor, Constrictor constrictor", 80 | "rock python, rock snake, Python sebae", 81 | "Indian cobra, Naja naja", 82 | "green mamba", 83 | "sea snake", 84 | "horned viper, cerastes, sand viper, horned asp, Cerastes cornutus", 85 | "diamondback, diamondback rattlesnake, Crotalus adamanteus", 86 | "sidewinder, horned rattlesnake, Crotalus cerastes", 87 | "trilobite", 88 | "harvestman, daddy longlegs, Phalangium opilio", 89 | "scorpion", 90 | "black and gold garden spider, Argiope aurantia", 91 | "barn spider, Araneus cavaticus", 92 | "garden spider, Aranea diademata", 93 | "black widow, Latrodectus mactans", 94 | "tarantula", 95 | "wolf spider, hunting spider", 96 | "tick", 97 | "centipede", 98 | "black grouse", 99 | "ptarmigan", 100 | "ruffed grouse, partridge, Bonasa umbellus", 101 | "prairie chicken, prairie grouse, prairie fowl", 102 | "peacock", 103 | "quail", 104 | "partridge", 105 | "African grey, African gray, Psittacus erithacus", 106 | "macaw", 107 | "sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita", 108 | "lorikeet", 109 | "coucal", 110 | "bee eater", 111 | "hornbill", 112 | "hummingbird", 113 | "jacamar", 114 | "toucan", 115 | "drake", 116 | "red-breasted merganser, Mergus serrator", 117 | "goose", 118 | "black swan, Cygnus atratus", 119 | "tusker", 120 | "echidna, spiny anteater, anteater", 121 | "platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus", 122 | "wallaby, brush kangaroo", 123 | "koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus", 124 | "wombat", 125 | "jellyfish", 126 | "sea anemone, anemone", 127 | "brain coral", 128 | "flatworm, platyhelminth", 129 | "nematode, nematode worm, roundworm", 130 | "conch", 131 | "snail", 132 | "slug", 133 | "sea slug, nudibranch", 134 | "chiton, coat-of-mail shell, sea cradle, polyplacophore", 135 | "chambered nautilus, pearly nautilus, nautilus", 136 | "Dungeness crab, Cancer magister", 137 | "rock crab, Cancer irroratus", 138 | "fiddler crab", 139 | "king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica", 140 | "American lobster, Northern lobster, Maine lobster, Homarus americanus", 141 | "spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish", 142 | "crayfish, crawfish, crawdad, crawdaddy", 143 | "hermit crab", 144 | "isopod", 145 | "white stork, Ciconia ciconia", 146 | "black stork, Ciconia nigra", 147 | "spoonbill", 148 | "flamingo", 149 | "little blue heron, Egretta caerulea", 150 | "American egret, great white heron, Egretta albus", 151 | "bittern", 152 | "crane", 153 | "limpkin, Aramus pictus", 154 | "European gallinule, Porphyrio porphyrio", 155 | "American coot, marsh hen, mud hen, water hen, Fulica americana", 156 | "bustard", 157 | "ruddy turnstone, Arenaria interpres", 158 | "red-backed sandpiper, dunlin, Erolia alpina", 159 | "redshank, Tringa totanus", 160 | "dowitcher", 161 | "oystercatcher, oyster catcher", 162 | "pelican", 163 | "king penguin, Aptenodytes patagonica", 164 | "albatross, mollymawk", 165 | "grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus", 166 | "killer whale, killer, orca, grampus, sea wolf, Orcinus orca", 167 | "dugong, Dugong dugon", 168 | "sea lion", 169 | "Chihuahua", 170 | "Japanese spaniel", 171 | "Maltese dog, Maltese terrier, Maltese", 172 | "Pekinese, Pekingese, Peke", 173 | "Shih-Tzu", 174 | "Blenheim spaniel", 175 | "papillon", 176 | "toy terrier", 177 | "Rhodesian ridgeback", 178 | "Afghan hound, Afghan", 179 | "basset, basset hound", 180 | "beagle", 181 | "bloodhound, sleuthhound", 182 | "bluetick", 183 | "black-and-tan coonhound", 184 | "Walker hound, Walker foxhound", 185 | "English foxhound", 186 | "redbone", 187 | "borzoi, Russian wolfhound", 188 | "Irish wolfhound", 189 | "Italian greyhound", 190 | "whippet", 191 | "Ibizan hound, Ibizan Podenco", 192 | "Norwegian elkhound, elkhound", 193 | "otterhound, otter hound", 194 | "Saluki, gazelle hound", 195 | "Scottish deerhound, deerhound", 196 | "Weimaraner", 197 | "Staffordshire bullterrier, Staffordshire bull terrier", 198 | "American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier", 199 | "Bedlington terrier", 200 | "Border terrier", 201 | "Kerry blue terrier", 202 | "Irish terrier", 203 | "Norfolk terrier", 204 | "Norwich terrier", 205 | "Yorkshire terrier", 206 | "wire-haired fox terrier", 207 | "Lakeland terrier", 208 | "Sealyham terrier, Sealyham", 209 | "Airedale, Airedale terrier", 210 | "cairn, cairn terrier", 211 | "Australian terrier", 212 | "Dandie Dinmont, Dandie Dinmont terrier", 213 | "Boston bull, Boston terrier", 214 | "miniature schnauzer", 215 | "giant schnauzer", 216 | "standard schnauzer", 217 | "Scotch terrier, Scottish terrier, Scottie", 218 | "Tibetan terrier, chrysanthemum dog", 219 | "silky terrier, Sydney silky", 220 | "soft-coated wheaten terrier", 221 | "West Highland white terrier", 222 | "Lhasa, Lhasa apso", 223 | "flat-coated retriever", 224 | "curly-coated retriever", 225 | "golden retriever", 226 | "Labrador retriever", 227 | "Chesapeake Bay retriever", 228 | "German short-haired pointer", 229 | "vizsla, Hungarian pointer", 230 | "English setter", 231 | "Irish setter, red setter", 232 | "Gordon setter", 233 | "Brittany spaniel", 234 | "clumber, clumber spaniel", 235 | "English springer, English springer spaniel", 236 | "Welsh springer spaniel", 237 | "cocker spaniel, English cocker spaniel, cocker", 238 | "Sussex spaniel", 239 | "Irish water spaniel", 240 | "kuvasz", 241 | "schipperke", 242 | "groenendael", 243 | "malinois", 244 | "briard", 245 | "kelpie", 246 | "komondor", 247 | "Old English sheepdog, bobtail", 248 | "Shetland sheepdog, Shetland sheep dog, Shetland", 249 | "collie", 250 | "Border collie", 251 | "Bouvier des Flandres, Bouviers des Flandres", 252 | "Rottweiler", 253 | "German shepherd, German shepherd dog, German police dog, alsatian", 254 | "Doberman, Doberman pinscher", 255 | "miniature pinscher", 256 | "Greater Swiss Mountain dog", 257 | "Bernese mountain dog", 258 | "Appenzeller", 259 | "EntleBucher", 260 | "boxer", 261 | "bull mastiff", 262 | "Tibetan mastiff", 263 | "French bulldog", 264 | "Great Dane", 265 | "Saint Bernard, St Bernard", 266 | "Eskimo dog, husky", 267 | "malamute, malemute, Alaskan malamute", 268 | "Siberian husky", 269 | "dalmatian, coach dog, carriage dog", 270 | "affenpinscher, monkey pinscher, monkey dog", 271 | "basenji", 272 | "pug, pug-dog", 273 | "Leonberg", 274 | "Newfoundland, Newfoundland dog", 275 | "Great Pyrenees", 276 | "Samoyed, Samoyede", 277 | "Pomeranian", 278 | "chow, chow chow", 279 | "keeshond", 280 | "Brabancon griffon", 281 | "Pembroke, Pembroke Welsh corgi", 282 | "Cardigan, Cardigan Welsh corgi", 283 | "toy poodle", 284 | "miniature poodle", 285 | "standard poodle", 286 | "Mexican hairless", 287 | "timber wolf, grey wolf, gray wolf, Canis lupus", 288 | "white wolf, Arctic wolf, Canis lupus tundrarum", 289 | "red wolf, maned wolf, Canis rufus, Canis niger", 290 | "coyote, prairie wolf, brush wolf, Canis latrans", 291 | "dingo, warrigal, warragal, Canis dingo", 292 | "dhole, Cuon alpinus", 293 | "African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus", 294 | "hyena, hyaena", 295 | "red fox, Vulpes vulpes", 296 | "kit fox, Vulpes macrotis", 297 | "Arctic fox, white fox, Alopex lagopus", 298 | "grey fox, gray fox, Urocyon cinereoargenteus", 299 | "tabby, tabby cat", 300 | "tiger cat", 301 | "Persian cat", 302 | "Siamese cat, Siamese", 303 | "Egyptian cat", 304 | "cougar, puma, catamount, mountain lion, painter, panther, Felis concolor", 305 | "lynx, catamount", 306 | "leopard, Panthera pardus", 307 | "snow leopard, ounce, Panthera uncia", 308 | "jaguar, panther, Panthera onca, Felis onca", 309 | "lion, king of beasts, Panthera leo", 310 | "tiger, Panthera tigris", 311 | "cheetah, chetah, Acinonyx jubatus", 312 | "brown bear, bruin, Ursus arctos", 313 | "American black bear, black bear, Ursus americanus, Euarctos americanus", 314 | "ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus", 315 | "sloth bear, Melursus ursinus, Ursus ursinus", 316 | "mongoose", 317 | "meerkat, mierkat", 318 | "tiger beetle", 319 | "ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle", 320 | "ground beetle, carabid beetle", 321 | "long-horned beetle, longicorn, longicorn beetle", 322 | "leaf beetle, chrysomelid", 323 | "dung beetle", 324 | "rhinoceros beetle", 325 | "weevil", 326 | "fly", 327 | "bee", 328 | "ant, emmet, pismire", 329 | "grasshopper, hopper", 330 | "cricket", 331 | "walking stick, walkingstick, stick insect", 332 | "cockroach, roach", 333 | "mantis, mantid", 334 | "cicada, cicala", 335 | "leafhopper", 336 | "lacewing, lacewing fly", 337 | "dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, " 338 | "skeeter hawk", 339 | "damselfly", 340 | "admiral", 341 | "ringlet, ringlet butterfly", 342 | "monarch, monarch butterfly, milkweed butterfly, Danaus plexippus", 343 | "cabbage butterfly", 344 | "sulphur butterfly, sulfur butterfly", 345 | "lycaenid, lycaenid butterfly", 346 | "starfish, sea star", 347 | "sea urchin", 348 | "sea cucumber, holothurian", 349 | "wood rabbit, cottontail, cottontail rabbit", 350 | "hare", 351 | "Angora, Angora rabbit", 352 | "hamster", 353 | "porcupine, hedgehog", 354 | "fox squirrel, eastern fox squirrel, Sciurus niger", 355 | "marmot", 356 | "beaver", 357 | "guinea pig, Cavia cobaya", 358 | "sorrel", 359 | "zebra", 360 | "hog, pig, grunter, squealer, Sus scrofa", 361 | "wild boar, boar, Sus scrofa", 362 | "warthog", 363 | "hippopotamus, hippo, river horse, Hippopotamus amphibius", 364 | "ox", 365 | "water buffalo, water ox, Asiatic buffalo, Bubalus bubalis", 366 | "bison", 367 | "ram, tup", 368 | "bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis", 369 | "ibex, Capra ibex", 370 | "hartebeest", 371 | "impala, Aepyceros melampus", 372 | "gazelle", 373 | "Arabian camel, dromedary, Camelus dromedarius", 374 | "llama", 375 | "weasel", 376 | "mink", 377 | "polecat, fitch, foulmart, foumart, Mustela putorius", 378 | "black-footed ferret, ferret, Mustela nigripes", 379 | "otter", 380 | "skunk, polecat, wood pussy", 381 | "badger", 382 | "armadillo", 383 | "three-toed sloth, ai, Bradypus tridactylus", 384 | "orangutan, orang, orangutang, Pongo pygmaeus", 385 | "gorilla, Gorilla gorilla", 386 | "chimpanzee, chimp, Pan troglodytes", 387 | "gibbon, Hylobates lar", 388 | "siamang, Hylobates syndactylus, Symphalangus syndactylus", 389 | "guenon, guenon monkey", 390 | "patas, hussar monkey, Erythrocebus patas", 391 | "baboon", 392 | "macaque", 393 | "langur", 394 | "colobus, colobus monkey", 395 | "proboscis monkey, Nasalis larvatus", 396 | "marmoset", 397 | "capuchin, ringtail, Cebus capucinus", 398 | "howler monkey, howler", 399 | "titi, titi monkey", 400 | "spider monkey, Ateles geoffroyi", 401 | "squirrel monkey, Saimiri sciureus", 402 | "Madagascar cat, ring-tailed lemur, Lemur catta", 403 | "indri, indris, Indri indri, Indri brevicaudatus", 404 | "Indian elephant, Elephas maximus", 405 | "African elephant, Loxodonta africana", 406 | "lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens", 407 | "giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca", 408 | "barracouta, snoek", 409 | "eel", 410 | "coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch", 411 | "rock beauty, Holocanthus tricolor", 412 | "anemone fish", 413 | "sturgeon", 414 | "gar, garfish, garpike, billfish, Lepisosteus osseus", 415 | "lionfish", 416 | "puffer, pufferfish, blowfish, globefish", 417 | "abacus", 418 | "abaya", 419 | "academic gown, academic robe, judge's robe", 420 | "accordion, piano accordion, squeeze box", 421 | "acoustic guitar", 422 | "aircraft carrier, carrier, flattop, attack aircraft carrier", 423 | "airliner", 424 | "airship, dirigible", 425 | "altar", 426 | "ambulance", 427 | "amphibian, amphibious vehicle", 428 | "analog clock", 429 | "apiary, bee house", 430 | "apron", 431 | "ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin", 432 | "assault rifle, assault gun", 433 | "backpack, back pack, knapsack, packsack, rucksack, haversack", 434 | "bakery, bakeshop, bakehouse", 435 | "balance beam, beam", 436 | "balloon", 437 | "ballpoint, ballpoint pen, ballpen, Biro", 438 | "Band Aid", 439 | "banjo", 440 | "bannister, banister, balustrade, balusters, handrail", 441 | "barbell", 442 | "barber chair", 443 | "barbershop", 444 | "barn", 445 | "barometer", 446 | "barrel, cask", 447 | "barrow, garden cart, lawn cart, wheelbarrow", 448 | "baseball", 449 | "basketball", 450 | "bassinet", 451 | "bassoon", 452 | "bathing cap, swimming cap", 453 | "bath towel", 454 | "bathtub, bathing tub, bath, tub", 455 | "beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon", 456 | "beacon, lighthouse, beacon light, pharos", 457 | "beaker", 458 | "bearskin, busby, shako", 459 | "beer bottle", 460 | "beer glass", 461 | "bell cote, bell cot", 462 | "bib", 463 | "bicycle-built-for-two, tandem bicycle, tandem", 464 | "bikini, two-piece", 465 | "binder, ring-binder", 466 | "binoculars, field glasses, opera glasses", 467 | "birdhouse", 468 | "boathouse", 469 | "bobsled, bobsleigh, bob", 470 | "bolo tie, bolo, bola tie, bola", 471 | "bonnet, poke bonnet", 472 | "bookcase", 473 | "bookshop, bookstore, bookstall", 474 | "bottlecap", 475 | "bow", 476 | "bow tie, bow-tie, bowtie", 477 | "brass, memorial tablet, plaque", 478 | "brassiere, bra, bandeau", 479 | "breakwater, groin, groyne, mole, bulwark, seawall, jetty", 480 | "breastplate, aegis, egis", 481 | "broom", 482 | "bucket, pail", 483 | "buckle", 484 | "bulletproof vest", 485 | "bullet train, bullet", 486 | "butcher shop, meat market", 487 | "cab, hack, taxi, taxicab", 488 | "caldron, cauldron", 489 | "candle, taper, wax light", 490 | "cannon", 491 | "canoe", 492 | "can opener, tin opener", 493 | "cardigan", 494 | "car mirror", 495 | "carousel, carrousel, merry-go-round, roundabout, whirligig", 496 | "carpenter's kit, tool kit", 497 | "carton", 498 | "car wheel", 499 | "cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic " 500 | "teller, ATM", 501 | "cassette", 502 | "cassette player", 503 | "castle", 504 | "catamaran", 505 | "CD player", 506 | "cello, violoncello", 507 | "cellular telephone, cellular phone, cellphone, cell, mobile phone", 508 | "chain", 509 | "chainlink fence", 510 | "chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour", 511 | "chain saw, chainsaw", 512 | "chest", 513 | "chiffonier, commode", 514 | "chime, bell, gong", 515 | "china cabinet, china closet", 516 | "Christmas stocking", 517 | "church, church building", 518 | "cinema, movie theater, movie theatre, movie house, picture palace", 519 | "cleaver, meat cleaver, chopper", 520 | "cliff dwelling", 521 | "cloak", 522 | "clog, geta, patten, sabot", 523 | "cocktail shaker", 524 | "coffee mug", 525 | "coffeepot", 526 | "coil, spiral, volute, whorl, helix", 527 | "combination lock", 528 | "computer keyboard, keypad", 529 | "confectionery, confectionary, candy store", 530 | "container ship, containership, container vessel", 531 | "convertible", 532 | "corkscrew, bottle screw", 533 | "cornet, horn, trumpet, trump", 534 | "cowboy boot", 535 | "cowboy hat, ten-gallon hat", 536 | "cradle", 537 | "crane", 538 | "crash helmet", 539 | "crate", 540 | "crib, cot", 541 | "Crock Pot", 542 | "croquet ball", 543 | "crutch", 544 | "cuirass", 545 | "dam, dike, dyke", 546 | "desk", 547 | "desktop computer", 548 | "dial telephone, dial phone", 549 | "diaper, nappy, napkin", 550 | "digital clock", 551 | "digital watch", 552 | "dining table, board", 553 | "dishrag, dishcloth", 554 | "dishwasher, dish washer, dishwashing machine", 555 | "disk brake, disc brake", 556 | "dock, dockage, docking facility", 557 | "dogsled, dog sled, dog sleigh", 558 | "dome", 559 | "doormat, welcome mat", 560 | "drilling platform, offshore rig", 561 | "drum, membranophone, tympan", 562 | "drumstick", 563 | "dumbbell", 564 | "Dutch oven", 565 | "electric fan, blower", 566 | "electric guitar", 567 | "electric locomotive", 568 | "entertainment center", 569 | "envelope", 570 | "espresso maker", 571 | "face powder", 572 | "feather boa, boa", 573 | "file, file cabinet, filing cabinet", 574 | "fireboat", 575 | "fire engine, fire truck", 576 | "fire screen, fireguard", 577 | "flagpole, flagstaff", 578 | "flute, transverse flute", 579 | "folding chair", 580 | "football helmet", 581 | "forklift", 582 | "fountain", 583 | "fountain pen", 584 | "four-poster", 585 | "freight car", 586 | "French horn, horn", 587 | "frying pan, frypan, skillet", 588 | "fur coat", 589 | "garbage truck, dustcart", 590 | "gasmask, respirator, gas helmet", 591 | "gas pump, gasoline pump, petrol pump, island dispenser", 592 | "goblet", 593 | "go-kart", 594 | "golf ball", 595 | "golfcart, golf cart", 596 | "gondola", 597 | "gong, tam-tam", 598 | "gown", 599 | "grand piano, grand", 600 | "greenhouse, nursery, glasshouse", 601 | "grille, radiator grille", 602 | "grocery store, grocery, food market, market", 603 | "guillotine", 604 | "hair slide", 605 | "hair spray", 606 | "half track", 607 | "hammer", 608 | "hamper", 609 | "hand blower, blow dryer, blow drier, hair dryer, hair drier", 610 | "hand-held computer, hand-held microcomputer", 611 | "handkerchief, hankie, hanky, hankey", 612 | "hard disc, hard disk, fixed disk", 613 | "harmonica, mouth organ, harp, mouth harp", 614 | "harp", 615 | "harvester, reaper", 616 | "hatchet", 617 | "holster", 618 | "home theater, home theatre", 619 | "honeycomb", 620 | "hook, claw", 621 | "hoopskirt, crinoline", 622 | "horizontal bar, high bar", 623 | "horse cart, horse-cart", 624 | "hourglass", 625 | "iPod", 626 | "iron, smoothing iron", 627 | "jack-o'-lantern", 628 | "jean, blue jean, denim", 629 | "jeep, landrover", 630 | "jersey, T-shirt, tee shirt", 631 | "jigsaw puzzle", 632 | "jinrikisha, ricksha, rickshaw", 633 | "joystick", 634 | "kimono", 635 | "knee pad", 636 | "knot", 637 | "lab coat, laboratory coat", 638 | "ladle", 639 | "lampshade, lamp shade", 640 | "laptop, laptop computer", 641 | "lawn mower, mower", 642 | "lens cap, lens cover", 643 | "letter opener, paper knife, paperknife", 644 | "library", 645 | "lifeboat", 646 | "lighter, light, igniter, ignitor", 647 | "limousine, limo", 648 | "liner, ocean liner", 649 | "lipstick, lip rouge", 650 | "Loafer", 651 | "lotion", 652 | "loudspeaker, speaker, speaker unit, loudspeaker system, speaker system", 653 | "loupe, jeweler's loupe", 654 | "lumbermill, sawmill", 655 | "magnetic compass", 656 | "mailbag, postbag", 657 | "mailbox, letter box", 658 | "maillot", 659 | "maillot, tank suit", 660 | "manhole cover", 661 | "maraca", 662 | "marimba, xylophone", 663 | "mask", 664 | "matchstick", 665 | "maypole", 666 | "maze, labyrinth", 667 | "measuring cup", 668 | "medicine chest, medicine cabinet", 669 | "megalith, megalithic structure", 670 | "microphone, mike", 671 | "microwave, microwave oven", 672 | "military uniform", 673 | "milk can", 674 | "minibus", 675 | "miniskirt, mini", 676 | "minivan", 677 | "missile", 678 | "mitten", 679 | "mixing bowl", 680 | "mobile home, manufactured home", 681 | "Model T", 682 | "modem", 683 | "monastery", 684 | "monitor", 685 | "moped", 686 | "mortar", 687 | "mortarboard", 688 | "mosque", 689 | "mosquito net", 690 | "motor scooter, scooter", 691 | "mountain bike, all-terrain bike, off-roader", 692 | "mountain tent", 693 | "mouse, computer mouse", 694 | "mousetrap", 695 | "moving van", 696 | "muzzle", 697 | "nail", 698 | "neck brace", 699 | "necklace", 700 | "nipple", 701 | "notebook, notebook computer", 702 | "obelisk", 703 | "oboe, hautboy, hautbois", 704 | "ocarina, sweet potato", 705 | "odometer, hodometer, mileometer, milometer", 706 | "oil filter", 707 | "organ, pipe organ", 708 | "oscilloscope, scope, cathode-ray oscilloscope, CRO", 709 | "overskirt", 710 | "oxcart", 711 | "oxygen mask", 712 | "packet", 713 | "paddle, boat paddle", 714 | "paddlewheel, paddle wheel", 715 | "padlock", 716 | "paintbrush", 717 | "pajama, pyjama, pj's, jammies", 718 | "palace", 719 | "panpipe, pandean pipe, syrinx", 720 | "paper towel", 721 | "parachute, chute", 722 | "parallel bars, bars", 723 | "park bench", 724 | "parking meter", 725 | "passenger car, coach, carriage", 726 | "patio, terrace", 727 | "pay-phone, pay-station", 728 | "pedestal, plinth, footstall", 729 | "pencil box, pencil case", 730 | "pencil sharpener", 731 | "perfume, essence", 732 | "Petri dish", 733 | "photocopier", 734 | "pick, plectrum, plectron", 735 | "pickelhaube", 736 | "picket fence, paling", 737 | "pickup, pickup truck", 738 | "pier", 739 | "piggy bank, penny bank", 740 | "pill bottle", 741 | "pillow", 742 | "ping-pong ball", 743 | "pinwheel", 744 | "pirate, pirate ship", 745 | "pitcher, ewer", 746 | "plane, carpenter's plane, woodworking plane", 747 | "planetarium", 748 | "plastic bag", 749 | "plate rack", 750 | "plow, plough", 751 | "plunger, plumber's helper", 752 | "Polaroid camera, Polaroid Land camera", 753 | "pole", 754 | "police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria", 755 | "poncho", 756 | "pool table, billiard table, snooker table", 757 | "pop bottle, soda bottle", 758 | "pot, flowerpot", 759 | "potter's wheel", 760 | "power drill", 761 | "prayer rug, prayer mat", 762 | "printer", 763 | "prison, prison house", 764 | "projectile, missile", 765 | "projector", 766 | "puck, hockey puck", 767 | "punching bag, punch bag, punching ball, punchball", 768 | "purse", 769 | "quill, quill pen", 770 | "quilt, comforter, comfort, puff", 771 | "racer, race car, racing car", 772 | "racket, racquet", 773 | "radiator", 774 | "radio, wireless", 775 | "radio telescope, radio reflector", 776 | "rain barrel", 777 | "recreational vehicle, RV, R.V.", 778 | "reel", 779 | "reflex camera", 780 | "refrigerator, icebox", 781 | "remote control, remote", 782 | "restaurant, eating house, eating place, eatery", 783 | "revolver, six-gun, six-shooter", 784 | "rifle", 785 | "rocking chair, rocker", 786 | "rotisserie", 787 | "rubber eraser, rubber, pencil eraser", 788 | "rugby ball", 789 | "rule, ruler", 790 | "running shoe", 791 | "safe", 792 | "safety pin", 793 | "saltshaker, salt shaker", 794 | "sandal", 795 | "sarong", 796 | "sax, saxophone", 797 | "scabbard", 798 | "scale, weighing machine", 799 | "school bus", 800 | "schooner", 801 | "scoreboard", 802 | "screen, CRT screen", 803 | "screw", 804 | "screwdriver", 805 | "seat belt, seatbelt", 806 | "sewing machine", 807 | "shield, buckler", 808 | "shoe shop, shoe-shop, shoe store", 809 | "shoji", 810 | "shopping basket", 811 | "shopping cart", 812 | "shovel", 813 | "shower cap", 814 | "shower curtain", 815 | "ski", 816 | "ski mask", 817 | "sleeping bag", 818 | "slide rule, slipstick", 819 | "sliding door", 820 | "slot, one-armed bandit", 821 | "snorkel", 822 | "snowmobile", 823 | "snowplow, snowplough", 824 | "soap dispenser", 825 | "soccer ball", 826 | "sock", 827 | "solar dish, solar collector, solar furnace", 828 | "sombrero", 829 | "soup bowl", 830 | "space bar", 831 | "space heater", 832 | "space shuttle", 833 | "spatula", 834 | "speedboat", 835 | "spider web, spider's web", 836 | "spindle", 837 | "sports car, sport car", 838 | "spotlight, spot", 839 | "stage", 840 | "steam locomotive", 841 | "steel arch bridge", 842 | "steel drum", 843 | "stethoscope", 844 | "stole", 845 | "stone wall", 846 | "stopwatch, stop watch", 847 | "stove", 848 | "strainer", 849 | "streetcar, tram, tramcar, trolley, trolley car", 850 | "stretcher", 851 | "studio couch, day bed", 852 | "stupa, tope", 853 | "submarine, pigboat, sub, U-boat", 854 | "suit, suit of clothes", 855 | "sundial", 856 | "sunglass", 857 | "sunglasses, dark glasses, shades", 858 | "sunscreen, sunblock, sun blocker", 859 | "suspension bridge", 860 | "swab, swob, mop", 861 | "sweatshirt", 862 | "swimming trunks, bathing trunks", 863 | "swing", 864 | "switch, electric switch, electrical switch", 865 | "syringe", 866 | "table lamp", 867 | "tank, army tank, armored combat vehicle, armoured combat vehicle", 868 | "tape player", 869 | "teapot", 870 | "teddy, teddy bear", 871 | "television, television system", 872 | "tennis ball", 873 | "thatch, thatched roof", 874 | "theater curtain, theatre curtain", 875 | "thimble", 876 | "thresher, thrasher, threshing machine", 877 | "throne", 878 | "tile roof", 879 | "toaster", 880 | "tobacco shop, tobacconist shop, tobacconist", 881 | "toilet seat", 882 | "torch", 883 | "totem pole", 884 | "tow truck, tow car, wrecker", 885 | "toyshop", 886 | "tractor", 887 | "trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi", 888 | "tray", 889 | "trench coat", 890 | "tricycle, trike, velocipede", 891 | "trimaran", 892 | "tripod", 893 | "triumphal arch", 894 | "trolleybus, trolley coach, trackless trolley", 895 | "trombone", 896 | "tub, vat", 897 | "turnstile", 898 | "typewriter keyboard", 899 | "umbrella", 900 | "unicycle, monocycle", 901 | "upright, upright piano", 902 | "vacuum, vacuum cleaner", 903 | "vase", 904 | "vault", 905 | "velvet", 906 | "vending machine", 907 | "vestment", 908 | "viaduct", 909 | "violin, fiddle", 910 | "volleyball", 911 | "waffle iron", 912 | "wall clock", 913 | "wallet, billfold, notecase, pocketbook", 914 | "wardrobe, closet, press", 915 | "warplane, military plane", 916 | "washbasin, handbasin, washbowl, lavabo, wash-hand basin", 917 | "washer, automatic washer, washing machine", 918 | "water bottle", 919 | "water jug", 920 | "water tower", 921 | "whiskey jug", 922 | "whistle", 923 | "wig", 924 | "window screen", 925 | "window shade", 926 | "Windsor tie", 927 | "wine bottle", 928 | "wing", 929 | "wok", 930 | "wooden spoon", 931 | "wool, woolen, woollen", 932 | "worm fence, snake fence, snake-rail fence, Virginia fence", 933 | "wreck", 934 | "yawl", 935 | "yurt", 936 | "web site, website, internet site, site", 937 | "comic book", 938 | "crossword puzzle, crossword", 939 | "street sign", 940 | "traffic light, traffic signal, stoplight", 941 | "book jacket, dust cover, dust jacket, dust wrapper", 942 | "menu", 943 | "plate", 944 | "guacamole", 945 | "consomme", 946 | "hot pot, hotpot", 947 | "trifle", 948 | "ice cream, icecream", 949 | "ice lolly, lolly, lollipop, popsicle", 950 | "French loaf", 951 | "bagel, beigel", 952 | "pretzel", 953 | "cheeseburger", 954 | "hotdog, hot dog, red hot", 955 | "mashed potato", 956 | "head cabbage", 957 | "broccoli", 958 | "cauliflower", 959 | "zucchini, courgette", 960 | "spaghetti squash", 961 | "acorn squash", 962 | "butternut squash", 963 | "cucumber, cuke", 964 | "artichoke, globe artichoke", 965 | "bell pepper", 966 | "cardoon", 967 | "mushroom", 968 | "Granny Smith", 969 | "strawberry", 970 | "orange", 971 | "lemon", 972 | "fig", 973 | "pineapple, ananas", 974 | "banana", 975 | "jackfruit, jak, jack", 976 | "custard apple", 977 | "pomegranate", 978 | "hay", 979 | "carbonara", 980 | "chocolate sauce, chocolate syrup", 981 | "dough", 982 | "meat loaf, meatloaf", 983 | "pizza, pizza pie", 984 | "potpie", 985 | "burrito", 986 | "red wine", 987 | "espresso", 988 | "cup", 989 | "eggnog", 990 | "alp", 991 | "bubble", 992 | "cliff, drop, drop-off", 993 | "coral reef", 994 | "geyser", 995 | "lakeside, lakeshore", 996 | "promontory, headland, head, foreland", 997 | "sandbar, sand bar", 998 | "seashore, coast, seacoast, sea-coast", 999 | "valley, vale", 1000 | "volcano", 1001 | "ballplayer, baseball player", 1002 | "groom, bridegroom", 1003 | "scuba diver", 1004 | "rapeseed", 1005 | "daisy", 1006 | "yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum", 1007 | "corn", 1008 | "acorn", 1009 | "hip, rose hip, rosehip", 1010 | "buckeye, horse chestnut, conker", 1011 | "coral fungus", 1012 | "agaric", 1013 | "gyromitra", 1014 | "stinkhorn, carrion fungus", 1015 | "earthstar", 1016 | "hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa", 1017 | "bolete", 1018 | "ear, spike, capitulum", 1019 | "toilet tissue, toilet paper, bathroom tissue", 1020 | }; 1021 | 1022 | static constexpr int64_t IMAGENET_NUM_CLASSES = 1000; 1023 | 1024 | // BGR 1025 | static const std::vector IMAGENET_MEAN = {0.406, 0.456, 0.485}; 1026 | 1027 | static const std::vector IMAGENET_STD = {0.225, 0.224, 0.229}; 1028 | 1029 | static const std::vector MSCOCO_CLASSES = { 1030 | "background", "person", "bicycle", "car", "motorbike", "aeroplane", "bus", 1031 | "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", 1032 | "bench", "bird", "cat", "dog", "horse", "sheep", "cow", 1033 | "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", 1034 | "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", 1035 | "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", 1036 | "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", 1037 | "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", 1038 | "cake", "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", 1039 | "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", 1040 | "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", 1041 | "scissors", "teddy bear", "hair drier", "toothbrush"}; 1042 | 1043 | static constexpr int64_t MSCOCO_NUM_CLASSES = 81; 1044 | 1045 | static const std::vector> MSCOCO_COLOR_CHART = generateColorCharts(MSCOCO_NUM_CLASSES); 1046 | 1047 | static const std::vector VOC_CLASSES = { 1048 | "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", 1049 | "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"}; 1050 | 1051 | static constexpr int64_t VOC_NUM_CLASSES = 20; 1052 | 1053 | static const std::vector> VOC_COLOR_CHART = generateColorCharts(VOC_NUM_CLASSES); 1054 | 1055 | static const std::vector CITY_SCAPES_CLASSES = { 1056 | "road", "sidewalk", "building", "wall", "fence", "pole", "traffic light", "traffic sign", "vegetation", "terrain", 1057 | "sky", "person", "rider", "car", "truck", "bus", "train", "motorcycle", "bicycle"}; 1058 | 1059 | static constexpr int64_t CITY_SCAPES_NUM_CLASSES = 19; 1060 | 1061 | static const std::vector> CITY_SCAPES_COLOR_CHART = { 1062 | {128, 64, 128}, {244, 35, 232}, {70, 70, 70}, {102, 102, 156}, {190, 153, 153}, {153, 153, 153}, {250, 170, 30}, 1063 | {220, 220, 0}, {107, 142, 35}, {152, 251, 152}, {70, 130, 180}, {220, 20, 60}, {255, 0, 0}, {0, 0, 142}, 1064 | {0, 0, 70}, {0, 60, 100}, {0, 80, 100}, {0, 0, 230}, {119, 11, 32}}; 1065 | } // namespace Ort 1066 | -------------------------------------------------------------------------------- /include/ort_utility/ImageClassificationOrtSessionHandler.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file ImageClassificationOrtSessionHandler.hpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | #pragma once 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | #include "ImageRecognitionOrtSessionHandlerBase.hpp" 15 | 16 | namespace Ort 17 | { 18 | class ImageClassificationOrtSessionHandler : public ImageRecognitionOrtSessionHandlerBase 19 | { 20 | public: 21 | ImageClassificationOrtSessionHandler( 22 | const uint16_t numClasses, // 23 | const std::string& modelPath, // 24 | const std::optional& gpuIdx = std::nullopt, // 25 | const std::optional>>& inputShapes = std::nullopt); 26 | 27 | ~ImageClassificationOrtSessionHandler(); 28 | 29 | std::vector> topK(const std::vector& inferenceOutput, // 30 | const uint16_t k = 1, // 31 | const bool useSoftmax = true) const; 32 | 33 | std::string topKToString(const std::vector& inferenceOutput, // 34 | const uint16_t k = 1, // 35 | const bool useSoftmax = true) const; 36 | }; 37 | } // namespace Ort 38 | -------------------------------------------------------------------------------- /include/ort_utility/ImageRecognitionOrtSessionHandlerBase.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file ImageRecognitionOrtSessionHandlerBase.hpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | #pragma once 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | #include "OrtSessionHandler.hpp" 15 | 16 | namespace Ort 17 | { 18 | class ImageRecognitionOrtSessionHandlerBase : public OrtSessionHandler 19 | { 20 | public: 21 | ImageRecognitionOrtSessionHandlerBase( 22 | const uint16_t numClasses, // 23 | const std::string& modelPath, // 24 | const std::optional& gpuIdx = std::nullopt, // 25 | const std::optional>>& inputShapes = std::nullopt); 26 | 27 | ~ImageRecognitionOrtSessionHandlerBase(); 28 | 29 | void initClassNames(const std::vector& classNames); 30 | 31 | virtual void preprocess(float* dst, // 32 | const unsigned char* src, // 33 | const int64_t targetImgWidth, // 34 | const int64_t targetImgHeight, // 35 | const int numChanels, // 36 | const std::vector& meanVal = {}, // 37 | const std::vector& stdVal = {}) const; 38 | 39 | uint16_t numClasses() const 40 | { 41 | return m_numClasses; 42 | } 43 | 44 | const std::vector& classNames() const 45 | { 46 | return m_classNames; 47 | } 48 | 49 | protected: 50 | const uint16_t m_numClasses; 51 | std::vector m_classNames; 52 | }; 53 | } // namespace Ort 54 | -------------------------------------------------------------------------------- /include/ort_utility/ObjectDetectionOrtSessionHandler.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file ObjectDetectionOrtSessionHandler.hpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | #pragma once 9 | 10 | #include 11 | #include 12 | 13 | #include "ImageRecognitionOrtSessionHandlerBase.hpp" 14 | 15 | namespace Ort 16 | { 17 | class ObjectDetectionOrtSessionHandler : public ImageRecognitionOrtSessionHandlerBase 18 | { 19 | public: 20 | ObjectDetectionOrtSessionHandler( 21 | const uint16_t numClasses, // 22 | const std::string& modelPath, // 23 | const std::optional& gpuIdx = std::nullopt, // 24 | const std::optional>>& inputShapes = std::nullopt); 25 | 26 | ~ObjectDetectionOrtSessionHandler(); 27 | }; 28 | } // namespace Ort 29 | -------------------------------------------------------------------------------- /include/ort_utility/OrtSessionHandler.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file OrtSessionHandler.hpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | #pragma once 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | namespace Ort 17 | { 18 | class OrtSessionHandler 19 | { 20 | public: 21 | // DataOutputType->(pointer to output data, shape of output data) 22 | using DataOutputType = std::pair>; 23 | 24 | explicit OrtSessionHandler(const std::string& modelPath, // 25 | const std::optional& gpuIdx = std::nullopt, 26 | const std::optional>>& inputShapes = std::nullopt); 27 | ~OrtSessionHandler(); 28 | 29 | // multiple inputs, multiple outputs 30 | std::vector operator()(const std::vector& inputImgData) const; 31 | 32 | void updateInputShapes(const std::vector>& inputShapes); 33 | 34 | private: 35 | class OrtSessionHandlerIml; 36 | std::unique_ptr m_piml; 37 | }; 38 | } // namespace Ort 39 | -------------------------------------------------------------------------------- /include/ort_utility/Utility.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file Utility.hpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | #pragma once 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #if ENABLE_DEBUG 21 | #include 22 | #endif 23 | 24 | template > class Container> 25 | std::ostream& operator<<(std::ostream& os, const Container& container) 26 | { 27 | using ContainerType = Container; 28 | for (typename ContainerType::const_iterator it = container.begin(); it != container.end(); ++it) { 29 | os << *it << " "; 30 | } 31 | 32 | return os; 33 | } 34 | 35 | namespace 36 | { 37 | template std::deque sortIndexes(const std::vector& v) 38 | { 39 | std::deque indices(v.size()); 40 | std::iota(indices.begin(), indices.end(), 0); 41 | 42 | std::stable_sort(std::begin(indices), std::end(indices), [&v](size_t i1, size_t i2) { return v[i1] < v[i2]; }); 43 | 44 | return indices; 45 | } 46 | } // namespace 47 | 48 | namespace Ort 49 | { 50 | #if ENABLE_DEBUG 51 | #define DEBUG_LOG(...) \ 52 | { \ 53 | char str[100]; \ 54 | snprintf(str, sizeof(str), __VA_ARGS__); \ 55 | std::cout << "[" << __FILE__ << "][" << __FUNCTION__ << "][Line " << __LINE__ << "] >>> " << str << std::endl; \ 56 | } 57 | #else 58 | #define DEBUG_LOG(...) 59 | #endif 60 | 61 | inline void softmax(float* input, const size_t inputLen) 62 | { 63 | const float maxVal = *std::max_element(input, input + inputLen); 64 | 65 | const float sum = std::accumulate(input, input + inputLen, 0.0, 66 | [&](float a, const float b) { return std::move(a) + expf(b - maxVal); }); 67 | 68 | const float offset = maxVal + logf(sum); 69 | for (auto it = input; it != (input + inputLen); ++it) { 70 | *it = expf(*it - offset); 71 | } 72 | } 73 | 74 | inline float sigmoid(const float x) 75 | { 76 | return 1.0 / (1.0 + expf(-x)); 77 | } 78 | 79 | inline std::vector nms(const std::vector>& bboxes, // 80 | const std::vector& scores, // 81 | const float overlapThresh = 0.45, // 82 | const uint64_t topK = std::numeric_limits::max() // 83 | ) 84 | { 85 | assert(bboxes.size() > 0); 86 | uint64_t boxesLength = bboxes.size(); 87 | const uint64_t realK = std::max(std::min(boxesLength, topK), static_cast(1)); 88 | 89 | std::vector keepIndices; 90 | keepIndices.reserve(realK); 91 | 92 | std::deque sortedIndices = ::sortIndexes(scores); 93 | 94 | // keep only topk bboxes 95 | for (uint64_t i = 0; i < boxesLength - realK; ++i) { 96 | sortedIndices.pop_front(); 97 | } 98 | 99 | std::vector areas; 100 | areas.reserve(boxesLength); 101 | std::transform(std::begin(bboxes), std::end(bboxes), std::back_inserter(areas), 102 | [](const auto& elem) { return (elem[2] - elem[0]) * (elem[3] - elem[1]); }); 103 | 104 | while (!sortedIndices.empty()) { 105 | uint64_t currentIdx = sortedIndices.back(); 106 | keepIndices.emplace_back(currentIdx); 107 | 108 | if (sortedIndices.size() == 1) { 109 | break; 110 | } 111 | 112 | sortedIndices.pop_back(); 113 | std::vector ious; 114 | ious.reserve(sortedIndices.size()); 115 | 116 | const auto& curBbox = bboxes[currentIdx]; 117 | const float curArea = areas[currentIdx]; 118 | 119 | std::deque newSortedIndices; 120 | 121 | for (const uint64_t elem : sortedIndices) { 122 | const auto& bbox = bboxes[elem]; 123 | float tmpXmin = std::max(curBbox[0], bbox[0]); 124 | float tmpYmin = std::max(curBbox[1], bbox[1]); 125 | float tmpXmax = std::min(curBbox[2], bbox[2]); 126 | float tmpYmax = std::min(curBbox[3], bbox[3]); 127 | 128 | float tmpW = std::max(tmpXmax - tmpXmin, 0.0); 129 | float tmpH = std::max(tmpYmax - tmpYmin, 0.0); 130 | 131 | const float intersection = tmpW * tmpH; 132 | const float tmpArea = areas[elem]; 133 | const float unionArea = tmpArea + curArea - intersection; 134 | const float iou = intersection / unionArea; 135 | 136 | if (iou <= overlapThresh) { 137 | newSortedIndices.emplace_back(elem); 138 | } 139 | } 140 | 141 | sortedIndices = newSortedIndices; 142 | } 143 | 144 | return keepIndices; 145 | } 146 | 147 | inline std::vector> generateColorCharts(const uint16_t numClasses = 1000, const uint16_t seed = 255) 148 | { 149 | std::srand(seed); 150 | std::vector> colors; 151 | colors.reserve(numClasses); 152 | for (uint16_t i = 0; i < numClasses; ++i) { 153 | colors.emplace_back(std::array{std::rand() % 256, std::rand() % 256, std::rand() % 256}); 154 | } 155 | 156 | return colors; 157 | } 158 | } // namespace Ort 159 | -------------------------------------------------------------------------------- /include/ort_utility/ort_utility.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file ort_utility.hpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | #pragma once 9 | 10 | #include "Constants.hpp" 11 | 12 | #include "ImageClassificationOrtSessionHandler.hpp" 13 | 14 | #include "ImageRecognitionOrtSessionHandlerBase.hpp" 15 | 16 | #include "OrtSessionHandler.hpp" 17 | 18 | #include "ObjectDetectionOrtSessionHandler.hpp" 19 | 20 | #include "Utility.hpp" 21 | -------------------------------------------------------------------------------- /scripts/get_cuda_environment_variables.bash: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | function command_exists 4 | { 5 | type "$1" &> /dev/null; 6 | } 7 | 8 | if ! command_exists nvcc; then 9 | echo "Cuda package not found" 10 | else 11 | if [ ! "$(dpkg -S cudnn)" ];then 12 | echo "cudnn not found" 13 | else 14 | export CUDA_HOME=`whereis cuda | awk '{print $2}'` 15 | export CUDA_VERSION=`nvcc --version | grep release | awk '{print $6}' | cut -c 2-4` 16 | export CUDNN_HOME=$(dirname `whereis cudnn.h | awk '{print $2}'`) 17 | fi 18 | fi 19 | -------------------------------------------------------------------------------- /scripts/get_tensorrt_environment_variables.bash: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | export TENSORRT_HOME="$(dirname $(whereis libnvinfer | awk '{print $2}'))" 4 | -------------------------------------------------------------------------------- /scripts/install_apps_dependencies.bash: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | sudo -l 4 | 5 | sudo apt-get install -y --no-install-recommends libopencv-dev 6 | -------------------------------------------------------------------------------- /scripts/install_latest_cmake.bash: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | readonly CMAKE_VERSION_TO_INSTALL="3.24.0" 4 | 5 | function command_exists 6 | { 7 | type "$1" &> /dev/null; 8 | } 9 | 10 | function version_greater_equal 11 | { 12 | printf '%s\n%s\n' "$2" "$1" | sort -V -C 13 | } 14 | 15 | if command_exists cmake; then 16 | readonly CMAKE_VERSION=`cmake --version | head -n1 | cut -d" " -f3` 17 | if version_greater_equal "${CMAKE_VERSION}" "${CMAKE_VERSION_TO_INSTALL}"; then 18 | exit 0 19 | fi 20 | fi 21 | 22 | echo "Need cmake ${CMAKE_VERSION_TO_INSTALL} or above. Install now..." 23 | 24 | sudo -l 25 | 26 | sudo apt-get update 27 | sudo apt-get -y purge cmake 28 | sudo apt-get -y --no-install-recommends install python3-pip 29 | python3 -m pip install cmake==$CMAKE_VERSION_TO_INSTALL 30 | exec bash 31 | -------------------------------------------------------------------------------- /scripts/install_onnx_runtime.bash: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # reference: https://github.com/microsoft/onnxruntime#installation 4 | 5 | readonly CURRENT_DIR=$(dirname $(realpath $0)) 6 | 7 | sudo -l 8 | 9 | sudo apt-get update 10 | sudo apt install -y --no-install-recommends zlib1g-dev 11 | 12 | readonly ONNXRUNTIME_VERSION="v1.14.1" 13 | git clone --recursive -b ${ONNXRUNTIME_VERSION} https://github.com/Microsoft/onnxruntime 14 | cd onnxruntime 15 | 16 | INSTALL_PREFIX="/usr/local" 17 | BUILDTYPE=Release 18 | BUILDARGS="--config ${BUILDTYPE}" 19 | BUILDARGS="${BUILDARGS} --build_shared_lib --skip_tests" 20 | BUILDARGS="${BUILDARGS} --parallel" 21 | BUILDARGS="${BUILDARGS} --cmake_extra_defines CMAKE_INSTALL_PREFIX=${INSTALL_PREFIX}" 22 | 23 | source $CURRENT_DIR/get_cuda_environment_variables.bash 24 | if [ ! -z "$CUDA_HOME" -a ! -z "$CUDA_VERSION" -a ! -z "$CUDNN_HOME" ]; then 25 | BUILDARGS="${BUILDARGS} --use_cuda --cuda_version=${CUDA_VERSION} --cuda_home=${CUDA_HOME} --cudnn_home=${CUDNN_HOME}" 26 | fi 27 | 28 | source $CURRENT_DIR/get_tensorrt_environment_variables.bash 29 | if [ ! -z "$TENSORRT_HOME" ]; then 30 | # onnxruntime v1.10.0 is compatible with tensorrt 8 31 | BUILDARGS="${BUILDARGS} --use_tensorrt --tensorrt_home=${TENSORRT_HOME}" 32 | fi 33 | 34 | ./build.sh ${BUILDARGS} 35 | cd ./build/Linux/${BUILDTYPE} 36 | sudo make install 37 | -------------------------------------------------------------------------------- /scripts/loftr/README.md: -------------------------------------------------------------------------------- 1 | # convert pre-trained loftr pytorch weights to onnx format 2 | 3 | --- 4 | 5 | ## dependencies 6 | 7 | --- 8 | 9 | - python: 3x 10 | 11 | - 12 | 13 | ```bash 14 | git submodule update --init --recursive 15 | 16 | python3 -m pip install -r requirements.txt 17 | ``` 18 | 19 | ## :running: how to run 20 | 21 | --- 22 | 23 | - download [LoFTR](https://github.com/zju3dv/LoFTR) weights indoor_ds_new.ckpt from [HERE](https://drive.google.com/drive/folders/1xu2Pq6mZT5hmFgiYMBT9Zt8h1yO-3SIp) 24 | 25 | - export onnx weights 26 | 27 | ``` 28 | python3 convert_to_onnx.py --model_path /path/to/indoor_ds_new.ckpt 29 | ``` 30 | 31 | ## Note 32 | 33 | - The LoFTR's [latest commit](b4ee7eb0359d0062e794c99f73e27639d7c7ac9f) seems to be only compatible with the new weights (Ref: https://github.com/zju3dv/LoFTR/issues/48). Hence, this onnx cpp application is only compatible with _indoor_ds_new.ckpt_ weights. 34 | -------------------------------------------------------------------------------- /scripts/loftr/convert_to_onnx.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import onnxruntime 4 | import torch 5 | 6 | from loftr_wrapper import LoFTRWrapper as LoFTR 7 | 8 | 9 | def get_args(): 10 | import argparse 11 | 12 | parser = argparse.ArgumentParser("convert loftr torch weights to onnx format") 13 | parser.add_argument("--model_path", type=str, required=True) 14 | 15 | return parser.parse_args() 16 | 17 | 18 | def main(): 19 | args = get_args() 20 | model_path = args.model_path 21 | model = LoFTR() 22 | model.load_state_dict(torch.load(model_path)["state_dict"]) 23 | model.eval() 24 | 25 | batch_size = 1 26 | height = 480 27 | width = 640 28 | 29 | data = {} 30 | data["image0"] = torch.randn(batch_size, 1, height, width) 31 | data["image1"] = torch.randn(batch_size, 1, height, width) 32 | 33 | torch.onnx.export( 34 | model, 35 | data, 36 | "loftr.onnx", 37 | export_params=True, 38 | opset_version=12, 39 | do_constant_folding=True, 40 | input_names=list(data.keys()), 41 | output_names=["keypoints0", "keypoints1", "confidence"], 42 | dynamic_axes={ 43 | "image0": {2: "height", 3: "width"}, 44 | "image1": {2: "height", 3: "width"}, 45 | "keypoints0": {0: "num_keypoints"}, 46 | "keypoints1": {0: "num_keypoints"}, 47 | "confidence": {0: "num_keypoints"}, 48 | }, 49 | ) 50 | 51 | print(f"\nonnx model is saved to: {os.getcwd()}/loftr.onnx") 52 | 53 | print("\ntest inference using onnxruntime") 54 | sess = onnxruntime.InferenceSession("loftr.onnx") 55 | for input in sess.get_inputs(): 56 | print("input: ", input) 57 | 58 | print("\n") 59 | for output in sess.get_outputs(): 60 | print("output: ", output) 61 | 62 | 63 | if __name__ == "__main__": 64 | import warnings 65 | 66 | warnings.filterwarnings("ignore") 67 | 68 | main() 69 | -------------------------------------------------------------------------------- /scripts/loftr/loftr_wrapper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import copy 3 | import os 4 | import sys 5 | from typing import Any, Dict 6 | 7 | import torch 8 | from einops.einops import rearrange 9 | 10 | _CURRENT_DIR = os.path.dirname(os.path.realpath(__file__)) 11 | sys.path.append(os.path.join(_CURRENT_DIR, "LoFTR")) 12 | 13 | from src.loftr import LoFTR, default_cfg 14 | 15 | DEFAULT_CFG = copy.deepcopy(default_cfg) 16 | DEFAULT_CFG["coarse"]["temp_bug_fix"] = True 17 | 18 | 19 | class LoFTRWrapper(LoFTR): 20 | def __init__( 21 | self, 22 | config: Dict[str, Any] = DEFAULT_CFG, 23 | ): 24 | LoFTR.__init__(self, config) 25 | 26 | def forward( 27 | self, 28 | image0: torch.Tensor, 29 | image1: torch.Tensor, 30 | ) -> Dict[str, torch.Tensor]: 31 | data = { 32 | "image0": image0, 33 | "image1": image1, 34 | } 35 | del image0, image1 36 | 37 | data.update( 38 | { 39 | "bs": data["image0"].size(0), 40 | "hw0_i": data["image0"].shape[2:], 41 | "hw1_i": data["image1"].shape[2:], 42 | } 43 | ) 44 | 45 | if data["hw0_i"] == data["hw1_i"]: # faster & better BN convergence 46 | feats_c, feats_f = self.backbone( 47 | torch.cat([data["image0"], data["image1"]], dim=0) 48 | ) 49 | (feat_c0, feat_c1), (feat_f0, feat_f1) = feats_c.split( 50 | data["bs"] 51 | ), feats_f.split(data["bs"]) 52 | else: # handle different input shapes 53 | (feat_c0, feat_f0), (feat_c1, feat_f1) = self.backbone( 54 | data["image0"] 55 | ), self.backbone(data["image1"]) 56 | 57 | data.update( 58 | { 59 | "hw0_c": feat_c0.shape[2:], 60 | "hw1_c": feat_c1.shape[2:], 61 | "hw0_f": feat_f0.shape[2:], 62 | "hw1_f": feat_f1.shape[2:], 63 | } 64 | ) 65 | 66 | # 2. coarse-level loftr module 67 | # add featmap with positional encoding, then flatten it to sequence [N, HW, C] 68 | feat_c0 = rearrange(self.pos_encoding(feat_c0), "n c h w -> n (h w) c") 69 | feat_c1 = rearrange(self.pos_encoding(feat_c1), "n c h w -> n (h w) c") 70 | 71 | mask_c0 = mask_c1 = None # mask is useful in training 72 | if "mask0" in data: 73 | mask_c0, mask_c1 = data["mask0"].flatten(-2), data["mask1"].flatten(-2) 74 | feat_c0, feat_c1 = self.loftr_coarse(feat_c0, feat_c1, mask_c0, mask_c1) 75 | 76 | # 3. match coarse-level 77 | self.coarse_matching(feat_c0, feat_c1, data, mask_c0=mask_c0, mask_c1=mask_c1) 78 | 79 | # 4. fine-level refinement 80 | feat_f0_unfold, feat_f1_unfold = self.fine_preprocess( 81 | feat_f0, feat_f1, feat_c0, feat_c1, data 82 | ) 83 | if feat_f0_unfold.size(0) != 0: # at least one coarse level predicted 84 | feat_f0_unfold, feat_f1_unfold = self.loftr_fine( 85 | feat_f0_unfold, feat_f1_unfold 86 | ) 87 | 88 | # 5. match fine-level 89 | self.fine_matching(feat_f0_unfold, feat_f1_unfold, data) 90 | 91 | rename_keys: Dict[str, str] = { 92 | "mkpts0_f": "keypoints0", 93 | "mkpts1_f": "keypoints1", 94 | "mconf": "confidence", 95 | } 96 | out: Dict[str, torch.Tensor] = {} 97 | for k, v in rename_keys.items(): 98 | _d = data[k] 99 | if isinstance(_d, torch.Tensor): 100 | out[v] = _d 101 | else: 102 | raise TypeError( 103 | f"Expected torch.Tensor for item `{k}`. Gotcha {type(_d)}" 104 | ) 105 | del data 106 | 107 | return out 108 | -------------------------------------------------------------------------------- /scripts/loftr/requirements.txt: -------------------------------------------------------------------------------- 1 | kornia==0.6.10 2 | onnx==1.13.1 3 | onnxruntime 4 | einops==0.3.0 5 | yacs>=0.1.8 6 | -------------------------------------------------------------------------------- /scripts/superglue/README.md: -------------------------------------------------------------------------------- 1 | # convert pre-trained superglue pytorch weights to onnx format 2 | 3 | --- 4 | 5 | ## dependencies 6 | 7 | --- 8 | 9 | - python: 3x 10 | 11 | - 12 | 13 | ```bash 14 | git submodule update --init --recursive 15 | 16 | python3 -m pip install -r requirements.txt 17 | python3 -m pip install -r SuperGluePretrainedNetwork/requirements.txt 18 | ``` 19 | 20 | ## :running: how to run 21 | 22 | --- 23 | 24 | - export onnx weights 25 | 26 | ``` 27 | python3 convert_to_onnx.py 28 | ``` 29 | -------------------------------------------------------------------------------- /scripts/superglue/convert_to_onnx.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | 4 | import torch 5 | 6 | import onnxruntime 7 | from superglue_wrapper import SuperGlueWrapper as SuperGlue 8 | 9 | 10 | def main(): 11 | config = { 12 | "descriptor_dim": 256, 13 | "weights": "indoor", 14 | "keypoint_encoder": [32, 64, 128, 256], 15 | "GNN_layers": ["self", "cross"] * 9, 16 | "sinkhorn_iterations": 100, 17 | "match_threshold": 0.2, 18 | } 19 | 20 | model = SuperGlue(config=config) 21 | model.eval() 22 | 23 | batch_size = 1 24 | height = 480 25 | width = 640 26 | num_keypoints = 382 27 | data = {} 28 | for i in range(2): 29 | data[f"image{i}_shape"] = torch.tensor( 30 | [batch_size, 1, height, width], dtype=torch.float32 31 | ) 32 | data[f"scores{i}"] = torch.randn(batch_size, num_keypoints) 33 | data[f"keypoints{i}"] = torch.randn(batch_size, num_keypoints, 2) 34 | data[f"descriptors{i}"] = torch.randn(batch_size, 256, num_keypoints) 35 | 36 | torch.onnx.export( 37 | model, 38 | data, 39 | "super_glue.onnx", 40 | export_params=True, 41 | opset_version=12, 42 | do_constant_folding=True, 43 | input_names=list(data.keys()), 44 | output_names=["matches0", "matches1", "matching_scores0", "matching_scores1"], 45 | dynamic_axes={ 46 | "keypoints0": {0: "batch_size", 1: "num_keypoints0"}, 47 | "scores0": {0: "batch_size", 1: "num_keypoints0"}, 48 | "descriptors0": {0: "batch_size", 2: "num_keypoints0"}, 49 | "keypoints1": {0: "batch_size", 1: "num_keypoints1"}, 50 | "scores1": {0: "batch_size", 1: "num_keypoints1"}, 51 | "descriptors1": {0: "batch_size", 2: "num_keypoints1"}, 52 | "matches0": {0: "batch_size", 1: "num_keypoints0"}, 53 | "matches1": {0: "batch_size", 1: "num_keypoints1"}, 54 | "matching_scores0": {0: "batch_size", 1: "num_keypoints0"}, 55 | "matching_scores1": {0: "batch_size", 1: "num_keypoints1"}, 56 | }, 57 | ) 58 | print(f"\nonnx model is saved to: {os.getcwd()}/super_glue.onnx") 59 | 60 | print("\ntest inference using onnxruntime") 61 | sess = onnxruntime.InferenceSession("super_glue.onnx") 62 | for input in sess.get_inputs(): 63 | print("input: ", input) 64 | 65 | print("\n") 66 | for output in sess.get_outputs(): 67 | print("output: ", output) 68 | 69 | 70 | if __name__ == "__main__": 71 | main() 72 | -------------------------------------------------------------------------------- /scripts/superglue/requirements.txt: -------------------------------------------------------------------------------- 1 | onnxruntime 2 | -------------------------------------------------------------------------------- /scripts/superglue/superglue_wrapper.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from SuperGluePretrainedNetwork.models.superglue import ( 4 | SuperGlue, 5 | arange_like, 6 | log_optimal_transport, 7 | normalize_keypoints, 8 | ) 9 | 10 | 11 | class SuperGlueWrapper(SuperGlue): 12 | default_config = { 13 | "descriptor_dim": 256, 14 | "weights": "outdor", 15 | "keypoint_encoder": [32, 64, 128, 256], 16 | "GNN_layers": ["self", "cross"] * 9, 17 | "sinkhorn_iterations": 100, 18 | "match_threshold": 0.2, 19 | } 20 | 21 | def __init__(self, config): 22 | SuperGlue.__init__(self, config) 23 | 24 | def forward( 25 | self, 26 | image0_shape, 27 | scores0, 28 | keypoints0, 29 | descriptors0, 30 | image1_shape, 31 | scores1, 32 | keypoints1, 33 | descriptors1, 34 | ): 35 | data = { 36 | "image0_shape": image0_shape, 37 | "scores0": scores0, 38 | "keypoints0": keypoints0, 39 | "descriptors0": descriptors0, 40 | "image1_shape": image1_shape, 41 | "scores1": scores1, 42 | "keypoints1": keypoints1, 43 | "descriptors1": descriptors1, 44 | } 45 | 46 | """Run SuperGlue on a pair of keypoints and descriptors""" 47 | desc0, desc1 = data["descriptors0"], data["descriptors1"] 48 | kpts0, kpts1 = data["keypoints0"], data["keypoints1"] 49 | 50 | # Keypoint normalization. 51 | kpts0 = normalize_keypoints(kpts0, data["image0_shape"]) 52 | kpts1 = normalize_keypoints(kpts1, data["image1_shape"]) 53 | 54 | # Keypoint MLP encoder. 55 | desc0 = desc0 + self.kenc(kpts0, data["scores0"]) 56 | desc1 = desc1 + self.kenc(kpts1, data["scores1"]) 57 | 58 | # Multi-layer Transformer network. 59 | desc0, desc1 = self.gnn(desc0, desc1) 60 | 61 | # Final MLP projection. 62 | mdesc0, mdesc1 = self.final_proj(desc0), self.final_proj(desc1) 63 | 64 | # Compute matching descriptor distance. 65 | scores = torch.einsum("bdn,bdm->bnm", mdesc0, mdesc1) 66 | scores = scores / self.config["descriptor_dim"] ** 0.5 67 | 68 | # Run the optimal transport. 69 | scores = log_optimal_transport( 70 | scores, self.bin_score, iters=self.config["sinkhorn_iterations"] 71 | ) 72 | 73 | # Get the matches with score above "match_threshold". 74 | max0, max1 = scores[:, :-1, :-1].max(2), scores[:, :-1, :-1].max(1) 75 | indices0, indices1 = max0.indices, max1.indices 76 | mutual0 = arange_like(indices0, 1)[None] == indices1.gather(1, indices0) 77 | mutual1 = arange_like(indices1, 1)[None] == indices0.gather(1, indices1) 78 | zero = scores.new_tensor(0) 79 | mscores0 = torch.where(mutual0, max0.values.exp(), zero) 80 | mscores1 = torch.where(mutual1, mscores0.gather(1, indices1), zero) 81 | valid0 = mutual0 & (mscores0 > self.config["match_threshold"]) 82 | valid1 = mutual1 & valid0.gather(1, indices1) 83 | indices0 = torch.where(valid0, indices0, indices0.new_tensor(-1)) 84 | indices1 = torch.where(valid1, indices1, indices1.new_tensor(-1)) 85 | 86 | return { 87 | "matches0": indices0, # use -1 for invalid match 88 | "matches1": indices1, # use -1 for invalid match 89 | "matching_scores0": mscores0, 90 | "matching_scores1": mscores1, 91 | } 92 | -------------------------------------------------------------------------------- /scripts/superpoint/README.md: -------------------------------------------------------------------------------- 1 | # convert pre-trained superpoint pytorch weights to onnx format 2 | 3 | --- 4 | 5 | ## dependencies 6 | 7 | --- 8 | 9 | - python: 3x 10 | 11 | - 12 | 13 | ```bash 14 | python3 -m pip install -r requirements.txt 15 | ``` 16 | 17 | ## :running: how to run 18 | 19 | --- 20 | 21 | - update submodule 22 | 23 | ```bash 24 | git submodule update --init --recursive 25 | ``` 26 | 27 | - export onnx weights 28 | 29 | ``` 30 | python3 convert_to_onnx.py 31 | ``` 32 | -------------------------------------------------------------------------------- /scripts/superpoint/convert_to_onnx.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | 4 | import torch 5 | import torch.onnx 6 | 7 | from SuperPointPretrainedNetwork.demo_superpoint import SuperPointNet 8 | 9 | _CURRENT_DIR = os.path.dirname(os.path.realpath(__file__)) 10 | _WEIGHTS_PATH = os.path.join( 11 | _CURRENT_DIR, "SuperPointPretrainedNetwork/superpoint_v1.pth" 12 | ) 13 | 14 | 15 | def main(): 16 | assert os.path.isfile(_WEIGHTS_PATH) 17 | model = SuperPointNet() 18 | model.load_state_dict(torch.load(_WEIGHTS_PATH)) 19 | model.eval() 20 | 21 | batch_size = 1 22 | height = 16 23 | width = 16 24 | x = torch.randn(batch_size, 1, height, width) 25 | 26 | torch.onnx.export( 27 | model, 28 | x, 29 | "super_point.onnx", 30 | export_params=True, 31 | opset_version=10, 32 | do_constant_folding=True, 33 | input_names=["input"], 34 | output_names=["output"], 35 | dynamic_axes={ 36 | "input": {0: "batch_size", 2: "height", 3: "width"}, 37 | "output": {0: "batch_size"}, 38 | }, 39 | ) 40 | print(f"\nonnx model is saved to: {os.getcwd()}/super_point.onnx") 41 | 42 | 43 | if __name__ == "__main__": 44 | main() 45 | -------------------------------------------------------------------------------- /scripts/superpoint/requirements.txt: -------------------------------------------------------------------------------- 1 | opencv-python 2 | torch>=0.4 3 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | 3 | set(LIBRARY_NAME ${PROJECT_NAME}) 4 | 5 | file(GLOB SOURCE_FILES 6 | ${PROJECT_SOURCE_DIR}/src/ImageClassificationOrtSessionHandler.cpp 7 | ${PROJECT_SOURCE_DIR}/src/ImageRecognitionOrtSessionHandlerBase.cpp 8 | ${PROJECT_SOURCE_DIR}/src/ObjectDetectionOrtSessionHandler.cpp 9 | ${PROJECT_SOURCE_DIR}/src/OrtSessionHandler.cpp 10 | ) 11 | 12 | add_library(${LIBRARY_NAME} 13 | SHARED 14 | ${SOURCE_FILES} 15 | ) 16 | 17 | target_include_directories(${LIBRARY_NAME} 18 | PUBLIC 19 | $ 20 | $ 21 | PRIVATE 22 | ${onnxruntime_INCLUDE_DIRS} 23 | ) 24 | 25 | target_compile_options(${LIBRARY_NAME} 26 | PRIVATE 27 | $<$:-O0 -g -Wall -Werror> 28 | $<$:-O3> 29 | ) 30 | 31 | list(APPEND PRIVATE_LIBS ${onnxruntime_LIBS}) 32 | 33 | target_link_libraries(${LIBRARY_NAME} 34 | PRIVATE 35 | ${PRIVATE_LIBS} 36 | ) 37 | -------------------------------------------------------------------------------- /src/ImageClassificationOrtSessionHandler.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file ImageClassificationOrtSessionHandler.cpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #include "ort_utility/ort_utility.hpp" 13 | 14 | namespace Ort 15 | { 16 | ImageClassificationOrtSessionHandler::ImageClassificationOrtSessionHandler( 17 | const uint16_t numClasses, // 18 | const std::string& modelPath, // 19 | const std::optional& gpuIdx, const std::optional>>& inputShapes) 20 | : ImageRecognitionOrtSessionHandlerBase(numClasses, modelPath, gpuIdx, inputShapes) 21 | { 22 | } 23 | 24 | ImageClassificationOrtSessionHandler::~ImageClassificationOrtSessionHandler() 25 | { 26 | } 27 | 28 | std::vector> 29 | ImageClassificationOrtSessionHandler::topK(const std::vector& inferenceOutput, // 30 | const uint16_t k, // 31 | const bool useSoftmax) const 32 | { 33 | const uint16_t realK = std::max(std::min(k, m_numClasses), static_cast(1)); 34 | 35 | assert(inferenceOutput.size() == 1); 36 | float* processData = inferenceOutput[0]; 37 | if (useSoftmax) { 38 | softmax(processData, m_numClasses); 39 | } 40 | 41 | std::vector> ps; 42 | ps.reserve(m_numClasses); 43 | 44 | for (int i = 0; i < m_numClasses; ++i) { 45 | ps.emplace_back(std::make_pair(i, processData[i])); 46 | } 47 | 48 | std::sort(ps.begin(), ps.end(), [](const auto& elem1, const auto& elem2) { return elem1.second > elem2.second; }); 49 | 50 | return std::vector>(ps.begin(), ps.begin() + realK); 51 | } 52 | 53 | std::string ImageClassificationOrtSessionHandler::topKToString(const std::vector& inferenceOutput, // 54 | const uint16_t k, // 55 | const bool useSoftmax) const 56 | { 57 | auto ps = this->topK(inferenceOutput, k, useSoftmax); 58 | 59 | std::stringstream ss; 60 | 61 | if (m_classNames.size() == 0) { 62 | for (const auto& elem : ps) { 63 | ss << elem.first << " : " << elem.second << std::endl; 64 | } 65 | } else { 66 | for (const auto& elem : ps) { 67 | ss << elem.first << " : " << m_classNames[elem.first] << " : " << elem.second << std::endl; 68 | } 69 | } 70 | 71 | return ss.str(); 72 | } 73 | } // namespace Ort 74 | -------------------------------------------------------------------------------- /src/ImageRecognitionOrtSessionHandlerBase.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file ImageRecognitionOrtSessionHandlerBase.cpp 3 | * 4 | * @author btran 5 | * 6 | * Copyright (c) organization 7 | * 8 | */ 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | #include "ort_utility/ort_utility.hpp" 15 | 16 | namespace Ort 17 | { 18 | ImageRecognitionOrtSessionHandlerBase::ImageRecognitionOrtSessionHandlerBase( 19 | const uint16_t numClasses, // 20 | const std::string& modelPath, // 21 | const std::optional& gpuIdx, const std::optional>>& inputShapes) 22 | : OrtSessionHandler(modelPath, gpuIdx, inputShapes) 23 | , m_numClasses(numClasses) 24 | , m_classNames() 25 | { 26 | if (numClasses <= 0) { 27 | throw std::runtime_error("Number of classes must be more than 0\n"); 28 | } 29 | 30 | m_classNames.reserve(m_numClasses); 31 | for (uint16_t i = 0; i < m_numClasses; ++i) { 32 | m_classNames.emplace_back(std::to_string(i)); 33 | } 34 | } 35 | 36 | ImageRecognitionOrtSessionHandlerBase::~ImageRecognitionOrtSessionHandlerBase() 37 | { 38 | } 39 | 40 | void ImageRecognitionOrtSessionHandlerBase::initClassNames(const std::vector& classNames) 41 | { 42 | if (classNames.size() != m_numClasses) { 43 | throw std::runtime_error("Mismatch number of classes\n"); 44 | } 45 | 46 | m_classNames = classNames; 47 | } 48 | 49 | void ImageRecognitionOrtSessionHandlerBase::preprocess(float* dst, // 50 | const unsigned char* src, // 51 | const int64_t targetImgWidth, // 52 | const int64_t targetImgHeight, // 53 | const int numChannels, // 54 | const std::vector& meanVal, // 55 | const std::vector& stdVal) const 56 | { 57 | if (!meanVal.empty() && !stdVal.empty()) { 58 | assert(meanVal.size() == stdVal.size() && meanVal.size() == static_cast(numChannels)); 59 | } 60 | 61 | int64_t dataLength = targetImgHeight * targetImgWidth * numChannels; 62 | 63 | memcpy(dst, reinterpret_cast(src), dataLength); 64 | 65 | if (!meanVal.empty() && !stdVal.empty()) { 66 | for (int i = 0; i < targetImgHeight; ++i) { 67 | for (int j = 0; j < targetImgWidth; ++j) { 68 | for (int c = 0; c < numChannels; ++c) { 69 | dst[c * targetImgHeight * targetImgWidth + i * targetImgWidth + j] = 70 | (src[i * targetImgWidth * numChannels + j * numChannels + c] / 255.0 - meanVal[c]) / stdVal[c]; 71 | } 72 | } 73 | } 74 | } else { 75 | for (int i = 0; i < targetImgHeight; ++i) { 76 | for (int j = 0; j < targetImgWidth; ++j) { 77 | for (int c = 0; c < numChannels; ++c) { 78 | dst[c * targetImgHeight * targetImgWidth + i * targetImgWidth + j] = 79 | src[i * targetImgWidth * numChannels + j * numChannels + c] / 255.0; 80 | } 81 | } 82 | } 83 | } 84 | } 85 | } // namespace Ort 86 | -------------------------------------------------------------------------------- /src/ObjectDetectionOrtSessionHandler.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file ObjectDetectionOrtSessionHandler.cpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | #include "ort_utility/ort_utility.hpp" 9 | 10 | namespace Ort 11 | { 12 | ObjectDetectionOrtSessionHandler::ObjectDetectionOrtSessionHandler( 13 | const uint16_t numClasses, // 14 | const std::string& modelPath, // 15 | const std::optional& gpuIdx, // 16 | const std::optional>>& inputShapes) 17 | : ImageRecognitionOrtSessionHandlerBase(numClasses, modelPath, gpuIdx, inputShapes) 18 | { 19 | } 20 | 21 | ObjectDetectionOrtSessionHandler::~ObjectDetectionOrtSessionHandler() 22 | { 23 | } 24 | 25 | } // namespace Ort 26 | -------------------------------------------------------------------------------- /src/OrtSessionHandler.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file OrtSessionHandler.cpp 3 | * 4 | * @author btran 5 | * 6 | */ 7 | 8 | #include 9 | #include 10 | 11 | #if ENABLE_TENSORRT 12 | #include 13 | #endif 14 | 15 | #include 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | namespace 23 | { 24 | std::string toString(const ONNXTensorElementDataType dataType) 25 | { 26 | switch (dataType) { 27 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT: { 28 | return "float"; 29 | } 30 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8: { 31 | return "uint8_t"; 32 | } 33 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8: { 34 | return "int8_t"; 35 | } 36 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16: { 37 | return "uint16_t"; 38 | } 39 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16: { 40 | return "int16_t"; 41 | } 42 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32: { 43 | return "int32_t"; 44 | } 45 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64: { 46 | return "int64_t"; 47 | } 48 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING: { 49 | return "string"; 50 | } 51 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL: { 52 | return "bool"; 53 | } 54 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16: { 55 | return "float16"; 56 | } 57 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE: { 58 | return "double"; 59 | } 60 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32: { 61 | return "uint32_t"; 62 | } 63 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64: { 64 | return "uint64_t"; 65 | } 66 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX64: { 67 | return "complex with float32 real and imaginary components"; 68 | } 69 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX128: { 70 | return "complex with float64 real and imaginary components"; 71 | } 72 | case ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16: { 73 | return "complex with float64 real and imaginary components"; 74 | } 75 | default: 76 | return "undefined"; 77 | } 78 | } 79 | } // namespace 80 | 81 | namespace Ort 82 | { 83 | //-----------------------------------------------------------------------------// 84 | // OrtSessionHandlerIml Definition 85 | //-----------------------------------------------------------------------------// 86 | 87 | class OrtSessionHandler::OrtSessionHandlerIml 88 | { 89 | public: 90 | OrtSessionHandlerIml(const std::string& modelPath, // 91 | const std::optional& gpuIdx, // 92 | const std::optional>>& inputShapes); 93 | ~OrtSessionHandlerIml(); 94 | 95 | std::vector operator()(const std::vector& inputData) const; 96 | 97 | void updateInputShapes(const std::vector>& inputShapes) 98 | { 99 | if (inputShapes.size() != m_numInputs) { 100 | DEBUG_LOG("inputShapes must be of size: %d", m_numInputs); 101 | return; 102 | } 103 | m_inputShapes = inputShapes; 104 | 105 | for (int i = 0; i < m_numInputs; i++) { 106 | const auto& curInputShape = m_inputShapes[i]; 107 | m_inputTensorSizes[i] = 108 | std::accumulate(std::begin(curInputShape), std::end(curInputShape), 1, std::multiplies()); 109 | } 110 | } 111 | 112 | private: 113 | void initSession(); 114 | void initModelInfo(); 115 | 116 | private: 117 | std::string m_modelPath; 118 | 119 | mutable Ort::Session m_session; 120 | Ort::Env m_env; 121 | Ort::AllocatorWithDefaultOptions m_ortAllocator; 122 | 123 | std::optional m_gpuIdx; 124 | 125 | std::vector> m_inputShapes; 126 | std::vector> m_outputShapes; 127 | 128 | std::vector m_inputTensorSizes; 129 | std::vector m_outputTensorSizes; 130 | 131 | uint8_t m_numInputs; 132 | uint8_t m_numOutputs; 133 | 134 | std::vector m_inputNodeNames; 135 | std::vector m_outputNodeNames; 136 | 137 | bool m_inputShapesProvided = false; 138 | }; 139 | 140 | //-----------------------------------------------------------------------------// 141 | // OrtSessionHandler 142 | //-----------------------------------------------------------------------------// 143 | 144 | OrtSessionHandler::OrtSessionHandler(const std::string& modelPath, // 145 | const std::optional& gpuIdx, // 146 | const std::optional>>& inputShapes) 147 | : m_piml(std::make_unique(modelPath, // 148 | gpuIdx, // 149 | inputShapes)) 150 | { 151 | } 152 | 153 | OrtSessionHandler::~OrtSessionHandler() = default; 154 | 155 | std::vector 156 | OrtSessionHandler::operator()(const std::vector& inputImgData) const 157 | { 158 | return this->m_piml->operator()(inputImgData); 159 | } 160 | 161 | //-----------------------------------------------------------------------------// 162 | // piml class implementation 163 | //-----------------------------------------------------------------------------// 164 | 165 | OrtSessionHandler::OrtSessionHandlerIml::OrtSessionHandlerIml( 166 | const std::string& modelPath, // 167 | const std::optional& gpuIdx, // 168 | const std::optional>>& inputShapes) 169 | : m_modelPath(modelPath) 170 | , m_session(nullptr) 171 | , m_env(nullptr) 172 | , m_ortAllocator() 173 | , m_gpuIdx(gpuIdx) 174 | , m_inputShapes() 175 | , m_outputShapes() 176 | , m_numInputs(0) 177 | , m_numOutputs(0) 178 | , m_inputNodeNames() 179 | , m_outputNodeNames() 180 | { 181 | this->initSession(); 182 | 183 | if (inputShapes.has_value()) { 184 | m_inputShapesProvided = true; 185 | m_inputShapes = inputShapes.value(); 186 | } 187 | 188 | this->initModelInfo(); 189 | } 190 | 191 | OrtSessionHandler::OrtSessionHandlerIml::~OrtSessionHandlerIml() 192 | { 193 | for (auto& elem : this->m_inputNodeNames) { 194 | free(elem); 195 | elem = nullptr; 196 | } 197 | this->m_inputNodeNames.clear(); 198 | 199 | for (auto& elem : this->m_outputNodeNames) { 200 | free(elem); 201 | elem = nullptr; 202 | } 203 | this->m_outputNodeNames.clear(); 204 | } 205 | 206 | void OrtSessionHandler::OrtSessionHandlerIml::initSession() 207 | { 208 | #if ENABLE_DEBUG 209 | m_env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "test"); 210 | #else 211 | m_env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "test"); 212 | #endif 213 | Ort::SessionOptions sessionOptions; 214 | 215 | sessionOptions.SetIntraOpNumThreads(1); 216 | // tensorrt options can be customized into sessionOptions 217 | // https://onnxruntime.ai/docs/execution-providers/TensorRT-ExecutionProvider.html 218 | 219 | #if ENABLE_GPU 220 | if (m_gpuIdx.has_value()) { 221 | Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, m_gpuIdx.value())); 222 | #if ENABLE_TENSORRT 223 | Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_Tensorrt(sessionOptions, m_gpuIdx.value())); 224 | #endif 225 | } 226 | #endif 227 | 228 | sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); 229 | m_session = Ort::Session(m_env, m_modelPath.c_str(), sessionOptions); 230 | m_numInputs = m_session.GetInputCount(); 231 | DEBUG_LOG("Model number of inputs: %d\n", m_numInputs); 232 | 233 | m_inputNodeNames.reserve(m_numInputs); 234 | m_inputTensorSizes.reserve(m_numInputs); 235 | 236 | m_numOutputs = m_session.GetOutputCount(); 237 | DEBUG_LOG("Model number of outputs: %d\n", m_numOutputs); 238 | 239 | m_outputNodeNames.reserve(m_numOutputs); 240 | m_outputTensorSizes.reserve(m_numOutputs); 241 | } 242 | 243 | void OrtSessionHandler::OrtSessionHandlerIml::initModelInfo() 244 | { 245 | for (int i = 0; i < m_numInputs; i++) { 246 | if (!m_inputShapesProvided) { 247 | Ort::TypeInfo typeInfo = m_session.GetInputTypeInfo(i); 248 | auto tensorInfo = typeInfo.GetTensorTypeAndShapeInfo(); 249 | 250 | m_inputShapes.emplace_back(tensorInfo.GetShape()); 251 | } 252 | 253 | const auto& curInputShape = m_inputShapes[i]; 254 | 255 | m_inputTensorSizes.emplace_back( 256 | std::accumulate(std::begin(curInputShape), std::end(curInputShape), 1, std::multiplies())); 257 | 258 | #if ORT_API_VERSION > 12 259 | m_inputNodeNames.emplace_back(strdup(m_session.GetInputNameAllocated(i, m_ortAllocator).get())); 260 | #else 261 | char* inputName = m_session.GetInputName(i, m_ortAllocator); 262 | m_inputNodeNames.emplace_back(strdup(inputName)); 263 | m_ortAllocator.Free(inputName); 264 | #endif 265 | } 266 | 267 | { 268 | #if ENABLE_DEBUG 269 | std::stringstream ssInputs; 270 | ssInputs << "Model input shapes: "; 271 | ssInputs << m_inputShapes << std::endl; 272 | ssInputs << "Model input node names: "; 273 | ssInputs << m_inputNodeNames << std::endl; 274 | DEBUG_LOG("%s\n", ssInputs.str().c_str()); 275 | #endif 276 | } 277 | 278 | for (int i = 0; i < m_numOutputs; ++i) { 279 | Ort::TypeInfo typeInfo = m_session.GetOutputTypeInfo(i); 280 | auto tensorInfo = typeInfo.GetTensorTypeAndShapeInfo(); 281 | 282 | m_outputShapes.emplace_back(tensorInfo.GetShape()); 283 | 284 | #if ORT_API_VERSION > 12 285 | m_outputNodeNames.emplace_back(strdup(m_session.GetOutputNameAllocated(i, m_ortAllocator).get())); 286 | #else 287 | char* outputName = m_session.GetOutputName(i, m_ortAllocator); 288 | m_outputNodeNames.emplace_back(strdup(outputName)); 289 | m_ortAllocator.Free(outputName); 290 | #endif 291 | } 292 | 293 | { 294 | #if ENABLE_DEBUG 295 | std::stringstream ssOutputs; 296 | ssOutputs << "Model output shapes: "; 297 | ssOutputs << m_outputShapes << std::endl; 298 | ssOutputs << "Model output node names: "; 299 | ssOutputs << m_outputNodeNames << std::endl; 300 | DEBUG_LOG("%s\n", ssOutputs.str().c_str()); 301 | #endif 302 | } 303 | } 304 | 305 | std::vector 306 | OrtSessionHandler::OrtSessionHandlerIml::operator()(const std::vector& inputData) const 307 | { 308 | if (m_numInputs != inputData.size()) { 309 | DEBUG_LOG("m_numInputs:%d, input size:%ld", m_numInputs, inputData.size()); 310 | throw std::runtime_error("Mismatch size of input data"); 311 | } 312 | 313 | Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); 314 | 315 | std::vector inputTensors; 316 | inputTensors.reserve(m_numInputs); 317 | 318 | for (int i = 0; i < m_numInputs; ++i) { 319 | inputTensors.emplace_back(std::move( 320 | Ort::Value::CreateTensor(memoryInfo, const_cast(inputData[i]), m_inputTensorSizes[i], 321 | m_inputShapes[i].data(), m_inputShapes[i].size()))); 322 | } 323 | 324 | auto outputTensors = m_session.Run(Ort::RunOptions{nullptr}, m_inputNodeNames.data(), inputTensors.data(), 325 | m_numInputs, m_outputNodeNames.data(), m_numOutputs); 326 | 327 | assert(outputTensors.size() == m_numOutputs); 328 | std::vector outputData; 329 | outputData.reserve(m_numOutputs); 330 | 331 | int count = 1; 332 | for (auto& elem : outputTensors) { 333 | DEBUG_LOG("type of input %d: %s", count++, toString(elem.GetTensorTypeAndShapeInfo().GetElementType()).c_str()); 334 | outputData.emplace_back( 335 | std::make_pair(std::move(elem.GetTensorMutableData()), elem.GetTensorTypeAndShapeInfo().GetShape())); 336 | } 337 | 338 | return outputData; 339 | } 340 | 341 | void OrtSessionHandler::updateInputShapes(const std::vector>& inputShapes) 342 | { 343 | m_piml->updateInputShapes(inputShapes); 344 | } 345 | } // namespace Ort 346 | --------------------------------------------------------------------------------