├── .clang-format
├── .github
    └── workflows
    │   └── build.yml
├── .gitignore
├── .gitmodules
├── .pre-commit-config.yaml
├── CMakeLists.txt
├── CPPLINT.cfg
├── LICENSE
├── Makefile
├── README.md
├── cmake
    ├── cmake_utility.cmake
    └── tensorrt.cmake
├── data
    ├── images
    │   ├── bird_detection.jpg
    │   ├── dog.jpg
    │   ├── dogs.jpg
    │   ├── endgame.jpg
    │   ├── indoor.jpg
    │   ├── matrix.jpg
    │   ├── no_way_home.jpg
    │   ├── odaiba.jpg
    │   └── sample_city_scapes.png
    ├── squeezenet1.1.onnx
    └── version-RFB-640.onnx
├── dockerfiles
    ├── ubuntu2004.dockerfile
    ├── ubuntu2004_gpu.dockerfile
    └── ubuntu2004_tensorrt.dockerfile
├── docs
    ├── images
    │   ├── ComputerVision_VN_30.jpg
    │   ├── ComputerVision_VisionCS_0.jpg
    │   ├── ComputerVision_VisionDN_1.jpg
    │   ├── RemoteSensing_CS1.jpg
    │   ├── RemoteSensing_CS5.jpg
    │   ├── RetinaFix.jpg
    │   ├── bird_detection_result.jpg
    │   ├── cityscapes_legend.jpg
    │   ├── dogs_maskrcnn_result.jpg
    │   ├── endgame_result.jpg
    │   ├── indoor_maskrcnn_result.jpg
    │   ├── loftr.jpg
    │   ├── matrix_result.jpg
    │   ├── no_way_home_result.jpg
    │   ├── odaiba_result.jpg
    │   ├── sample_city_scapes_result.jpg
    │   ├── street_result.jpg
    │   ├── super_point_good_matches.jpg
    │   └── tiny_yolov2_result.jpg
    └── onnxruntime_tensorrt.md
├── examples
    ├── CMakeLists.txt
    ├── LoFTR.cpp
    ├── LoFTR.hpp
    ├── LoFTRApp.cpp
    ├── MaskRCNN.cpp
    ├── MaskRCNN.hpp
    ├── MaskRCNNApp.cpp
    ├── PrimitiveTest.cpp
    ├── SemanticSegmentationPaddleSegBisenetv2.cpp
    ├── SemanticSegmentationPaddleSegBisenetv2.hpp
    ├── SemanticSegmentationPaddleSegBisenetv2App.cpp
    ├── SuperGlueApp.cpp
    ├── SuperPoint.cpp
    ├── SuperPoint.hpp
    ├── SuperPointApp.cpp
    ├── TestImageClassification.cpp
    ├── TinyYolov2.cpp
    ├── TinyYolov2.hpp
    ├── TinyYolov2App.cpp
    ├── UltraLightFastGenericFaceDetector.cpp
    ├── UltraLightFastGenericFaceDetector.hpp
    ├── UltraLightFastGenericFaceDetectorApp.cpp
    ├── Utility.hpp
    ├── YoloX.cpp
    ├── YoloX.hpp
    ├── YoloXApp.cpp
    ├── Yolov3.cpp
    ├── Yolov3.hpp
    └── Yolov3App.cpp
├── include
    └── ort_utility
    │   ├── Constants.hpp
    │   ├── ImageClassificationOrtSessionHandler.hpp
    │   ├── ImageRecognitionOrtSessionHandlerBase.hpp
    │   ├── ObjectDetectionOrtSessionHandler.hpp
    │   ├── OrtSessionHandler.hpp
    │   ├── Utility.hpp
    │   └── ort_utility.hpp
├── scripts
    ├── get_cuda_environment_variables.bash
    ├── get_tensorrt_environment_variables.bash
    ├── install_apps_dependencies.bash
    ├── install_latest_cmake.bash
    ├── install_onnx_runtime.bash
    ├── loftr
    │   ├── README.md
    │   ├── convert_to_onnx.py
    │   ├── loftr_wrapper.py
    │   └── requirements.txt
    ├── superglue
    │   ├── README.md
    │   ├── convert_to_onnx.py
    │   ├── requirements.txt
    │   └── superglue_wrapper.py
    └── superpoint
    │   ├── README.md
    │   ├── convert_to_onnx.py
    │   └── requirements.txt
└── src
    ├── CMakeLists.txt
    ├── ImageClassificationOrtSessionHandler.cpp
    ├── ImageRecognitionOrtSessionHandlerBase.cpp
    ├── ObjectDetectionOrtSessionHandler.cpp
    └── OrtSessionHandler.cpp


/.clang-format:
--------------------------------------------------------------------------------
 1 | ---
 2 | Language: Cpp
 3 | # BasedOnStyle:  LLVM
 4 | AccessModifierOffset: -3
 5 | AlignAfterOpenBracket: true
 6 | AlignEscapedNewlinesLeft: false
 7 | AlignOperands: true
 8 | AlignTrailingComments: true
 9 | AllowAllParametersOfDeclarationOnNextLine: true
10 | AllowShortBlocksOnASingleLine: false
11 | AllowShortCaseLabelsOnASingleLine: false
12 | AllowShortIfStatementsOnASingleLine: false
13 | AllowShortLoopsOnASingleLine: false
14 | AllowShortFunctionsOnASingleLine: None
15 | AlwaysBreakAfterDefinitionReturnType: false
16 | AlwaysBreakTemplateDeclarations: false
17 | AlwaysBreakBeforeMultilineStrings: false
18 | BreakBeforeBinaryOperators: None
19 | BreakBeforeTernaryOperators: true
20 | BreakConstructorInitializersBeforeComma: true
21 | BinPackParameters: true
22 | BinPackArguments: true
23 | ColumnLimit: 120
24 | ConstructorInitializerAllOnOneLineOrOnePerLine: false
25 | ConstructorInitializerIndentWidth: 4
26 | DerivePointerAlignment: false
27 | ExperimentalAutoDetectBinPacking: false
28 | IndentCaseLabels: true
29 | IndentWrappedFunctionNames: false
30 | IndentFunctionDeclarationAfterType: false
31 | MaxEmptyLinesToKeep: 1
32 | KeepEmptyLinesAtTheStartOfBlocks: false
33 | NamespaceIndentation: None
34 | ObjCBlockIndentWidth: 2
35 | ObjCSpaceAfterProperty: false
36 | ObjCSpaceBeforeProtocolList: true
37 | PenaltyBreakBeforeFirstCallParameter: 19
38 | PenaltyBreakComment: 300
39 | PenaltyBreakString: 1000
40 | PenaltyBreakFirstLessLess: 120
41 | PenaltyExcessCharacter: 1000000
42 | PenaltyReturnTypeOnItsOwnLine: 60
43 | PointerAlignment: Left
44 | SpacesBeforeTrailingComments: 2
45 | Cpp11BracedListStyle: true
46 | Standard: Cpp11
47 | IndentWidth: 4
48 | TabWidth: 8
49 | UseTab: Never
50 | BreakBeforeBraces: Linux # the position of the braces
51 | SpacesInParentheses: false
52 | SpacesInSquareBrackets: false
53 | SpacesInAngles: false
54 | SpaceInEmptyParentheses: false
55 | SpacesInCStyleCastParentheses: false
56 | SpaceAfterCStyleCast: false
57 | SpacesInContainerLiterals: true
58 | SpaceBeforeAssignmentOperators: true
59 | ContinuationIndentWidth: 4
60 | CommentPragmas: ".*"
61 | ForEachMacros: [foreach, Q_FOREACH, BOOST_FOREACH]
62 | SpaceBeforeParens: ControlStatements
63 | DisableFormat: false
64 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: Build
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: ["master"]
 6 |   pull_request:
 7 | 
 8 | jobs:
 9 |   linting:
10 |     runs-on: ubuntu-20.04
11 |     steps:
12 |       - uses: actions/checkout@v3
13 | 
14 |       - name: Setup python
15 |         uses: actions/setup-python@v4
16 |         with:
17 |           python-version: "3.x"
18 | 
19 |       - name: Apply pre-commit
20 |         uses: pre-commit/action@v3.0.0
21 |         with:
22 |           extra_args: --all-files
23 | 
24 |   test-production:
25 |     runs-on: ubuntu-20.04
26 |     container:
27 |       image: xmba15/onnx_runtime_cpp:v1.14.1-ubuntu20.04
28 | 
29 |     steps:
30 |       - uses: actions/checkout@v3
31 | 
32 |       - name: Setup environment
33 |         run: |
34 |           sudo apt-get update
35 |           bash ./scripts/install_apps_dependencies.bash
36 | 
37 |       - name: Test building
38 |         run: |
39 |           make apps -j`nproc`
40 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled source #
 2 | ###################
 3 | *.com
 4 | *.class
 5 | *.dll
 6 | *.exe
 7 | *.o
 8 | *.so
 9 | *.pyc
10 | .ipynb_checkpoints
11 | *~
12 | *#
13 | build*
14 | .cache*
15 | 
16 | # Packages #
17 | ###################
18 | # it's better to unpack these files and commit the raw source
19 | # git has its own built in compression methods
20 | *.7z
21 | *.dmg
22 | *.gz
23 | *.iso
24 | *.jar
25 | *.rar
26 | *.tar
27 | *.zip
28 | 
29 | # Logs and databases #
30 | ######################
31 | *.log
32 | *.sql
33 | *.sqlite
34 | 
35 | # OS generated files #
36 | ######################
37 | .DS_Store
38 | .DS_Store?
39 | ._*
40 | .Spotlight-V100
41 | .Trashes
42 | ehthumbs.db
43 | Thumbs.db
44 | 
45 | # Images
46 | ######################
47 | *.jpg
48 | *.gif
49 | *.png
50 | *.svg
51 | *.ico
52 | 
53 | # Video
54 | ######################
55 | *.wmv
56 | *.mpg
57 | *.mpeg
58 | *.mp4
59 | *.mov
60 | *.flv
61 | *.avi
62 | *.ogv
63 | *.ogg
64 | *.webm
65 | 
66 | # Audio
67 | ######################
68 | *.wav
69 | *.mp3
70 | *.wma
71 | 
72 | # Fonts
73 | ######################
74 | Fonts
75 | *.eot
76 | *.ttf
77 | *.woff
78 | 
79 | # Format
80 | ######################
81 | CPPLINT.cfg
82 | .clang-format
83 | 
84 | # Gtags
85 | ######################
86 | GPATH
87 | GRTAGS
88 | GSYMS
89 | GTAGS
90 | 
91 | data*
92 | *onnx*
93 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "scripts/superpoint/SuperPointPretrainedNetwork"]
 2 | 	path = scripts/superpoint/SuperPointPretrainedNetwork
 3 | 	url = https://github.com/magicleap/SuperPointPretrainedNetwork
 4 | [submodule "scripts/superglue/SuperGluePretrainedNetwork"]
 5 | 	path = scripts/superglue/SuperGluePretrainedNetwork
 6 | 	url = https://github.com/magicleap/SuperGluePretrainedNetwork.git
 7 | [submodule "scripts/loftr/LoFTR"]
 8 | 	path = scripts/loftr/LoFTR
 9 | 	url = https://github.com/xmba15/LoFTR
10 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/pre-commit/mirrors-clang-format
 3 |     rev: v14.0.6
 4 |     hooks:
 5 |       - id: clang-format
 6 | 
 7 |   - repo: https://github.com/cpplint/cpplint
 8 |     rev: 1.6.0
 9 |     hooks:
10 |       - id: cpplint
11 | 
12 |   - repo: https://github.com/pre-commit/pre-commit-hooks
13 |     rev: v4.3.0
14 |     hooks:
15 |       - id: end-of-file-fixer
16 |       - id: trailing-whitespace
17 | 
18 |   - repo: https://github.com/pre-commit/mirrors-prettier
19 |     rev: v2.7.1
20 |     hooks:
21 |       - id: prettier
22 |         types_or: [json, markdown, yaml]
23 | 
24 |   - repo: https://github.com/lovesegfault/beautysh
25 |     rev: v6.2.1
26 |     hooks:
27 |       - id: beautysh
28 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10)
 2 | 
 3 | project(ort_utility VERSION 0.0.2)
 4 | set (CMAKE_CXX_STANDARD 17)
 5 | 
 6 | list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
 7 | 
 8 | set(onnxruntime_INSTALL_PREFIX /usr/local)
 9 | set(onnxruntime_INCLUDE_DIRS
10 |   ${onnxruntime_INSTALL_PREFIX}/include/onnxruntime
11 |   ${onnxruntime_INSTALL_PREFIX}/include/onnxruntime/core/session
12 | )
13 | 
14 | find_library(onnxruntime_LIBS NAMES onnxruntime PATHS /usr/local/lib)
15 | 
16 | find_package(CUDA QUIET)
17 | 
18 | if(CUDA_FOUND AND USE_GPU)
19 |   add_definitions(-DENABLE_GPU=1)
20 |   include(tensorrt)
21 |   if (TENSORRT_FOUND)
22 |     add_definitions(-DENABLE_TENSORRT=1)
23 |   endif()
24 | else()
25 |   add_definitions(-DENABLE_GPU=0)
26 |   add_definitions(-DENABLE_TENSORRT=0)
27 | endif()
28 | 
29 | 
30 | add_compile_options(
31 |   "$<$<CONFIG:Debug>:-DENABLE_DEBUG=1>"
32 |   "$<$<CONFIG:Release>:-DENABLE_DEBUG=0>"
33 | )
34 | 
35 | add_subdirectory(src)
36 | 
37 | if(BUILD_EXAMPLES)
38 |   add_subdirectory(examples)
39 | endif(BUILD_EXAMPLES)
40 | 


--------------------------------------------------------------------------------
/CPPLINT.cfg:
--------------------------------------------------------------------------------
 1 | set noparent
 2 | 
 3 | filter=-legal/copyright
 4 | filter=-whitespace/braces
 5 | filter=-build/header_guard
 6 | filter=+build/pragma_once
 7 | filter=-build/include_order
 8 | filter=-build/include_subdir
 9 | filter=-build/c++11
10 | linelength=120
11 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020-2021 Ba Tran
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | BUILD_EXAMPLES=OFF
 2 | BUILD_TYPE=Release
 3 | CMAKE_ARGS:=$(CMAKE_ARGS)
 4 | USE_GPU=OFF
 5 | 
 6 | default:
 7 | 	@mkdir -p build
 8 | 	@cd build && cmake .. -DBUILD_EXAMPLES=$(BUILD_EXAMPLES) \
 9 |                               -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \
10 |                               -DUSE_GPU=$(USE_GPU) \
11 |                               -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
12 |                               $(CMAKE_ARGS)
13 | 	@cd build && make
14 | 
15 | gpu_default:
16 | 	@make default USE_GPU=ON
17 | 
18 | debug:
19 | 	@make default BUILD_TYPE=Debug
20 | 
21 | apps:
22 | 	@make default BUILD_EXAMPLES=ON
23 | 
24 | gpu_apps:
25 | 	@make apps USE_GPU=ON
26 | 
27 | debug_apps:
28 | 	@make debug BUILD_EXAMPLES=ON
29 | 
30 | clean:
31 | 	@rm -rf build*
32 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![Build](https://github.com/xmba15/onnx_runtime_cpp/actions/workflows/build.yml/badge.svg)](https://github.com/xmba15/onnx_runtime_cpp/actions/workflows/build.yml)
  2 | 
  3 | If this work somehow makes your day, maybe you can consider :)
  4 | 
  5 | [!["Buy Me A Coffee"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://www.buymeacoffee.com/xmba15)
  6 | 
  7 | # small c++ library to quickly use [onnxruntime](https://github.com/microsoft/onnxruntime) to deploy deep learning models
  8 | 
  9 | Thanks to [cardboardcode](https://github.com/cardboardcode), we have [the documentation](https://onnx-runtime-cpp.readthedocs.io/en/latest/index.html) for this small library.
 10 | Hope that they both are helpful for your work.
 11 | 
 12 | <details>
 13 |   <summary>Table of Contents</summary>
 14 |   <ol>
 15 |     <li><a href="#todo">TODO</a></li>
 16 |     <li><a href="#installation">Installation</a></li>
 17 |     <li>
 18 |         <a href="#how-to-build">How to Build</a>
 19 |         <ul>
 20 |             <li><a href="#how-to-run-with-docker">How to Run with Docker</a></li>
 21 |         </ul>
 22 |     </li>
 23 |     <li><a href="#how-to-test-apps">How to test apps</a></li>
 24 |   </ol>
 25 | </details>
 26 | 
 27 | ## TODO
 28 | 
 29 | - [x] Support inference of multi-inputs, multi-outputs
 30 | - [x] Examples for famous models, like yolov3, mask-rcnn, [ultra-light-weight face detector](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB), [yolox](https://github.com/Megvii-BaseDetection/YOLOX), [PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.3), [SuperPoint](https://github.com/magicleap/SuperPointPretrainedNetwork), [SuperGlue](https://github.com/magicleap/SuperGluePretrainedNetwork/tree/ddcf11f42e7e0732a0c4607648f9448ea8d73590), [LoFTR](https://zju3dv.github.io/loftr/). Might consider supporting more if requested.
 31 | - [x] (Minimal^^) Support for TensorRT backend
 32 | - [ ] Batch-inference
 33 | 
 34 | ## Installation
 35 | 
 36 | - build onnxruntime from source with the following script
 37 | 
 38 | ```bash
 39 |     # onnxruntime needs newer cmake version to build
 40 |     bash ./scripts/install_latest_cmake.bash
 41 | 
 42 | 
 43 |     bash ./scripts/install_onnx_runtime.bash
 44 | 
 45 |     # dependencies to build apps
 46 |     bash ./scripts/install_apps_dependencies.bash
 47 | ```
 48 | 
 49 | ## How to build
 50 | 
 51 | ---
 52 | 
 53 | <details>
 54 | <summary>CPU</summary>
 55 | 
 56 | ```bash
 57 | make default
 58 | 
 59 | # build examples
 60 | make apps
 61 | ```
 62 | 
 63 | </details>
 64 | 
 65 | <details>
 66 | <summary>GPU with CUDA</summary>
 67 | 
 68 | ```bash
 69 | make gpu_default
 70 | 
 71 | make gpu_apps
 72 | ```
 73 | 
 74 | </details>
 75 | 
 76 | ### How to Run with Docker
 77 | 
 78 | <details>
 79 | <summary>CPU</summary>
 80 | 
 81 | ```bash
 82 | # build
 83 | docker build -f ./dockerfiles/ubuntu2004.dockerfile -t onnx_runtime .
 84 | 
 85 | # run
 86 | docker run -it --rm -v `pwd`:/workspace onnx_runtime
 87 | ```
 88 | 
 89 | </details>
 90 | 
 91 | <details>
 92 | <summary>GPU with CUDA</summary>
 93 | 
 94 | ```bash
 95 | # build
 96 | # change the cuda version to match your local cuda version before build the docker
 97 | 
 98 | docker build -f ./dockerfiles/ubuntu2004_gpu.dockerfile -t onnx_runtime_gpu .
 99 | 
100 | # run
101 | docker run -it --rm --gpus all -v `pwd`:/workspace onnx_runtime_gpu
102 | ```
103 | 
104 | - Onnxruntime will be built with TensorRT support if the environment has TensorRT. Check [this memo](./docs/onnxruntime_tensorrt.md) for useful URLs related to building with TensorRT.
105 | - Be careful to choose TensorRT version compatible with onnxruntime. A good guess can be inferred from [HERE](https://github.com/microsoft/onnxruntime/blob/main/dockerfiles/Dockerfile.tensorrt).
106 | - Also it is not possible to use models whose input shapes are dynamic with TensorRT backend, according to [this](https://onnxruntime.ai/docs/execution-providers/TensorRT-ExecutionProvider.html#shape-inference-for-tensorrt-subgraphs)
107 | 
108 | </details>
109 | 
110 | ## How to test apps
111 | 
112 | ---
113 | 
114 | ### Image Classification With Squeezenet
115 | 
116 | ---
117 | 
118 | <details>
119 | <summary>Usage</summary>
120 | 
121 | ```bash
122 | # after make apps
123 | ./build/examples/TestImageClassification ./data/squeezenet1.1.onnx ./data/images/dog.jpg
124 | ```
125 | 
126 | the following result can be obtained
127 | 
128 | ```
129 | 264 : Cardigan, Cardigan Welsh corgi : 0.391365
130 | 263 : Pembroke, Pembroke Welsh corgi : 0.376214
131 | 227 : kelpie : 0.0314975
132 | 158 : toy terrier : 0.0223435
133 | 230 : Shetland sheepdog, Shetland sheep dog, Shetland : 0.020529
134 | ```
135 | 
136 | </details>
137 | 
138 | <p align="right">(<a href="#readme-top">back to top</a>)</p>
139 | 
140 | ### Object Detection With Tiny-Yolov2 trained on VOC dataset (with 20 classes)
141 | 
142 | ---
143 | 
144 | <p align="center" width="100%">
145 |     <img width="30%" src="docs/images/tiny_yolov2_result.jpg">
146 | </p>
147 | 
148 | <details>
149 | <summary>Usage</summary>
150 | 
151 | - Download model from onnx model zoo: [HERE](https://github.com/onnx/models/tree/master/vision/object_detection_segmentation/yolov2)
152 | 
153 | - The shape of the output would be
154 | 
155 | ```text
156 |     OUTPUT_FEATUREMAP_SIZE X OUTPUT_FEATUREMAP_SIZE * NUM_ANCHORS * (NUM_CLASSES + 4 + 1)
157 |     where OUTPUT_FEATUREMAP_SIZE = 13; NUM_ANCHORS = 5; NUM_CLASSES = 20 for the tiny-yolov2 model from onnx model zoo
158 | ```
159 | 
160 | - Test tiny-yolov2 inference apps
161 | 
162 | ```bash
163 | # after make apps
164 | ./build/examples/tiny_yolo_v2 [path/to/tiny_yolov2/onnx/model] ./data/images/dog.jpg
165 | ```
166 | 
167 | </details>
168 | 
169 | <p align="right">(<a href="#readme-top">back to top</a>)</p>
170 | 
171 | ### Object Instance Segmentation With MaskRCNN trained on MS CoCo Dataset (80 + 1(background) clasess)
172 | 
173 | ---
174 | 
175 | <p align="center" width="100%">
176 |     <img width="45%" height="250" src="docs/images/dogs_maskrcnn_result.jpg">
177 |     <img width="45%" height="250" src="docs/images/indoor_maskrcnn_result.jpg">
178 | </p>
179 | 
180 | <details>
181 | <summary>Usage</summary>
182 | 
183 | - Download model from onnx model zoo: [HERE](https://github.com/onnx/models/tree/master/vision/object_detection_segmentation/mask-rcnn)
184 | 
185 | - As also stated in the url above, there are four outputs: boxes(nboxes x 4), labels(nboxes), scores(nboxes), masks(nboxesx1x28x28)
186 | - Test mask-rcnn inference apps
187 | 
188 | ```bash
189 | # after make apps
190 | ./build/examples/mask_rcnn [path/to/mask_rcnn/onnx/model] ./data/images/dogs.jpg
191 | ```
192 | 
193 | </details>
194 | 
195 | <p align="right">(<a href="#readme-top">back to top</a>)</p>
196 | 
197 | ### Yolo V3 trained on Ms CoCo Dataset
198 | 
199 | ---
200 | 
201 | <p align="center" width="100%">
202 |     <img width="50%" src="docs/images/no_way_home_result.jpg">
203 | </p>
204 | 
205 | <details>
206 | <summary>Usage</summary>
207 | 
208 | - Download model from onnx model zoo: [HERE](https://github.com/onnx/models/tree/master/vision/object_detection_segmentation/yolov3)
209 | 
210 | - Test yolo-v3 inference apps
211 | 
212 | ```bash
213 | # after make apps
214 | ./build/examples/yolov3 [path/to/yolov3/onnx/model] ./data/images/no_way_home.jpg
215 | ```
216 | 
217 | </details>
218 | 
219 | <p align="right">(<a href="#readme-top">back to top</a>)</p>
220 | 
221 | ### [Ultra-Light-Fast-Generic-Face-Detector-1MB](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB)
222 | 
223 | ---
224 | 
225 | <p align="center" width="100%">
226 |     <img width="50%" src="docs/images/endgame_result.jpg">
227 | </p>
228 | 
229 | <details>
230 | <summary>Usage</summary>
231 | 
232 | - App to use onnx model trained with famous light-weight [Ultra-Light-Fast-Generic-Face-Detector-1MB](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB)
233 | - Sample weight has been saved [./data/version-RFB-640.onnx](./data/version-RFB-640.onnx)
234 | - Test inference apps
235 | 
236 | ```bash
237 | # after make apps
238 | ./build/examples/ultra_light_face_detector ./data/version-RFB-640.onnx ./data/images/endgame.jpg
239 | ```
240 | 
241 | </details>
242 | 
243 | <p align="right">(<a href="#readme-top">back to top</a>)</p>
244 | 
245 | ### [YoloX: high-performance anchor-free YOLO by Megvii](https://github.com/Megvii-BaseDetection/YOLOX)
246 | 
247 | ---
248 | 
249 | <p align="center" width="100%">
250 |     <img width="50%" src="docs/images/matrix_result.jpg">
251 | </p>
252 | 
253 | <details>
254 | <summary>Usage</summary>
255 | 
256 | - Download onnx model trained on COCO dataset from [HERE](https://github.com/Megvii-BaseDetection/YOLOX/tree/main/demo/ONNXRuntime)
257 | 
258 | ```bash
259 | # this app tests yolox_l model but you can try with other yolox models also.
260 | wget https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_l.onnx -O ./data/yolox_l.onnx
261 | ```
262 | 
263 | - Test inference apps
264 | 
265 | ```bash
266 | # after make apps
267 | ./build/examples/yolox ./data/yolox_l.onnx ./data/images/matrix.jpg
268 | ```
269 | 
270 | </details>
271 | 
272 | <p align="right">(<a href="#readme-top">back to top</a>)</p>
273 | 
274 | ### [Semantic Segmentation Paddle Seg](https://github.com/PaddlePaddle/PaddleSeg)
275 | 
276 | ---
277 | 
278 | <p align="center" width="100%">
279 |     <img width="20%" src="docs/images/cityscapes_legend.jpg">
280 | </p>
281 | 
282 | <p align="center" width="100%">
283 |     <img width="45%" height="250" align=top src="docs/images/sample_city_scapes_result.jpg">
284 |     <img width="45%" height="250" align=top src="docs/images/odaiba_result.jpg">
285 | </p>
286 | 
287 | <details>
288 | <summary>Usage</summary>
289 | 
290 | - Download PaddleSeg's bisenetv2 trained on cityscapes dataset that has been converted to onnx [HERE](https://drive.google.com/file/d/1e-anuWG_ppDXmoy0sQ0sgrdutCTGlk95/view?usp=sharing) and copy to [./data directory](./data)
291 | 
292 | <details>
293 | <summary>You can also convert your own PaddleSeg with following procedures</summary>
294 | 
295 | - [export PaddleSeg model](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.3/docs/model_export.md)
296 | - convert exported model to onnx format with [Paddle2ONNX](https://github.com/PaddlePaddle/Paddle2ONNX)
297 | 
298 | </details>
299 | 
300 | - Test inference apps
301 | 
302 | ```bash
303 | ./build/examples/semantic_segmentation_paddleseg_bisenetv2 ./data/bisenetv2_cityscapes.onnx ./data/images/sample_city_scapes.png
304 | ./build/examples/semantic_segmentation_paddleseg_bisenetv2 ./data/bisenetv2_cityscapes.onnx ./data/images/odaiba.jpg
305 | ```
306 | 
307 | </details>
308 | 
309 | <p align="right">(<a href="#readme-top">back to top</a>)</p>
310 | 
311 | ### [SuperPoint](https://arxiv.org/pdf/1712.07629.pdf)
312 | 
313 | ---
314 | 
315 | <p align="center" width="100%">
316 |     <img width="80%" src="docs/images/super_point_good_matches.jpg">
317 | </p>
318 | 
319 | <details>
320 | <summary>Usage</summary>
321 | 
322 | - Convert SuperPoint's pretrained weights to onnx format
323 | 
324 | ```bash
325 | git submodule update --init --recursive
326 | python3 -m pip install -r scripts/superpoint/requirements.txt
327 | python3 scripts/superpoint/convert_to_onnx.py
328 | ```
329 | 
330 | - Download test images from [this dataset](https://github.com/StaRainJ/Multi-modality-image-matching-database-metrics-methods)
331 | 
332 | ```bash
333 | wget https://raw.githubusercontent.com/StaRainJ/Multi-modality-image-matching-database-metrics-methods/master/Multimodal_Image_Matching_Datasets/ComputerVision/CrossSeason/VisionCS_0a.png -P data
334 | 
335 | wget https://raw.githubusercontent.com/StaRainJ/Multi-modality-image-matching-database-metrics-methods/master/Multimodal_Image_Matching_Datasets/ComputerVision/CrossSeason/VisionCS_0b.png -P data
336 | ```
337 | 
338 | - Test inference apps
339 | 
340 | ```bash
341 | ./build/examples/super_point /path/to/super_point.onnx data/VisionCS_0a.png data/VisionCS_0b.png
342 | ```
343 | 
344 | </details>
345 | 
346 | <p align="right">(<a href="#readme-top">back to top</a>)</p>
347 | 
348 | ### [SuperGlue](https://arxiv.org/pdf/1911.11763.pdf)
349 | 
350 | ---
351 | 
352 | <p align="center" width="100%">
353 |     <img width="49%" height="250" src="docs/images/ComputerVision_VisionCS_0.jpg">
354 |     <img width="49%" height="250" src="docs/images/ComputerVision_VN_30.jpg">
355 | </p>
356 | 
357 | <p align="center" width="100%">
358 |     <img width="49%" height="250" src="docs/images/RemoteSensing_CS1.jpg">
359 |     <img width="49%" height="250" src="docs/images/RemoteSensing_CS5.jpg">
360 | </p>
361 | 
362 | <p align="center" width="100%">
363 |     <img width="49%" height="250" src="docs/images/ComputerVision_VisionDN_1.jpg">
364 |     <img width="49%" height="250" src="docs/images/RetinaFix.jpg">
365 | </p>
366 | 
367 | <details>
368 | <summary>Usage</summary>
369 | 
370 | - Convert SuperPoint's pretrained weights to onnx format: Follow the above instruction
371 | 
372 | - Convert SuperGlue's pretrained weights to onnx format
373 | 
374 | ```bash
375 | git submodule update --init --recursive
376 | python3 -m pip install -r scripts/superglue/requirements.txt
377 | python3 -m pip install -r scripts/superglue/SuperGluePretrainedNetwork/requirements.txt
378 | python3 scripts/superglue/convert_to_onnx.py
379 | ```
380 | 
381 | - Download test images from [this dataset](https://github.com/StaRainJ/Multi-modality-image-matching-database-metrics-methods): Or prepare some pairs of your own images
382 | 
383 | - Test inference apps
384 | 
385 | ```bash
386 | ./build/examples/super_glue /path/to/super_point.onnx /path/to/super_glue.onnx /path/to/1st/image /path/to/2nd/image
387 | ```
388 | 
389 | </details>
390 | 
391 | <p align="right">(<a href="#readme-top">back to top</a>)</p>
392 | 
393 | ### [LoFTR](https://zju3dv.github.io/loftr/)
394 | 
395 | ---
396 | 
397 | <p align="center" width="100%">
398 |     <img width="100%" src="docs/images/loftr.jpg">
399 | </p>
400 | 
401 | <details>
402 | <summary>Usage</summary>
403 | 
404 | - Download [LoFTR](https://github.com/zju3dv/LoFTR) weights indoor*ds_new.ckpt from [HERE](https://drive.google.com/drive/folders/1xu2Pq6mZT5hmFgiYMBT9Zt8h1yO-3SIp). (LoFTR's [latest commit](b4ee7eb0359d0062e794c99f73e27639d7c7ac9f) seems to be only compatible with the new weights (Ref: https://github.com/zju3dv/LoFTR/issues/48). Hence, this onnx cpp application is only compatible with \_indoor_ds_new.ckpt* weights)
405 | 
406 | - Convert LoFTR's pretrained weights to onnx format
407 | 
408 | ```bash
409 | git submodule update --init --recursive
410 | python3 -m pip install -r scripts/loftr/requirements.txt
411 | python3 scripts/loftr/convert_to_onnx.py --model_path /path/to/indoor_ds_new.ckpt
412 | ```
413 | 
414 | - Download test images from [this dataset](https://github.com/StaRainJ/Multi-modality-image-matching-database-metrics-methods): Or prepare some pairs of your own images
415 | 
416 | - Test inference apps
417 | 
418 | ```bash
419 | ./build/examples/loftr /path/to/loftr.onnx /path/to/loftr.onnx /path/to/1st/image /path/to/2nd/image
420 | ```
421 | 
422 | </details>
423 | 
424 | <p align="right">(<a href="#readme-top">back to top</a>)</p>
425 | 


--------------------------------------------------------------------------------
/cmake/cmake_utility.cmake:
--------------------------------------------------------------------------------
 1 | function(__build_example example_name public_lib private_lib)
 2 |   add_executable(
 3 |     ${example_name}
 4 |     ${CMAKE_CURRENT_LIST_DIR}/${example_name}.cpp
 5 |   )
 6 | 
 7 |   target_link_libraries(
 8 |     ${example_name}
 9 |     PUBLIC
10 |        ${public_lib}
11 |     PRIVATE
12 |        ${private_lib}
13 |   )
14 | endfunction()
15 | 


--------------------------------------------------------------------------------
/cmake/tensorrt.cmake:
--------------------------------------------------------------------------------
 1 | # Ref: //github.com/PRBonn/rangenet_lib/blob/master/cmake/tensorrt-config.cmake
 2 | 
 3 | find_package(CUDA)
 4 | find_library(NVINFER  NAMES nvinfer)
 5 | find_library(NVINFERPLUGIN NAMES nvinfer_plugin)
 6 | find_library(NVPARSERS NAMES nvparsers)
 7 | find_library(NVONNXPARSER NAMES nvonnxparser)
 8 | 
 9 | # newer tensorrt does not have nvonnxparser_runtime
10 | # find_library(NVONNXPARSERRUNTIME NAMES nvonnxparser_runtime)
11 | 
12 | # If it is ALL there, export libraries as a single package
13 | if(CUDA_FOUND AND NVINFER AND NVINFERPLUGIN AND NVPARSERS AND NVONNXPARSER)
14 |   message("TensorRT available!")
15 |   message("CUDA Libs: ${CUDA_LIBRARIES}")
16 |   message("CUDA Headers: ${CUDA_INCLUDE_DIRS}")
17 |   message("NVINFER: ${NVINFER}")
18 |   message("NVINFERPLUGIN: ${NVINFERPLUGIN}")
19 |   message("NVPARSERS: ${NVPARSERS}")
20 |   message("NVONNXPARSER: ${NVONNXPARSER}")
21 |   list(APPEND TENSORRT_LIBRARIES ${CUDA_LIBRARIES} nvinfer nvinfer_plugin nvparsers nvonnxparser)
22 |   message("All togheter now (libs): ${TENSORRT_LIBRARIES}")
23 |   list(APPEND TENSORRT_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS})
24 |   message("All togheter now (inc): ${TENSORRT_INCLUDE_DIRS}")
25 |   set(TENSORRT_FOUND ON)
26 | else()
27 |   message("TensorRT NOT Available")
28 |   set(TENSORRT_FOUND OFF)
29 | endif()
30 | 


--------------------------------------------------------------------------------
/data/images/bird_detection.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/data/images/bird_detection.jpg


--------------------------------------------------------------------------------
/data/images/dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/data/images/dog.jpg


--------------------------------------------------------------------------------
/data/images/dogs.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/data/images/dogs.jpg


--------------------------------------------------------------------------------
/data/images/endgame.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/data/images/endgame.jpg


--------------------------------------------------------------------------------
/data/images/indoor.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/data/images/indoor.jpg


--------------------------------------------------------------------------------
/data/images/matrix.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/data/images/matrix.jpg


--------------------------------------------------------------------------------
/data/images/no_way_home.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/data/images/no_way_home.jpg


--------------------------------------------------------------------------------
/data/images/odaiba.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/data/images/odaiba.jpg


--------------------------------------------------------------------------------
/data/images/sample_city_scapes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/data/images/sample_city_scapes.png


--------------------------------------------------------------------------------
/data/squeezenet1.1.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/data/squeezenet1.1.onnx


--------------------------------------------------------------------------------
/data/version-RFB-640.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/data/version-RFB-640.onnx


--------------------------------------------------------------------------------
/dockerfiles/ubuntu2004.dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:20.04
 2 | 
 3 | ENV DEBIAN_FRONTEND=noninteractive
 4 | 
 5 | WORKDIR /build
 6 | COPY ./scripts .
 7 | 
 8 | RUN apt-get update && \
 9 |     apt-get install -y --no-install-recommends \
10 |         sudo \
11 |         gnupg2 \
12 |         lsb-release \
13 |         build-essential \
14 |         software-properties-common \
15 |         cmake \
16 |         git \
17 |         tmux && \
18 |     bash install_latest_cmake.bash && \
19 |     bash install_onnx_runtime.bash && \
20 |     bash install_apps_dependencies.bash && \
21 |     rm -rf /build && \
22 |     apt-get clean && \
23 |     rm -rf /var/lib/apt/lists/*
24 | 
25 | WORKDIR /workspace
26 | 
27 | ENTRYPOINT ["/bin/bash"]
28 | 


--------------------------------------------------------------------------------
/dockerfiles/ubuntu2004_gpu.dockerfile:
--------------------------------------------------------------------------------
 1 | FROM  nvidia/cuda:11.4.0-cudnn8-devel-ubuntu20.04
 2 | 
 3 | ENV DEBIAN_FRONTEND=noninteractive
 4 | 
 5 | WORKDIR /build
 6 | 
 7 | RUN apt-get update && \
 8 |     apt-get install -y --no-install-recommends \
 9 |         sudo \
10 |         gnupg2 \
11 |         lsb-release \
12 |         build-essential \
13 |         software-properties-common \
14 |         cmake \
15 |         git \
16 |         tmux && \
17 |     bash install_latest_cmake.bash && \
18 |     bash install_onnx_runtime.bash && \
19 |     bash install_apps_dependencies.bash && \
20 |     rm -rf /build && \
21 |     apt-get clean && \
22 |     rm -rf /var/lib/apt/lists/*
23 | 
24 | WORKDIR /workspace
25 | 
26 | ENTRYPOINT ["/bin/bash"]
27 | 


--------------------------------------------------------------------------------
/dockerfiles/ubuntu2004_tensorrt.dockerfile:
--------------------------------------------------------------------------------
 1 | FROM  nvcr.io/nvidia/tensorrt:21.07-py3
 2 | 
 3 | ENV DEBIAN_FRONTEND=noninteractive
 4 | 
 5 | WORKDIR /build
 6 | 
 7 | RUN apt-get update && \
 8 |     apt-get install -y --no-install-recommends \
 9 |         sudo \
10 |         gnupg2 \
11 |         lsb-release \
12 |         build-essential \
13 |         software-properties-common \
14 |         cmake \
15 |         git \
16 |         tmux && \
17 |     bash install_latest_cmake.bash && \
18 |     bash install_onnx_runtime.bash && \
19 |     bash install_apps_dependencies.bash && \
20 |     rm -rf /build && \
21 |     apt-get clean && \
22 |     rm -rf /var/lib/apt/lists/*
23 | 
24 | WORKDIR /workspace
25 | 
26 | ENTRYPOINT ["/bin/bash"]
27 | 


--------------------------------------------------------------------------------
/docs/images/ComputerVision_VN_30.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/ComputerVision_VN_30.jpg


--------------------------------------------------------------------------------
/docs/images/ComputerVision_VisionCS_0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/ComputerVision_VisionCS_0.jpg


--------------------------------------------------------------------------------
/docs/images/ComputerVision_VisionDN_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/ComputerVision_VisionDN_1.jpg


--------------------------------------------------------------------------------
/docs/images/RemoteSensing_CS1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/RemoteSensing_CS1.jpg


--------------------------------------------------------------------------------
/docs/images/RemoteSensing_CS5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/RemoteSensing_CS5.jpg


--------------------------------------------------------------------------------
/docs/images/RetinaFix.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/RetinaFix.jpg


--------------------------------------------------------------------------------
/docs/images/bird_detection_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/bird_detection_result.jpg


--------------------------------------------------------------------------------
/docs/images/cityscapes_legend.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/cityscapes_legend.jpg


--------------------------------------------------------------------------------
/docs/images/dogs_maskrcnn_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/dogs_maskrcnn_result.jpg


--------------------------------------------------------------------------------
/docs/images/endgame_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/endgame_result.jpg


--------------------------------------------------------------------------------
/docs/images/indoor_maskrcnn_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/indoor_maskrcnn_result.jpg


--------------------------------------------------------------------------------
/docs/images/loftr.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/loftr.jpg


--------------------------------------------------------------------------------
/docs/images/matrix_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/matrix_result.jpg


--------------------------------------------------------------------------------
/docs/images/no_way_home_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/no_way_home_result.jpg


--------------------------------------------------------------------------------
/docs/images/odaiba_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/odaiba_result.jpg


--------------------------------------------------------------------------------
/docs/images/sample_city_scapes_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/sample_city_scapes_result.jpg


--------------------------------------------------------------------------------
/docs/images/street_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/street_result.jpg


--------------------------------------------------------------------------------
/docs/images/super_point_good_matches.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/super_point_good_matches.jpg


--------------------------------------------------------------------------------
/docs/images/tiny_yolov2_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmba15/onnx_runtime_cpp/f3f40a04fdb1074bf8de596edf92fbdbaa802660/docs/images/tiny_yolov2_result.jpg


--------------------------------------------------------------------------------
/docs/onnxruntime_tensorrt.md:
--------------------------------------------------------------------------------
 1 | # 📝 memo on how to use tensorrt onnxruntime
 2 | 
 3 | ---
 4 | 
 5 | ## :running: How to Run
 6 | 
 7 | ---
 8 | 
 9 | Sample docker with tensorrt environment is provided [HERE](../dockerfiles/ubuntu2004_tensorrt.dockerfile)
10 | 
11 | ## :gem: References
12 | 
13 | ---
14 | 
15 | - [tensorrt tags](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tensorrt/tags)
16 | - [overview of tensorrt docker](https://docs.nvidia.com/deeplearning/tensorrt/container-release-notes/overview.html)
17 | - [tensorrt introduction](https://developer.nvidia.com/tensorrt)
18 | - [build instruction](https://onnxruntime.ai/docs/build/eps.html)
19 | - [sample dockerfile provided by onnxruntime repo](https://github.com/microsoft/onnxruntime/blob/v1.10.0/dockerfiles/Dockerfile.tensorrt): _need to choose suitable tensorrt version that matches onnxruntime version_
20 | 


--------------------------------------------------------------------------------
/examples/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | cmake_minimum_required(VERSION 3.10)
  2 | 
  3 | list(APPEND EXAMPLES
  4 |   TestImageClassification
  5 |   PrimitiveTest
  6 | )
  7 | 
  8 | include(cmake_utility)
  9 | 
 10 | find_package(OpenCV REQUIRED)
 11 | 
 12 | list(APPEND PUBLIC_LIBS
 13 |   ${PROJECT_NAME}
 14 |   ${OpenCV_LIBS}
 15 | )
 16 | 
 17 | list(APPEND PRIVATE_LIBS
 18 | )
 19 | 
 20 | foreach(EXAMPLE ${EXAMPLES})
 21 |   __build_example(
 22 |     ${EXAMPLE}
 23 |     "${PUBLIC_LIBS}"
 24 |     "${PRIVATE_LIBS}"
 25 |   )
 26 |   target_include_directories(${EXAMPLE}
 27 |     PUBLIC
 28 |       ${OpenCV_INCLUDE_DIRS}
 29 |   )
 30 | endforeach(EXAMPLE)
 31 | 
 32 | # ---------------------------------------------------------
 33 | 
 34 | add_executable(tiny_yolo_v2
 35 |   ${CMAKE_CURRENT_LIST_DIR}/TinyYolov2.cpp
 36 |   ${CMAKE_CURRENT_LIST_DIR}/TinyYolov2.hpp
 37 |   ${CMAKE_CURRENT_LIST_DIR}/TinyYolov2App.cpp
 38 | )
 39 | 
 40 | target_link_libraries(tiny_yolo_v2
 41 |   PUBLIC
 42 |     ${PROJECT_NAME}
 43 |     ${OpenCV_LIBS}
 44 | )
 45 | 
 46 | target_include_directories(tiny_yolo_v2
 47 |   PUBLIC
 48 |     ${OpenCV_INCLUDE_DIRS}
 49 | )
 50 | 
 51 | # ---------------------------------------------------------
 52 | 
 53 | add_executable(mask_rcnn
 54 |   ${CMAKE_CURRENT_LIST_DIR}/MaskRCNN.cpp
 55 |   ${CMAKE_CURRENT_LIST_DIR}/MaskRCNN.hpp
 56 |   ${CMAKE_CURRENT_LIST_DIR}/MaskRCNNApp.cpp
 57 | )
 58 | 
 59 | target_link_libraries(mask_rcnn
 60 |   PUBLIC
 61 |     ${PROJECT_NAME}
 62 |     ${OpenCV_LIBS}
 63 | )
 64 | 
 65 | target_include_directories(mask_rcnn
 66 |   PUBLIC
 67 |     ${OpenCV_INCLUDE_DIRS}
 68 | )
 69 | 
 70 | # ---------------------------------------------------------
 71 | 
 72 | add_executable(yolov3
 73 |   ${CMAKE_CURRENT_LIST_DIR}/Yolov3.cpp
 74 |   ${CMAKE_CURRENT_LIST_DIR}/Yolov3.hpp
 75 |   ${CMAKE_CURRENT_LIST_DIR}/Yolov3App.cpp
 76 | )
 77 | 
 78 | target_link_libraries(yolov3
 79 |   PUBLIC
 80 |     ${PROJECT_NAME}
 81 |     ${OpenCV_LIBS}
 82 | )
 83 | 
 84 | target_include_directories(yolov3
 85 |   PUBLIC
 86 |     ${OpenCV_INCLUDE_DIRS}
 87 | )
 88 | 
 89 | # ---------------------------------------------------------
 90 | 
 91 | add_executable(ultra_light_face_detector
 92 |   ${CMAKE_CURRENT_LIST_DIR}/UltraLightFastGenericFaceDetector.cpp
 93 |   ${CMAKE_CURRENT_LIST_DIR}/UltraLightFastGenericFaceDetector.hpp
 94 |   ${CMAKE_CURRENT_LIST_DIR}/UltraLightFastGenericFaceDetectorApp.cpp
 95 | )
 96 | 
 97 | target_link_libraries(ultra_light_face_detector
 98 |   PUBLIC
 99 |     ${PROJECT_NAME}
100 |     ${OpenCV_LIBS}
101 | )
102 | 
103 | target_include_directories(ultra_light_face_detector
104 |   PUBLIC
105 |     ${OpenCV_INCLUDE_DIRS}
106 | )
107 | 
108 | # ---------------------------------------------------------
109 | 
110 | add_executable(yolox
111 |   ${CMAKE_CURRENT_LIST_DIR}/YoloX.cpp
112 |   ${CMAKE_CURRENT_LIST_DIR}/YoloX.hpp
113 |   ${CMAKE_CURRENT_LIST_DIR}/YoloXApp.cpp
114 | )
115 | 
116 | target_link_libraries(yolox
117 |   PUBLIC
118 |     ${PROJECT_NAME}
119 |     ${OpenCV_LIBS}
120 | )
121 | 
122 | target_include_directories(yolox
123 |   PUBLIC
124 |     ${OpenCV_INCLUDE_DIRS}
125 | )
126 | 
127 | # ---------------------------------------------------------
128 | 
129 | add_executable(semantic_segmentation_paddleseg_bisenetv2
130 |   ${CMAKE_CURRENT_LIST_DIR}/SemanticSegmentationPaddleSegBisenetv2.cpp
131 |   ${CMAKE_CURRENT_LIST_DIR}/SemanticSegmentationPaddleSegBisenetv2App.cpp
132 | )
133 | 
134 | target_link_libraries(semantic_segmentation_paddleseg_bisenetv2
135 |   PUBLIC
136 |     ${PROJECT_NAME}
137 |     ${OpenCV_LIBS}
138 | )
139 | 
140 | target_include_directories(semantic_segmentation_paddleseg_bisenetv2
141 |   PUBLIC
142 |     ${OpenCV_INCLUDE_DIRS}
143 | )
144 | 
145 | # ---------------------------------------------------------
146 | 
147 | add_executable(super_point
148 |   ${CMAKE_CURRENT_LIST_DIR}/SuperPoint.cpp
149 |   ${CMAKE_CURRENT_LIST_DIR}/SuperPointApp.cpp
150 | )
151 | 
152 | target_link_libraries(super_point
153 |   PUBLIC
154 |     ${PROJECT_NAME}
155 |     ${OpenCV_LIBS}
156 | )
157 | 
158 | target_include_directories(super_point
159 |   PUBLIC
160 |     ${OpenCV_INCLUDE_DIRS}
161 | )
162 | 
163 | # ---------------------------------------------------------
164 | 
165 | add_executable(super_glue
166 |   ${CMAKE_CURRENT_LIST_DIR}/SuperPoint.cpp
167 |   ${CMAKE_CURRENT_LIST_DIR}/SuperGlueApp.cpp
168 | )
169 | 
170 | target_link_libraries(super_glue
171 |   PUBLIC
172 |     ${PROJECT_NAME}
173 |     ${OpenCV_LIBS}
174 | )
175 | 
176 | target_include_directories(super_glue
177 |   PUBLIC
178 |     ${OpenCV_INCLUDE_DIRS}
179 | )
180 | 
181 | # ---------------------------------------------------------
182 | 
183 | add_executable(loftr
184 |   ${CMAKE_CURRENT_LIST_DIR}/LoFTR.cpp
185 |   ${CMAKE_CURRENT_LIST_DIR}/LoFTRApp.cpp
186 | )
187 | 
188 | target_link_libraries(loftr
189 |   PUBLIC
190 |     ${PROJECT_NAME}
191 |     ${OpenCV_LIBS}
192 | )
193 | 
194 | target_include_directories(loftr
195 |   PUBLIC
196 |     ${OpenCV_INCLUDE_DIRS}
197 | )
198 | 


--------------------------------------------------------------------------------
/examples/LoFTR.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file    LoFTR.cpp
 3 |  *
 4 |  * @author  btran
 5 |  *
 6 |  */
 7 | 
 8 | #include "LoFTR.hpp"
 9 | 
10 | namespace Ort
11 | {
12 | void LoFTR::preprocess(float* dst, const unsigned char* src, const int64_t targetImgWidth,
13 |                        const int64_t targetImgHeight, const int numChannels) const
14 | {
15 |     for (int i = 0; i < targetImgHeight; ++i) {
16 |         for (int j = 0; j < targetImgWidth; ++j) {
17 |             for (int c = 0; c < numChannels; ++c) {
18 |                 dst[c * targetImgHeight * targetImgWidth + i * targetImgWidth + j] =
19 |                     (src[i * targetImgWidth * numChannels + j * numChannels + c] / 255.0);
20 |             }
21 |         }
22 |     }
23 | }
24 | }  // namespace Ort
25 | 


--------------------------------------------------------------------------------
/examples/LoFTR.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file    LoFTR.hpp
 3 |  *
 4 |  * @author  btran
 5 |  *
 6 |  */
 7 | 
 8 | #pragma once
 9 | 
10 | #include <ort_utility/ort_utility.hpp>
11 | 
12 | namespace Ort
13 | {
14 | class LoFTR : public OrtSessionHandler
15 | {
16 |  public:
17 |     static constexpr int64_t IMG_H = 480;
18 |     static constexpr int64_t IMG_W = 640;
19 |     static constexpr int64_t IMG_CHANNEL = 1;
20 | 
21 |     using OrtSessionHandler::OrtSessionHandler;
22 | 
23 |     void preprocess(float* dst,                     //
24 |                     const unsigned char* src,       //
25 |                     const int64_t targetImgWidth,   //
26 |                     const int64_t targetImgHeight,  //
27 |                     const int numChannels) const;
28 | };
29 | }  // namespace Ort
30 | 


--------------------------------------------------------------------------------
/examples/LoFTRApp.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @file    LoFTRApp.cpp
  3 |  *
  4 |  * @author  btran
  5 |  *
  6 |  */
  7 | 
  8 | #include "LoFTR.hpp"
  9 | #include "Utility.hpp"
 10 | #include <utility>
 11 | 
 12 | static constexpr float CONFIDENCE_THRESHOLD = 0.1;
 13 | 
 14 | namespace
 15 | {
 16 | std::pair<std::vector<cv::KeyPoint>, std::vector<cv::KeyPoint>>
 17 | processOneImagePair(const Ort::LoFTR& loftrOsh, const cv::Mat& queryImg, const cv::Mat& refImg, float* queryData,
 18 |                     float* refData, float confidenceThresh = CONFIDENCE_THRESHOLD);
 19 | }  // namespace
 20 | 
 21 | int main(int argc, char* argv[])
 22 | {
 23 |     if (argc != 4) {
 24 |         std::cerr << "Usage: [apps] [path/to/onnx/loftr] [path/to/image1] [path/to/image2]" << std::endl;
 25 |         return EXIT_FAILURE;
 26 |     }
 27 | 
 28 |     const std::string ONNX_MODEL_PATH = argv[1];
 29 |     const std::vector<std::string> IMAGE_PATHS = {argv[2], argv[3]};
 30 | 
 31 |     std::vector<cv::Mat> images;
 32 |     std::vector<cv::Mat> grays;
 33 |     std::transform(IMAGE_PATHS.begin(), IMAGE_PATHS.end(), std::back_inserter(images),
 34 |                    [](const auto& imagePath) { return cv::imread(imagePath); });
 35 |     for (int i = 0; i < 2; ++i) {
 36 |         if (images[i].empty()) {
 37 |             throw std::runtime_error("failed to open " + IMAGE_PATHS[i]);
 38 |         }
 39 |     }
 40 | 
 41 |     std::transform(IMAGE_PATHS.begin(), IMAGE_PATHS.end(), std::back_inserter(grays),
 42 |                    [](const auto& imagePath) { return cv::imread(imagePath, 0); });
 43 | 
 44 |     std::vector<float> queryData(Ort::LoFTR::IMG_CHANNEL * Ort::LoFTR::IMG_H * Ort::LoFTR::IMG_W);
 45 |     std::vector<float> refData(Ort::LoFTR::IMG_CHANNEL * Ort::LoFTR::IMG_H * Ort::LoFTR::IMG_W);
 46 | 
 47 |     Ort::LoFTR osh(
 48 |         ONNX_MODEL_PATH, 0,
 49 |         std::vector<std::vector<int64_t>>{{1, Ort::LoFTR::IMG_CHANNEL, Ort::LoFTR::IMG_H, Ort::LoFTR::IMG_W},
 50 |                                           {1, Ort::LoFTR::IMG_CHANNEL, Ort::LoFTR::IMG_H, Ort::LoFTR::IMG_W}});
 51 | 
 52 |     auto matchedKpts = processOneImagePair(osh, grays[0], grays[1], queryData.data(), refData.data());
 53 |     const std::vector<cv::KeyPoint>& queryKpts = matchedKpts.first;
 54 |     const std::vector<cv::KeyPoint>& refKpts = matchedKpts.second;
 55 |     std::vector<cv::DMatch> matches;
 56 |     for (int i = 0; i < queryKpts.size(); ++i) {
 57 |         cv::DMatch match;
 58 |         match.imgIdx = 0;
 59 |         match.queryIdx = i;
 60 |         match.trainIdx = i;
 61 |         matches.emplace_back(std::move(match));
 62 |     }
 63 |     cv::Mat matchesImage;
 64 |     cv::drawMatches(images[0], queryKpts, images[1], refKpts, matches, matchesImage, cv::Scalar::all(-1),
 65 |                     cv::Scalar::all(-1), std::vector<char>(), cv::DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS);
 66 |     cv::imwrite("loftr.jpg", matchesImage);
 67 |     cv::imshow("loftr", matchesImage);
 68 |     cv::waitKey();
 69 | 
 70 |     return EXIT_SUCCESS;
 71 | }
 72 | 
 73 | namespace
 74 | {
 75 | std::pair<std::vector<cv::KeyPoint>, std::vector<cv::KeyPoint>>
 76 | processOneImagePair(const Ort::LoFTR& loftrOsh, const cv::Mat& queryImg, const cv::Mat& refImg, float* queryData,
 77 |                     float* refData, float confidenceThresh)
 78 | {
 79 |     int origQueryW = queryImg.cols, origQueryH = queryImg.rows;
 80 |     int origRefW = refImg.cols, origRefH = refImg.rows;
 81 | 
 82 |     cv::Mat scaledQueryImg, scaledRefImg;
 83 |     cv::resize(queryImg, scaledQueryImg, cv::Size(Ort::LoFTR::IMG_W, Ort::LoFTR::IMG_H), 0, 0, cv::INTER_CUBIC);
 84 |     cv::resize(refImg, scaledRefImg, cv::Size(Ort::LoFTR::IMG_W, Ort::LoFTR::IMG_H), 0, 0, cv::INTER_CUBIC);
 85 | 
 86 |     loftrOsh.preprocess(queryData, scaledQueryImg.data, Ort::LoFTR::IMG_W, Ort::LoFTR::IMG_H, Ort::LoFTR::IMG_CHANNEL);
 87 |     loftrOsh.preprocess(refData, scaledRefImg.data, Ort::LoFTR::IMG_W, Ort::LoFTR::IMG_H, Ort::LoFTR::IMG_CHANNEL);
 88 |     auto inferenceOutput = loftrOsh({queryData, refData});
 89 | 
 90 |     // inferenceOutput[0].second: keypoints0 of shape [num kpt x 2]
 91 |     // inferenceOutput[1].second: keypoints1 of shape [num kpt x 2]
 92 |     // inferenceOutput[2].second: confidences of shape [num kpt]
 93 | 
 94 |     int numKeyPoints = inferenceOutput[2].second[0];
 95 |     std::vector<cv::KeyPoint> queryKpts, refKpts;
 96 |     queryKpts.reserve(numKeyPoints);
 97 |     refKpts.reserve(numKeyPoints);
 98 | 
 99 |     for (int i = 0; i < numKeyPoints; ++i) {
100 |         float confidence = inferenceOutput[2].first[i];
101 |         if (confidence < confidenceThresh) {
102 |             continue;
103 |         }
104 |         float queryX = inferenceOutput[0].first[i * 2 + 0];
105 |         float queryY = inferenceOutput[0].first[i * 2 + 1];
106 |         float refX = inferenceOutput[1].first[i * 2 + 0];
107 |         float refY = inferenceOutput[1].first[i * 2 + 1];
108 |         cv::KeyPoint queryKpt, refKpt;
109 |         queryKpt.pt.x = queryX * origQueryW / Ort::LoFTR::IMG_W;
110 |         queryKpt.pt.y = queryY * origQueryH / Ort::LoFTR::IMG_H;
111 | 
112 |         refKpt.pt.x = refX * origRefW / Ort::LoFTR::IMG_W;
113 |         refKpt.pt.y = refY * origRefH / Ort::LoFTR::IMG_H;
114 | 
115 |         queryKpts.emplace_back(std::move(queryKpt));
116 |         refKpts.emplace_back(std::move(refKpt));
117 |     }
118 | 
119 |     return std::make_pair(queryKpts, refKpts);
120 | }
121 | }  // namespace
122 | 


--------------------------------------------------------------------------------
/examples/MaskRCNN.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file    MaskRCNN.cpp
 3 |  *
 4 |  * @author  btran
 5 |  *
 6 |  */
 7 | 
 8 | #include <cstring>
 9 | 
10 | #include "MaskRCNN.hpp"
11 | 
12 | namespace Ort
13 | {
14 | MaskRCNN::MaskRCNN(const uint16_t numClasses,     //
15 |                    const std::string& modelPath,  //
16 |                    const std::optional<size_t>& gpuIdx,
17 |                    const std::optional<std::vector<std::vector<int64_t>>>& inputShapes)
18 |     : ImageRecognitionOrtSessionHandlerBase(numClasses, modelPath, gpuIdx, inputShapes)
19 | {
20 | }
21 | 
22 | MaskRCNN::~MaskRCNN()
23 | {
24 | }
25 | 
26 | void MaskRCNN::preprocess(float* dst,                     //
27 |                           const float* src,               //
28 |                           const int64_t targetImgWidth,   //
29 |                           const int64_t targetImgHeight,  //
30 |                           const int numChannels) const
31 | {
32 |     for (int c = 0; c < numChannels; ++c) {
33 |         for (int i = 0; i < targetImgHeight; ++i) {
34 |             for (int j = 0; j < targetImgWidth; ++j) {
35 |                 dst[c * targetImgHeight * targetImgWidth + i * targetImgWidth + j] =
36 |                     src[i * targetImgWidth * numChannels + j * numChannels + c];
37 |             }
38 |         }
39 |     }
40 | }
41 | 
42 | void MaskRCNN::preprocess(float* dst,                     //
43 |                           const cv::Mat& imgSrc,          //
44 |                           const int64_t targetImgWidth,   //
45 |                           const int64_t targetImgHeight,  //
46 |                           const int numChannels) const
47 | {
48 |     for (int i = 0; i < targetImgHeight; ++i) {
49 |         for (int j = 0; j < targetImgWidth; ++j) {
50 |             for (int c = 0; c < numChannels; ++c) {
51 |                 dst[c * targetImgHeight * targetImgWidth + i * targetImgWidth + j] = imgSrc.ptr<float>(i, j)[c];
52 |             }
53 |         }
54 |     }
55 | }
56 | 
57 | }  // namespace Ort
58 | 


--------------------------------------------------------------------------------
/examples/MaskRCNN.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file    MaskRCNN.hpp
 3 |  *
 4 |  * @author  btran
 5 |  *
 6 |  */
 7 | 
 8 | #pragma once
 9 | 
10 | #include <optional>
11 | #include <string>
12 | #include <vector>
13 | 
14 | #include <opencv2/opencv.hpp>
15 | 
16 | #include <ort_utility/ort_utility.hpp>
17 | 
18 | namespace Ort
19 | {
20 | class MaskRCNN : public ImageRecognitionOrtSessionHandlerBase
21 | {
22 |  public:
23 |     static constexpr int64_t MIN_IMAGE_SIZE = 800;
24 | 
25 |     static constexpr int64_t IMG_CHANNEL = 3;
26 | 
27 |     MaskRCNN(const uint16_t numClasses,                           //
28 |              const std::string& modelPath,                        //
29 |              const std::optional<size_t>& gpuIdx = std::nullopt,  //
30 |              const std::optional<std::vector<std::vector<int64_t>>>& inputShapes = std::nullopt);
31 | 
32 |     ~MaskRCNN();
33 | 
34 |     void preprocess(float* dst,                     //
35 |                     const float* src,               //
36 |                     const int64_t targetImgWidth,   //
37 |                     const int64_t targetImgHeight,  //
38 |                     const int numChannels) const;
39 | 
40 |     void preprocess(float* dst,                     //
41 |                     const cv::Mat& imgSrc,          //
42 |                     const int64_t targetImgWidth,   //
43 |                     const int64_t targetImgHeight,  //
44 |                     const int numChannels) const;
45 | };
46 | }  // namespace Ort
47 | 


--------------------------------------------------------------------------------
/examples/MaskRCNNApp.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @file    MaskRCNNApp.cpp
  3 |  *
  4 |  * @author  btran
  5 |  *
  6 |  */
  7 | 
  8 | #include <chrono>
  9 | #include <iostream>
 10 | #include <memory>
 11 | 
 12 | #include <opencv2/opencv.hpp>
 13 | 
 14 | #include <ort_utility/ort_utility.hpp>
 15 | 
 16 | #include "MaskRCNN.hpp"
 17 | #include "Utility.hpp"
 18 | 
 19 | static constexpr float CONFIDENCE_THRESHOLD = 0.5;
 20 | static const std::vector<cv::Scalar> COLORS = toCvScalarColors(Ort::MSCOCO_COLOR_CHART);
 21 | 
 22 | namespace
 23 | {
 24 | cv::Mat processOneFrame(const Ort::MaskRCNN& osh, const cv::Mat& inputImg, int newW, int newH, int paddedW, int paddedH,
 25 |                         float ratio, float* dst, const float confThresh = 0.5,
 26 |                         const cv::Scalar& meanVal = cv::Scalar(102.9801, 115.9465, 122.7717),
 27 |                         bool visualizeMask = true);
 28 | }  // namespace
 29 | 
 30 | int main(int argc, char* argv[])
 31 | {
 32 |     if (argc != 3) {
 33 |         std::cerr << "Usage: [apps] [path/to/onnx/maskrcnn.onnx] [path/to/image]" << std::endl;
 34 |         return EXIT_FAILURE;
 35 |     }
 36 | 
 37 |     const std::string ONNX_MODEL_PATH = argv[1];
 38 |     const std::string IMAGE_PATH = argv[2];
 39 | 
 40 |     cv::Mat img = cv::imread(IMAGE_PATH);
 41 | 
 42 |     if (img.empty()) {
 43 |         std::cerr << "Failed to read input image" << std::endl;
 44 |         return EXIT_FAILURE;
 45 |     }
 46 | 
 47 |     float ratio = 800.0 / std::min(img.cols, img.rows);
 48 |     int newW = ratio * img.cols;
 49 |     int newH = ratio * img.rows;
 50 |     int paddedH = static_cast<int>(((newH + 31) / 32) * 32);
 51 |     int paddedW = static_cast<int>(((newW + 31) / 32) * 32);
 52 | 
 53 |     Ort::MaskRCNN osh(Ort::MSCOCO_NUM_CLASSES, ONNX_MODEL_PATH, 0,
 54 |                       std::vector<std::vector<int64_t>>{{Ort::MaskRCNN::IMG_CHANNEL, paddedH, paddedW}});
 55 | 
 56 |     osh.initClassNames(Ort::MSCOCO_CLASSES);
 57 | 
 58 |     std::vector<float> dst(Ort::MaskRCNN::IMG_CHANNEL * paddedH * paddedW);
 59 | 
 60 |     auto resultImg = ::processOneFrame(osh, img, newW, newH, paddedW, paddedH, ratio, dst.data(), CONFIDENCE_THRESHOLD);
 61 |     cv::imwrite("result.jpg", resultImg);
 62 | 
 63 |     return EXIT_SUCCESS;
 64 | }
 65 | 
 66 | namespace
 67 | {
 68 | cv::Mat processOneFrame(const Ort::MaskRCNN& osh, const cv::Mat& inputImg, int newW, int newH, int paddedW, int paddedH,
 69 |                         float ratio, float* dst, float confThresh, const cv::Scalar& meanVal, bool visualizeMask)
 70 | {
 71 |     cv::Mat tmpImg;
 72 |     cv::resize(inputImg, tmpImg, cv::Size(newW, newH));
 73 | 
 74 |     tmpImg.convertTo(tmpImg, CV_32FC3);
 75 |     tmpImg -= meanVal;
 76 | 
 77 |     cv::Mat paddedImg(paddedH, paddedW, CV_32FC3, cv::Scalar(0, 0, 0));
 78 |     tmpImg.copyTo(paddedImg(cv::Rect(0, 0, newW, newH)));
 79 | 
 80 |     osh.preprocess(dst, paddedImg.ptr<float>(), paddedW, paddedH, 3);
 81 |     // or
 82 |     // osh.preprocess(dst, paddedImg, paddedW, paddedH, 3);
 83 | 
 84 |     // boxes, labels, scores, masks
 85 |     auto inferenceOutput = osh({dst});
 86 | 
 87 |     assert(inferenceOutput[1].second.size() == 1);
 88 |     size_t nBoxes = inferenceOutput[1].second[0];
 89 | 
 90 |     std::vector<std::array<float, 4>> bboxes;
 91 |     std::vector<uint64_t> classIndices;
 92 |     std::vector<cv::Mat> masks;
 93 | 
 94 |     bboxes.reserve(nBoxes);
 95 |     classIndices.reserve(nBoxes);
 96 |     masks.reserve(nBoxes);
 97 | 
 98 |     for (size_t i = 0; i < nBoxes; ++i) {
 99 |         if (inferenceOutput[2].first[i] > confThresh) {
100 |             DEBUG_LOG("%f", inferenceOutput[2].first[i]);
101 | 
102 |             float xmin = inferenceOutput[0].first[i * 4 + 0] / ratio;
103 |             float ymin = inferenceOutput[0].first[i * 4 + 1] / ratio;
104 |             float xmax = inferenceOutput[0].first[i * 4 + 2] / ratio;
105 |             float ymax = inferenceOutput[0].first[i * 4 + 3] / ratio;
106 | 
107 |             xmin = std::max<float>(xmin, 0);
108 |             ymin = std::max<float>(ymin, 0);
109 |             xmax = std::min<float>(xmax, inputImg.cols - 1);
110 |             ymax = std::min<float>(ymax, inputImg.rows - 1);
111 | 
112 |             bboxes.emplace_back(std::array<float, 4>{xmin, ymin, xmax, ymax});
113 |             classIndices.emplace_back(reinterpret_cast<int64_t*>(inferenceOutput[1].first)[i]);
114 | 
115 |             cv::Mat curMask(28, 28, CV_32FC1);
116 |             memcpy(curMask.data, inferenceOutput[3].first + i * 28 * 28, 28 * 28 * sizeof(float));
117 |             masks.emplace_back(curMask);
118 |         }
119 |     }
120 | 
121 |     if (bboxes.size() == 0) {
122 |         return inputImg;
123 |     }
124 | 
125 |     return visualizeMask ? ::visualizeOneImageWithMask(inputImg, bboxes, classIndices, masks, COLORS, osh.classNames())
126 |                          : ::visualizeOneImage(inputImg, bboxes, classIndices, COLORS, osh.classNames());
127 | }
128 | }  // namespace
129 | 


--------------------------------------------------------------------------------
/examples/PrimitiveTest.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file    PrimitiveTest.cpp
 3 |  *
 4 |  * @author  btran
 5 |  *
 6 |  */
 7 | 
 8 | /**
 9 |  *   @brief primitive app to test size and name of input/output tensors
10 |  *   DO BUILD with DEBUG mode to see the debug messages
11 |  */
12 | 
13 | #include <iostream>
14 | 
15 | #include <ort_utility/ort_utility.hpp>
16 | 
17 | namespace
18 | {
19 | static constexpr int64_t DUMMY_NUM_CLASSES = 2021;
20 | }  // namespace
21 | 
22 | int main(int argc, char* argv[])
23 | {
24 |     if (argc != 2) {
25 |         std::cerr << "Usage: [apps] [path/to/onnx/model]" << std::endl;
26 |         return EXIT_FAILURE;
27 |     }
28 | 
29 |     const std::string ONNX_MODEL_PATH = argv[1];
30 |     Ort::ObjectDetectionOrtSessionHandler osh(DUMMY_NUM_CLASSES, ONNX_MODEL_PATH, 0);
31 | 
32 |     return EXIT_SUCCESS;
33 | }
34 | 


--------------------------------------------------------------------------------
/examples/SemanticSegmentationPaddleSegBisenetv2.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file    SemanticSegmentationPaddleSegBisenetv2.cpp
 3 |  *
 4 |  * @author  btran
 5 |  *
 6 |  */
 7 | 
 8 | #include "SemanticSegmentationPaddleSegBisenetv2.hpp"
 9 | 
10 | namespace Ort
11 | {
12 | SemanticSegmentationPaddleSegBisenetv2::SemanticSegmentationPaddleSegBisenetv2(
13 |     const uint16_t numClasses,     //
14 |     const std::string& modelPath,  //
15 |     const std::optional<size_t>& gpuIdx, const std::optional<std::vector<std::vector<int64_t>>>& inputShapes)
16 |     : ImageRecognitionOrtSessionHandlerBase(numClasses, modelPath, gpuIdx, inputShapes)
17 | {
18 | }
19 | 
20 | void SemanticSegmentationPaddleSegBisenetv2::preprocess(float* dst,                         //
21 |                                                         const unsigned char* src,           //
22 |                                                         int64_t targetImgWidth,             //
23 |                                                         int64_t targetImgHeight,            //
24 |                                                         int numChannels,                    //
25 |                                                         const std::vector<float>& meanVal,  //
26 |                                                         const std::vector<float>& stdVal) const
27 | {
28 |     for (int i = 0; i < targetImgHeight; ++i) {
29 |         for (int j = 0; j < targetImgWidth; ++j) {
30 |             for (int c = 0; c < numChannels; ++c) {
31 |                 dst[c * targetImgHeight * targetImgWidth + i * targetImgWidth + j] =
32 |                     (src[i * targetImgWidth * numChannels + j * numChannels + c] / 255.0 - meanVal[c]) / stdVal[c];
33 |             }
34 |         }
35 |     }
36 | }
37 | }  // namespace Ort
38 | 


--------------------------------------------------------------------------------
/examples/SemanticSegmentationPaddleSegBisenetv2.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file    SemanticSegmentationPaddleSegBisenetv2.hpp
 3 |  *
 4 |  * @author  btran
 5 |  *
 6 |  */
 7 | 
 8 | #pragma once
 9 | 
10 | #include <opencv2/opencv.hpp>
11 | #include <string>
12 | #include <vector>
13 | 
14 | #include <ort_utility/ort_utility.hpp>
15 | 
16 | namespace Ort
17 | {
18 | class SemanticSegmentationPaddleSegBisenetv2 : public ImageRecognitionOrtSessionHandlerBase
19 | {
20 |  public:
21 |     static constexpr int64_t IMG_H = 1024;
22 |     static constexpr int64_t IMG_W = 1024;
23 |     static constexpr int64_t IMG_CHANNEL = 3;
24 | 
25 |     SemanticSegmentationPaddleSegBisenetv2(
26 |         const uint16_t numClasses,                           //
27 |         const std::string& modelPath,                        //
28 |         const std::optional<size_t>& gpuIdx = std::nullopt,  //
29 |         const std::optional<std::vector<std::vector<int64_t>>>& inputShapes = std::nullopt);
30 | 
31 |     void preprocess(float* dst,                                           //
32 |                     const unsigned char* src,                             //
33 |                     int64_t targetImgWidth,                               //
34 |                     int64_t targetImgHeight,                              //
35 |                     int numChannels,                                      //
36 |                     const std::vector<float>& meanVal = {0.5, 0.5, 0.5},  //
37 |                     const std::vector<float>& stdVal = {0.5, 0.5, 0.5}) const;
38 | };
39 | };  // namespace Ort
40 | 


--------------------------------------------------------------------------------
/examples/SemanticSegmentationPaddleSegBisenetv2App.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file    SemanticSegmentationPaddleSegBisenetv2App.cpp
 3 |  *
 4 |  * @author  btran
 5 |  *
 6 |  */
 7 | 
 8 | #include <ort_utility/ort_utility.hpp>
 9 | 
10 | #include "SemanticSegmentationPaddleSegBisenetv2.hpp"
11 | #include "Utility.hpp"
12 | 
13 | namespace
14 | {
15 | cv::Mat processOneFrame(const Ort::SemanticSegmentationPaddleSegBisenetv2& osh, const cv::Mat& inputImg, float* dst,
16 |                         float alpha = 0.4);
17 | static const std::vector<cv::Scalar> COLORS = toCvScalarColors(Ort::CITY_SCAPES_COLOR_CHART);
18 | }  // namespace
19 | 
20 | int main(int argc, char* argv[])
21 | {
22 |     if (argc != 3) {
23 |         std::cerr << "Usage: [apps] [path/to/onnx/semantic/segmentation] [path/to/image]" << std::endl;
24 |         return EXIT_FAILURE;
25 |     }
26 | 
27 |     const std::string ONNX_MODEL_PATH = argv[1];
28 |     const std::string IMAGE_PATH = argv[2];
29 | 
30 |     cv::Mat img = cv::imread(IMAGE_PATH);
31 | 
32 |     if (img.empty()) {
33 |         std::cerr << "Failed to read input image" << std::endl;
34 |         return EXIT_FAILURE;
35 |     }
36 | 
37 |     Ort::SemanticSegmentationPaddleSegBisenetv2 osh(
38 |         Ort::CITY_SCAPES_NUM_CLASSES, ONNX_MODEL_PATH, 0,
39 |         std::vector<std::vector<int64_t>>{{1, Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_CHANNEL,
40 |                                            Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_H,
41 |                                            Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_W}});
42 | 
43 |     osh.initClassNames(Ort::CITY_SCAPES_CLASSES);
44 |     std::vector<float> dst(Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_CHANNEL *
45 |                            Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_H *
46 |                            Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_W);
47 | 
48 |     auto result = processOneFrame(osh, img, dst.data());
49 |     cv::Mat legend = drawColorChart(Ort::CITY_SCAPES_CLASSES, COLORS);
50 |     cv::imshow("legend", legend);
51 |     cv::imshow("overlaid result", result);
52 |     cv::waitKey(0);
53 | 
54 |     return EXIT_SUCCESS;
55 | }
56 | 
57 | namespace
58 | {
59 | cv::Mat processOneFrame(const Ort::SemanticSegmentationPaddleSegBisenetv2& osh, const cv::Mat& inputImg, float* dst,
60 |                         float alpha)
61 | {
62 |     int origW = inputImg.cols, origH = inputImg.rows;
63 |     std::vector<float> originImageSize{static_cast<float>(origH), static_cast<float>(origW)};
64 |     cv::Mat scaledImg = inputImg.clone();
65 |     cv::resize(inputImg, scaledImg,
66 |                cv::Size(Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_W,
67 |                         Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_H),
68 |                0, 0, cv::INTER_CUBIC);
69 |     cv::cvtColor(scaledImg, scaledImg, cv::COLOR_BGR2RGB);
70 |     osh.preprocess(dst, scaledImg.data, Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_W,
71 |                    Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_H, 3);
72 |     auto inferenceOutput = osh({dst});
73 | 
74 |     // tips: when you have done all the tricks but still get the wrong output result,
75 |     // try checking the type of inferenceOutput
76 |     int64_t* data = reinterpret_cast<int64_t*>(inferenceOutput[0].first);
77 |     cv::Mat segm(Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_H, Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_W,
78 |                  CV_8UC(3));
79 |     for (int i = 0; i < Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_H; ++i) {
80 |         cv::Vec3b* ptrSegm = segm.ptr<cv::Vec3b>(i);
81 |         for (int j = 0; j < Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_W; ++j) {
82 |             const auto& color = COLORS[data[i * Ort::SemanticSegmentationPaddleSegBisenetv2::IMG_W + j]];
83 |             ptrSegm[j] = cv::Vec3b(color[0], color[1], color[2]);
84 |         }
85 |     }
86 | 
87 |     cv::resize(segm, segm, inputImg.size(), 0, 0, cv::INTER_NEAREST);
88 |     segm = (1 - alpha) * inputImg + alpha * segm;
89 |     return segm;
90 | }
91 | }  // namespace
92 | 


--------------------------------------------------------------------------------
/examples/SuperGlueApp.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @file    SuperGlueApp.cpp
  3 |  *
  4 |  * @author  btran
  5 |  *
  6 |  */
  7 | 
  8 | #include "SuperPoint.hpp"
  9 | #include "Utility.hpp"
 10 | 
 11 | namespace
 12 | {
 13 | using KeyPointAndDesc = std::pair<std::vector<cv::KeyPoint>, cv::Mat>;
 14 | 
 15 | KeyPointAndDesc processOneFrameSuperPoint(const Ort::SuperPoint& superPointOsh, const cv::Mat& inputImg, float* dst,
 16 |                                           int borderRemove = 4, float confidenceThresh = 0.015,
 17 |                                           bool alignCorners = true, int distThresh = 2);
 18 | 
 19 | void normalizeDescriptors(cv::Mat* descriptors);
 20 | }  // namespace
 21 | 
 22 | int main(int argc, char* argv[])
 23 | {
 24 |     if (argc != 5) {
 25 |         std::cerr
 26 |             << "Usage: [apps] [path/to/onnx/super/point] [path/to/onnx/super/glue] [path/to/image1] [path/to/image2]"
 27 |             << std::endl;
 28 |         return EXIT_FAILURE;
 29 |     }
 30 | 
 31 |     const std::string ONNX_MODEL_PATH = argv[1];
 32 |     const std::string SUPERGLUE_ONNX_MODEL_PATH = argv[2];
 33 |     const std::vector<std::string> IMAGE_PATHS = {argv[3], argv[4]};
 34 | 
 35 |     Ort::SuperPoint superPointOsh(ONNX_MODEL_PATH, 0,
 36 |                                   std::vector<std::vector<int64_t>>{{1, Ort::SuperPoint::IMG_CHANNEL,
 37 |                                                                      Ort::SuperPoint::IMG_H, Ort::SuperPoint::IMG_W}});
 38 | 
 39 |     std::vector<cv::Mat> images;
 40 |     std::vector<cv::Mat> grays;
 41 |     std::transform(IMAGE_PATHS.begin(), IMAGE_PATHS.end(), std::back_inserter(images),
 42 |                    [](const auto& imagePath) { return cv::imread(imagePath); });
 43 |     for (int i = 0; i < 2; ++i) {
 44 |         if (images[i].empty()) {
 45 |             throw std::runtime_error("failed to open " + IMAGE_PATHS[i]);
 46 |         }
 47 |     }
 48 |     std::transform(IMAGE_PATHS.begin(), IMAGE_PATHS.end(), std::back_inserter(grays),
 49 |                    [](const auto& imagePath) { return cv::imread(imagePath, 0); });
 50 | 
 51 |     std::vector<float> dst(Ort::SuperPoint::IMG_CHANNEL * Ort::SuperPoint::IMG_H * Ort::SuperPoint::IMG_W);
 52 | 
 53 |     std::vector<KeyPointAndDesc> superPointResults;
 54 |     std::transform(grays.begin(), grays.end(), std::back_inserter(superPointResults),
 55 |                    [&superPointOsh, &dst](const auto& gray) {
 56 |                        return processOneFrameSuperPoint(superPointOsh, gray, dst.data());
 57 |                    });
 58 | 
 59 |     for (auto& curKeyPointAndDesc : superPointResults) {
 60 |         normalizeDescriptors(&curKeyPointAndDesc.second);
 61 |     }
 62 | 
 63 |     // superglue
 64 |     static const int DUMMY_NUM_KEYPOINTS = 256;
 65 |     Ort::OrtSessionHandler superGlueOsh(SUPERGLUE_ONNX_MODEL_PATH, 0,
 66 |                                         std::vector<std::vector<int64_t>>{
 67 |                                             {4},
 68 |                                             {1, DUMMY_NUM_KEYPOINTS},
 69 |                                             {1, DUMMY_NUM_KEYPOINTS, 2},
 70 |                                             {1, 256, DUMMY_NUM_KEYPOINTS},
 71 |                                             {4},
 72 |                                             {1, DUMMY_NUM_KEYPOINTS},
 73 |                                             {1, DUMMY_NUM_KEYPOINTS, 2},
 74 |                                             {1, 256, DUMMY_NUM_KEYPOINTS},
 75 |                                         });
 76 | 
 77 |     int numKeypoints0 = superPointResults[0].first.size();
 78 |     int numKeypoints1 = superPointResults[1].first.size();
 79 |     std::vector<std::vector<int64_t>> inputShapes = {
 80 |         {4}, {1, numKeypoints0}, {1, numKeypoints0, 2}, {1, 256, numKeypoints0},
 81 |         {4}, {1, numKeypoints1}, {1, numKeypoints1, 2}, {1, 256, numKeypoints1},
 82 |     };
 83 |     superGlueOsh.updateInputShapes(inputShapes);
 84 | 
 85 |     std::vector<std::vector<float>> imageShapes(2);
 86 |     std::vector<std::vector<float>> scores(2);
 87 |     std::vector<std::vector<float>> keypoints(2);
 88 |     std::vector<std::vector<float>> descriptors(2);
 89 | 
 90 |     cv::Mat buffer;
 91 |     for (int i = 0; i < 2; ++i) {
 92 |         imageShapes[i] = {1, 1, static_cast<float>(images[0].rows), static_cast<float>(images[0].cols)};
 93 |         std::transform(superPointResults[i].first.begin(), superPointResults[i].first.end(),
 94 |                        std::back_inserter(scores[i]), [](const cv::KeyPoint& keypoint) { return keypoint.response; });
 95 |         for (const auto& k : superPointResults[i].first) {
 96 |             keypoints[i].emplace_back(k.pt.y);
 97 |             keypoints[i].emplace_back(k.pt.x);
 98 |         }
 99 | 
100 |         transposeNDWrapper(superPointResults[i].second, {1, 0}, buffer);
101 |         std::copy(buffer.begin<float>(), buffer.end<float>(), std::back_inserter(descriptors[i]));
102 |         buffer.release();
103 |     }
104 |     std::vector<Ort::OrtSessionHandler::DataOutputType> superGlueOrtOutput =
105 |         superGlueOsh({imageShapes[0].data(), scores[0].data(), keypoints[0].data(), descriptors[0].data(),
106 |                       imageShapes[1].data(), scores[1].data(), keypoints[1].data(), descriptors[1].data()});
107 | 
108 |     // match keypoints 0 to keypoints 1
109 |     std::vector<int64_t> matchIndices(reinterpret_cast<int64_t*>(superGlueOrtOutput[0].first),
110 |                                       reinterpret_cast<int64_t*>(superGlueOrtOutput[0].first) + numKeypoints0);
111 | 
112 |     std::vector<cv::DMatch> goodMatches;
113 |     for (std::size_t i = 0; i < matchIndices.size(); ++i) {
114 |         if (matchIndices[i] < 0) {
115 |             continue;
116 |         }
117 |         cv::DMatch match;
118 |         match.imgIdx = 0;
119 |         match.queryIdx = i;
120 |         match.trainIdx = matchIndices[i];
121 |         goodMatches.emplace_back(match);
122 |     }
123 | 
124 |     cv::Mat matchesImage;
125 |     cv::drawMatches(images[0], superPointResults[0].first, images[1], superPointResults[1].first, goodMatches,
126 |                     matchesImage, cv::Scalar::all(-1), cv::Scalar::all(-1), std::vector<char>(),
127 |                     cv::DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS);
128 |     cv::imwrite("super_point_super_glue_good_matches.jpg", matchesImage);
129 |     cv::imshow("super_point_super_glue_good_matches", matchesImage);
130 |     cv::waitKey();
131 | 
132 |     return EXIT_SUCCESS;
133 | }
134 | 
135 | namespace
136 | {
137 | void normalizeDescriptors(cv::Mat* descriptors)
138 | {
139 |     cv::Mat rsquaredSumMat;
140 |     cv::reduce(descriptors->mul(*descriptors), rsquaredSumMat, 1, cv::REDUCE_SUM);
141 |     cv::sqrt(rsquaredSumMat, rsquaredSumMat);
142 |     for (int i = 0; i < descriptors->rows; ++i) {
143 |         float rsquaredSum = std::max<float>(rsquaredSumMat.ptr<float>()[i], 1e-12);
144 |         descriptors->row(i) /= rsquaredSum;
145 |     }
146 | }
147 | 
148 | KeyPointAndDesc processOneFrameSuperPoint(const Ort::SuperPoint& superPointOsh, const cv::Mat& inputImg, float* dst,
149 |                                           int borderRemove, float confidenceThresh, bool alignCorners, int distThresh)
150 | {
151 |     int origW = inputImg.cols, origH = inputImg.rows;
152 |     cv::Mat scaledImg;
153 |     cv::resize(inputImg, scaledImg, cv::Size(Ort::SuperPoint::IMG_W, Ort::SuperPoint::IMG_H), 0, 0, cv::INTER_CUBIC);
154 |     superPointOsh.preprocess(dst, scaledImg.data, Ort::SuperPoint::IMG_W, Ort::SuperPoint::IMG_H,
155 |                              Ort::SuperPoint::IMG_CHANNEL);
156 |     auto inferenceOutput = superPointOsh({dst});
157 | 
158 |     std::vector<cv::KeyPoint> keyPoints = superPointOsh.getKeyPoints(inferenceOutput, borderRemove, confidenceThresh);
159 | 
160 |     std::vector<int> descriptorShape(inferenceOutput[1].second.begin(), inferenceOutput[1].second.end());
161 |     cv::Mat coarseDescriptorMat(descriptorShape.size(), descriptorShape.data(), CV_32F,
162 |                                 inferenceOutput[1].first);  // 1 x 256 x H/8 x W/8
163 | 
164 |     std::vector<int> keepIndices =
165 |         superPointOsh.nmsFast(keyPoints, Ort::SuperPoint::IMG_H, Ort::SuperPoint::IMG_W, distThresh);
166 | 
167 |     std::vector<cv::KeyPoint> keepKeyPoints;
168 |     keepKeyPoints.reserve(keepIndices.size());
169 |     std::transform(keepIndices.begin(), keepIndices.end(), std::back_inserter(keepKeyPoints),
170 |                    [&keyPoints](int idx) { return keyPoints[idx]; });
171 |     keyPoints = std::move(keepKeyPoints);
172 | 
173 |     cv::Mat descriptors = superPointOsh.getDescriptors(coarseDescriptorMat, keyPoints, Ort::SuperPoint::IMG_H,
174 |                                                        Ort::SuperPoint::IMG_W, alignCorners);
175 | 
176 |     for (auto& keyPoint : keyPoints) {
177 |         keyPoint.pt.x *= static_cast<float>(origW) / Ort::SuperPoint::IMG_W;
178 |         keyPoint.pt.y *= static_cast<float>(origH) / Ort::SuperPoint::IMG_H;
179 |     }
180 | 
181 |     return {keyPoints, descriptors};
182 | }
183 | }  // namespace
184 | 


--------------------------------------------------------------------------------
/examples/SuperPoint.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @file    SuperPoint.cpp
  3 |  *
  4 |  * @author  btran
  5 |  *
  6 |  */
  7 | 
  8 | #include "SuperPoint.hpp"
  9 | #include "Utility.hpp"
 10 | 
 11 | namespace Ort
 12 | {
 13 | void SuperPoint::preprocess(float* dst, const unsigned char* src, const int64_t targetImgWidth,
 14 |                             const int64_t targetImgHeight, const int numChannels) const
 15 | {
 16 |     for (int i = 0; i < targetImgHeight; ++i) {
 17 |         for (int j = 0; j < targetImgWidth; ++j) {
 18 |             for (int c = 0; c < numChannels; ++c) {
 19 |                 dst[c * targetImgHeight * targetImgWidth + i * targetImgWidth + j] =
 20 |                     (src[i * targetImgWidth * numChannels + j * numChannels + c] / 255.0);
 21 |             }
 22 |         }
 23 |     }
 24 | }
 25 | 
 26 | std::vector<int> SuperPoint::nmsFast(const std::vector<cv::KeyPoint>& keyPoints, int height, int width,
 27 |                                      int distThresh) const
 28 | {
 29 |     static const int TO_PROCESS = 1;
 30 |     static const int EMPTY_OR_SUPPRESSED = 0;
 31 | 
 32 |     std::vector<int> sortedIndices(keyPoints.size());
 33 |     std::iota(sortedIndices.begin(), sortedIndices.end(), 0);
 34 | 
 35 |     // sort in descending order base on confidence
 36 |     std::stable_sort(sortedIndices.begin(), sortedIndices.end(),
 37 |                      [&keyPoints](int lidx, int ridx) { return keyPoints[lidx].response > keyPoints[ridx].response; });
 38 | 
 39 |     cv::Mat grid = cv::Mat(height, width, CV_8U, TO_PROCESS);
 40 |     std::vector<int> keepIndices;
 41 | 
 42 |     for (int idx : sortedIndices) {
 43 |         int x = keyPoints[idx].pt.x;
 44 |         int y = keyPoints[idx].pt.y;
 45 | 
 46 |         if (grid.at<uchar>(y, x) == TO_PROCESS) {
 47 |             for (int i = y - distThresh; i < y + distThresh; ++i) {
 48 |                 if (i < 0 || i >= height) {
 49 |                     continue;
 50 |                 }
 51 | 
 52 |                 for (int j = x - distThresh; j < x + distThresh; ++j) {
 53 |                     if (j < 0 || j >= width) {
 54 |                         continue;
 55 |                     }
 56 |                     grid.at<uchar>(i, j) = EMPTY_OR_SUPPRESSED;
 57 |                 }
 58 |             }
 59 |             keepIndices.emplace_back(idx);
 60 |         }
 61 |     }
 62 | 
 63 |     return keepIndices;
 64 | }
 65 | 
 66 | std::vector<cv::KeyPoint>
 67 | SuperPoint::getKeyPoints(const std::vector<Ort::OrtSessionHandler::DataOutputType>& inferenceOutput, int borderRemove,
 68 |                          float confidenceThresh) const
 69 | {
 70 |     std::vector<int> detectorShape(inferenceOutput[0].second.begin() + 1, inferenceOutput[0].second.end());
 71 | 
 72 |     cv::Mat detectorMat(detectorShape.size(), detectorShape.data(), CV_32F,
 73 |                         inferenceOutput[0].first);  // 65 x H/8 x W/8
 74 |     cv::Mat buffer;
 75 | 
 76 |     transposeNDWrapper(detectorMat, {1, 2, 0}, buffer);
 77 |     buffer.copyTo(detectorMat);  // H/8 x W/8 x 65
 78 | 
 79 |     for (int i = 0; i < detectorShape[1]; ++i) {
 80 |         for (int j = 0; j < detectorShape[2]; ++j) {
 81 |             Ort::softmax(detectorMat.ptr<float>(i, j), detectorShape[0]);
 82 |         }
 83 |     }
 84 |     detectorMat = detectorMat({cv::Range::all(), cv::Range::all(), cv::Range(0, detectorShape[0] - 1)})
 85 |                       .clone();                                                        // H/8 x W/8 x 64
 86 |     detectorMat = detectorMat.reshape(1, {detectorShape[1], detectorShape[2], 8, 8});  // H/8 x W/8 x 8 x 8
 87 |     transposeNDWrapper(detectorMat, {0, 2, 1, 3}, buffer);
 88 |     buffer.copyTo(detectorMat);  // H/8 x 8 x W/8 x 8
 89 | 
 90 |     detectorMat = detectorMat.reshape(1, {detectorShape[1] * 8, detectorShape[2] * 8});  // H x W
 91 | 
 92 |     std::vector<cv::KeyPoint> keyPoints;
 93 |     for (int i = borderRemove; i < detectorMat.rows - borderRemove; ++i) {
 94 |         auto rowPtr = detectorMat.ptr<float>(i);
 95 |         for (int j = borderRemove; j < detectorMat.cols - borderRemove; ++j) {
 96 |             if (rowPtr[j] > confidenceThresh) {
 97 |                 cv::KeyPoint keyPoint;
 98 |                 keyPoint.pt.x = j;
 99 |                 keyPoint.pt.y = i;
100 |                 keyPoint.response = rowPtr[j];
101 |                 keyPoints.emplace_back(keyPoint);
102 |             }
103 |         }
104 |     }
105 | 
106 |     return keyPoints;
107 | }
108 | 
109 | cv::Mat SuperPoint::getDescriptors(const cv::Mat& coarseDescriptors, const std::vector<cv::KeyPoint>& keyPoints,
110 |                                    int height, int width, bool alignCorners) const
111 | {
112 |     cv::Mat keyPointMat(keyPoints.size(), 2, CV_32F);
113 | 
114 |     for (int i = 0; i < keyPoints.size(); ++i) {
115 |         auto rowPtr = keyPointMat.ptr<float>(i);
116 |         rowPtr[0] = 2 * keyPoints[i].pt.y / (height - 1) - 1;
117 |         rowPtr[1] = 2 * keyPoints[i].pt.x / (width - 1) - 1;
118 |     }
119 |     keyPointMat = keyPointMat.reshape(1, {1, 1, static_cast<int>(keyPoints.size()), 2});
120 |     cv::Mat descriptors = bilinearGridSample(coarseDescriptors, keyPointMat, alignCorners);
121 |     descriptors = descriptors.reshape(1, {coarseDescriptors.size[1], static_cast<int>(keyPoints.size())});
122 | 
123 |     cv::Mat buffer;
124 |     transposeNDWrapper(descriptors, {1, 0}, buffer);
125 | 
126 |     return buffer;
127 | }
128 | }  // namespace Ort
129 | 


--------------------------------------------------------------------------------
/examples/SuperPoint.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file    SuperPoint.hpp
 3 |  *
 4 |  * @author  btran
 5 |  *
 6 |  */
 7 | 
 8 | #pragma once
 9 | 
10 | #include <opencv2/opencv.hpp>
11 | #include <ort_utility/ort_utility.hpp>
12 | #include <vector>
13 | 
14 | namespace Ort
15 | {
16 | class SuperPoint : public OrtSessionHandler
17 | {
18 |  public:
19 |     static constexpr int64_t IMG_H = 480;
20 |     static constexpr int64_t IMG_W = 640;
21 |     static constexpr int64_t IMG_CHANNEL = 1;
22 | 
23 |     using OrtSessionHandler::OrtSessionHandler;
24 | 
25 |     void preprocess(float* dst,                     //
26 |                     const unsigned char* src,       //
27 |                     const int64_t targetImgWidth,   //
28 |                     const int64_t targetImgHeight,  //
29 |                     const int numChannels) const;
30 | 
31 |     std::vector<int> nmsFast(const std::vector<cv::KeyPoint>& keyPoints, int height, int width,
32 |                              int distThresh = 2) const;
33 | 
34 |     /**
35 |      *  @brief detect super point
36 |      *
37 |      *  @return vector of detected key points
38 |      */
39 |     std::vector<cv::KeyPoint> getKeyPoints(const std::vector<Ort::OrtSessionHandler::DataOutputType>& inferenceOutput,
40 |                                            int borderRemove, float confidenceThresh) const;
41 | 
42 |     /**
43 |      *  @brief estimate super point's keypoint descriptor
44 |      *
45 |      *  @return keypoint Mat of shape [num key point x 256]
46 |      */
47 |     cv::Mat getDescriptors(const cv::Mat& coarseDescriptors, const std::vector<cv::KeyPoint>& keyPoints, int height,
48 |                            int width, bool alignCorners) const;
49 | };
50 | }  // namespace Ort
51 | 


--------------------------------------------------------------------------------
/examples/SuperPointApp.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @file    SuperPointApp.cpp
  3 |  *
  4 |  * @author  btran
  5 |  *
  6 |  */
  7 | 
  8 | #include "SuperPoint.hpp"
  9 | #include "Utility.hpp"
 10 | #include <opencv2/features2d.hpp>
 11 | 
 12 | namespace
 13 | {
 14 | using KeyPointAndDesc = std::pair<std::vector<cv::KeyPoint>, cv::Mat>;
 15 | 
 16 | KeyPointAndDesc processOneFrame(const Ort::SuperPoint& osh, const cv::Mat& inputImg, float* dst, int borderRemove = 4,
 17 |                                 float confidenceThresh = 0.015, bool alignCorners = true, int distThresh = 2);
 18 | 
 19 | }  // namespace
 20 | 
 21 | int main(int argc, char* argv[])
 22 | {
 23 |     if (argc != 4) {
 24 |         std::cerr << "Usage: [apps] [path/to/onnx/super/point] [path/to/image1] [path/to/image2]" << std::endl;
 25 |         return EXIT_FAILURE;
 26 |     }
 27 | 
 28 |     const std::string ONNX_MODEL_PATH = argv[1];
 29 |     const std::vector<std::string> IMAGE_PATHS = {argv[2], argv[3]};
 30 | 
 31 |     Ort::SuperPoint osh(ONNX_MODEL_PATH, 0,
 32 |                         std::vector<std::vector<int64_t>>{
 33 |                             {1, Ort::SuperPoint::IMG_CHANNEL, Ort::SuperPoint::IMG_H, Ort::SuperPoint::IMG_W}});
 34 | 
 35 |     std::vector<cv::Mat> images;
 36 |     std::vector<cv::Mat> grays;
 37 |     std::transform(IMAGE_PATHS.begin(), IMAGE_PATHS.end(), std::back_inserter(images),
 38 |                    [](const auto& imagePath) { return cv::imread(imagePath); });
 39 |     for (int i = 0; i < 2; ++i) {
 40 |         if (images[i].empty()) {
 41 |             throw std::runtime_error("failed to open " + IMAGE_PATHS[i]);
 42 |         }
 43 |     }
 44 |     std::transform(IMAGE_PATHS.begin(), IMAGE_PATHS.end(), std::back_inserter(grays),
 45 |                    [](const auto& imagePath) { return cv::imread(imagePath, 0); });
 46 | 
 47 |     std::vector<float> dst(Ort::SuperPoint::IMG_CHANNEL * Ort::SuperPoint::IMG_H * Ort::SuperPoint::IMG_W);
 48 | 
 49 |     std::vector<KeyPointAndDesc> results;
 50 |     std::transform(grays.begin(), grays.end(), std::back_inserter(results),
 51 |                    [&osh, &dst](const auto& gray) { return processOneFrame(osh, gray, dst.data()); });
 52 | 
 53 |     cv::BFMatcher matcher(cv::NORM_L2, true /* crossCheck */);
 54 |     std::vector<cv::DMatch> knnMatches;
 55 |     matcher.match(results[0].second, results[1].second, knnMatches);
 56 | 
 57 |     cv::Mat matchesImage;
 58 |     cv::drawMatches(images[0], results[0].first, images[1], results[1].first, knnMatches, matchesImage,
 59 |                     cv::Scalar::all(-1), cv::Scalar::all(-1), std::vector<char>(),
 60 |                     cv::DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS);
 61 |     cv::imwrite("super_point_good_matches.jpg", matchesImage);
 62 |     cv::imshow("super_point_good_matches", matchesImage);
 63 |     cv::waitKey();
 64 | 
 65 |     return EXIT_SUCCESS;
 66 | }
 67 | 
 68 | namespace
 69 | {
 70 | KeyPointAndDesc processOneFrame(const Ort::SuperPoint& osh, const cv::Mat& inputImg, float* dst, int borderRemove,
 71 |                                 float confidenceThresh, bool alignCorners, int distThresh)
 72 | {
 73 |     int origW = inputImg.cols, origH = inputImg.rows;
 74 |     cv::Mat scaledImg;
 75 |     cv::resize(inputImg, scaledImg, cv::Size(Ort::SuperPoint::IMG_W, Ort::SuperPoint::IMG_H), 0, 0, cv::INTER_CUBIC);
 76 |     osh.preprocess(dst, scaledImg.data, Ort::SuperPoint::IMG_W, Ort::SuperPoint::IMG_H, Ort::SuperPoint::IMG_CHANNEL);
 77 |     auto inferenceOutput = osh({dst});
 78 | 
 79 |     std::vector<cv::KeyPoint> keyPoints = osh.getKeyPoints(inferenceOutput, borderRemove, confidenceThresh);
 80 | 
 81 |     std::vector<int> descriptorShape(inferenceOutput[1].second.begin(), inferenceOutput[1].second.end());
 82 |     cv::Mat coarseDescriptorMat(descriptorShape.size(), descriptorShape.data(), CV_32F,
 83 |                                 inferenceOutput[1].first);  // 1 x 256 x H/8 x W/8
 84 | 
 85 |     std::vector<int> keepIndices = osh.nmsFast(keyPoints, Ort::SuperPoint::IMG_H, Ort::SuperPoint::IMG_W, distThresh);
 86 | 
 87 |     std::vector<cv::KeyPoint> keepKeyPoints;
 88 |     keepKeyPoints.reserve(keepIndices.size());
 89 |     std::transform(keepIndices.begin(), keepIndices.end(), std::back_inserter(keepKeyPoints),
 90 |                    [&keyPoints](int idx) { return keyPoints[idx]; });
 91 |     keyPoints = std::move(keepKeyPoints);
 92 | 
 93 |     cv::Mat descriptors = osh.getDescriptors(coarseDescriptorMat, keyPoints, Ort::SuperPoint::IMG_H,
 94 |                                              Ort::SuperPoint::IMG_W, alignCorners);
 95 | 
 96 |     for (auto& keyPoint : keyPoints) {
 97 |         keyPoint.pt.x *= static_cast<float>(origW) / Ort::SuperPoint::IMG_W;
 98 |         keyPoint.pt.y *= static_cast<float>(origH) / Ort::SuperPoint::IMG_H;
 99 |     }
100 | 
101 |     return {keyPoints, descriptors};
102 | }
103 | }  // namespace
104 | 


--------------------------------------------------------------------------------
/examples/TestImageClassification.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file    TestImageClassification.cpp
 3 |  *
 4 |  * @author  btran
 5 |  *
 6 |  * Copyright (c) organization
 7 |  *
 8 |  */
 9 | 
10 | #include <cassert>
11 | #include <chrono>
12 | #include <cmath>
13 | #include <iostream>
14 | #include <numeric>
15 | #include <string>
16 | #include <vector>
17 | 
18 | #include <opencv2/opencv.hpp>
19 | 
20 | #include <ort_utility/ort_utility.hpp>
21 | 
22 | static constexpr int64_t IMG_WIDTH = 224;
23 | static constexpr int64_t IMG_HEIGHT = 224;
24 | static constexpr int64_t IMG_CHANNEL = 3;
25 | static constexpr int64_t TEST_TIMES = 1000;
26 | 
27 | int main(int argc, char* argv[])
28 | {
29 |     if (argc != 3) {
30 |         std::cerr << "Usage: [apps] [path/to/onnx/model] [path/to/image]" << std::endl;
31 |         return EXIT_FAILURE;
32 |     }
33 | 
34 |     const std::string ONNX_MODEL_PATH = argv[1];
35 |     const std::string IMAGE_PATH = argv[2];
36 | 
37 |     Ort::ImageClassificationOrtSessionHandler osh(Ort::IMAGENET_NUM_CLASSES, ONNX_MODEL_PATH, 0);
38 |     osh.initClassNames(Ort::IMAGENET_CLASSES);
39 | 
40 |     cv::Mat img = cv::imread(IMAGE_PATH);
41 | 
42 |     if (img.empty()) {
43 |         std::cerr << "Failed to read input image" << std::endl;
44 |         return EXIT_FAILURE;
45 |     }
46 | 
47 |     cv::resize(img, img, cv::Size(IMG_WIDTH, IMG_HEIGHT));
48 |     float* dst = new float[IMG_WIDTH * IMG_HEIGHT * IMG_CHANNEL];
49 |     osh.preprocess(dst, img.data, IMG_WIDTH, IMG_HEIGHT, IMG_CHANNEL, Ort::IMAGENET_MEAN, Ort::IMAGENET_STD);
50 | 
51 |     std::chrono::high_resolution_clock::time_point begin = std::chrono::high_resolution_clock::now();
52 |     for (int i = 0; i < TEST_TIMES; ++i) {
53 |         auto inferenceOutput = osh({reinterpret_cast<float*>(dst)});
54 | 
55 |         const int TOP_K = 5;
56 |         // osh.topK({inferenceOutput[0].first}, TOP_K);
57 |         std::cout << osh.topKToString({inferenceOutput[0].first}, TOP_K) << std::endl;
58 |     }
59 |     std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now();
60 |     auto elapsedTime = std::chrono::duration_cast<std::chrono::milliseconds>(end - begin);
61 |     std::cout << elapsedTime.count() / 1000. << "[sec]" << std::endl;
62 | 
63 |     delete[] dst;
64 | 
65 |     return 0;
66 | }
67 | 


--------------------------------------------------------------------------------
/examples/TinyYolov2.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file    TinyYolov2.cpp
 3 |  *
 4 |  * @author  btran
 5 |  *
 6 |  */
 7 | 
 8 | #include "TinyYolov2.hpp"
 9 | 
10 | namespace Ort
11 | {
12 | TinyYolov2::TinyYolov2(const uint16_t numClasses,     //
13 |                        const std::string& modelPath,  //
14 |                        const std::optional<size_t>& gpuIdx,
15 |                        const std::optional<std::vector<std::vector<int64_t>>>& inputShapes)
16 |     : ImageRecognitionOrtSessionHandlerBase(numClasses, modelPath, gpuIdx, inputShapes)
17 | {
18 | }
19 | 
20 | TinyYolov2::~TinyYolov2()
21 | {
22 | }
23 | 
24 | std::tuple<std::vector<std::array<float, 4>>, std::vector<float>, std::vector<uint64_t>>
25 | TinyYolov2::postProcess(const std::vector<DataOutputType>& inferenceOutput, const float confidenceThresh) const
26 | {
27 |     std::vector<float> outputData(inferenceOutput.front().first, inferenceOutput.front().first + NUM_BOXES);
28 | 
29 |     // or
30 |     // std::vector<float> outputData(inferenceOutput.front().first,
31 |     //                               inferenceOutput.front().first +
32 |     //                                   std::accumulate(inferenceOutput.front().second.begin(),
33 |     //                                                   inferenceOutput.front().second.end(), 1, std::multiplies<>()));
34 | 
35 |     std::vector<std::array<float, 4>> bboxes;
36 |     std::vector<float> scores;
37 |     std::vector<uint64_t> classIndices;
38 | 
39 |     std::vector<float> tmpScores(m_numClasses);
40 | 
41 |     for (uint64_t i = 0; i < FEATURE_MAP_SIZE; ++i) {
42 |         for (uint64_t j = 0; j < NUM_ANCHORS; ++j) {
43 |             for (uint64_t k = 0; k < m_numClasses; ++k) {
44 |                 tmpScores[k] = outputData[i + FEATURE_MAP_SIZE * ((m_numClasses + 5) * j + k + 5)];
45 |             }
46 |             Ort::softmax(tmpScores.data(), m_numClasses);
47 |             uint64_t maxIdx =
48 |                 std::distance(tmpScores.begin(), std::max_element(tmpScores.begin(), tmpScores.begin() + m_numClasses));
49 |             float probability = tmpScores[maxIdx];
50 | 
51 |             if (Ort::sigmoid(outputData[i + FEATURE_MAP_SIZE * ((m_numClasses + 5) * j + 4)]) * probability >=
52 |                 confidenceThresh) {
53 |                 float xcenter =
54 |                     (Ort::sigmoid(outputData[i + FEATURE_MAP_SIZE * (m_numClasses + 5 * j)]) + i % 13) * 32.0;
55 |                 float ycenter =
56 |                     (Ort::sigmoid(outputData[i + FEATURE_MAP_SIZE * ((m_numClasses + 5) * j + 1)]) + i % 13) * 32.0;
57 | 
58 |                 float width =
59 |                     expf(outputData[i + FEATURE_MAP_SIZE * ((m_numClasses + 5) * j + 2)]) * ANCHORS[2 * j] * 32.0;
60 |                 float height =
61 |                     expf(outputData[i + FEATURE_MAP_SIZE * ((m_numClasses + 5) * j + 3)]) * ANCHORS[2 * j + 1] * 32.0;
62 | 
63 |                 float xmin = std::max<float>(xcenter - width / 2, 0.0);
64 |                 float ymin = std::max<float>(ycenter - height / 2, 0.0);
65 |                 float xmax = std::min<float>(xcenter + width / 2, IMG_WIDTH);
66 |                 float ymax = std::min<float>(ycenter + height / 2, IMG_HEIGHT);
67 | 
68 |                 bboxes.emplace_back(std::array<float, 4>{xmin, ymin, xmax, ymax});
69 |                 scores.emplace_back(probability);
70 |                 classIndices.emplace_back(maxIdx);
71 |             }
72 |         }
73 |     }
74 | 
75 |     return std::make_tuple(bboxes, scores, classIndices);
76 | }
77 | 
78 | void TinyYolov2::preprocess(float* dst,                     //
79 |                             const unsigned char* src,       //
80 |                             const int64_t targetImgWidth,   //
81 |                             const int64_t targetImgHeight,  //
82 |                             const int numChannels) const
83 | {
84 |     for (int i = 0; i < targetImgHeight; ++i) {
85 |         for (int j = 0; j < targetImgWidth; ++j) {
86 |             for (int c = 0; c < numChannels; ++c) {
87 |                 dst[c * targetImgHeight * targetImgWidth + i * targetImgWidth + j] =
88 |                     src[i * targetImgWidth * numChannels + j * numChannels + c];
89 |             }
90 |         }
91 |     }
92 | }
93 | }  // namespace Ort
94 | 


--------------------------------------------------------------------------------
/examples/TinyYolov2.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file    TinyYolov2.hpp
 3 |  *
 4 |  * @author  btran
 5 |  *
 6 |  * @date    2020-05-05
 7 |  *
 8 |  * Copyright (c) organization
 9 |  *
10 |  */
11 | 
12 | #pragma once
13 | 
14 | #include <algorithm>
15 | #include <cstring>
16 | #include <string>
17 | #include <tuple>
18 | #include <vector>
19 | 
20 | #include <ort_utility/ort_utility.hpp>
21 | 
22 | namespace Ort
23 | {
24 | class TinyYolov2 : public ImageRecognitionOrtSessionHandlerBase
25 | {
26 |  public:
27 |     static constexpr int64_t IMG_WIDTH = 416;
28 | 
29 |     static constexpr int64_t IMG_HEIGHT = 416;
30 | 
31 |     static constexpr int64_t IMG_CHANNEL = 3;
32 | 
33 |     static constexpr int64_t FEATURE_MAP_SIZE = 13 * 13;
34 | 
35 |     static constexpr int64_t NUM_BOXES = 1 * 13 * 13 * 125;
36 | 
37 |     static constexpr int64_t NUM_ANCHORS = 5;
38 | 
39 |     static constexpr float ANCHORS[10] = {1.08, 1.19, 3.42, 4.41, 6.63, 11.38, 9.42, 5.11, 16.62, 10.52};
40 | 
41 |     TinyYolov2(const uint16_t numClasses,                           //
42 |                const std::string& modelPath,                        //
43 |                const std::optional<size_t>& gpuIdx = std::nullopt,  //
44 |                const std::optional<std::vector<std::vector<int64_t>>>& inputShapes = std::nullopt);
45 | 
46 |     ~TinyYolov2();
47 | 
48 |     void preprocess(float* dst,                     //
49 |                     const unsigned char* src,       //
50 |                     const int64_t targetImgWidth,   //
51 |                     const int64_t targetImgHeight,  //
52 |                     const int numChannels) const;
53 | 
54 |     std::tuple<std::vector<std::array<float, 4>>, std::vector<float>, std::vector<uint64_t>>
55 |     postProcess(const std::vector<DataOutputType>& inferenceOutput, const float confidenceThresh = 0.5) const;
56 | };
57 | }  // namespace Ort
58 | 


--------------------------------------------------------------------------------
/examples/TinyYolov2App.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file    TinyYolov2App.cpp
 3 |  *
 4 |  * @author  btran
 5 |  *
 6 |  */
 7 | 
 8 | #include <chrono>
 9 | #include <memory>
10 | 
11 | #include <opencv2/opencv.hpp>
12 | 
13 | #include <ort_utility/ort_utility.hpp>
14 | 
15 | #include "TinyYolov2.hpp"
16 | #include "Utility.hpp"
17 | 
18 | static constexpr float CONFIDENCE_THRESHOLD = 0.5;
19 | static constexpr float NMS_THRESHOLD = 0.6;
20 | static const std::vector<cv::Scalar> COLORS = toCvScalarColors(Ort::VOC_COLOR_CHART);
21 | 
22 | namespace
23 | {
24 | cv::Mat processOneFrame(const Ort::TinyYolov2& osh, const cv::Mat& inputImg, float* dst);
25 | }  // namespace
26 | 
27 | int main(int argc, char* argv[])
28 | {
29 |     if (argc != 3) {
30 |         std::cerr << "Usage: [apps] [path/to/onnx/yolov3-tiny.onnx] [path/to/image]" << std::endl;
31 |         return EXIT_FAILURE;
32 |     }
33 | 
34 |     const std::string ONNX_MODEL_PATH = argv[1];
35 |     const std::string IMAGE_PATH = argv[2];
36 | 
37 |     Ort::TinyYolov2 osh(Ort::VOC_NUM_CLASSES, ONNX_MODEL_PATH, 0,
38 |                         std::vector<std::vector<int64_t>>{{1, Ort::TinyYolov2::IMG_CHANNEL, Ort::TinyYolov2::IMG_WIDTH,
39 |                                                            Ort::TinyYolov2::IMG_HEIGHT}});
40 | 
41 |     osh.initClassNames(Ort::VOC_CLASSES);
42 |     std::array<float, Ort::TinyYolov2::IMG_WIDTH * Ort::TinyYolov2::IMG_HEIGHT * Ort::TinyYolov2::IMG_CHANNEL> dst;
43 | 
44 |     cv::Mat img = cv::imread(IMAGE_PATH);
45 | 
46 |     if (img.empty()) {
47 |         std::cerr << "Failed to read input image" << std::endl;
48 |         return EXIT_FAILURE;
49 |     }
50 | 
51 |     auto resultImg = ::processOneFrame(osh, img, dst.data());
52 | 
53 |     cv::imwrite("result.jpg", resultImg);
54 | 
55 |     return EXIT_SUCCESS;
56 | }
57 | 
58 | namespace
59 | {
60 | cv::Mat processOneFrame(const Ort::TinyYolov2& osh, const cv::Mat& inputImg, float* dst)
61 | {
62 |     cv::Mat result;
63 |     cv::resize(inputImg, result, cv::Size(Ort::TinyYolov2::IMG_WIDTH, Ort::TinyYolov2::IMG_HEIGHT));
64 | 
65 |     osh.preprocess(dst, result.data, Ort::TinyYolov2::IMG_WIDTH, Ort::TinyYolov2::IMG_HEIGHT,
66 |                    Ort::TinyYolov2::IMG_CHANNEL);
67 |     auto inferenceOutput = osh({dst});
68 |     assert(inferenceOutput.size() == 1);
69 | 
70 |     auto processedResult = osh.postProcess(inferenceOutput, CONFIDENCE_THRESHOLD);
71 |     std::vector<std::array<float, 4>> bboxes;
72 |     std::vector<float> scores;
73 |     std::vector<uint64_t> classIndices;
74 |     std::tie(bboxes, scores, classIndices) = processedResult;
75 | 
76 |     if (bboxes.size() == 0) {
77 |         return result;
78 |     }
79 | 
80 |     auto afterNmsIndices = Ort::nms(bboxes, scores, NMS_THRESHOLD);
81 | 
82 |     std::vector<std::array<float, 4>> afterNmsBboxes;
83 |     std::vector<uint64_t> afterNmsClassIndices;
84 | 
85 |     afterNmsBboxes.reserve(afterNmsIndices.size());
86 |     afterNmsClassIndices.reserve(afterNmsIndices.size());
87 | 
88 |     for (const auto idx : afterNmsIndices) {
89 |         afterNmsBboxes.emplace_back(bboxes[idx]);
90 |         afterNmsClassIndices.emplace_back(classIndices[idx]);
91 |     }
92 | 
93 |     result = ::visualizeOneImage(result, afterNmsBboxes, afterNmsClassIndices, COLORS, osh.classNames());
94 | 
95 |     return result;
96 | }
97 | }  // namespace
98 | 


--------------------------------------------------------------------------------
/examples/UltraLightFastGenericFaceDetector.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file    UltraLightFastGenericFaceDetector.cpp
 3 |  *
 4 |  * @author  btran
 5 |  *
 6 |  */
 7 | 
 8 | #include "UltraLightFastGenericFaceDetector.hpp"
 9 | 
10 | namespace Ort
11 | {
12 | UltraLightFastGenericFaceDetector::UltraLightFastGenericFaceDetector(
13 |     const std::string& modelPath, const std::optional<size_t>& gpuIdx,
14 |     const std::optional<std::vector<std::vector<int64_t>>>& inputShapes)
15 |     : ImageRecognitionOrtSessionHandlerBase(1 /* num classes */, modelPath, gpuIdx, inputShapes)
16 | {
17 | }
18 | 
19 | UltraLightFastGenericFaceDetector::~UltraLightFastGenericFaceDetector()
20 | {
21 | }
22 | 
23 | // Ref: https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/blob/master/detect_imgs_onnx.py#L70
24 | void UltraLightFastGenericFaceDetector::preprocess(float* dst,                     //
25 |                                                    const unsigned char* src,       //
26 |                                                    const int64_t targetImgWidth,   //
27 |                                                    const int64_t targetImgHeight,  //
28 |                                                    const int numChannels) const
29 | {
30 |     for (int c = 0; c < numChannels; ++c) {
31 |         for (int i = 0; i < targetImgHeight; ++i) {
32 |             for (int j = 0; j < targetImgWidth; ++j) {
33 |                 dst[c * targetImgHeight * targetImgWidth + i * targetImgWidth + j] =
34 |                     (src[i * targetImgWidth * numChannels + j * numChannels + c] - 127.) / 128.;
35 |             }
36 |         }
37 |     }
38 | }
39 | }  // namespace Ort
40 | 


--------------------------------------------------------------------------------
/examples/UltraLightFastGenericFaceDetector.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file    UltraLightFastGenericFaceDetector.hpp
 3 |  *
 4 |  * @author  btran
 5 |  *
 6 |  */
 7 | 
 8 | #pragma once
 9 | 
10 | #include <optional>
11 | #include <string>
12 | #include <vector>
13 | 
14 | #include <ort_utility/ort_utility.hpp>
15 | 
16 | namespace Ort
17 | {
18 | class UltraLightFastGenericFaceDetector : public ImageRecognitionOrtSessionHandlerBase
19 | {
20 |  public:
21 |     static constexpr int64_t IMG_H = 480;
22 | 
23 |     static constexpr int64_t IMG_W = 640;
24 | 
25 |     static constexpr int64_t IMG_CHANNEL = 3;
26 | 
27 |     explicit UltraLightFastGenericFaceDetector(
28 |         const std::string& modelPath, const std::optional<size_t>& gpuIdx = std::nullopt,
29 |         const std::optional<std::vector<std::vector<std::int64_t>>>& inputShapes = std::nullopt);
30 | 
31 |     ~UltraLightFastGenericFaceDetector();
32 | 
33 |     void preprocess(float* dst,                     //
34 |                     const unsigned char* src,       //
35 |                     const int64_t targetImgWidth,   //
36 |                     const int64_t targetImgHeight,  //
37 |                     const int numChannels) const;
38 | };
39 | }  // namespace Ort
40 | 


--------------------------------------------------------------------------------
/examples/UltraLightFastGenericFaceDetectorApp.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @file    UltraLightFastGenericFaceDetectorApp.cpp
  3 |  *
  4 |  * @author  btran
  5 |  *
  6 |  */
  7 | 
  8 | #include "Utility.hpp"
  9 | 
 10 | #include "UltraLightFastGenericFaceDetector.hpp"
 11 | 
 12 | static const std::vector<std::string> FACE_CLASSES = {"face"};
 13 | static constexpr int64_t FACE_NUM_CLASSES = 1;
 14 | static const std::vector<std::array<int, 3>> FACE_COLOR_CHART =
 15 |     Ort::generateColorCharts(FACE_NUM_CLASSES, 2020 /* choosing awesome 2020 as seed point */);
 16 | 
 17 | static constexpr float CONFIDENCE_THRESHOLD = 0.7;
 18 | static constexpr float NMS_THRESHOLD = 0.3;
 19 | static const std::vector<cv::Scalar> COLORS = toCvScalarColors(FACE_COLOR_CHART);
 20 | 
 21 | namespace
 22 | {
 23 | cv::Mat processOneFrame(const Ort::UltraLightFastGenericFaceDetector& osh, const cv::Mat& inputImg, float* dst,
 24 |                         const float confThresh = CONFIDENCE_THRESHOLD, const float nmsThresh = NMS_THRESHOLD);
 25 | }  // namespace
 26 | 
 27 | int main(int argc, char* argv[])
 28 | {
 29 |     if (argc != 3) {
 30 |         std::cerr << "Usage: [apps] [path/to/onnx/version-RFB-640.onnx] [path/to/image]" << std::endl;
 31 |         return EXIT_FAILURE;
 32 |     }
 33 | 
 34 |     const std::string ONNX_MODEL_PATH = argv[1];
 35 |     const std::string IMAGE_PATH = argv[2];
 36 | 
 37 |     cv::Mat img = cv::imread(IMAGE_PATH);
 38 | 
 39 |     if (img.empty()) {
 40 |         std::cerr << "Failed to read input image" << std::endl;
 41 |         return EXIT_FAILURE;
 42 |     }
 43 | 
 44 |     Ort::UltraLightFastGenericFaceDetector osh(
 45 |         ONNX_MODEL_PATH, 0,
 46 |         std::vector<std::vector<int64_t>>{{1, Ort::UltraLightFastGenericFaceDetector::IMG_CHANNEL,
 47 |                                            Ort::UltraLightFastGenericFaceDetector::IMG_H,
 48 |                                            Ort::UltraLightFastGenericFaceDetector::IMG_W}});
 49 | 
 50 |     osh.initClassNames(FACE_CLASSES);
 51 | 
 52 |     std::vector<float> dst(Ort::UltraLightFastGenericFaceDetector::IMG_CHANNEL *
 53 |                            Ort::UltraLightFastGenericFaceDetector::IMG_H *
 54 |                            Ort::UltraLightFastGenericFaceDetector::IMG_W);
 55 |     auto result = processOneFrame(osh, img, dst.data());
 56 |     cv::imwrite("result.jpg", result);
 57 | 
 58 |     return 0;
 59 | }
 60 | 
 61 | namespace
 62 | {
 63 | cv::Mat processOneFrame(const Ort::UltraLightFastGenericFaceDetector& osh, const cv::Mat& inputImg, float* dst,
 64 |                         const float confThresh, const float nmsThresh)
 65 | {
 66 |     int origH = inputImg.rows;
 67 |     int origW = inputImg.cols;
 68 | 
 69 |     cv::Mat processedImg;
 70 |     cv::resize(inputImg, processedImg, cv::Size(osh.IMG_W, osh.IMG_H));
 71 | 
 72 |     osh.preprocess(dst, processedImg.data, osh.IMG_W, osh.IMG_H, 3);
 73 |     auto inferenceOutput = osh({dst});
 74 | 
 75 |     // output includes two tensors:
 76 |     // confidences: 1 x 17640 x 2 (2 represents 2 classes of background and face)
 77 |     // bboxes: 1 x 17640 x 4 (4 represents bbox coordinates)
 78 | 
 79 |     int numAnchors = inferenceOutput[0].second[1];
 80 | 
 81 |     std::vector<std::array<float, 4>> bboxes;
 82 |     std::vector<float> scores;
 83 |     std::vector<uint64_t> classIndices;
 84 | 
 85 |     for (auto indices = std::make_pair(0, 0); indices.first < numAnchors * 2 && indices.second < numAnchors * 4;
 86 |          indices.first += 2, indices.second += 4) {
 87 |         float conf = inferenceOutput[0].first[indices.first + 1];
 88 |         if (conf < confThresh) {
 89 |             continue;
 90 |         }
 91 |         float xmin = inferenceOutput[1].first[indices.second + 0] * origW;
 92 |         float ymin = inferenceOutput[1].first[indices.second + 1] * origH;
 93 |         float xmax = inferenceOutput[1].first[indices.second + 2] * origW;
 94 |         float ymax = inferenceOutput[1].first[indices.second + 3] * origH;
 95 | 
 96 |         bboxes.emplace_back(std::array<float, 4>{xmin, ymin, xmax, ymax});
 97 |         scores.emplace_back(conf);
 98 | 
 99 |         // only consider face
100 |         classIndices.emplace_back(0);
101 |     }
102 | 
103 |     if (bboxes.size() == 0) {
104 |         return inputImg;
105 |     }
106 | 
107 |     auto afterNmsIndices = Ort::nms(bboxes, scores, nmsThresh);
108 | 
109 |     std::vector<std::array<float, 4>> afterNmsBboxes;
110 |     std::vector<uint64_t> afterNmsClassIndices;
111 | 
112 |     afterNmsBboxes.reserve(afterNmsIndices.size());
113 |     afterNmsClassIndices.reserve(afterNmsIndices.size());
114 | 
115 |     for (const auto idx : afterNmsIndices) {
116 |         afterNmsBboxes.emplace_back(bboxes[idx]);
117 |         afterNmsClassIndices.emplace_back(classIndices[idx]);
118 |     }
119 | 
120 |     return visualizeOneImage(inputImg, afterNmsBboxes, afterNmsClassIndices, COLORS, osh.classNames());
121 | }
122 | }  // namespace
123 | 


--------------------------------------------------------------------------------
/examples/Utility.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @file    Utility.hpp
  3 |  *
  4 |  * @author  btran
  5 |  *
  6 |  * @date    2020-05-04
  7 |  *
  8 |  * Copyright (c) organization
  9 |  *
 10 |  */
 11 | 
 12 | #pragma once
 13 | 
 14 | #include <algorithm>
 15 | #include <string>
 16 | #include <utility>
 17 | #include <vector>
 18 | 
 19 | #include <opencv2/opencv.hpp>
 20 | 
 21 | namespace
 22 | {
 23 | inline std::vector<cv::Scalar> toCvScalarColors(const std::vector<std::array<int, 3>>& colors)
 24 | {
 25 |     std::vector<cv::Scalar> result;
 26 |     result.reserve(colors.size());
 27 |     std::transform(std::begin(colors), std::end(colors), std::back_inserter(result),
 28 |                    [](const auto& elem) { return cv::Scalar(elem[0], elem[1], elem[2]); });
 29 | 
 30 |     return result;
 31 | }
 32 | 
 33 | inline cv::Mat visualizeOneImage(const cv::Mat& img, const std::vector<std::array<float, 4>>& bboxes,
 34 |                                  const std::vector<uint64_t>& classIndices, const std::vector<cv::Scalar>& allColors,
 35 |                                  const std::vector<std::string>& allClassNames = {})
 36 | {
 37 |     assert(bboxes.size() == classIndices.size());
 38 |     if (!allClassNames.empty()) {
 39 |         assert(allClassNames.size() > *std::max_element(classIndices.begin(), classIndices.end()));
 40 |         assert(allColors.size() == allClassNames.size());
 41 |     }
 42 | 
 43 |     cv::Mat result = img.clone();
 44 | 
 45 |     for (size_t i = 0; i < bboxes.size(); ++i) {
 46 |         const auto& curBbox = bboxes[i];
 47 |         const uint64_t classIdx = classIndices[i];
 48 |         const cv::Scalar& curColor = allColors[classIdx];
 49 |         const std::string curLabel = allClassNames.empty() ? std::to_string(classIdx) : allClassNames[classIdx];
 50 | 
 51 |         cv::rectangle(result, cv::Point(curBbox[0], curBbox[1]), cv::Point(curBbox[2], curBbox[3]), curColor, 2);
 52 | 
 53 |         int baseLine = 0;
 54 |         cv::Size labelSize = cv::getTextSize(curLabel, cv::FONT_HERSHEY_COMPLEX, 0.35, 1, &baseLine);
 55 |         cv::rectangle(result, cv::Point(curBbox[0], curBbox[1]),
 56 |                       cv::Point(curBbox[0] + labelSize.width, curBbox[1] + static_cast<int>(1.3 * labelSize.height)),
 57 |                       curColor, -1);
 58 |         cv::putText(result, curLabel, cv::Point(curBbox[0], curBbox[1] + labelSize.height), cv::FONT_HERSHEY_COMPLEX,
 59 |                     0.35, cv::Scalar(255, 255, 255));
 60 |     }
 61 | 
 62 |     return result;
 63 | }
 64 | 
 65 | inline cv::Mat visualizeOneImageWithMask(const cv::Mat& img, const std::vector<std::array<float, 4>>& bboxes,
 66 |                                          const std::vector<uint64_t>& classIndices, const std::vector<cv::Mat>& masks,
 67 |                                          const std::vector<cv::Scalar>& allColors,
 68 |                                          const std::vector<std::string>& allClassNames = {},
 69 |                                          const float maskThreshold = 0.5)
 70 | {
 71 |     assert(bboxes.size() == classIndices.size());
 72 |     if (!allClassNames.empty()) {
 73 |         assert(allClassNames.size() > *std::max_element(classIndices.begin(), classIndices.end()));
 74 |         assert(allColors.size() == allClassNames.size());
 75 |     }
 76 | 
 77 |     cv::Mat result = img.clone();
 78 | 
 79 |     for (size_t i = 0; i < bboxes.size(); ++i) {
 80 |         const auto& curBbox = bboxes[i];
 81 |         const uint64_t classIdx = classIndices[i];
 82 |         cv::Mat curMask = masks[i].clone();
 83 |         const cv::Scalar& curColor = allColors[classIdx];
 84 |         const std::string curLabel = allClassNames.empty() ? std::to_string(classIdx) : allClassNames[classIdx];
 85 | 
 86 |         cv::rectangle(result, cv::Point(curBbox[0], curBbox[1]), cv::Point(curBbox[2], curBbox[3]), curColor, 2);
 87 | 
 88 |         int baseLine = 0;
 89 |         cv::Size labelSize = cv::getTextSize(curLabel, cv::FONT_HERSHEY_COMPLEX, 0.35, 1, &baseLine);
 90 |         cv::rectangle(result, cv::Point(curBbox[0], curBbox[1]),
 91 |                       cv::Point(curBbox[0] + labelSize.width, curBbox[1] + static_cast<int>(1.3 * labelSize.height)),
 92 |                       curColor, -1);
 93 |         cv::putText(result, curLabel, cv::Point(curBbox[0], curBbox[1] + labelSize.height), cv::FONT_HERSHEY_COMPLEX,
 94 |                     0.35, cv::Scalar(255, 255, 255));
 95 | 
 96 |         // ---------------------------------------------------------------------//
 97 |         // Visualize masks
 98 | 
 99 |         const cv::Rect curBoxRect(cv::Point(curBbox[0], curBbox[1]), cv::Point(curBbox[2], curBbox[3]));
100 | 
101 |         cv::resize(curMask, curMask, curBoxRect.size());
102 | 
103 |         cv::Mat finalMask = (curMask > maskThreshold);
104 | 
105 |         cv::Mat coloredRoi = (0.3 * curColor + 0.7 * result(curBoxRect));
106 | 
107 |         coloredRoi.convertTo(coloredRoi, CV_8UC3);
108 | 
109 |         std::vector<cv::Mat> contours;
110 |         cv::Mat hierarchy;
111 |         finalMask.convertTo(finalMask, CV_8U);
112 | 
113 |         cv::findContours(finalMask, contours, hierarchy, cv::RETR_TREE, cv::CHAIN_APPROX_SIMPLE);
114 |         cv::drawContours(coloredRoi, contours, -1, curColor, 5, cv::LINE_8, hierarchy, 100);
115 |         coloredRoi.copyTo(result(curBoxRect), finalMask);
116 |     }
117 | 
118 |     return result;
119 | }
120 | 
121 | inline cv::Mat drawColorChart(const std::vector<std::string>& classes, const std::vector<cv::Scalar>& colors)
122 | {
123 |     cv::Mat legend = cv::Mat::zeros((classes.size() * 25) + 25, 300, CV_8UC(3));
124 |     for (std::size_t i = 0; i < classes.size(); ++i) {
125 |         cv::putText(legend, classes[i], cv::Point(5, (i * 25) + 17), cv::FONT_HERSHEY_SIMPLEX, 0.5,
126 |                     cv::Scalar(0, 0, 255), 2);
127 |         cv::rectangle(legend, cv::Point(100, (i * 25)), cv::Point(300, (i * 25) + 25), colors[i], -1);
128 |     }
129 | 
130 |     return legend;
131 | }
132 | 
133 | inline void transposeNDWrapper(cv::InputArray src_, const std::vector<int>& order, cv::OutputArray dst_)
134 | {
135 | #if (CV_MAJOR_VERSION > 4 || (CV_MAJOR_VERSION == 4 && CV_MINOR_VERSION >= 6))
136 |     cv::transposeND(src_, order, dst_);
137 | #else
138 |     cv::Mat inp = src_.getMat();
139 |     CV_Assert(inp.isContinuous());
140 |     CV_CheckEQ(inp.channels(), 1, "Input array should be single-channel");
141 |     CV_CheckEQ(order.size(), static_cast<size_t>(inp.dims), "Number of dimensions shouldn't change");
142 | 
143 |     auto order_ = order;
144 |     std::sort(order_.begin(), order_.end());
145 |     for (size_t i = 0; i < order_.size(); ++i) {
146 |         CV_CheckEQ(static_cast<size_t>(order_[i]), i, "New order should be a valid permutation of the old one");
147 |     }
148 | 
149 |     std::vector<int> newShape(order.size());
150 |     for (size_t i = 0; i < order.size(); ++i) {
151 |         newShape[i] = inp.size[order[i]];
152 |     }
153 | 
154 |     dst_.create(static_cast<int>(newShape.size()), newShape.data(), inp.type());
155 |     cv::Mat out = dst_.getMat();
156 |     CV_Assert(out.isContinuous());
157 |     CV_Assert(inp.data != out.data);
158 | 
159 |     int continuous_idx = 0;
160 |     for (int i = static_cast<int>(order.size()) - 1; i >= 0; --i) {
161 |         if (order[i] != i) {
162 |             continuous_idx = i + 1;
163 |             break;
164 |         }
165 |     }
166 | 
167 |     size_t continuous_size = continuous_idx == 0 ? out.total() : out.step1(continuous_idx - 1);
168 |     size_t outer_size = out.total() / continuous_size;
169 | 
170 |     std::vector<size_t> steps(order.size());
171 |     for (int i = 0; i < static_cast<int>(steps.size()); ++i) {
172 |         steps[i] = inp.step1(order[i]);
173 |     }
174 | 
175 |     auto* src = inp.ptr<const unsigned char>();
176 |     auto* dst = out.ptr<unsigned char>();
177 | 
178 |     size_t src_offset = 0;
179 |     size_t es = out.elemSize();
180 |     for (size_t i = 0; i < outer_size; ++i) {
181 |         std::memcpy(dst, src + es * src_offset, es * continuous_size);
182 |         dst += es * continuous_size;
183 |         for (int j = continuous_idx - 1; j >= 0; --j) {
184 |             src_offset += steps[j];
185 |             if ((src_offset / steps[j]) % out.size[j] != 0) {
186 |                 break;
187 |             }
188 |             src_offset -= steps[j] * out.size[j];
189 |         }
190 |     }
191 | #endif
192 | }
193 | 
194 | /**
195 |  *  @brief https://mmcv.readthedocs.io/en/latest/_modules/mmcv/ops/point_sample.html
196 |  */
197 | inline cv::Mat bilinearGridSample(const cv::Mat& input, const cv::Mat& grid, bool alignCorners)
198 | {
199 |     // input: B x C x Hi x Wi
200 |     // grid: B x Hg x Wg x 2
201 | 
202 |     if (input.size[0] != grid.size[0]) {
203 |         throw std::runtime_error("input and grid need to have the same batch size");
204 |     }
205 |     int batch = input.size[0];
206 |     int channel = input.size[1];
207 |     int height = input.size[2];
208 |     int width = input.size[3];
209 | 
210 |     int numKeyPoints = grid.size[2];
211 |     cv::Mat yMat = grid({cv::Range::all(), cv::Range::all(), cv::Range::all(), cv::Range(0, 1)})
212 |                        .clone()
213 |                        .reshape(1, {batch, grid.size[1] * grid.size[2]});
214 |     cv::Mat xMat = grid({cv::Range::all(), cv::Range::all(), cv::Range::all(), cv::Range(1, 2)})
215 |                        .clone()
216 |                        .reshape(1, {batch, grid.size[1] * grid.size[2]});
217 | 
218 |     if (alignCorners) {
219 |         xMat = ((xMat + 1) / 2) * (width - 1);
220 |         yMat = ((yMat + 1) / 2) * (height - 1);
221 |     } else {
222 |         xMat = ((xMat + 1) * width - 1) / 2;
223 |         yMat = ((yMat + 1) * height - 1) / 2;
224 |     }
225 | 
226 |     // floor
227 |     cv::Mat x0Mat = xMat - 0.5;
228 |     cv::Mat y0Mat = yMat - 0.5;
229 |     x0Mat.convertTo(x0Mat, CV_32S);
230 |     y0Mat.convertTo(y0Mat, CV_32S);
231 | 
232 |     x0Mat.convertTo(x0Mat, CV_32F);
233 |     y0Mat.convertTo(y0Mat, CV_32F);
234 | 
235 |     cv::Mat x1Mat = x0Mat + 1;
236 |     cv::Mat y1Mat = x0Mat + 1;
237 | 
238 |     std::vector<cv::Mat> weights = {(x1Mat - xMat).mul(y1Mat - yMat), (x1Mat - xMat).mul(yMat - y0Mat),
239 |                                     (xMat - x0Mat).mul(y1Mat - yMat), (xMat - x0Mat).mul(yMat - y0Mat)};
240 | 
241 |     cv::Mat result = cv::Mat::zeros(3, std::vector<int>{batch, channel, grid.size[1] * grid.size[2]}.data(), CV_32F);
242 | 
243 |     auto isCoordSafe = [](int size, int maxSize) -> bool { return size > 0 && size < maxSize; };
244 | 
245 |     for (int b = 0; b < batch; ++b) {
246 |         for (int i = 0; i < grid.size[1] * grid.size[2]; ++i) {
247 |             int x0 = x0Mat.at<float>(b, i);
248 |             int y0 = y0Mat.at<float>(b, i);
249 |             int x1 = x1Mat.at<float>(b, i);
250 |             int y1 = y1Mat.at<float>(b, i);
251 | 
252 |             std::vector<std::pair<int, int>> pairs = {{x0, y0}, {x0, y1}, {x1, y0}, {x1, y1}};
253 |             std::vector<cv::Mat> Is(4, cv::Mat::zeros(channel, 1, CV_32F));
254 | 
255 |             for (int k = 0; k < 4; ++k) {
256 |                 if (isCoordSafe(pairs[k].first, width) && isCoordSafe(pairs[k].second, height)) {
257 |                     Is[k] =
258 |                         input({cv::Range(b, b + 1), cv::Range::all(), cv::Range(pairs[k].second, pairs[k].second + 1),
259 |                                cv::Range(pairs[k].first, pairs[k].first + 1)})
260 |                             .clone()
261 |                             .reshape(1, channel);
262 |                 }
263 |             }
264 | 
265 |             cv::Mat curDescriptor = Is[0] * weights[0].at<float>(i) + Is[1] * weights[1].at<float>(i) +
266 |                                     Is[2] * weights[2].at<float>(i) + Is[3] * weights[3].at<float>(i);
267 | 
268 |             for (int c = 0; c < channel; ++c) {
269 |                 result.at<float>(b, c, i) = curDescriptor.at<float>(c);
270 |             }
271 |         }
272 |     }
273 | 
274 |     return result.reshape(1, {batch, channel, grid.size[1], grid.size[2]});
275 | }
276 | }  // namespace
277 | 


--------------------------------------------------------------------------------
/examples/YoloX.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @file    YoloX.cpp
  3 |  *
  4 |  * @author  btran
  5 |  *
  6 |  */
  7 | 
  8 | #include "YoloX.hpp"
  9 | 
 10 | namespace Ort
 11 | {
 12 | struct YoloX::GridAndStride {
 13 |     int grid0;
 14 |     int grid1;
 15 |     int stride;
 16 | };
 17 | 
 18 | YoloX::YoloX(const uint16_t numClasses,     //
 19 |              const std::string& modelPath,  //
 20 |              const std::optional<size_t>& gpuIdx, const std::optional<std::vector<std::vector<int64_t>>>& inputShapes)
 21 |     : ImageRecognitionOrtSessionHandlerBase(numClasses, modelPath, gpuIdx, inputShapes)
 22 | {
 23 | }
 24 | 
 25 | YoloX::~YoloX()
 26 | {
 27 | }
 28 | 
 29 | void YoloX::preprocess(float* dst,                     //
 30 |                        const unsigned char* src,       //
 31 |                        const int64_t targetImgWidth,   //
 32 |                        const int64_t targetImgHeight,  //
 33 |                        const int numChannels) const
 34 | {
 35 |     for (int c = 0; c < numChannels; ++c) {
 36 |         for (int i = 0; i < targetImgHeight; ++i) {
 37 |             for (int j = 0; j < targetImgWidth; ++j) {
 38 |                 dst[c * targetImgHeight * targetImgWidth + i * targetImgWidth + j] =
 39 |                     src[i * targetImgWidth * numChannels + j * numChannels + c];
 40 |             }
 41 |         }
 42 |     }
 43 | }
 44 | 
 45 | std::vector<YoloX::Object> YoloX::decodeOutputs(const float* prob, float confThresh) const
 46 | {
 47 |     std::vector<GridAndStride> gridStrides = this->genereateGridsAndStrides(IMG_W, m_strides);
 48 |     return this->generateYoloXProposals(prob, gridStrides, confThresh);
 49 | }
 50 | 
 51 | /**
 52 |  *  @brief https://github.com/Megvii-BaseDetection/YOLOX/blob/main/demo/MegEngine/cpp/yolox.cpp#L64
 53 |  */
 54 | std::vector<YoloX::GridAndStride> YoloX::genereateGridsAndStrides(int targetSize, const std::vector<int>& strides) const
 55 | {
 56 |     std::vector<YoloX::GridAndStride> gridStrides;
 57 |     for (auto stride : strides) {
 58 |         int numGrid = targetSize / stride;
 59 |         for (int g1 = 0; g1 < numGrid; g1++) {
 60 |             for (int g0 = 0; g0 < numGrid; g0++) {
 61 |                 gridStrides.push_back((GridAndStride){g0, g1, stride});
 62 |             }
 63 |         }
 64 |     }
 65 |     return gridStrides;
 66 | }
 67 | 
 68 | /**
 69 |  *  @brief https://github.com/Megvii-BaseDetection/YOLOX/blob/main/demo/MegEngine/cpp/yolox.cpp#L76
 70 |  */
 71 | std::vector<YoloX::Object>
 72 | YoloX::generateYoloXProposals(const float* prob, const std::vector<GridAndStride>& gridStrides, float confThresh) const
 73 | {
 74 |     std::vector<Object> objects;
 75 | 
 76 |     const int numAnchors = gridStrides.size();
 77 |     const int numClasses = m_numClasses;
 78 |     for (int anchorIdx = 0; anchorIdx < numAnchors; ++anchorIdx) {
 79 |         const int grid0 = gridStrides[anchorIdx].grid0;
 80 |         const int grid1 = gridStrides[anchorIdx].grid1;
 81 |         const int stride = gridStrides[anchorIdx].stride;
 82 | 
 83 |         // 4 parameters defining the bounding boxes and 1 parameter defining the confidence
 84 |         const int basicPos = anchorIdx * (numClasses + 5);
 85 |         float xCenter = (prob[basicPos + 0] + grid0) * stride;
 86 |         float yCenter = (prob[basicPos + 1] + grid1) * stride;
 87 |         float w = exp(prob[basicPos + 2]) * stride;
 88 |         float h = exp(prob[basicPos + 3]) * stride;
 89 |         float x0 = xCenter - w * 0.5f;
 90 |         float y0 = yCenter - h * 0.5f;
 91 | 
 92 |         float boxObjectness = prob[basicPos + 4];
 93 |         for (int classIdx = 0; classIdx < numClasses; ++classIdx) {
 94 |             float boxClsScore = prob[basicPos + 5 + classIdx];
 95 |             float boxProb = boxObjectness * boxClsScore;
 96 |             if (boxProb > confThresh) {
 97 |                 Object obj;
 98 |                 obj.pos.x = x0;
 99 |                 obj.pos.y = y0;
100 |                 obj.pos.width = w;
101 |                 obj.pos.height = h;
102 |                 obj.label = classIdx;
103 |                 obj.prob = boxProb;
104 |                 objects.push_back(obj);
105 |             }
106 |         }
107 |     }
108 | 
109 |     return objects;
110 | }
111 | }  // namespace Ort
112 | 


--------------------------------------------------------------------------------
/examples/YoloX.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file    YoloX.hpp
 3 |  *
 4 |  * @author  btran
 5 |  *
 6 |  */
 7 | 
 8 | #pragma once
 9 | 
10 | #include <string>
11 | #include <tuple>
12 | #include <vector>
13 | 
14 | #include <opencv2/opencv.hpp>
15 | 
16 | #include <ort_utility/ort_utility.hpp>
17 | 
18 | namespace Ort
19 | {
20 | class YoloX : public ImageRecognitionOrtSessionHandlerBase
21 | {
22 |  public:
23 |     static constexpr int64_t IMG_H = 640;
24 |     static constexpr int64_t IMG_W = 640;
25 |     static constexpr int64_t IMG_CHANNEL = 3;
26 | 
27 |     struct Object {
28 |         cv::Rect_<float> pos;
29 |         int label;
30 |         float prob;
31 |     };
32 | 
33 |     YoloX(const uint16_t numClasses,                           //
34 |           const std::string& modelPath,                        //
35 |           const std::optional<size_t>& gpuIdx = std::nullopt,  //
36 |           const std::optional<std::vector<std::vector<int64_t>>>& inputShapes = std::nullopt);
37 | 
38 |     ~YoloX();
39 | 
40 |     void preprocess(float* dst,                     //
41 |                     const unsigned char* src,       //
42 |                     const int64_t targetImgWidth,   //
43 |                     const int64_t targetImgHeight,  //
44 |                     const int numChannels) const;
45 | 
46 |     std::vector<Object> decodeOutputs(const float* prob, float confThresh) const;
47 | 
48 |     /**
49 |      *  @brief update strides
50 |      *  this method gives users the flexibility to use other yolox models
51 |      */
52 |     void updateStrides(const std::vector<int>& strides)
53 |     {
54 |         if (strides.empty()) {
55 |             throw std::runtime_error("cannot update empty strides");
56 |         }
57 |         m_strides = strides;
58 |     }
59 | 
60 |  private:
61 |     std::vector<int> m_strides = {8, 16, 32};
62 | 
63 |     struct GridAndStride;
64 | 
65 |     std::vector<GridAndStride> genereateGridsAndStrides(int targetSize, const std::vector<int>& strides) const;
66 | 
67 |     std::vector<Object> generateYoloXProposals(const float* prob, const std::vector<GridAndStride>& gridStrides,
68 |                                                float confThresh) const;
69 | };
70 | }  // namespace Ort
71 | 


--------------------------------------------------------------------------------
/examples/YoloXApp.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @file    YoloXApp.cpp
  3 |  *
  4 |  * @author  btran
  5 |  *
  6 |  */
  7 | 
  8 | #include <ort_utility/ort_utility.hpp>
  9 | 
 10 | #include "Utility.hpp"
 11 | #include "YoloX.hpp"
 12 | 
 13 | static const std::vector<std::string> MSCOCO_WITHOUT_BG_CLASSES(Ort::MSCOCO_CLASSES.begin() + 1,
 14 |                                                                 Ort::MSCOCO_CLASSES.end());
 15 | static constexpr int64_t NUM_CLASSES = 80;
 16 | static const std::vector<std::array<int, 3>> COLOR_CHART = Ort::generateColorCharts(NUM_CLASSES);
 17 | 
 18 | static constexpr float CONFIDENCE_THRESHOLD = 0.1;
 19 | static const std::vector<cv::Scalar> COLORS = toCvScalarColors(COLOR_CHART);
 20 | 
 21 | namespace
 22 | {
 23 | cv::Mat processOneFrame(const Ort::YoloX& osh, const cv::Mat& inputImg, float* dst, const float confThresh);
 24 | }  // namespace
 25 | 
 26 | int main(int argc, char* argv[])
 27 | {
 28 |     if (argc != 3) {
 29 |         std::cerr << "Usage: [apps] [path/to/onnx/yolox] [path/to/image]" << std::endl;
 30 |         return EXIT_FAILURE;
 31 |     }
 32 | 
 33 |     const std::string ONNX_MODEL_PATH = argv[1];
 34 |     const std::string IMAGE_PATH = argv[2];
 35 | 
 36 |     cv::Mat img = cv::imread(IMAGE_PATH);
 37 | 
 38 |     if (img.empty()) {
 39 |         std::cerr << "Failed to read input image" << std::endl;
 40 |         return EXIT_FAILURE;
 41 |     }
 42 | 
 43 |     Ort::YoloX osh(
 44 |         NUM_CLASSES, ONNX_MODEL_PATH, 0,
 45 |         std::vector<std::vector<int64_t>>{{1, Ort::YoloX::IMG_CHANNEL, Ort::YoloX::IMG_H, Ort::YoloX::IMG_W}});
 46 | 
 47 |     osh.initClassNames(MSCOCO_WITHOUT_BG_CLASSES);
 48 | 
 49 |     std::vector<float> dst(Ort::YoloX::IMG_CHANNEL * Ort::YoloX::IMG_H * Ort::YoloX::IMG_W);
 50 |     auto result = processOneFrame(osh, img, dst.data(), CONFIDENCE_THRESHOLD);
 51 |     cv::imwrite("result.jpg", result);
 52 | 
 53 |     return EXIT_SUCCESS;
 54 | }
 55 | 
 56 | namespace
 57 | {
 58 | cv::Mat processOneFrame(const Ort::YoloX& osh, const cv::Mat& inputImg, float* dst, const float confThresh)
 59 | {
 60 |     int origW = inputImg.cols, origH = inputImg.rows;
 61 |     std::vector<float> originImageSize{static_cast<float>(origH), static_cast<float>(origW)};
 62 |     cv::Mat scaledImg;
 63 |     cv::resize(inputImg, scaledImg, cv::Size(Ort::YoloX::IMG_W, Ort::YoloX::IMG_H), 0, 0, cv::INTER_CUBIC);
 64 |     osh.preprocess(dst, scaledImg.data, Ort::YoloX::IMG_W, Ort::YoloX::IMG_H, 3);
 65 |     auto inferenceOutput = osh({dst});
 66 | 
 67 |     std::vector<Ort::YoloX::Object> objects = osh.decodeOutputs(inferenceOutput[0].first, confThresh);
 68 | 
 69 |     std::vector<std::array<float, 4>> bboxes;
 70 |     std::vector<float> scores;
 71 |     std::vector<uint64_t> classIndices;
 72 | 
 73 |     float scaleW = 1. * origW / Ort::YoloX::IMG_W;
 74 |     float scaleH = 1. * origH / Ort::YoloX::IMG_H;
 75 | 
 76 |     for (const auto& object : objects) {
 77 |         float xmin = object.pos.x * scaleW;
 78 |         float ymin = object.pos.y * scaleH;
 79 |         float xmax = (object.pos.x + object.pos.width) * scaleW;
 80 |         float ymax = (object.pos.y + object.pos.height) * scaleH;
 81 | 
 82 |         xmin = std::max<float>(xmin, 0);
 83 |         ymin = std::max<float>(ymin, 0);
 84 |         xmax = std::min<float>(xmax, inputImg.cols - 1);
 85 |         ymax = std::min<float>(ymax, inputImg.rows - 1);
 86 | 
 87 |         bboxes.emplace_back(std::array<float, 4>{xmin, ymin, xmax, ymax});
 88 |         scores.emplace_back(object.prob);
 89 |         classIndices.emplace_back(object.label);
 90 |     }
 91 | 
 92 |     auto afterNmsIndices = Ort::nms(bboxes, scores, confThresh);
 93 | 
 94 |     std::vector<std::array<float, 4>> afterNmsBboxes;
 95 |     std::vector<uint64_t> afterNmsClassIndices;
 96 | 
 97 |     afterNmsBboxes.reserve(afterNmsIndices.size());
 98 |     afterNmsClassIndices.reserve(afterNmsIndices.size());
 99 | 
100 |     for (const auto idx : afterNmsIndices) {
101 |         afterNmsBboxes.emplace_back(bboxes[idx]);
102 |         afterNmsClassIndices.emplace_back(classIndices[idx]);
103 |     }
104 | 
105 |     return afterNmsBboxes.empty()
106 |                ? inputImg
107 |                : visualizeOneImage(inputImg, afterNmsBboxes, afterNmsClassIndices, COLORS, osh.classNames());
108 | }
109 | }  // namespace
110 | 


--------------------------------------------------------------------------------
/examples/Yolov3.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file    Yolov3.cpp
 3 |  *
 4 |  * @author  btran
 5 |  *
 6 |  */
 7 | 
 8 | #include "Yolov3.hpp"
 9 | #include <string>
10 | 
11 | namespace Ort
12 | {
13 | Yolov3::Yolov3(const uint16_t numClasses,     //
14 |                const std::string& modelPath,  //
15 |                const std::optional<size_t>& gpuIdx, const std::optional<std::vector<std::vector<int64_t>>>& inputShapes)
16 |     : ImageRecognitionOrtSessionHandlerBase(numClasses, modelPath, gpuIdx, inputShapes)
17 | {
18 | }
19 | 
20 | Yolov3::~Yolov3()
21 | {
22 | }
23 | 
24 | void Yolov3::preprocess(float* dst,                     //
25 |                         const unsigned char* src,       //
26 |                         const int64_t targetImgWidth,   //
27 |                         const int64_t targetImgHeight,  //
28 |                         const int numChannels) const
29 | {
30 |     for (int c = 0; c < numChannels; ++c) {
31 |         for (int i = 0; i < targetImgHeight; ++i) {
32 |             for (int j = 0; j < targetImgWidth; ++j) {
33 |                 dst[c * targetImgHeight * targetImgWidth + i * targetImgWidth + j] =
34 |                     src[i * targetImgWidth * numChannels + j * numChannels + c] / 255.0;
35 |             }
36 |         }
37 |     }
38 | }
39 | }  // namespace Ort
40 | 


--------------------------------------------------------------------------------
/examples/Yolov3.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file    Yolov3.hpp
 3 |  *
 4 |  * @author  btran
 5 |  *
 6 |  */
 7 | 
 8 | #pragma once
 9 | 
10 | #include <string>
11 | #include <tuple>
12 | #include <vector>
13 | 
14 | #include <opencv2/opencv.hpp>
15 | 
16 | #include <ort_utility/ort_utility.hpp>
17 | 
18 | namespace Ort
19 | {
20 | class Yolov3 : public ImageRecognitionOrtSessionHandlerBase
21 | {
22 |  public:
23 |     static constexpr int64_t IMG_H = 416;
24 | 
25 |     static constexpr int64_t IMG_W = 416;
26 | 
27 |     static constexpr int64_t IMG_CHANNEL = 3;
28 | 
29 |     Yolov3(const uint16_t numClasses,                           //
30 |            const std::string& modelPath,                        //
31 |            const std::optional<size_t>& gpuIdx = std::nullopt,  //
32 |            const std::optional<std::vector<std::vector<int64_t>>>& inputShapes = std::nullopt);
33 | 
34 |     ~Yolov3();
35 | 
36 |     void preprocess(float* dst,                     //
37 |                     const unsigned char* src,       //
38 |                     const int64_t targetImgWidth,   //
39 |                     const int64_t targetImgHeight,  //
40 |                     const int numChannels) const;
41 | };
42 | }  // namespace Ort
43 | 


--------------------------------------------------------------------------------
/examples/Yolov3App.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @file    Yolov3App.cpp
  3 |  *
  4 |  * @author  btran
  5 |  *
  6 |  */
  7 | 
  8 | #include <ort_utility/ort_utility.hpp>
  9 | 
 10 | #include "Utility.hpp"
 11 | #include "Yolov3.hpp"
 12 | 
 13 | static const std::vector<std::string> MSCOCO_WITHOUT_BG_CLASSES(Ort::MSCOCO_CLASSES.begin() + 1,
 14 |                                                                 Ort::MSCOCO_CLASSES.end());
 15 | static constexpr int64_t NUM_CLASSES = 80;
 16 | static const std::vector<std::array<int, 3>> COLOR_CHART = Ort::generateColorCharts(NUM_CLASSES);
 17 | 
 18 | static constexpr float CONFIDENCE_THRESHOLD = 0.2;
 19 | static const std::vector<cv::Scalar> COLORS = toCvScalarColors(COLOR_CHART);
 20 | 
 21 | namespace
 22 | {
 23 | cv::Mat processOneFrame(const Ort::Yolov3& osh, const cv::Mat& inputImg, float* dst, const float confThresh = 0.15);
 24 | }  // namespace
 25 | 
 26 | int main(int argc, char* argv[])
 27 | {
 28 |     if (argc != 3) {
 29 |         std::cerr << "Usage: [apps] [path/to/onnx/yolov3.onnx] [path/to/image]" << std::endl;
 30 |         return EXIT_FAILURE;
 31 |     }
 32 | 
 33 |     const std::string ONNX_MODEL_PATH = argv[1];
 34 |     const std::string IMAGE_PATH = argv[2];
 35 | 
 36 |     cv::Mat img = cv::imread(IMAGE_PATH);
 37 | 
 38 |     if (img.empty()) {
 39 |         std::cerr << "Failed to read input image" << std::endl;
 40 |         return EXIT_FAILURE;
 41 |     }
 42 | 
 43 |     Ort::Yolov3 osh(NUM_CLASSES, ONNX_MODEL_PATH, 0,
 44 |                     std::vector<std::vector<int64_t>>{
 45 |                         {1, Ort::Yolov3::IMG_CHANNEL, Ort::Yolov3::IMG_H, Ort::Yolov3::IMG_W}, {1, 2}});
 46 | 
 47 |     osh.initClassNames(MSCOCO_WITHOUT_BG_CLASSES);
 48 | 
 49 |     std::vector<float> dst(Ort::Yolov3::IMG_CHANNEL * Ort::Yolov3::IMG_H * Ort::Yolov3::IMG_W);
 50 |     auto result = processOneFrame(osh, img, dst.data());
 51 |     cv::imwrite("result.jpg", result);
 52 | 
 53 |     return 0;
 54 | }
 55 | 
 56 | namespace
 57 | {
 58 | cv::Mat processOneFrame(const Ort::Yolov3& osh, const cv::Mat& inputImg, float* dst, const float confThresh)
 59 | {
 60 |     int origW = inputImg.cols, origH = inputImg.rows;
 61 |     std::vector<float> originImageSize{static_cast<float>(origH), static_cast<float>(origW)};
 62 | 
 63 |     float scale = std::min<float>(1.0 * Ort::Yolov3::IMG_W / origW, 1.0 * Ort::Yolov3::IMG_H / origH);
 64 | 
 65 |     cv::Mat scaledImg;
 66 |     cv::resize(inputImg, scaledImg, cv::Size(), scale, scale, cv::INTER_CUBIC);
 67 |     cv::Mat processedImg(Ort::Yolov3::IMG_H, Ort::Yolov3::IMG_W, CV_8UC3, cv::Scalar(128, 128, 128));
 68 | 
 69 |     scaledImg.copyTo(processedImg(cv::Rect((Ort::Yolov3::IMG_W - scaledImg.cols) / 2,
 70 |                                            (Ort::Yolov3::IMG_H - scaledImg.rows) / 2, scaledImg.cols, scaledImg.rows)));
 71 | 
 72 |     osh.preprocess(dst, processedImg.data, Ort::Yolov3::IMG_W, Ort::Yolov3::IMG_H, 3);
 73 |     auto inferenceOutput = osh({dst, originImageSize.data()});
 74 |     int numAnchors = inferenceOutput[0].second[1];
 75 |     int numOutputBboxes = inferenceOutput[2].second[0];
 76 |     DEBUG_LOG("number anchor candidates: %d", numAnchors);
 77 |     DEBUG_LOG("number output bboxes: %d", numOutputBboxes);
 78 | 
 79 |     const float* candidateBboxes = inferenceOutput[0].first;
 80 |     const float* candidateScores = inferenceOutput[1].first;
 81 |     const int32_t* outputIndices = reinterpret_cast<int32_t*>(inferenceOutput[2].first);
 82 | 
 83 |     std::vector<std::array<float, 4>> bboxes;
 84 |     std::vector<float> scores;
 85 |     std::vector<uint64_t> classIndices;
 86 | 
 87 |     for (int i = 0; i < numOutputBboxes; ++i) {
 88 |         int curClassIdx = outputIndices[i * 3 + 1];
 89 |         int curCandidateIdx = outputIndices[i * 3 + 2];
 90 | 
 91 |         float curScore = candidateScores[curClassIdx * numAnchors + curCandidateIdx];
 92 | 
 93 |         if (curScore < confThresh) {
 94 |             continue;
 95 |         }
 96 | 
 97 |         float ymin = candidateBboxes[curCandidateIdx * 4 + 0];
 98 |         float xmin = candidateBboxes[curCandidateIdx * 4 + 1];
 99 |         float ymax = candidateBboxes[curCandidateIdx * 4 + 2];
100 |         float xmax = candidateBboxes[curCandidateIdx * 4 + 3];
101 | 
102 |         xmin = std::max<float>(xmin, 0);
103 |         ymin = std::max<float>(ymin, 0);
104 |         xmax = std::min<float>(xmax, inputImg.cols - 1);
105 |         ymax = std::min<float>(ymax, inputImg.rows - 1);
106 | 
107 |         bboxes.emplace_back(std::array<float, 4>{xmin, ymin, xmax, ymax});
108 |         scores.emplace_back(curScore);
109 |         classIndices.emplace_back(curClassIdx);
110 |     }
111 | 
112 |     return bboxes.empty() ? inputImg : visualizeOneImage(inputImg, bboxes, classIndices, COLORS, osh.classNames());
113 | }
114 | }  // namespace
115 | 


--------------------------------------------------------------------------------
/include/ort_utility/Constants.hpp:
--------------------------------------------------------------------------------
   1 | /**
   2 |  * @file    Constants.hpp
   3 |  *
   4 |  * @author  btran
   5 |  *
   6 |  */
   7 | 
   8 | #pragma once
   9 | 
  10 | #include <string>
  11 | #include <vector>
  12 | 
  13 | #include "Utility.hpp"
  14 | 
  15 | namespace Ort
  16 | {
  17 | static const std::vector<std::string> IMAGENET_CLASSES = {
  18 |     "tench, Tinca tinca",
  19 |     "goldfish, Carassius auratus",
  20 |     "great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias",
  21 |     "tiger shark, Galeocerdo cuvieri",
  22 |     "hammerhead, hammerhead shark",
  23 |     "electric ray, crampfish, numbfish, torpedo",
  24 |     "stingray",
  25 |     "cock",
  26 |     "hen",
  27 |     "ostrich, Struthio camelus",
  28 |     "brambling, Fringilla montifringilla",
  29 |     "goldfinch, Carduelis carduelis",
  30 |     "house finch, linnet, Carpodacus mexicanus",
  31 |     "junco, snowbird",
  32 |     "indigo bunting, indigo finch, indigo bird, Passerina cyanea",
  33 |     "robin, American robin, Turdus migratorius",
  34 |     "bulbul",
  35 |     "jay",
  36 |     "magpie",
  37 |     "chickadee",
  38 |     "water ouzel, dipper",
  39 |     "kite",
  40 |     "bald eagle, American eagle, Haliaeetus leucocephalus",
  41 |     "vulture",
  42 |     "great grey owl, great gray owl, Strix nebulosa",
  43 |     "European fire salamander, Salamandra salamandra",
  44 |     "common newt, Triturus vulgaris",
  45 |     "eft",
  46 |     "spotted salamander, Ambystoma maculatum",
  47 |     "axolotl, mud puppy, Ambystoma mexicanum",
  48 |     "bullfrog, Rana catesbeiana",
  49 |     "tree frog, tree-frog",
  50 |     "tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui",
  51 |     "loggerhead, loggerhead turtle, Caretta caretta",
  52 |     "leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea",
  53 |     "mud turtle",
  54 |     "terrapin",
  55 |     "box turtle, box tortoise",
  56 |     "banded gecko",
  57 |     "common iguana, iguana, Iguana iguana",
  58 |     "American chameleon, anole, Anolis carolinensis",
  59 |     "whiptail, whiptail lizard",
  60 |     "agama",
  61 |     "frilled lizard, Chlamydosaurus kingi",
  62 |     "alligator lizard",
  63 |     "Gila monster, Heloderma suspectum",
  64 |     "green lizard, Lacerta viridis",
  65 |     "African chameleon, Chamaeleo chamaeleon",
  66 |     "Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis",
  67 |     "African crocodile, Nile crocodile, Crocodylus niloticus",
  68 |     "American alligator, Alligator mississipiensis",
  69 |     "triceratops",
  70 |     "thunder snake, worm snake, Carphophis amoenus",
  71 |     "ringneck snake, ring-necked snake, ring snake",
  72 |     "hognose snake, puff adder, sand viper",
  73 |     "green snake, grass snake",
  74 |     "king snake, kingsnake",
  75 |     "garter snake, grass snake",
  76 |     "water snake",
  77 |     "vine snake",
  78 |     "night snake, Hypsiglena torquata",
  79 |     "boa constrictor, Constrictor constrictor",
  80 |     "rock python, rock snake, Python sebae",
  81 |     "Indian cobra, Naja naja",
  82 |     "green mamba",
  83 |     "sea snake",
  84 |     "horned viper, cerastes, sand viper, horned asp, Cerastes cornutus",
  85 |     "diamondback, diamondback rattlesnake, Crotalus adamanteus",
  86 |     "sidewinder, horned rattlesnake, Crotalus cerastes",
  87 |     "trilobite",
  88 |     "harvestman, daddy longlegs, Phalangium opilio",
  89 |     "scorpion",
  90 |     "black and gold garden spider, Argiope aurantia",
  91 |     "barn spider, Araneus cavaticus",
  92 |     "garden spider, Aranea diademata",
  93 |     "black widow, Latrodectus mactans",
  94 |     "tarantula",
  95 |     "wolf spider, hunting spider",
  96 |     "tick",
  97 |     "centipede",
  98 |     "black grouse",
  99 |     "ptarmigan",
 100 |     "ruffed grouse, partridge, Bonasa umbellus",
 101 |     "prairie chicken, prairie grouse, prairie fowl",
 102 |     "peacock",
 103 |     "quail",
 104 |     "partridge",
 105 |     "African grey, African gray, Psittacus erithacus",
 106 |     "macaw",
 107 |     "sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita",
 108 |     "lorikeet",
 109 |     "coucal",
 110 |     "bee eater",
 111 |     "hornbill",
 112 |     "hummingbird",
 113 |     "jacamar",
 114 |     "toucan",
 115 |     "drake",
 116 |     "red-breasted merganser, Mergus serrator",
 117 |     "goose",
 118 |     "black swan, Cygnus atratus",
 119 |     "tusker",
 120 |     "echidna, spiny anteater, anteater",
 121 |     "platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus",
 122 |     "wallaby, brush kangaroo",
 123 |     "koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus",
 124 |     "wombat",
 125 |     "jellyfish",
 126 |     "sea anemone, anemone",
 127 |     "brain coral",
 128 |     "flatworm, platyhelminth",
 129 |     "nematode, nematode worm, roundworm",
 130 |     "conch",
 131 |     "snail",
 132 |     "slug",
 133 |     "sea slug, nudibranch",
 134 |     "chiton, coat-of-mail shell, sea cradle, polyplacophore",
 135 |     "chambered nautilus, pearly nautilus, nautilus",
 136 |     "Dungeness crab, Cancer magister",
 137 |     "rock crab, Cancer irroratus",
 138 |     "fiddler crab",
 139 |     "king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica",
 140 |     "American lobster, Northern lobster, Maine lobster, Homarus americanus",
 141 |     "spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish",
 142 |     "crayfish, crawfish, crawdad, crawdaddy",
 143 |     "hermit crab",
 144 |     "isopod",
 145 |     "white stork, Ciconia ciconia",
 146 |     "black stork, Ciconia nigra",
 147 |     "spoonbill",
 148 |     "flamingo",
 149 |     "little blue heron, Egretta caerulea",
 150 |     "American egret, great white heron, Egretta albus",
 151 |     "bittern",
 152 |     "crane",
 153 |     "limpkin, Aramus pictus",
 154 |     "European gallinule, Porphyrio porphyrio",
 155 |     "American coot, marsh hen, mud hen, water hen, Fulica americana",
 156 |     "bustard",
 157 |     "ruddy turnstone, Arenaria interpres",
 158 |     "red-backed sandpiper, dunlin, Erolia alpina",
 159 |     "redshank, Tringa totanus",
 160 |     "dowitcher",
 161 |     "oystercatcher, oyster catcher",
 162 |     "pelican",
 163 |     "king penguin, Aptenodytes patagonica",
 164 |     "albatross, mollymawk",
 165 |     "grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus",
 166 |     "killer whale, killer, orca, grampus, sea wolf, Orcinus orca",
 167 |     "dugong, Dugong dugon",
 168 |     "sea lion",
 169 |     "Chihuahua",
 170 |     "Japanese spaniel",
 171 |     "Maltese dog, Maltese terrier, Maltese",
 172 |     "Pekinese, Pekingese, Peke",
 173 |     "Shih-Tzu",
 174 |     "Blenheim spaniel",
 175 |     "papillon",
 176 |     "toy terrier",
 177 |     "Rhodesian ridgeback",
 178 |     "Afghan hound, Afghan",
 179 |     "basset, basset hound",
 180 |     "beagle",
 181 |     "bloodhound, sleuthhound",
 182 |     "bluetick",
 183 |     "black-and-tan coonhound",
 184 |     "Walker hound, Walker foxhound",
 185 |     "English foxhound",
 186 |     "redbone",
 187 |     "borzoi, Russian wolfhound",
 188 |     "Irish wolfhound",
 189 |     "Italian greyhound",
 190 |     "whippet",
 191 |     "Ibizan hound, Ibizan Podenco",
 192 |     "Norwegian elkhound, elkhound",
 193 |     "otterhound, otter hound",
 194 |     "Saluki, gazelle hound",
 195 |     "Scottish deerhound, deerhound",
 196 |     "Weimaraner",
 197 |     "Staffordshire bullterrier, Staffordshire bull terrier",
 198 |     "American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier",
 199 |     "Bedlington terrier",
 200 |     "Border terrier",
 201 |     "Kerry blue terrier",
 202 |     "Irish terrier",
 203 |     "Norfolk terrier",
 204 |     "Norwich terrier",
 205 |     "Yorkshire terrier",
 206 |     "wire-haired fox terrier",
 207 |     "Lakeland terrier",
 208 |     "Sealyham terrier, Sealyham",
 209 |     "Airedale, Airedale terrier",
 210 |     "cairn, cairn terrier",
 211 |     "Australian terrier",
 212 |     "Dandie Dinmont, Dandie Dinmont terrier",
 213 |     "Boston bull, Boston terrier",
 214 |     "miniature schnauzer",
 215 |     "giant schnauzer",
 216 |     "standard schnauzer",
 217 |     "Scotch terrier, Scottish terrier, Scottie",
 218 |     "Tibetan terrier, chrysanthemum dog",
 219 |     "silky terrier, Sydney silky",
 220 |     "soft-coated wheaten terrier",
 221 |     "West Highland white terrier",
 222 |     "Lhasa, Lhasa apso",
 223 |     "flat-coated retriever",
 224 |     "curly-coated retriever",
 225 |     "golden retriever",
 226 |     "Labrador retriever",
 227 |     "Chesapeake Bay retriever",
 228 |     "German short-haired pointer",
 229 |     "vizsla, Hungarian pointer",
 230 |     "English setter",
 231 |     "Irish setter, red setter",
 232 |     "Gordon setter",
 233 |     "Brittany spaniel",
 234 |     "clumber, clumber spaniel",
 235 |     "English springer, English springer spaniel",
 236 |     "Welsh springer spaniel",
 237 |     "cocker spaniel, English cocker spaniel, cocker",
 238 |     "Sussex spaniel",
 239 |     "Irish water spaniel",
 240 |     "kuvasz",
 241 |     "schipperke",
 242 |     "groenendael",
 243 |     "malinois",
 244 |     "briard",
 245 |     "kelpie",
 246 |     "komondor",
 247 |     "Old English sheepdog, bobtail",
 248 |     "Shetland sheepdog, Shetland sheep dog, Shetland",
 249 |     "collie",
 250 |     "Border collie",
 251 |     "Bouvier des Flandres, Bouviers des Flandres",
 252 |     "Rottweiler",
 253 |     "German shepherd, German shepherd dog, German police dog, alsatian",
 254 |     "Doberman, Doberman pinscher",
 255 |     "miniature pinscher",
 256 |     "Greater Swiss Mountain dog",
 257 |     "Bernese mountain dog",
 258 |     "Appenzeller",
 259 |     "EntleBucher",
 260 |     "boxer",
 261 |     "bull mastiff",
 262 |     "Tibetan mastiff",
 263 |     "French bulldog",
 264 |     "Great Dane",
 265 |     "Saint Bernard, St Bernard",
 266 |     "Eskimo dog, husky",
 267 |     "malamute, malemute, Alaskan malamute",
 268 |     "Siberian husky",
 269 |     "dalmatian, coach dog, carriage dog",
 270 |     "affenpinscher, monkey pinscher, monkey dog",
 271 |     "basenji",
 272 |     "pug, pug-dog",
 273 |     "Leonberg",
 274 |     "Newfoundland, Newfoundland dog",
 275 |     "Great Pyrenees",
 276 |     "Samoyed, Samoyede",
 277 |     "Pomeranian",
 278 |     "chow, chow chow",
 279 |     "keeshond",
 280 |     "Brabancon griffon",
 281 |     "Pembroke, Pembroke Welsh corgi",
 282 |     "Cardigan, Cardigan Welsh corgi",
 283 |     "toy poodle",
 284 |     "miniature poodle",
 285 |     "standard poodle",
 286 |     "Mexican hairless",
 287 |     "timber wolf, grey wolf, gray wolf, Canis lupus",
 288 |     "white wolf, Arctic wolf, Canis lupus tundrarum",
 289 |     "red wolf, maned wolf, Canis rufus, Canis niger",
 290 |     "coyote, prairie wolf, brush wolf, Canis latrans",
 291 |     "dingo, warrigal, warragal, Canis dingo",
 292 |     "dhole, Cuon alpinus",
 293 |     "African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus",
 294 |     "hyena, hyaena",
 295 |     "red fox, Vulpes vulpes",
 296 |     "kit fox, Vulpes macrotis",
 297 |     "Arctic fox, white fox, Alopex lagopus",
 298 |     "grey fox, gray fox, Urocyon cinereoargenteus",
 299 |     "tabby, tabby cat",
 300 |     "tiger cat",
 301 |     "Persian cat",
 302 |     "Siamese cat, Siamese",
 303 |     "Egyptian cat",
 304 |     "cougar, puma, catamount, mountain lion, painter, panther, Felis concolor",
 305 |     "lynx, catamount",
 306 |     "leopard, Panthera pardus",
 307 |     "snow leopard, ounce, Panthera uncia",
 308 |     "jaguar, panther, Panthera onca, Felis onca",
 309 |     "lion, king of beasts, Panthera leo",
 310 |     "tiger, Panthera tigris",
 311 |     "cheetah, chetah, Acinonyx jubatus",
 312 |     "brown bear, bruin, Ursus arctos",
 313 |     "American black bear, black bear, Ursus americanus, Euarctos americanus",
 314 |     "ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus",
 315 |     "sloth bear, Melursus ursinus, Ursus ursinus",
 316 |     "mongoose",
 317 |     "meerkat, mierkat",
 318 |     "tiger beetle",
 319 |     "ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle",
 320 |     "ground beetle, carabid beetle",
 321 |     "long-horned beetle, longicorn, longicorn beetle",
 322 |     "leaf beetle, chrysomelid",
 323 |     "dung beetle",
 324 |     "rhinoceros beetle",
 325 |     "weevil",
 326 |     "fly",
 327 |     "bee",
 328 |     "ant, emmet, pismire",
 329 |     "grasshopper, hopper",
 330 |     "cricket",
 331 |     "walking stick, walkingstick, stick insect",
 332 |     "cockroach, roach",
 333 |     "mantis, mantid",
 334 |     "cicada, cicala",
 335 |     "leafhopper",
 336 |     "lacewing, lacewing fly",
 337 |     "dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, "
 338 |     "skeeter hawk",
 339 |     "damselfly",
 340 |     "admiral",
 341 |     "ringlet, ringlet butterfly",
 342 |     "monarch, monarch butterfly, milkweed butterfly, Danaus plexippus",
 343 |     "cabbage butterfly",
 344 |     "sulphur butterfly, sulfur butterfly",
 345 |     "lycaenid, lycaenid butterfly",
 346 |     "starfish, sea star",
 347 |     "sea urchin",
 348 |     "sea cucumber, holothurian",
 349 |     "wood rabbit, cottontail, cottontail rabbit",
 350 |     "hare",
 351 |     "Angora, Angora rabbit",
 352 |     "hamster",
 353 |     "porcupine, hedgehog",
 354 |     "fox squirrel, eastern fox squirrel, Sciurus niger",
 355 |     "marmot",
 356 |     "beaver",
 357 |     "guinea pig, Cavia cobaya",
 358 |     "sorrel",
 359 |     "zebra",
 360 |     "hog, pig, grunter, squealer, Sus scrofa",
 361 |     "wild boar, boar, Sus scrofa",
 362 |     "warthog",
 363 |     "hippopotamus, hippo, river horse, Hippopotamus amphibius",
 364 |     "ox",
 365 |     "water buffalo, water ox, Asiatic buffalo, Bubalus bubalis",
 366 |     "bison",
 367 |     "ram, tup",
 368 |     "bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis",
 369 |     "ibex, Capra ibex",
 370 |     "hartebeest",
 371 |     "impala, Aepyceros melampus",
 372 |     "gazelle",
 373 |     "Arabian camel, dromedary, Camelus dromedarius",
 374 |     "llama",
 375 |     "weasel",
 376 |     "mink",
 377 |     "polecat, fitch, foulmart, foumart, Mustela putorius",
 378 |     "black-footed ferret, ferret, Mustela nigripes",
 379 |     "otter",
 380 |     "skunk, polecat, wood pussy",
 381 |     "badger",
 382 |     "armadillo",
 383 |     "three-toed sloth, ai, Bradypus tridactylus",
 384 |     "orangutan, orang, orangutang, Pongo pygmaeus",
 385 |     "gorilla, Gorilla gorilla",
 386 |     "chimpanzee, chimp, Pan troglodytes",
 387 |     "gibbon, Hylobates lar",
 388 |     "siamang, Hylobates syndactylus, Symphalangus syndactylus",
 389 |     "guenon, guenon monkey",
 390 |     "patas, hussar monkey, Erythrocebus patas",
 391 |     "baboon",
 392 |     "macaque",
 393 |     "langur",
 394 |     "colobus, colobus monkey",
 395 |     "proboscis monkey, Nasalis larvatus",
 396 |     "marmoset",
 397 |     "capuchin, ringtail, Cebus capucinus",
 398 |     "howler monkey, howler",
 399 |     "titi, titi monkey",
 400 |     "spider monkey, Ateles geoffroyi",
 401 |     "squirrel monkey, Saimiri sciureus",
 402 |     "Madagascar cat, ring-tailed lemur, Lemur catta",
 403 |     "indri, indris, Indri indri, Indri brevicaudatus",
 404 |     "Indian elephant, Elephas maximus",
 405 |     "African elephant, Loxodonta africana",
 406 |     "lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens",
 407 |     "giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca",
 408 |     "barracouta, snoek",
 409 |     "eel",
 410 |     "coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch",
 411 |     "rock beauty, Holocanthus tricolor",
 412 |     "anemone fish",
 413 |     "sturgeon",
 414 |     "gar, garfish, garpike, billfish, Lepisosteus osseus",
 415 |     "lionfish",
 416 |     "puffer, pufferfish, blowfish, globefish",
 417 |     "abacus",
 418 |     "abaya",
 419 |     "academic gown, academic robe, judge's robe",
 420 |     "accordion, piano accordion, squeeze box",
 421 |     "acoustic guitar",
 422 |     "aircraft carrier, carrier, flattop, attack aircraft carrier",
 423 |     "airliner",
 424 |     "airship, dirigible",
 425 |     "altar",
 426 |     "ambulance",
 427 |     "amphibian, amphibious vehicle",
 428 |     "analog clock",
 429 |     "apiary, bee house",
 430 |     "apron",
 431 |     "ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin",
 432 |     "assault rifle, assault gun",
 433 |     "backpack, back pack, knapsack, packsack, rucksack, haversack",
 434 |     "bakery, bakeshop, bakehouse",
 435 |     "balance beam, beam",
 436 |     "balloon",
 437 |     "ballpoint, ballpoint pen, ballpen, Biro",
 438 |     "Band Aid",
 439 |     "banjo",
 440 |     "bannister, banister, balustrade, balusters, handrail",
 441 |     "barbell",
 442 |     "barber chair",
 443 |     "barbershop",
 444 |     "barn",
 445 |     "barometer",
 446 |     "barrel, cask",
 447 |     "barrow, garden cart, lawn cart, wheelbarrow",
 448 |     "baseball",
 449 |     "basketball",
 450 |     "bassinet",
 451 |     "bassoon",
 452 |     "bathing cap, swimming cap",
 453 |     "bath towel",
 454 |     "bathtub, bathing tub, bath, tub",
 455 |     "beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon",
 456 |     "beacon, lighthouse, beacon light, pharos",
 457 |     "beaker",
 458 |     "bearskin, busby, shako",
 459 |     "beer bottle",
 460 |     "beer glass",
 461 |     "bell cote, bell cot",
 462 |     "bib",
 463 |     "bicycle-built-for-two, tandem bicycle, tandem",
 464 |     "bikini, two-piece",
 465 |     "binder, ring-binder",
 466 |     "binoculars, field glasses, opera glasses",
 467 |     "birdhouse",
 468 |     "boathouse",
 469 |     "bobsled, bobsleigh, bob",
 470 |     "bolo tie, bolo, bola tie, bola",
 471 |     "bonnet, poke bonnet",
 472 |     "bookcase",
 473 |     "bookshop, bookstore, bookstall",
 474 |     "bottlecap",
 475 |     "bow",
 476 |     "bow tie, bow-tie, bowtie",
 477 |     "brass, memorial tablet, plaque",
 478 |     "brassiere, bra, bandeau",
 479 |     "breakwater, groin, groyne, mole, bulwark, seawall, jetty",
 480 |     "breastplate, aegis, egis",
 481 |     "broom",
 482 |     "bucket, pail",
 483 |     "buckle",
 484 |     "bulletproof vest",
 485 |     "bullet train, bullet",
 486 |     "butcher shop, meat market",
 487 |     "cab, hack, taxi, taxicab",
 488 |     "caldron, cauldron",
 489 |     "candle, taper, wax light",
 490 |     "cannon",
 491 |     "canoe",
 492 |     "can opener, tin opener",
 493 |     "cardigan",
 494 |     "car mirror",
 495 |     "carousel, carrousel, merry-go-round, roundabout, whirligig",
 496 |     "carpenter's kit, tool kit",
 497 |     "carton",
 498 |     "car wheel",
 499 |     "cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic "
 500 |     "teller, ATM",
 501 |     "cassette",
 502 |     "cassette player",
 503 |     "castle",
 504 |     "catamaran",
 505 |     "CD player",
 506 |     "cello, violoncello",
 507 |     "cellular telephone, cellular phone, cellphone, cell, mobile phone",
 508 |     "chain",
 509 |     "chainlink fence",
 510 |     "chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour",
 511 |     "chain saw, chainsaw",
 512 |     "chest",
 513 |     "chiffonier, commode",
 514 |     "chime, bell, gong",
 515 |     "china cabinet, china closet",
 516 |     "Christmas stocking",
 517 |     "church, church building",
 518 |     "cinema, movie theater, movie theatre, movie house, picture palace",
 519 |     "cleaver, meat cleaver, chopper",
 520 |     "cliff dwelling",
 521 |     "cloak",
 522 |     "clog, geta, patten, sabot",
 523 |     "cocktail shaker",
 524 |     "coffee mug",
 525 |     "coffeepot",
 526 |     "coil, spiral, volute, whorl, helix",
 527 |     "combination lock",
 528 |     "computer keyboard, keypad",
 529 |     "confectionery, confectionary, candy store",
 530 |     "container ship, containership, container vessel",
 531 |     "convertible",
 532 |     "corkscrew, bottle screw",
 533 |     "cornet, horn, trumpet, trump",
 534 |     "cowboy boot",
 535 |     "cowboy hat, ten-gallon hat",
 536 |     "cradle",
 537 |     "crane",
 538 |     "crash helmet",
 539 |     "crate",
 540 |     "crib, cot",
 541 |     "Crock Pot",
 542 |     "croquet ball",
 543 |     "crutch",
 544 |     "cuirass",
 545 |     "dam, dike, dyke",
 546 |     "desk",
 547 |     "desktop computer",
 548 |     "dial telephone, dial phone",
 549 |     "diaper, nappy, napkin",
 550 |     "digital clock",
 551 |     "digital watch",
 552 |     "dining table, board",
 553 |     "dishrag, dishcloth",
 554 |     "dishwasher, dish washer, dishwashing machine",
 555 |     "disk brake, disc brake",
 556 |     "dock, dockage, docking facility",
 557 |     "dogsled, dog sled, dog sleigh",
 558 |     "dome",
 559 |     "doormat, welcome mat",
 560 |     "drilling platform, offshore rig",
 561 |     "drum, membranophone, tympan",
 562 |     "drumstick",
 563 |     "dumbbell",
 564 |     "Dutch oven",
 565 |     "electric fan, blower",
 566 |     "electric guitar",
 567 |     "electric locomotive",
 568 |     "entertainment center",
 569 |     "envelope",
 570 |     "espresso maker",
 571 |     "face powder",
 572 |     "feather boa, boa",
 573 |     "file, file cabinet, filing cabinet",
 574 |     "fireboat",
 575 |     "fire engine, fire truck",
 576 |     "fire screen, fireguard",
 577 |     "flagpole, flagstaff",
 578 |     "flute, transverse flute",
 579 |     "folding chair",
 580 |     "football helmet",
 581 |     "forklift",
 582 |     "fountain",
 583 |     "fountain pen",
 584 |     "four-poster",
 585 |     "freight car",
 586 |     "French horn, horn",
 587 |     "frying pan, frypan, skillet",
 588 |     "fur coat",
 589 |     "garbage truck, dustcart",
 590 |     "gasmask, respirator, gas helmet",
 591 |     "gas pump, gasoline pump, petrol pump, island dispenser",
 592 |     "goblet",
 593 |     "go-kart",
 594 |     "golf ball",
 595 |     "golfcart, golf cart",
 596 |     "gondola",
 597 |     "gong, tam-tam",
 598 |     "gown",
 599 |     "grand piano, grand",
 600 |     "greenhouse, nursery, glasshouse",
 601 |     "grille, radiator grille",
 602 |     "grocery store, grocery, food market, market",
 603 |     "guillotine",
 604 |     "hair slide",
 605 |     "hair spray",
 606 |     "half track",
 607 |     "hammer",
 608 |     "hamper",
 609 |     "hand blower, blow dryer, blow drier, hair dryer, hair drier",
 610 |     "hand-held computer, hand-held microcomputer",
 611 |     "handkerchief, hankie, hanky, hankey",
 612 |     "hard disc, hard disk, fixed disk",
 613 |     "harmonica, mouth organ, harp, mouth harp",
 614 |     "harp",
 615 |     "harvester, reaper",
 616 |     "hatchet",
 617 |     "holster",
 618 |     "home theater, home theatre",
 619 |     "honeycomb",
 620 |     "hook, claw",
 621 |     "hoopskirt, crinoline",
 622 |     "horizontal bar, high bar",
 623 |     "horse cart, horse-cart",
 624 |     "hourglass",
 625 |     "iPod",
 626 |     "iron, smoothing iron",
 627 |     "jack-o'-lantern",
 628 |     "jean, blue jean, denim",
 629 |     "jeep, landrover",
 630 |     "jersey, T-shirt, tee shirt",
 631 |     "jigsaw puzzle",
 632 |     "jinrikisha, ricksha, rickshaw",
 633 |     "joystick",
 634 |     "kimono",
 635 |     "knee pad",
 636 |     "knot",
 637 |     "lab coat, laboratory coat",
 638 |     "ladle",
 639 |     "lampshade, lamp shade",
 640 |     "laptop, laptop computer",
 641 |     "lawn mower, mower",
 642 |     "lens cap, lens cover",
 643 |     "letter opener, paper knife, paperknife",
 644 |     "library",
 645 |     "lifeboat",
 646 |     "lighter, light, igniter, ignitor",
 647 |     "limousine, limo",
 648 |     "liner, ocean liner",
 649 |     "lipstick, lip rouge",
 650 |     "Loafer",
 651 |     "lotion",
 652 |     "loudspeaker, speaker, speaker unit, loudspeaker system, speaker system",
 653 |     "loupe, jeweler's loupe",
 654 |     "lumbermill, sawmill",
 655 |     "magnetic compass",
 656 |     "mailbag, postbag",
 657 |     "mailbox, letter box",
 658 |     "maillot",
 659 |     "maillot, tank suit",
 660 |     "manhole cover",
 661 |     "maraca",
 662 |     "marimba, xylophone",
 663 |     "mask",
 664 |     "matchstick",
 665 |     "maypole",
 666 |     "maze, labyrinth",
 667 |     "measuring cup",
 668 |     "medicine chest, medicine cabinet",
 669 |     "megalith, megalithic structure",
 670 |     "microphone, mike",
 671 |     "microwave, microwave oven",
 672 |     "military uniform",
 673 |     "milk can",
 674 |     "minibus",
 675 |     "miniskirt, mini",
 676 |     "minivan",
 677 |     "missile",
 678 |     "mitten",
 679 |     "mixing bowl",
 680 |     "mobile home, manufactured home",
 681 |     "Model T",
 682 |     "modem",
 683 |     "monastery",
 684 |     "monitor",
 685 |     "moped",
 686 |     "mortar",
 687 |     "mortarboard",
 688 |     "mosque",
 689 |     "mosquito net",
 690 |     "motor scooter, scooter",
 691 |     "mountain bike, all-terrain bike, off-roader",
 692 |     "mountain tent",
 693 |     "mouse, computer mouse",
 694 |     "mousetrap",
 695 |     "moving van",
 696 |     "muzzle",
 697 |     "nail",
 698 |     "neck brace",
 699 |     "necklace",
 700 |     "nipple",
 701 |     "notebook, notebook computer",
 702 |     "obelisk",
 703 |     "oboe, hautboy, hautbois",
 704 |     "ocarina, sweet potato",
 705 |     "odometer, hodometer, mileometer, milometer",
 706 |     "oil filter",
 707 |     "organ, pipe organ",
 708 |     "oscilloscope, scope, cathode-ray oscilloscope, CRO",
 709 |     "overskirt",
 710 |     "oxcart",
 711 |     "oxygen mask",
 712 |     "packet",
 713 |     "paddle, boat paddle",
 714 |     "paddlewheel, paddle wheel",
 715 |     "padlock",
 716 |     "paintbrush",
 717 |     "pajama, pyjama, pj's, jammies",
 718 |     "palace",
 719 |     "panpipe, pandean pipe, syrinx",
 720 |     "paper towel",
 721 |     "parachute, chute",
 722 |     "parallel bars, bars",
 723 |     "park bench",
 724 |     "parking meter",
 725 |     "passenger car, coach, carriage",
 726 |     "patio, terrace",
 727 |     "pay-phone, pay-station",
 728 |     "pedestal, plinth, footstall",
 729 |     "pencil box, pencil case",
 730 |     "pencil sharpener",
 731 |     "perfume, essence",
 732 |     "Petri dish",
 733 |     "photocopier",
 734 |     "pick, plectrum, plectron",
 735 |     "pickelhaube",
 736 |     "picket fence, paling",
 737 |     "pickup, pickup truck",
 738 |     "pier",
 739 |     "piggy bank, penny bank",
 740 |     "pill bottle",
 741 |     "pillow",
 742 |     "ping-pong ball",
 743 |     "pinwheel",
 744 |     "pirate, pirate ship",
 745 |     "pitcher, ewer",
 746 |     "plane, carpenter's plane, woodworking plane",
 747 |     "planetarium",
 748 |     "plastic bag",
 749 |     "plate rack",
 750 |     "plow, plough",
 751 |     "plunger, plumber's helper",
 752 |     "Polaroid camera, Polaroid Land camera",
 753 |     "pole",
 754 |     "police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria",
 755 |     "poncho",
 756 |     "pool table, billiard table, snooker table",
 757 |     "pop bottle, soda bottle",
 758 |     "pot, flowerpot",
 759 |     "potter's wheel",
 760 |     "power drill",
 761 |     "prayer rug, prayer mat",
 762 |     "printer",
 763 |     "prison, prison house",
 764 |     "projectile, missile",
 765 |     "projector",
 766 |     "puck, hockey puck",
 767 |     "punching bag, punch bag, punching ball, punchball",
 768 |     "purse",
 769 |     "quill, quill pen",
 770 |     "quilt, comforter, comfort, puff",
 771 |     "racer, race car, racing car",
 772 |     "racket, racquet",
 773 |     "radiator",
 774 |     "radio, wireless",
 775 |     "radio telescope, radio reflector",
 776 |     "rain barrel",
 777 |     "recreational vehicle, RV, R.V.",
 778 |     "reel",
 779 |     "reflex camera",
 780 |     "refrigerator, icebox",
 781 |     "remote control, remote",
 782 |     "restaurant, eating house, eating place, eatery",
 783 |     "revolver, six-gun, six-shooter",
 784 |     "rifle",
 785 |     "rocking chair, rocker",
 786 |     "rotisserie",
 787 |     "rubber eraser, rubber, pencil eraser",
 788 |     "rugby ball",
 789 |     "rule, ruler",
 790 |     "running shoe",
 791 |     "safe",
 792 |     "safety pin",
 793 |     "saltshaker, salt shaker",
 794 |     "sandal",
 795 |     "sarong",
 796 |     "sax, saxophone",
 797 |     "scabbard",
 798 |     "scale, weighing machine",
 799 |     "school bus",
 800 |     "schooner",
 801 |     "scoreboard",
 802 |     "screen, CRT screen",
 803 |     "screw",
 804 |     "screwdriver",
 805 |     "seat belt, seatbelt",
 806 |     "sewing machine",
 807 |     "shield, buckler",
 808 |     "shoe shop, shoe-shop, shoe store",
 809 |     "shoji",
 810 |     "shopping basket",
 811 |     "shopping cart",
 812 |     "shovel",
 813 |     "shower cap",
 814 |     "shower curtain",
 815 |     "ski",
 816 |     "ski mask",
 817 |     "sleeping bag",
 818 |     "slide rule, slipstick",
 819 |     "sliding door",
 820 |     "slot, one-armed bandit",
 821 |     "snorkel",
 822 |     "snowmobile",
 823 |     "snowplow, snowplough",
 824 |     "soap dispenser",
 825 |     "soccer ball",
 826 |     "sock",
 827 |     "solar dish, solar collector, solar furnace",
 828 |     "sombrero",
 829 |     "soup bowl",
 830 |     "space bar",
 831 |     "space heater",
 832 |     "space shuttle",
 833 |     "spatula",
 834 |     "speedboat",
 835 |     "spider web, spider's web",
 836 |     "spindle",
 837 |     "sports car, sport car",
 838 |     "spotlight, spot",
 839 |     "stage",
 840 |     "steam locomotive",
 841 |     "steel arch bridge",
 842 |     "steel drum",
 843 |     "stethoscope",
 844 |     "stole",
 845 |     "stone wall",
 846 |     "stopwatch, stop watch",
 847 |     "stove",
 848 |     "strainer",
 849 |     "streetcar, tram, tramcar, trolley, trolley car",
 850 |     "stretcher",
 851 |     "studio couch, day bed",
 852 |     "stupa, tope",
 853 |     "submarine, pigboat, sub, U-boat",
 854 |     "suit, suit of clothes",
 855 |     "sundial",
 856 |     "sunglass",
 857 |     "sunglasses, dark glasses, shades",
 858 |     "sunscreen, sunblock, sun blocker",
 859 |     "suspension bridge",
 860 |     "swab, swob, mop",
 861 |     "sweatshirt",
 862 |     "swimming trunks, bathing trunks",
 863 |     "swing",
 864 |     "switch, electric switch, electrical switch",
 865 |     "syringe",
 866 |     "table lamp",
 867 |     "tank, army tank, armored combat vehicle, armoured combat vehicle",
 868 |     "tape player",
 869 |     "teapot",
 870 |     "teddy, teddy bear",
 871 |     "television, television system",
 872 |     "tennis ball",
 873 |     "thatch, thatched roof",
 874 |     "theater curtain, theatre curtain",
 875 |     "thimble",
 876 |     "thresher, thrasher, threshing machine",
 877 |     "throne",
 878 |     "tile roof",
 879 |     "toaster",
 880 |     "tobacco shop, tobacconist shop, tobacconist",
 881 |     "toilet seat",
 882 |     "torch",
 883 |     "totem pole",
 884 |     "tow truck, tow car, wrecker",
 885 |     "toyshop",
 886 |     "tractor",
 887 |     "trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi",
 888 |     "tray",
 889 |     "trench coat",
 890 |     "tricycle, trike, velocipede",
 891 |     "trimaran",
 892 |     "tripod",
 893 |     "triumphal arch",
 894 |     "trolleybus, trolley coach, trackless trolley",
 895 |     "trombone",
 896 |     "tub, vat",
 897 |     "turnstile",
 898 |     "typewriter keyboard",
 899 |     "umbrella",
 900 |     "unicycle, monocycle",
 901 |     "upright, upright piano",
 902 |     "vacuum, vacuum cleaner",
 903 |     "vase",
 904 |     "vault",
 905 |     "velvet",
 906 |     "vending machine",
 907 |     "vestment",
 908 |     "viaduct",
 909 |     "violin, fiddle",
 910 |     "volleyball",
 911 |     "waffle iron",
 912 |     "wall clock",
 913 |     "wallet, billfold, notecase, pocketbook",
 914 |     "wardrobe, closet, press",
 915 |     "warplane, military plane",
 916 |     "washbasin, handbasin, washbowl, lavabo, wash-hand basin",
 917 |     "washer, automatic washer, washing machine",
 918 |     "water bottle",
 919 |     "water jug",
 920 |     "water tower",
 921 |     "whiskey jug",
 922 |     "whistle",
 923 |     "wig",
 924 |     "window screen",
 925 |     "window shade",
 926 |     "Windsor tie",
 927 |     "wine bottle",
 928 |     "wing",
 929 |     "wok",
 930 |     "wooden spoon",
 931 |     "wool, woolen, woollen",
 932 |     "worm fence, snake fence, snake-rail fence, Virginia fence",
 933 |     "wreck",
 934 |     "yawl",
 935 |     "yurt",
 936 |     "web site, website, internet site, site",
 937 |     "comic book",
 938 |     "crossword puzzle, crossword",
 939 |     "street sign",
 940 |     "traffic light, traffic signal, stoplight",
 941 |     "book jacket, dust cover, dust jacket, dust wrapper",
 942 |     "menu",
 943 |     "plate",
 944 |     "guacamole",
 945 |     "consomme",
 946 |     "hot pot, hotpot",
 947 |     "trifle",
 948 |     "ice cream, icecream",
 949 |     "ice lolly, lolly, lollipop, popsicle",
 950 |     "French loaf",
 951 |     "bagel, beigel",
 952 |     "pretzel",
 953 |     "cheeseburger",
 954 |     "hotdog, hot dog, red hot",
 955 |     "mashed potato",
 956 |     "head cabbage",
 957 |     "broccoli",
 958 |     "cauliflower",
 959 |     "zucchini, courgette",
 960 |     "spaghetti squash",
 961 |     "acorn squash",
 962 |     "butternut squash",
 963 |     "cucumber, cuke",
 964 |     "artichoke, globe artichoke",
 965 |     "bell pepper",
 966 |     "cardoon",
 967 |     "mushroom",
 968 |     "Granny Smith",
 969 |     "strawberry",
 970 |     "orange",
 971 |     "lemon",
 972 |     "fig",
 973 |     "pineapple, ananas",
 974 |     "banana",
 975 |     "jackfruit, jak, jack",
 976 |     "custard apple",
 977 |     "pomegranate",
 978 |     "hay",
 979 |     "carbonara",
 980 |     "chocolate sauce, chocolate syrup",
 981 |     "dough",
 982 |     "meat loaf, meatloaf",
 983 |     "pizza, pizza pie",
 984 |     "potpie",
 985 |     "burrito",
 986 |     "red wine",
 987 |     "espresso",
 988 |     "cup",
 989 |     "eggnog",
 990 |     "alp",
 991 |     "bubble",
 992 |     "cliff, drop, drop-off",
 993 |     "coral reef",
 994 |     "geyser",
 995 |     "lakeside, lakeshore",
 996 |     "promontory, headland, head, foreland",
 997 |     "sandbar, sand bar",
 998 |     "seashore, coast, seacoast, sea-coast",
 999 |     "valley, vale",
1000 |     "volcano",
1001 |     "ballplayer, baseball player",
1002 |     "groom, bridegroom",
1003 |     "scuba diver",
1004 |     "rapeseed",
1005 |     "daisy",
1006 |     "yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum",
1007 |     "corn",
1008 |     "acorn",
1009 |     "hip, rose hip, rosehip",
1010 |     "buckeye, horse chestnut, conker",
1011 |     "coral fungus",
1012 |     "agaric",
1013 |     "gyromitra",
1014 |     "stinkhorn, carrion fungus",
1015 |     "earthstar",
1016 |     "hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa",
1017 |     "bolete",
1018 |     "ear, spike, capitulum",
1019 |     "toilet tissue, toilet paper, bathroom tissue",
1020 | };
1021 | 
1022 | static constexpr int64_t IMAGENET_NUM_CLASSES = 1000;
1023 | 
1024 | // BGR
1025 | static const std::vector<float> IMAGENET_MEAN = {0.406, 0.456, 0.485};
1026 | 
1027 | static const std::vector<float> IMAGENET_STD = {0.225, 0.224, 0.229};
1028 | 
1029 | static const std::vector<std::string> MSCOCO_CLASSES = {
1030 |     "background",   "person",         "bicycle",    "car",           "motorbike",     "aeroplane",   "bus",
1031 |     "train",        "truck",          "boat",       "traffic light", "fire hydrant",  "stop sign",   "parking meter",
1032 |     "bench",        "bird",           "cat",        "dog",           "horse",         "sheep",       "cow",
1033 |     "elephant",     "bear",           "zebra",      "giraffe",       "backpack",      "umbrella",    "handbag",
1034 |     "tie",          "suitcase",       "frisbee",    "skis",          "snowboard",     "sports ball", "kite",
1035 |     "baseball bat", "baseball glove", "skateboard", "surfboard",     "tennis racket", "bottle",      "wine glass",
1036 |     "cup",          "fork",           "knife",      "spoon",         "bowl",          "banana",      "apple",
1037 |     "sandwich",     "orange",         "broccoli",   "carrot",        "hot dog",       "pizza",       "donut",
1038 |     "cake",         "chair",          "sofa",       "pottedplant",   "bed",           "diningtable", "toilet",
1039 |     "tvmonitor",    "laptop",         "mouse",      "remote",        "keyboard",      "cell phone",  "microwave",
1040 |     "oven",         "toaster",        "sink",       "refrigerator",  "book",          "clock",       "vase",
1041 |     "scissors",     "teddy bear",     "hair drier", "toothbrush"};
1042 | 
1043 | static constexpr int64_t MSCOCO_NUM_CLASSES = 81;
1044 | 
1045 | static const std::vector<std::array<int, 3>> MSCOCO_COLOR_CHART = generateColorCharts(MSCOCO_NUM_CLASSES);
1046 | 
1047 | static const std::vector<std::string> VOC_CLASSES = {
1048 |     "aeroplane",   "bicycle", "bird",  "boat",      "bottle", "bus",         "car",   "cat",  "chair", "cow",
1049 |     "diningtable", "dog",     "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"};
1050 | 
1051 | static constexpr int64_t VOC_NUM_CLASSES = 20;
1052 | 
1053 | static const std::vector<std::array<int, 3>> VOC_COLOR_CHART = generateColorCharts(VOC_NUM_CLASSES);
1054 | 
1055 | static const std::vector<std::string> CITY_SCAPES_CLASSES = {
1056 |     "road", "sidewalk", "building", "wall", "fence", "pole", "traffic light", "traffic sign", "vegetation", "terrain",
1057 |     "sky",  "person",   "rider",    "car",  "truck", "bus",  "train",         "motorcycle",   "bicycle"};
1058 | 
1059 | static constexpr int64_t CITY_SCAPES_NUM_CLASSES = 19;
1060 | 
1061 | static const std::vector<std::array<int, 3>> CITY_SCAPES_COLOR_CHART = {
1062 |     {128, 64, 128}, {244, 35, 232}, {70, 70, 70},    {102, 102, 156}, {190, 153, 153}, {153, 153, 153}, {250, 170, 30},
1063 |     {220, 220, 0},  {107, 142, 35}, {152, 251, 152}, {70, 130, 180},  {220, 20, 60},   {255, 0, 0},     {0, 0, 142},
1064 |     {0, 0, 70},     {0, 60, 100},   {0, 80, 100},    {0, 0, 230},     {119, 11, 32}};
1065 | }  // namespace Ort
1066 | 


--------------------------------------------------------------------------------
/include/ort_utility/ImageClassificationOrtSessionHandler.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file    ImageClassificationOrtSessionHandler.hpp
 3 |  *
 4 |  * @author  btran
 5 |  *
 6 |  */
 7 | 
 8 | #pragma once
 9 | 
10 | #include <string>
11 | #include <utility>
12 | #include <vector>
13 | 
14 | #include "ImageRecognitionOrtSessionHandlerBase.hpp"
15 | 
16 | namespace Ort
17 | {
18 | class ImageClassificationOrtSessionHandler : public ImageRecognitionOrtSessionHandlerBase
19 | {
20 |  public:
21 |     ImageClassificationOrtSessionHandler(
22 |         const uint16_t numClasses,                           //
23 |         const std::string& modelPath,                        //
24 |         const std::optional<size_t>& gpuIdx = std::nullopt,  //
25 |         const std::optional<std::vector<std::vector<int64_t>>>& inputShapes = std::nullopt);
26 | 
27 |     ~ImageClassificationOrtSessionHandler();
28 | 
29 |     std::vector<std::pair<int, float>> topK(const std::vector<float*>& inferenceOutput,  //
30 |                                             const uint16_t k = 1,                        //
31 |                                             const bool useSoftmax = true) const;
32 | 
33 |     std::string topKToString(const std::vector<float*>& inferenceOutput,  //
34 |                              const uint16_t k = 1,                        //
35 |                              const bool useSoftmax = true) const;
36 | };
37 | }  // namespace Ort
38 | 


--------------------------------------------------------------------------------
/include/ort_utility/ImageRecognitionOrtSessionHandlerBase.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file    ImageRecognitionOrtSessionHandlerBase.hpp
 3 |  *
 4 |  * @author  btran
 5 |  *
 6 |  */
 7 | 
 8 | #pragma once
 9 | 
10 | #include <string>
11 | #include <utility>
12 | #include <vector>
13 | 
14 | #include "OrtSessionHandler.hpp"
15 | 
16 | namespace Ort
17 | {
18 | class ImageRecognitionOrtSessionHandlerBase : public OrtSessionHandler
19 | {
20 |  public:
21 |     ImageRecognitionOrtSessionHandlerBase(
22 |         const uint16_t numClasses,                           //
23 |         const std::string& modelPath,                        //
24 |         const std::optional<size_t>& gpuIdx = std::nullopt,  //
25 |         const std::optional<std::vector<std::vector<int64_t>>>& inputShapes = std::nullopt);
26 | 
27 |     ~ImageRecognitionOrtSessionHandlerBase();
28 | 
29 |     void initClassNames(const std::vector<std::string>& classNames);
30 | 
31 |     virtual void preprocess(float* dst,                              //
32 |                             const unsigned char* src,                //
33 |                             const int64_t targetImgWidth,            //
34 |                             const int64_t targetImgHeight,           //
35 |                             const int numChanels,                    //
36 |                             const std::vector<float>& meanVal = {},  //
37 |                             const std::vector<float>& stdVal = {}) const;
38 | 
39 |     uint16_t numClasses() const
40 |     {
41 |         return m_numClasses;
42 |     }
43 | 
44 |     const std::vector<std::string>& classNames() const
45 |     {
46 |         return m_classNames;
47 |     }
48 | 
49 |  protected:
50 |     const uint16_t m_numClasses;
51 |     std::vector<std::string> m_classNames;
52 | };
53 | }  // namespace Ort
54 | 


--------------------------------------------------------------------------------
/include/ort_utility/ObjectDetectionOrtSessionHandler.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file    ObjectDetectionOrtSessionHandler.hpp
 3 |  *
 4 |  * @author  btran
 5 |  *
 6 |  */
 7 | 
 8 | #pragma once
 9 | 
10 | #include <string>
11 | #include <vector>
12 | 
13 | #include "ImageRecognitionOrtSessionHandlerBase.hpp"
14 | 
15 | namespace Ort
16 | {
17 | class ObjectDetectionOrtSessionHandler : public ImageRecognitionOrtSessionHandlerBase
18 | {
19 |  public:
20 |     ObjectDetectionOrtSessionHandler(
21 |         const uint16_t numClasses,                           //
22 |         const std::string& modelPath,                        //
23 |         const std::optional<size_t>& gpuIdx = std::nullopt,  //
24 |         const std::optional<std::vector<std::vector<int64_t>>>& inputShapes = std::nullopt);
25 | 
26 |     ~ObjectDetectionOrtSessionHandler();
27 | };
28 | }  // namespace Ort
29 | 


--------------------------------------------------------------------------------
/include/ort_utility/OrtSessionHandler.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file    OrtSessionHandler.hpp
 3 |  *
 4 |  * @author  btran
 5 |  *
 6 |  */
 7 | 
 8 | #pragma once
 9 | 
10 | #include <memory>
11 | #include <optional>
12 | #include <string>
13 | #include <utility>
14 | #include <vector>
15 | 
16 | namespace Ort
17 | {
18 | class OrtSessionHandler
19 | {
20 |  public:
21 |     // DataOutputType->(pointer to output data, shape of output data)
22 |     using DataOutputType = std::pair<float*, std::vector<int64_t>>;
23 | 
24 |     explicit OrtSessionHandler(const std::string& modelPath,  //
25 |                                const std::optional<size_t>& gpuIdx = std::nullopt,
26 |                                const std::optional<std::vector<std::vector<int64_t>>>& inputShapes = std::nullopt);
27 |     ~OrtSessionHandler();
28 | 
29 |     // multiple inputs, multiple outputs
30 |     std::vector<DataOutputType> operator()(const std::vector<float*>& inputImgData) const;
31 | 
32 |     void updateInputShapes(const std::vector<std::vector<int64_t>>& inputShapes);
33 | 
34 |  private:
35 |     class OrtSessionHandlerIml;
36 |     std::unique_ptr<OrtSessionHandlerIml> m_piml;
37 | };
38 | }  // namespace Ort
39 | 


--------------------------------------------------------------------------------
/include/ort_utility/Utility.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @file    Utility.hpp
  3 |  *
  4 |  * @author  btran
  5 |  *
  6 |  */
  7 | 
  8 | #pragma once
  9 | 
 10 | #include <algorithm>
 11 | #include <cassert>
 12 | #include <cmath>
 13 | #include <deque>
 14 | #include <limits>
 15 | #include <memory>
 16 | #include <numeric>
 17 | #include <utility>
 18 | #include <vector>
 19 | 
 20 | #if ENABLE_DEBUG
 21 | #include <iostream>
 22 | #endif
 23 | 
 24 | template <typename T, template <typename, typename = std::allocator<T>> class Container>
 25 | std::ostream& operator<<(std::ostream& os, const Container<T>& container)
 26 | {
 27 |     using ContainerType = Container<T>;
 28 |     for (typename ContainerType::const_iterator it = container.begin(); it != container.end(); ++it) {
 29 |         os << *it << " ";
 30 |     }
 31 | 
 32 |     return os;
 33 | }
 34 | 
 35 | namespace
 36 | {
 37 | template <typename T> std::deque<size_t> sortIndexes(const std::vector<T>& v)
 38 | {
 39 |     std::deque<size_t> indices(v.size());
 40 |     std::iota(indices.begin(), indices.end(), 0);
 41 | 
 42 |     std::stable_sort(std::begin(indices), std::end(indices), [&v](size_t i1, size_t i2) { return v[i1] < v[i2]; });
 43 | 
 44 |     return indices;
 45 | }
 46 | }  // namespace
 47 | 
 48 | namespace Ort
 49 | {
 50 | #if ENABLE_DEBUG
 51 | #define DEBUG_LOG(...)                                                                                                 \
 52 |     {                                                                                                                  \
 53 |         char str[100];                                                                                                 \
 54 |         snprintf(str, sizeof(str), __VA_ARGS__);                                                                       \
 55 |         std::cout << "[" << __FILE__ << "][" << __FUNCTION__ << "][Line " << __LINE__ << "] >>> " << str << std::endl; \
 56 |     }
 57 | #else
 58 | #define DEBUG_LOG(...)
 59 | #endif
 60 | 
 61 | inline void softmax(float* input, const size_t inputLen)
 62 | {
 63 |     const float maxVal = *std::max_element(input, input + inputLen);
 64 | 
 65 |     const float sum = std::accumulate(input, input + inputLen, 0.0,
 66 |                                       [&](float a, const float b) { return std::move(a) + expf(b - maxVal); });
 67 | 
 68 |     const float offset = maxVal + logf(sum);
 69 |     for (auto it = input; it != (input + inputLen); ++it) {
 70 |         *it = expf(*it - offset);
 71 |     }
 72 | }
 73 | 
 74 | inline float sigmoid(const float x)
 75 | {
 76 |     return 1.0 / (1.0 + expf(-x));
 77 | }
 78 | 
 79 | inline std::vector<uint64_t> nms(const std::vector<std::array<float, 4>>& bboxes,            //
 80 |                                  const std::vector<float>& scores,                           //
 81 |                                  const float overlapThresh = 0.45,                           //
 82 |                                  const uint64_t topK = std::numeric_limits<uint64_t>::max()  //
 83 | )
 84 | {
 85 |     assert(bboxes.size() > 0);
 86 |     uint64_t boxesLength = bboxes.size();
 87 |     const uint64_t realK = std::max(std::min(boxesLength, topK), static_cast<uint64_t>(1));
 88 | 
 89 |     std::vector<uint64_t> keepIndices;
 90 |     keepIndices.reserve(realK);
 91 | 
 92 |     std::deque<uint64_t> sortedIndices = ::sortIndexes(scores);
 93 | 
 94 |     // keep only topk bboxes
 95 |     for (uint64_t i = 0; i < boxesLength - realK; ++i) {
 96 |         sortedIndices.pop_front();
 97 |     }
 98 | 
 99 |     std::vector<float> areas;
100 |     areas.reserve(boxesLength);
101 |     std::transform(std::begin(bboxes), std::end(bboxes), std::back_inserter(areas),
102 |                    [](const auto& elem) { return (elem[2] - elem[0]) * (elem[3] - elem[1]); });
103 | 
104 |     while (!sortedIndices.empty()) {
105 |         uint64_t currentIdx = sortedIndices.back();
106 |         keepIndices.emplace_back(currentIdx);
107 | 
108 |         if (sortedIndices.size() == 1) {
109 |             break;
110 |         }
111 | 
112 |         sortedIndices.pop_back();
113 |         std::vector<float> ious;
114 |         ious.reserve(sortedIndices.size());
115 | 
116 |         const auto& curBbox = bboxes[currentIdx];
117 |         const float curArea = areas[currentIdx];
118 | 
119 |         std::deque<uint64_t> newSortedIndices;
120 | 
121 |         for (const uint64_t elem : sortedIndices) {
122 |             const auto& bbox = bboxes[elem];
123 |             float tmpXmin = std::max(curBbox[0], bbox[0]);
124 |             float tmpYmin = std::max(curBbox[1], bbox[1]);
125 |             float tmpXmax = std::min(curBbox[2], bbox[2]);
126 |             float tmpYmax = std::min(curBbox[3], bbox[3]);
127 | 
128 |             float tmpW = std::max<float>(tmpXmax - tmpXmin, 0.0);
129 |             float tmpH = std::max<float>(tmpYmax - tmpYmin, 0.0);
130 | 
131 |             const float intersection = tmpW * tmpH;
132 |             const float tmpArea = areas[elem];
133 |             const float unionArea = tmpArea + curArea - intersection;
134 |             const float iou = intersection / unionArea;
135 | 
136 |             if (iou <= overlapThresh) {
137 |                 newSortedIndices.emplace_back(elem);
138 |             }
139 |         }
140 | 
141 |         sortedIndices = newSortedIndices;
142 |     }
143 | 
144 |     return keepIndices;
145 | }
146 | 
147 | inline std::vector<std::array<int, 3>> generateColorCharts(const uint16_t numClasses = 1000, const uint16_t seed = 255)
148 | {
149 |     std::srand(seed);
150 |     std::vector<std::array<int, 3>> colors;
151 |     colors.reserve(numClasses);
152 |     for (uint16_t i = 0; i < numClasses; ++i) {
153 |         colors.emplace_back(std::array<int, 3>{std::rand() % 256, std::rand() % 256, std::rand() % 256});
154 |     }
155 | 
156 |     return colors;
157 | }
158 | }  // namespace Ort
159 | 


--------------------------------------------------------------------------------
/include/ort_utility/ort_utility.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file    ort_utility.hpp
 3 |  *
 4 |  * @author  btran
 5 |  *
 6 |  */
 7 | 
 8 | #pragma once
 9 | 
10 | #include "Constants.hpp"
11 | 
12 | #include "ImageClassificationOrtSessionHandler.hpp"
13 | 
14 | #include "ImageRecognitionOrtSessionHandlerBase.hpp"
15 | 
16 | #include "OrtSessionHandler.hpp"
17 | 
18 | #include "ObjectDetectionOrtSessionHandler.hpp"
19 | 
20 | #include "Utility.hpp"
21 | 


--------------------------------------------------------------------------------
/scripts/get_cuda_environment_variables.bash:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | function command_exists
 4 | {
 5 |     type "$1" &> /dev/null;
 6 | }
 7 | 
 8 | if ! command_exists nvcc; then
 9 |     echo "Cuda package not found"
10 | else
11 |     if [ ! "$(dpkg -S cudnn)" ];then
12 |         echo "cudnn not found"
13 |     else
14 |         export CUDA_HOME=`whereis cuda |  awk '{print $2}'`
15 |         export CUDA_VERSION=`nvcc --version | grep release | awk '{print $6}' | cut -c 2-4`
16 |         export CUDNN_HOME=$(dirname `whereis cudnn.h | awk '{print $2}'`)
17 |     fi
18 | fi
19 | 


--------------------------------------------------------------------------------
/scripts/get_tensorrt_environment_variables.bash:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | export TENSORRT_HOME="$(dirname $(whereis libnvinfer |  awk '{print $2}'))"
4 | 


--------------------------------------------------------------------------------
/scripts/install_apps_dependencies.bash:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | sudo -l
4 | 
5 | sudo apt-get install -y --no-install-recommends libopencv-dev
6 | 


--------------------------------------------------------------------------------
/scripts/install_latest_cmake.bash:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | readonly CMAKE_VERSION_TO_INSTALL="3.24.0"
 4 | 
 5 | function command_exists
 6 | {
 7 |     type "$1" &> /dev/null;
 8 | }
 9 | 
10 | function version_greater_equal
11 | {
12 |     printf '%s\n%s\n' "$2" "$1" | sort -V -C
13 | }
14 | 
15 | if command_exists cmake; then
16 |     readonly CMAKE_VERSION=`cmake --version | head -n1 | cut -d" " -f3`
17 |     if version_greater_equal "${CMAKE_VERSION}" "${CMAKE_VERSION_TO_INSTALL}"; then
18 |         exit 0
19 |     fi
20 | fi
21 | 
22 | echo "Need cmake ${CMAKE_VERSION_TO_INSTALL} or above. Install now..."
23 | 
24 | sudo -l
25 | 
26 | sudo apt-get update
27 | sudo apt-get -y purge cmake
28 | sudo apt-get -y --no-install-recommends install python3-pip
29 | python3 -m pip install cmake==$CMAKE_VERSION_TO_INSTALL
30 | exec bash
31 | 


--------------------------------------------------------------------------------
/scripts/install_onnx_runtime.bash:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # reference: https://github.com/microsoft/onnxruntime#installation
 4 | 
 5 | readonly CURRENT_DIR=$(dirname $(realpath $0))
 6 | 
 7 | sudo -l
 8 | 
 9 | sudo apt-get update
10 | sudo apt install -y --no-install-recommends zlib1g-dev
11 | 
12 | readonly ONNXRUNTIME_VERSION="v1.14.1"
13 | git clone --recursive -b ${ONNXRUNTIME_VERSION} https://github.com/Microsoft/onnxruntime
14 | cd onnxruntime
15 | 
16 | INSTALL_PREFIX="/usr/local"
17 | BUILDTYPE=Release
18 | BUILDARGS="--config ${BUILDTYPE}"
19 | BUILDARGS="${BUILDARGS} --build_shared_lib --skip_tests"
20 | BUILDARGS="${BUILDARGS} --parallel"
21 | BUILDARGS="${BUILDARGS} --cmake_extra_defines CMAKE_INSTALL_PREFIX=${INSTALL_PREFIX}"
22 | 
23 | source $CURRENT_DIR/get_cuda_environment_variables.bash
24 | if [ ! -z "$CUDA_HOME" -a ! -z "$CUDA_VERSION" -a ! -z "$CUDNN_HOME" ]; then
25 |     BUILDARGS="${BUILDARGS} --use_cuda --cuda_version=${CUDA_VERSION} --cuda_home=${CUDA_HOME} --cudnn_home=${CUDNN_HOME}"
26 | fi
27 | 
28 | source $CURRENT_DIR/get_tensorrt_environment_variables.bash
29 | if [ ! -z "$TENSORRT_HOME" ]; then
30 |     # onnxruntime v1.10.0 is compatible with tensorrt 8
31 |     BUILDARGS="${BUILDARGS} --use_tensorrt --tensorrt_home=${TENSORRT_HOME}"
32 | fi
33 | 
34 | ./build.sh ${BUILDARGS}
35 | cd ./build/Linux/${BUILDTYPE}
36 | sudo make install
37 | 


--------------------------------------------------------------------------------
/scripts/loftr/README.md:
--------------------------------------------------------------------------------
 1 | # convert pre-trained loftr pytorch weights to onnx format
 2 | 
 3 | ---
 4 | 
 5 | ## dependencies
 6 | 
 7 | ---
 8 | 
 9 | - python: 3x
10 | 
11 | -
12 | 
13 | ```bash
14 | git submodule update --init --recursive
15 | 
16 | python3 -m pip install -r requirements.txt
17 | ```
18 | 
19 | ## :running: how to run
20 | 
21 | ---
22 | 
23 | - download [LoFTR](https://github.com/zju3dv/LoFTR) weights indoor_ds_new.ckpt from [HERE](https://drive.google.com/drive/folders/1xu2Pq6mZT5hmFgiYMBT9Zt8h1yO-3SIp)
24 | 
25 | - export onnx weights
26 | 
27 | ```
28 | python3 convert_to_onnx.py --model_path /path/to/indoor_ds_new.ckpt
29 | ```
30 | 
31 | ## Note
32 | 
33 | - The LoFTR's [latest commit](b4ee7eb0359d0062e794c99f73e27639d7c7ac9f) seems to be only compatible with the new weights (Ref: https://github.com/zju3dv/LoFTR/issues/48). Hence, this onnx cpp application is only compatible with _indoor_ds_new.ckpt_ weights.
34 | 


--------------------------------------------------------------------------------
/scripts/loftr/convert_to_onnx.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import onnxruntime
 4 | import torch
 5 | 
 6 | from loftr_wrapper import LoFTRWrapper as LoFTR
 7 | 
 8 | 
 9 | def get_args():
10 |     import argparse
11 | 
12 |     parser = argparse.ArgumentParser("convert loftr torch weights to onnx format")
13 |     parser.add_argument("--model_path", type=str, required=True)
14 | 
15 |     return parser.parse_args()
16 | 
17 | 
18 | def main():
19 |     args = get_args()
20 |     model_path = args.model_path
21 |     model = LoFTR()
22 |     model.load_state_dict(torch.load(model_path)["state_dict"])
23 |     model.eval()
24 | 
25 |     batch_size = 1
26 |     height = 480
27 |     width = 640
28 | 
29 |     data = {}
30 |     data["image0"] = torch.randn(batch_size, 1, height, width)
31 |     data["image1"] = torch.randn(batch_size, 1, height, width)
32 | 
33 |     torch.onnx.export(
34 |         model,
35 |         data,
36 |         "loftr.onnx",
37 |         export_params=True,
38 |         opset_version=12,
39 |         do_constant_folding=True,
40 |         input_names=list(data.keys()),
41 |         output_names=["keypoints0", "keypoints1", "confidence"],
42 |         dynamic_axes={
43 |             "image0": {2: "height", 3: "width"},
44 |             "image1": {2: "height", 3: "width"},
45 |             "keypoints0": {0: "num_keypoints"},
46 |             "keypoints1": {0: "num_keypoints"},
47 |             "confidence": {0: "num_keypoints"},
48 |         },
49 |     )
50 | 
51 |     print(f"\nonnx model is saved to: {os.getcwd()}/loftr.onnx")
52 | 
53 |     print("\ntest inference using onnxruntime")
54 |     sess = onnxruntime.InferenceSession("loftr.onnx")
55 |     for input in sess.get_inputs():
56 |         print("input: ", input)
57 | 
58 |     print("\n")
59 |     for output in sess.get_outputs():
60 |         print("output: ", output)
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     import warnings
65 | 
66 |     warnings.filterwarnings("ignore")
67 | 
68 |     main()
69 | 


--------------------------------------------------------------------------------
/scripts/loftr/loftr_wrapper.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import copy
  3 | import os
  4 | import sys
  5 | from typing import Any, Dict
  6 | 
  7 | import torch
  8 | from einops.einops import rearrange
  9 | 
 10 | _CURRENT_DIR = os.path.dirname(os.path.realpath(__file__))
 11 | sys.path.append(os.path.join(_CURRENT_DIR, "LoFTR"))
 12 | 
 13 | from src.loftr import LoFTR, default_cfg
 14 | 
 15 | DEFAULT_CFG = copy.deepcopy(default_cfg)
 16 | DEFAULT_CFG["coarse"]["temp_bug_fix"] = True
 17 | 
 18 | 
 19 | class LoFTRWrapper(LoFTR):
 20 |     def __init__(
 21 |         self,
 22 |         config: Dict[str, Any] = DEFAULT_CFG,
 23 |     ):
 24 |         LoFTR.__init__(self, config)
 25 | 
 26 |     def forward(
 27 |         self,
 28 |         image0: torch.Tensor,
 29 |         image1: torch.Tensor,
 30 |     ) -> Dict[str, torch.Tensor]:
 31 |         data = {
 32 |             "image0": image0,
 33 |             "image1": image1,
 34 |         }
 35 |         del image0, image1
 36 | 
 37 |         data.update(
 38 |             {
 39 |                 "bs": data["image0"].size(0),
 40 |                 "hw0_i": data["image0"].shape[2:],
 41 |                 "hw1_i": data["image1"].shape[2:],
 42 |             }
 43 |         )
 44 | 
 45 |         if data["hw0_i"] == data["hw1_i"]:  # faster & better BN convergence
 46 |             feats_c, feats_f = self.backbone(
 47 |                 torch.cat([data["image0"], data["image1"]], dim=0)
 48 |             )
 49 |             (feat_c0, feat_c1), (feat_f0, feat_f1) = feats_c.split(
 50 |                 data["bs"]
 51 |             ), feats_f.split(data["bs"])
 52 |         else:  # handle different input shapes
 53 |             (feat_c0, feat_f0), (feat_c1, feat_f1) = self.backbone(
 54 |                 data["image0"]
 55 |             ), self.backbone(data["image1"])
 56 | 
 57 |         data.update(
 58 |             {
 59 |                 "hw0_c": feat_c0.shape[2:],
 60 |                 "hw1_c": feat_c1.shape[2:],
 61 |                 "hw0_f": feat_f0.shape[2:],
 62 |                 "hw1_f": feat_f1.shape[2:],
 63 |             }
 64 |         )
 65 | 
 66 |         # 2. coarse-level loftr module
 67 |         # add featmap with positional encoding, then flatten it to sequence [N, HW, C]
 68 |         feat_c0 = rearrange(self.pos_encoding(feat_c0), "n c h w -> n (h w) c")
 69 |         feat_c1 = rearrange(self.pos_encoding(feat_c1), "n c h w -> n (h w) c")
 70 | 
 71 |         mask_c0 = mask_c1 = None  # mask is useful in training
 72 |         if "mask0" in data:
 73 |             mask_c0, mask_c1 = data["mask0"].flatten(-2), data["mask1"].flatten(-2)
 74 |         feat_c0, feat_c1 = self.loftr_coarse(feat_c0, feat_c1, mask_c0, mask_c1)
 75 | 
 76 |         # 3. match coarse-level
 77 |         self.coarse_matching(feat_c0, feat_c1, data, mask_c0=mask_c0, mask_c1=mask_c1)
 78 | 
 79 |         # 4. fine-level refinement
 80 |         feat_f0_unfold, feat_f1_unfold = self.fine_preprocess(
 81 |             feat_f0, feat_f1, feat_c0, feat_c1, data
 82 |         )
 83 |         if feat_f0_unfold.size(0) != 0:  # at least one coarse level predicted
 84 |             feat_f0_unfold, feat_f1_unfold = self.loftr_fine(
 85 |                 feat_f0_unfold, feat_f1_unfold
 86 |             )
 87 | 
 88 |         # 5. match fine-level
 89 |         self.fine_matching(feat_f0_unfold, feat_f1_unfold, data)
 90 | 
 91 |         rename_keys: Dict[str, str] = {
 92 |             "mkpts0_f": "keypoints0",
 93 |             "mkpts1_f": "keypoints1",
 94 |             "mconf": "confidence",
 95 |         }
 96 |         out: Dict[str, torch.Tensor] = {}
 97 |         for k, v in rename_keys.items():
 98 |             _d = data[k]
 99 |             if isinstance(_d, torch.Tensor):
100 |                 out[v] = _d
101 |             else:
102 |                 raise TypeError(
103 |                     f"Expected torch.Tensor for item `{k}`. Gotcha {type(_d)}"
104 |                 )
105 |         del data
106 | 
107 |         return out
108 | 


--------------------------------------------------------------------------------
/scripts/loftr/requirements.txt:
--------------------------------------------------------------------------------
1 | kornia==0.6.10
2 | onnx==1.13.1
3 | onnxruntime
4 | einops==0.3.0
5 | yacs>=0.1.8
6 | 


--------------------------------------------------------------------------------
/scripts/superglue/README.md:
--------------------------------------------------------------------------------
 1 | # convert pre-trained superglue pytorch weights to onnx format
 2 | 
 3 | ---
 4 | 
 5 | ## dependencies
 6 | 
 7 | ---
 8 | 
 9 | - python: 3x
10 | 
11 | -
12 | 
13 | ```bash
14 | git submodule update --init --recursive
15 | 
16 | python3 -m pip install -r requirements.txt
17 | python3 -m pip install -r SuperGluePretrainedNetwork/requirements.txt
18 | ```
19 | 
20 | ## :running: how to run
21 | 
22 | ---
23 | 
24 | - export onnx weights
25 | 
26 | ```
27 | python3 convert_to_onnx.py
28 | ```
29 | 


--------------------------------------------------------------------------------
/scripts/superglue/convert_to_onnx.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os
 3 | 
 4 | import torch
 5 | 
 6 | import onnxruntime
 7 | from superglue_wrapper import SuperGlueWrapper as SuperGlue
 8 | 
 9 | 
10 | def main():
11 |     config = {
12 |         "descriptor_dim": 256,
13 |         "weights": "indoor",
14 |         "keypoint_encoder": [32, 64, 128, 256],
15 |         "GNN_layers": ["self", "cross"] * 9,
16 |         "sinkhorn_iterations": 100,
17 |         "match_threshold": 0.2,
18 |     }
19 | 
20 |     model = SuperGlue(config=config)
21 |     model.eval()
22 | 
23 |     batch_size = 1
24 |     height = 480
25 |     width = 640
26 |     num_keypoints = 382
27 |     data = {}
28 |     for i in range(2):
29 |         data[f"image{i}_shape"] = torch.tensor(
30 |             [batch_size, 1, height, width], dtype=torch.float32
31 |         )
32 |         data[f"scores{i}"] = torch.randn(batch_size, num_keypoints)
33 |         data[f"keypoints{i}"] = torch.randn(batch_size, num_keypoints, 2)
34 |         data[f"descriptors{i}"] = torch.randn(batch_size, 256, num_keypoints)
35 | 
36 |     torch.onnx.export(
37 |         model,
38 |         data,
39 |         "super_glue.onnx",
40 |         export_params=True,
41 |         opset_version=12,
42 |         do_constant_folding=True,
43 |         input_names=list(data.keys()),
44 |         output_names=["matches0", "matches1", "matching_scores0", "matching_scores1"],
45 |         dynamic_axes={
46 |             "keypoints0": {0: "batch_size", 1: "num_keypoints0"},
47 |             "scores0": {0: "batch_size", 1: "num_keypoints0"},
48 |             "descriptors0": {0: "batch_size", 2: "num_keypoints0"},
49 |             "keypoints1": {0: "batch_size", 1: "num_keypoints1"},
50 |             "scores1": {0: "batch_size", 1: "num_keypoints1"},
51 |             "descriptors1": {0: "batch_size", 2: "num_keypoints1"},
52 |             "matches0": {0: "batch_size", 1: "num_keypoints0"},
53 |             "matches1": {0: "batch_size", 1: "num_keypoints1"},
54 |             "matching_scores0": {0: "batch_size", 1: "num_keypoints0"},
55 |             "matching_scores1": {0: "batch_size", 1: "num_keypoints1"},
56 |         },
57 |     )
58 |     print(f"\nonnx model is saved to: {os.getcwd()}/super_glue.onnx")
59 | 
60 |     print("\ntest inference using onnxruntime")
61 |     sess = onnxruntime.InferenceSession("super_glue.onnx")
62 |     for input in sess.get_inputs():
63 |         print("input: ", input)
64 | 
65 |     print("\n")
66 |     for output in sess.get_outputs():
67 |         print("output: ", output)
68 | 
69 | 
70 | if __name__ == "__main__":
71 |     main()
72 | 


--------------------------------------------------------------------------------
/scripts/superglue/requirements.txt:
--------------------------------------------------------------------------------
1 | onnxruntime
2 | 


--------------------------------------------------------------------------------
/scripts/superglue/superglue_wrapper.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from SuperGluePretrainedNetwork.models.superglue import (
 4 |     SuperGlue,
 5 |     arange_like,
 6 |     log_optimal_transport,
 7 |     normalize_keypoints,
 8 | )
 9 | 
10 | 
11 | class SuperGlueWrapper(SuperGlue):
12 |     default_config = {
13 |         "descriptor_dim": 256,
14 |         "weights": "outdor",
15 |         "keypoint_encoder": [32, 64, 128, 256],
16 |         "GNN_layers": ["self", "cross"] * 9,
17 |         "sinkhorn_iterations": 100,
18 |         "match_threshold": 0.2,
19 |     }
20 | 
21 |     def __init__(self, config):
22 |         SuperGlue.__init__(self, config)
23 | 
24 |     def forward(
25 |         self,
26 |         image0_shape,
27 |         scores0,
28 |         keypoints0,
29 |         descriptors0,
30 |         image1_shape,
31 |         scores1,
32 |         keypoints1,
33 |         descriptors1,
34 |     ):
35 |         data = {
36 |             "image0_shape": image0_shape,
37 |             "scores0": scores0,
38 |             "keypoints0": keypoints0,
39 |             "descriptors0": descriptors0,
40 |             "image1_shape": image1_shape,
41 |             "scores1": scores1,
42 |             "keypoints1": keypoints1,
43 |             "descriptors1": descriptors1,
44 |         }
45 | 
46 |         """Run SuperGlue on a pair of keypoints and descriptors"""
47 |         desc0, desc1 = data["descriptors0"], data["descriptors1"]
48 |         kpts0, kpts1 = data["keypoints0"], data["keypoints1"]
49 | 
50 |         # Keypoint normalization.
51 |         kpts0 = normalize_keypoints(kpts0, data["image0_shape"])
52 |         kpts1 = normalize_keypoints(kpts1, data["image1_shape"])
53 | 
54 |         # Keypoint MLP encoder.
55 |         desc0 = desc0 + self.kenc(kpts0, data["scores0"])
56 |         desc1 = desc1 + self.kenc(kpts1, data["scores1"])
57 | 
58 |         # Multi-layer Transformer network.
59 |         desc0, desc1 = self.gnn(desc0, desc1)
60 | 
61 |         # Final MLP projection.
62 |         mdesc0, mdesc1 = self.final_proj(desc0), self.final_proj(desc1)
63 | 
64 |         # Compute matching descriptor distance.
65 |         scores = torch.einsum("bdn,bdm->bnm", mdesc0, mdesc1)
66 |         scores = scores / self.config["descriptor_dim"] ** 0.5
67 | 
68 |         # Run the optimal transport.
69 |         scores = log_optimal_transport(
70 |             scores, self.bin_score, iters=self.config["sinkhorn_iterations"]
71 |         )
72 | 
73 |         # Get the matches with score above "match_threshold".
74 |         max0, max1 = scores[:, :-1, :-1].max(2), scores[:, :-1, :-1].max(1)
75 |         indices0, indices1 = max0.indices, max1.indices
76 |         mutual0 = arange_like(indices0, 1)[None] == indices1.gather(1, indices0)
77 |         mutual1 = arange_like(indices1, 1)[None] == indices0.gather(1, indices1)
78 |         zero = scores.new_tensor(0)
79 |         mscores0 = torch.where(mutual0, max0.values.exp(), zero)
80 |         mscores1 = torch.where(mutual1, mscores0.gather(1, indices1), zero)
81 |         valid0 = mutual0 & (mscores0 > self.config["match_threshold"])
82 |         valid1 = mutual1 & valid0.gather(1, indices1)
83 |         indices0 = torch.where(valid0, indices0, indices0.new_tensor(-1))
84 |         indices1 = torch.where(valid1, indices1, indices1.new_tensor(-1))
85 | 
86 |         return {
87 |             "matches0": indices0,  # use -1 for invalid match
88 |             "matches1": indices1,  # use -1 for invalid match
89 |             "matching_scores0": mscores0,
90 |             "matching_scores1": mscores1,
91 |         }
92 | 


--------------------------------------------------------------------------------
/scripts/superpoint/README.md:
--------------------------------------------------------------------------------
 1 | # convert pre-trained superpoint pytorch weights to onnx format
 2 | 
 3 | ---
 4 | 
 5 | ## dependencies
 6 | 
 7 | ---
 8 | 
 9 | - python: 3x
10 | 
11 | -
12 | 
13 | ```bash
14 | python3 -m pip install -r requirements.txt
15 | ```
16 | 
17 | ## :running: how to run
18 | 
19 | ---
20 | 
21 | - update submodule
22 | 
23 | ```bash
24 | git submodule update --init --recursive
25 | ```
26 | 
27 | - export onnx weights
28 | 
29 | ```
30 | python3 convert_to_onnx.py
31 | ```
32 | 


--------------------------------------------------------------------------------
/scripts/superpoint/convert_to_onnx.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os
 3 | 
 4 | import torch
 5 | import torch.onnx
 6 | 
 7 | from SuperPointPretrainedNetwork.demo_superpoint import SuperPointNet
 8 | 
 9 | _CURRENT_DIR = os.path.dirname(os.path.realpath(__file__))
10 | _WEIGHTS_PATH = os.path.join(
11 |     _CURRENT_DIR, "SuperPointPretrainedNetwork/superpoint_v1.pth"
12 | )
13 | 
14 | 
15 | def main():
16 |     assert os.path.isfile(_WEIGHTS_PATH)
17 |     model = SuperPointNet()
18 |     model.load_state_dict(torch.load(_WEIGHTS_PATH))
19 |     model.eval()
20 | 
21 |     batch_size = 1
22 |     height = 16
23 |     width = 16
24 |     x = torch.randn(batch_size, 1, height, width)
25 | 
26 |     torch.onnx.export(
27 |         model,
28 |         x,
29 |         "super_point.onnx",
30 |         export_params=True,
31 |         opset_version=10,
32 |         do_constant_folding=True,
33 |         input_names=["input"],
34 |         output_names=["output"],
35 |         dynamic_axes={
36 |             "input": {0: "batch_size", 2: "height", 3: "width"},
37 |             "output": {0: "batch_size"},
38 |         },
39 |     )
40 |     print(f"\nonnx model is saved to: {os.getcwd()}/super_point.onnx")
41 | 
42 | 
43 | if __name__ == "__main__":
44 |     main()
45 | 


--------------------------------------------------------------------------------
/scripts/superpoint/requirements.txt:
--------------------------------------------------------------------------------
1 | opencv-python
2 | torch>=0.4
3 | 


--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10)
 2 | 
 3 | set(LIBRARY_NAME ${PROJECT_NAME})
 4 | 
 5 | file(GLOB SOURCE_FILES
 6 |   ${PROJECT_SOURCE_DIR}/src/ImageClassificationOrtSessionHandler.cpp
 7 |   ${PROJECT_SOURCE_DIR}/src/ImageRecognitionOrtSessionHandlerBase.cpp
 8 |   ${PROJECT_SOURCE_DIR}/src/ObjectDetectionOrtSessionHandler.cpp
 9 |   ${PROJECT_SOURCE_DIR}/src/OrtSessionHandler.cpp
10 | )
11 | 
12 | add_library(${LIBRARY_NAME}
13 |   SHARED
14 |      ${SOURCE_FILES}
15 | )
16 | 
17 | target_include_directories(${LIBRARY_NAME}
18 |   PUBLIC
19 |     $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
20 |     $<INSTALL_INTERFACE:include>
21 |   PRIVATE
22 |     ${onnxruntime_INCLUDE_DIRS}
23 | )
24 | 
25 | target_compile_options(${LIBRARY_NAME}
26 |   PRIVATE
27 |      $<$<CONFIG:Debug>:-O0 -g -Wall -Werror>
28 |      $<$<CONFIG:Release>:-O3>
29 | )
30 | 
31 | list(APPEND PRIVATE_LIBS ${onnxruntime_LIBS})
32 | 
33 | target_link_libraries(${LIBRARY_NAME}
34 |   PRIVATE
35 |     ${PRIVATE_LIBS}
36 | )
37 | 


--------------------------------------------------------------------------------
/src/ImageClassificationOrtSessionHandler.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file    ImageClassificationOrtSessionHandler.cpp
 3 |  *
 4 |  * @author  btran
 5 |  *
 6 |  */
 7 | 
 8 | #include <cassert>
 9 | #include <cstring>
10 | #include <sstream>
11 | 
12 | #include "ort_utility/ort_utility.hpp"
13 | 
14 | namespace Ort
15 | {
16 | ImageClassificationOrtSessionHandler::ImageClassificationOrtSessionHandler(
17 |     const uint16_t numClasses,     //
18 |     const std::string& modelPath,  //
19 |     const std::optional<size_t>& gpuIdx, const std::optional<std::vector<std::vector<int64_t>>>& inputShapes)
20 |     : ImageRecognitionOrtSessionHandlerBase(numClasses, modelPath, gpuIdx, inputShapes)
21 | {
22 | }
23 | 
24 | ImageClassificationOrtSessionHandler::~ImageClassificationOrtSessionHandler()
25 | {
26 | }
27 | 
28 | std::vector<std::pair<int, float>>
29 | ImageClassificationOrtSessionHandler::topK(const std::vector<float*>& inferenceOutput,  //
30 |                                            const uint16_t k,                            //
31 |                                            const bool useSoftmax) const
32 | {
33 |     const uint16_t realK = std::max(std::min(k, m_numClasses), static_cast<uint16_t>(1));
34 | 
35 |     assert(inferenceOutput.size() == 1);
36 |     float* processData = inferenceOutput[0];
37 |     if (useSoftmax) {
38 |         softmax(processData, m_numClasses);
39 |     }
40 | 
41 |     std::vector<std::pair<int, float>> ps;
42 |     ps.reserve(m_numClasses);
43 | 
44 |     for (int i = 0; i < m_numClasses; ++i) {
45 |         ps.emplace_back(std::make_pair(i, processData[i]));
46 |     }
47 | 
48 |     std::sort(ps.begin(), ps.end(), [](const auto& elem1, const auto& elem2) { return elem1.second > elem2.second; });
49 | 
50 |     return std::vector<std::pair<int, float>>(ps.begin(), ps.begin() + realK);
51 | }
52 | 
53 | std::string ImageClassificationOrtSessionHandler::topKToString(const std::vector<float*>& inferenceOutput,  //
54 |                                                                const uint16_t k,                            //
55 |                                                                const bool useSoftmax) const
56 | {
57 |     auto ps = this->topK(inferenceOutput, k, useSoftmax);
58 | 
59 |     std::stringstream ss;
60 | 
61 |     if (m_classNames.size() == 0) {
62 |         for (const auto& elem : ps) {
63 |             ss << elem.first << " : " << elem.second << std::endl;
64 |         }
65 |     } else {
66 |         for (const auto& elem : ps) {
67 |             ss << elem.first << " : " << m_classNames[elem.first] << " : " << elem.second << std::endl;
68 |         }
69 |     }
70 | 
71 |     return ss.str();
72 | }
73 | }  // namespace Ort
74 | 


--------------------------------------------------------------------------------
/src/ImageRecognitionOrtSessionHandlerBase.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file    ImageRecognitionOrtSessionHandlerBase.cpp
 3 |  *
 4 |  * @author  btran
 5 |  *
 6 |  * Copyright (c) organization
 7 |  *
 8 |  */
 9 | 
10 | #include <cassert>
11 | #include <cstring>
12 | #include <sstream>
13 | 
14 | #include "ort_utility/ort_utility.hpp"
15 | 
16 | namespace Ort
17 | {
18 | ImageRecognitionOrtSessionHandlerBase::ImageRecognitionOrtSessionHandlerBase(
19 |     const uint16_t numClasses,     //
20 |     const std::string& modelPath,  //
21 |     const std::optional<size_t>& gpuIdx, const std::optional<std::vector<std::vector<int64_t>>>& inputShapes)
22 |     : OrtSessionHandler(modelPath, gpuIdx, inputShapes)
23 |     , m_numClasses(numClasses)
24 |     , m_classNames()
25 | {
26 |     if (numClasses <= 0) {
27 |         throw std::runtime_error("Number of classes must be more than 0\n");
28 |     }
29 | 
30 |     m_classNames.reserve(m_numClasses);
31 |     for (uint16_t i = 0; i < m_numClasses; ++i) {
32 |         m_classNames.emplace_back(std::to_string(i));
33 |     }
34 | }
35 | 
36 | ImageRecognitionOrtSessionHandlerBase::~ImageRecognitionOrtSessionHandlerBase()
37 | {
38 | }
39 | 
40 | void ImageRecognitionOrtSessionHandlerBase::initClassNames(const std::vector<std::string>& classNames)
41 | {
42 |     if (classNames.size() != m_numClasses) {
43 |         throw std::runtime_error("Mismatch number of classes\n");
44 |     }
45 | 
46 |     m_classNames = classNames;
47 | }
48 | 
49 | void ImageRecognitionOrtSessionHandlerBase::preprocess(float* dst,                         //
50 |                                                        const unsigned char* src,           //
51 |                                                        const int64_t targetImgWidth,       //
52 |                                                        const int64_t targetImgHeight,      //
53 |                                                        const int numChannels,              //
54 |                                                        const std::vector<float>& meanVal,  //
55 |                                                        const std::vector<float>& stdVal) const
56 | {
57 |     if (!meanVal.empty() && !stdVal.empty()) {
58 |         assert(meanVal.size() == stdVal.size() && meanVal.size() == static_cast<std::size_t>(numChannels));
59 |     }
60 | 
61 |     int64_t dataLength = targetImgHeight * targetImgWidth * numChannels;
62 | 
63 |     memcpy(dst, reinterpret_cast<const float*>(src), dataLength);
64 | 
65 |     if (!meanVal.empty() && !stdVal.empty()) {
66 |         for (int i = 0; i < targetImgHeight; ++i) {
67 |             for (int j = 0; j < targetImgWidth; ++j) {
68 |                 for (int c = 0; c < numChannels; ++c) {
69 |                     dst[c * targetImgHeight * targetImgWidth + i * targetImgWidth + j] =
70 |                         (src[i * targetImgWidth * numChannels + j * numChannels + c] / 255.0 - meanVal[c]) / stdVal[c];
71 |                 }
72 |             }
73 |         }
74 |     } else {
75 |         for (int i = 0; i < targetImgHeight; ++i) {
76 |             for (int j = 0; j < targetImgWidth; ++j) {
77 |                 for (int c = 0; c < numChannels; ++c) {
78 |                     dst[c * targetImgHeight * targetImgWidth + i * targetImgWidth + j] =
79 |                         src[i * targetImgWidth * numChannels + j * numChannels + c] / 255.0;
80 |                 }
81 |             }
82 |         }
83 |     }
84 | }
85 | }  // namespace Ort
86 | 


--------------------------------------------------------------------------------
/src/ObjectDetectionOrtSessionHandler.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file    ObjectDetectionOrtSessionHandler.cpp
 3 |  *
 4 |  * @author  btran
 5 |  *
 6 |  */
 7 | 
 8 | #include "ort_utility/ort_utility.hpp"
 9 | 
10 | namespace Ort
11 | {
12 | ObjectDetectionOrtSessionHandler::ObjectDetectionOrtSessionHandler(
13 |     const uint16_t numClasses,            //
14 |     const std::string& modelPath,         //
15 |     const std::optional<size_t>& gpuIdx,  //
16 |     const std::optional<std::vector<std::vector<int64_t>>>& inputShapes)
17 |     : ImageRecognitionOrtSessionHandlerBase(numClasses, modelPath, gpuIdx, inputShapes)
18 | {
19 | }
20 | 
21 | ObjectDetectionOrtSessionHandler::~ObjectDetectionOrtSessionHandler()
22 | {
23 | }
24 | 
25 | }  // namespace Ort
26 | 


--------------------------------------------------------------------------------
/src/OrtSessionHandler.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @file    OrtSessionHandler.cpp
  3 |  *
  4 |  * @author  btran
  5 |  *
  6 |  */
  7 | 
  8 | #include <onnxruntime/core/session/onnxruntime_c_api.h>
  9 | #include <onnxruntime/core/session/onnxruntime_cxx_api.h>
 10 | 
 11 | #if ENABLE_TENSORRT
 12 | #include <onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h>
 13 | #endif
 14 | 
 15 | #include <ort_utility/ort_utility.hpp>
 16 | 
 17 | #include <algorithm>
 18 | #include <cassert>
 19 | #include <numeric>
 20 | #include <sstream>
 21 | 
 22 | namespace
 23 | {
 24 | std::string toString(const ONNXTensorElementDataType dataType)
 25 | {
 26 |     switch (dataType) {
 27 |         case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT: {
 28 |             return "float";
 29 |         }
 30 |         case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8: {
 31 |             return "uint8_t";
 32 |         }
 33 |         case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8: {
 34 |             return "int8_t";
 35 |         }
 36 |         case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16: {
 37 |             return "uint16_t";
 38 |         }
 39 |         case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16: {
 40 |             return "int16_t";
 41 |         }
 42 |         case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32: {
 43 |             return "int32_t";
 44 |         }
 45 |         case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64: {
 46 |             return "int64_t";
 47 |         }
 48 |         case ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING: {
 49 |             return "string";
 50 |         }
 51 |         case ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL: {
 52 |             return "bool";
 53 |         }
 54 |         case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16: {
 55 |             return "float16";
 56 |         }
 57 |         case ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE: {
 58 |             return "double";
 59 |         }
 60 |         case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32: {
 61 |             return "uint32_t";
 62 |         }
 63 |         case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64: {
 64 |             return "uint64_t";
 65 |         }
 66 |         case ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX64: {
 67 |             return "complex with float32 real and imaginary components";
 68 |         }
 69 |         case ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX128: {
 70 |             return "complex with float64 real and imaginary components";
 71 |         }
 72 |         case ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16: {
 73 |             return "complex with float64 real and imaginary components";
 74 |         }
 75 |         default:
 76 |             return "undefined";
 77 |     }
 78 | }
 79 | }  // namespace
 80 | 
 81 | namespace Ort
 82 | {
 83 | //-----------------------------------------------------------------------------//
 84 | // OrtSessionHandlerIml Definition
 85 | //-----------------------------------------------------------------------------//
 86 | 
 87 | class OrtSessionHandler::OrtSessionHandlerIml
 88 | {
 89 |  public:
 90 |     OrtSessionHandlerIml(const std::string& modelPath,         //
 91 |                          const std::optional<size_t>& gpuIdx,  //
 92 |                          const std::optional<std::vector<std::vector<int64_t>>>& inputShapes);
 93 |     ~OrtSessionHandlerIml();
 94 | 
 95 |     std::vector<DataOutputType> operator()(const std::vector<float*>& inputData) const;
 96 | 
 97 |     void updateInputShapes(const std::vector<std::vector<int64_t>>& inputShapes)
 98 |     {
 99 |         if (inputShapes.size() != m_numInputs) {
100 |             DEBUG_LOG("inputShapes must be of size: %d", m_numInputs);
101 |             return;
102 |         }
103 |         m_inputShapes = inputShapes;
104 | 
105 |         for (int i = 0; i < m_numInputs; i++) {
106 |             const auto& curInputShape = m_inputShapes[i];
107 |             m_inputTensorSizes[i] =
108 |                 std::accumulate(std::begin(curInputShape), std::end(curInputShape), 1, std::multiplies<int64_t>());
109 |         }
110 |     }
111 | 
112 |  private:
113 |     void initSession();
114 |     void initModelInfo();
115 | 
116 |  private:
117 |     std::string m_modelPath;
118 | 
119 |     mutable Ort::Session m_session;
120 |     Ort::Env m_env;
121 |     Ort::AllocatorWithDefaultOptions m_ortAllocator;
122 | 
123 |     std::optional<size_t> m_gpuIdx;
124 | 
125 |     std::vector<std::vector<int64_t>> m_inputShapes;
126 |     std::vector<std::vector<int64_t>> m_outputShapes;
127 | 
128 |     std::vector<int64_t> m_inputTensorSizes;
129 |     std::vector<int64_t> m_outputTensorSizes;
130 | 
131 |     uint8_t m_numInputs;
132 |     uint8_t m_numOutputs;
133 | 
134 |     std::vector<char*> m_inputNodeNames;
135 |     std::vector<char*> m_outputNodeNames;
136 | 
137 |     bool m_inputShapesProvided = false;
138 | };
139 | 
140 | //-----------------------------------------------------------------------------//
141 | // OrtSessionHandler
142 | //-----------------------------------------------------------------------------//
143 | 
144 | OrtSessionHandler::OrtSessionHandler(const std::string& modelPath,         //
145 |                                      const std::optional<size_t>& gpuIdx,  //
146 |                                      const std::optional<std::vector<std::vector<int64_t>>>& inputShapes)
147 |     : m_piml(std::make_unique<OrtSessionHandlerIml>(modelPath,  //
148 |                                                     gpuIdx,     //
149 |                                                     inputShapes))
150 | {
151 | }
152 | 
153 | OrtSessionHandler::~OrtSessionHandler() = default;
154 | 
155 | std::vector<OrtSessionHandler::DataOutputType>
156 | OrtSessionHandler::operator()(const std::vector<float*>& inputImgData) const
157 | {
158 |     return this->m_piml->operator()(inputImgData);
159 | }
160 | 
161 | //-----------------------------------------------------------------------------//
162 | // piml class implementation
163 | //-----------------------------------------------------------------------------//
164 | 
165 | OrtSessionHandler::OrtSessionHandlerIml::OrtSessionHandlerIml(
166 |     const std::string& modelPath,         //
167 |     const std::optional<size_t>& gpuIdx,  //
168 |     const std::optional<std::vector<std::vector<int64_t>>>& inputShapes)
169 |     : m_modelPath(modelPath)
170 |     , m_session(nullptr)
171 |     , m_env(nullptr)
172 |     , m_ortAllocator()
173 |     , m_gpuIdx(gpuIdx)
174 |     , m_inputShapes()
175 |     , m_outputShapes()
176 |     , m_numInputs(0)
177 |     , m_numOutputs(0)
178 |     , m_inputNodeNames()
179 |     , m_outputNodeNames()
180 | {
181 |     this->initSession();
182 | 
183 |     if (inputShapes.has_value()) {
184 |         m_inputShapesProvided = true;
185 |         m_inputShapes = inputShapes.value();
186 |     }
187 | 
188 |     this->initModelInfo();
189 | }
190 | 
191 | OrtSessionHandler::OrtSessionHandlerIml::~OrtSessionHandlerIml()
192 | {
193 |     for (auto& elem : this->m_inputNodeNames) {
194 |         free(elem);
195 |         elem = nullptr;
196 |     }
197 |     this->m_inputNodeNames.clear();
198 | 
199 |     for (auto& elem : this->m_outputNodeNames) {
200 |         free(elem);
201 |         elem = nullptr;
202 |     }
203 |     this->m_outputNodeNames.clear();
204 | }
205 | 
206 | void OrtSessionHandler::OrtSessionHandlerIml::initSession()
207 | {
208 | #if ENABLE_DEBUG
209 |     m_env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "test");
210 | #else
211 |     m_env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "test");
212 | #endif
213 |     Ort::SessionOptions sessionOptions;
214 | 
215 |     sessionOptions.SetIntraOpNumThreads(1);
216 |     // tensorrt options can be customized into sessionOptions
217 |     // https://onnxruntime.ai/docs/execution-providers/TensorRT-ExecutionProvider.html
218 | 
219 | #if ENABLE_GPU
220 |     if (m_gpuIdx.has_value()) {
221 |         Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, m_gpuIdx.value()));
222 | #if ENABLE_TENSORRT
223 |         Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_Tensorrt(sessionOptions, m_gpuIdx.value()));
224 | #endif
225 |     }
226 | #endif
227 | 
228 |     sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
229 |     m_session = Ort::Session(m_env, m_modelPath.c_str(), sessionOptions);
230 |     m_numInputs = m_session.GetInputCount();
231 |     DEBUG_LOG("Model number of inputs: %d\n", m_numInputs);
232 | 
233 |     m_inputNodeNames.reserve(m_numInputs);
234 |     m_inputTensorSizes.reserve(m_numInputs);
235 | 
236 |     m_numOutputs = m_session.GetOutputCount();
237 |     DEBUG_LOG("Model number of outputs: %d\n", m_numOutputs);
238 | 
239 |     m_outputNodeNames.reserve(m_numOutputs);
240 |     m_outputTensorSizes.reserve(m_numOutputs);
241 | }
242 | 
243 | void OrtSessionHandler::OrtSessionHandlerIml::initModelInfo()
244 | {
245 |     for (int i = 0; i < m_numInputs; i++) {
246 |         if (!m_inputShapesProvided) {
247 |             Ort::TypeInfo typeInfo = m_session.GetInputTypeInfo(i);
248 |             auto tensorInfo = typeInfo.GetTensorTypeAndShapeInfo();
249 | 
250 |             m_inputShapes.emplace_back(tensorInfo.GetShape());
251 |         }
252 | 
253 |         const auto& curInputShape = m_inputShapes[i];
254 | 
255 |         m_inputTensorSizes.emplace_back(
256 |             std::accumulate(std::begin(curInputShape), std::end(curInputShape), 1, std::multiplies<int64_t>()));
257 | 
258 | #if ORT_API_VERSION > 12
259 |         m_inputNodeNames.emplace_back(strdup(m_session.GetInputNameAllocated(i, m_ortAllocator).get()));
260 | #else
261 |         char* inputName = m_session.GetInputName(i, m_ortAllocator);
262 |         m_inputNodeNames.emplace_back(strdup(inputName));
263 |         m_ortAllocator.Free(inputName);
264 | #endif
265 |     }
266 | 
267 |     {
268 | #if ENABLE_DEBUG
269 |         std::stringstream ssInputs;
270 |         ssInputs << "Model input shapes: ";
271 |         ssInputs << m_inputShapes << std::endl;
272 |         ssInputs << "Model input node names: ";
273 |         ssInputs << m_inputNodeNames << std::endl;
274 |         DEBUG_LOG("%s\n", ssInputs.str().c_str());
275 | #endif
276 |     }
277 | 
278 |     for (int i = 0; i < m_numOutputs; ++i) {
279 |         Ort::TypeInfo typeInfo = m_session.GetOutputTypeInfo(i);
280 |         auto tensorInfo = typeInfo.GetTensorTypeAndShapeInfo();
281 | 
282 |         m_outputShapes.emplace_back(tensorInfo.GetShape());
283 | 
284 | #if ORT_API_VERSION > 12
285 |         m_outputNodeNames.emplace_back(strdup(m_session.GetOutputNameAllocated(i, m_ortAllocator).get()));
286 | #else
287 |         char* outputName = m_session.GetOutputName(i, m_ortAllocator);
288 |         m_outputNodeNames.emplace_back(strdup(outputName));
289 |         m_ortAllocator.Free(outputName);
290 | #endif
291 |     }
292 | 
293 |     {
294 | #if ENABLE_DEBUG
295 |         std::stringstream ssOutputs;
296 |         ssOutputs << "Model output shapes: ";
297 |         ssOutputs << m_outputShapes << std::endl;
298 |         ssOutputs << "Model output node names: ";
299 |         ssOutputs << m_outputNodeNames << std::endl;
300 |         DEBUG_LOG("%s\n", ssOutputs.str().c_str());
301 | #endif
302 |     }
303 | }
304 | 
305 | std::vector<OrtSessionHandler::DataOutputType>
306 | OrtSessionHandler::OrtSessionHandlerIml::operator()(const std::vector<float*>& inputData) const
307 | {
308 |     if (m_numInputs != inputData.size()) {
309 |         DEBUG_LOG("m_numInputs:%d, input size:%ld", m_numInputs, inputData.size());
310 |         throw std::runtime_error("Mismatch size of input data");
311 |     }
312 | 
313 |     Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
314 | 
315 |     std::vector<Ort::Value> inputTensors;
316 |     inputTensors.reserve(m_numInputs);
317 | 
318 |     for (int i = 0; i < m_numInputs; ++i) {
319 |         inputTensors.emplace_back(std::move(
320 |             Ort::Value::CreateTensor<float>(memoryInfo, const_cast<float*>(inputData[i]), m_inputTensorSizes[i],
321 |                                             m_inputShapes[i].data(), m_inputShapes[i].size())));
322 |     }
323 | 
324 |     auto outputTensors = m_session.Run(Ort::RunOptions{nullptr}, m_inputNodeNames.data(), inputTensors.data(),
325 |                                        m_numInputs, m_outputNodeNames.data(), m_numOutputs);
326 | 
327 |     assert(outputTensors.size() == m_numOutputs);
328 |     std::vector<DataOutputType> outputData;
329 |     outputData.reserve(m_numOutputs);
330 | 
331 |     int count = 1;
332 |     for (auto& elem : outputTensors) {
333 |         DEBUG_LOG("type of input %d: %s", count++, toString(elem.GetTensorTypeAndShapeInfo().GetElementType()).c_str());
334 |         outputData.emplace_back(
335 |             std::make_pair(std::move(elem.GetTensorMutableData<float>()), elem.GetTensorTypeAndShapeInfo().GetShape()));
336 |     }
337 | 
338 |     return outputData;
339 | }
340 | 
341 | void OrtSessionHandler::updateInputShapes(const std::vector<std::vector<int64_t>>& inputShapes)
342 | {
343 |     m_piml->updateInputShapes(inputShapes);
344 | }
345 | }  // namespace Ort
346 | 


--------------------------------------------------------------------------------