├── CMakeLists.txt
├── LICENSE
├── README.md
├── blazepose.h
├── main.cpp
└── resources
    ├── output-full.gif
    ├── output.gif
    └── result.png


/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # CMakeLists.txt - blazepose
 2 | 
 3 | cmake_minimum_required (VERSION 2.8.1)
 4 | 
 5 | set(TARGET_NAME blazepose)     # name of executable file
 6 | set(CMAKE_BUILD_TYPE "Release")
 7 | 
 8 | set(CMAKE_CXX_STANDARD 11)
 9 | set(CMAKE_CXX_STANDARD_REQUIRED ON)
10 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fPIE")
11 | 
12 | find_package(InferenceEngine 1.1 REQUIRED)
13 | find_package(OpenCV REQUIRED)
14 | add_definitions(-DUSE_OPENCV)
15 | 
16 | include_directories( ${InferenceEngine_INCLUDE_DIRS} ${OpenCV_INCLUDE_DIRS} )
17 | link_directories( )
18 | 
19 | add_executable( ${TARGET_NAME} main.cpp )    # list of source file(s)
20 | set_target_properties(${TARGET_NAME} PROPERTIES "CMAKE_CXX_FLAGS" "${CMAKE_CXX_FLAGS}")
21 | target_link_libraries(${TARGET_NAME} ${InferenceEngine_LIBRARIES} ${OpenCV_LIBS} )
22 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Yasunori Shimura
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # blazepose_openvino
 2 | BlazePose model test program for Intel Distribution of Intel OpenVINO toolkit
 3 | 
 4 | **WIP**
 5 | 
 6 | The BlazePose OpenVINO IR model can be obtained from [PINTO model zoo](https://github.com/PINTO0309/PINTO_model_zoo). 
 7 | 
 8 | ### Example (Upper body model)  
 9 | ![Example](./resources/output.gif)  
10 | 
11 | ### Example (Full body model)  
12 | ![Example-full](./resources/output-full.gif)
13 | 
14 | ## Configuration / Options
15 | Current program doesn't support command line options.  
16 | You can modify the source code to change input media/file and device to use for inferencing. Those configurations can be found on top of the `main.cpp` code.  
17 | 
18 | ```c++
19 | #define FULL_POSE
20 | #include "blazepose.h"
21 | 
22 | #define IMAGE_INPUT    (1)
23 | #define VIDEO_INPUT    (2)
24 | #define CAM_INPUT      (3)
25 | 
26 | #ifndef FULL_POSE
27 | const std::string MODEL_POSE_DET = "../pose_detection/128x128/FP32/pose_detection";
28 | const std::string MODEL_LM_DET   = "../pose_landmark_upper_body/256x256/FP32/pose_landmark_upper_body";
29 | #else
30 | const std::string MODEL_POSE_DET = "../pose_detection_full/128x128/FP16/pose_detection_full";
31 | const std::string MODEL_LM_DET   = "../pose_landmark_full_body/256x256/FP16/pose_landmark_full_body";
32 | #endif
33 | 
34 | // INPUT_TYPE = { IMAGE_INPUT | VIDEO_INPUT | CAM_INPUT }
35 | #define INPUT_TYPE    VIDEO_INPUT
36 | const std::string INPUT_FILE = "../boy.mp4";                    /* Image or movie file */
37 | 
38 | // 'output.mp4' will be generated when this macro is defined and the input source is either one of VIDEO_INPUT or CAM_INPUT
39 | #define VIDEO_OUTPUT
40 | #define VIDEO_SIZE    (400)             /* output video size = (VIDEO_SIZE, VIDEO_SIZE) */
41 | 
42 | // Device to use for inferencing. Possible options = "CPU", "GPU", "MYRIAD", "HDDL", "HETERO:FPGA,CPU", ...
43 | const std::string DEVICE_PD = "CPU";
44 | const std::string DEVICE_LM = "CPU";
45 | 
46 | // ** Define or Undefine to control the items to display
47 | //#define RENDER_ROI
48 | #define RENDER_TIME
49 | #define RENDER_POINTS
50 | ```
51 | 


--------------------------------------------------------------------------------
/blazepose.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | /* ------------------------------------------------ *
  3 |  * The MIT License (MIT)
  4 |  * Copyright (c) 2020 terryky1220@gmail.com
  5 |  * ------------------------------------------------ */
  6 | #ifndef TFLITE_BLAZEPOSE_H_
  7 | #define TFLITE_BLAZEPOSE_H_
  8 | 
  9 | #ifdef __cplusplus
 10 | extern "C" {
 11 | #endif
 12 | 
 13 | #define MAX_POSE_NUM  100
 14 | 
 15 | #ifndef FULL_POSE
 16 | #define POSE_JOINT_NUM 25
 17 | #else
 18 | #define POSE_JOINT_NUM 33
 19 | #endif
 20 | 
 21 | #ifndef FULL_POSE
 22 |     enum pose_detect_key_id {
 23 |         kMidHipCenter = 0,      //  0
 24 |         kFullBodySizeRot,       //  1
 25 |         kMidShoulderCenter,     //  2
 26 |         kUpperBodySizeRot,      //  3
 27 | 
 28 |         kPoseDetectKeyNum
 29 |     };
 30 | #else
 31 |     enum pose_detect_key_id {
 32 |         kMidHipCenter = 0,      //  0
 33 |         kFullBodySizeRot,       //  1
 34 | 
 35 |         kPoseDetectKeyNum
 36 |     };
 37 | #endif
 38 | 
 39 |     typedef struct _fvec2
 40 |     {
 41 |         float x, y;
 42 |     } fvec2;
 43 | 
 44 |     typedef struct _fvec3
 45 |     {
 46 |         float x, y, z;
 47 |     } fvec3;
 48 | 
 49 |     typedef struct _detect_region_t
 50 |     {
 51 |         float score;
 52 |         fvec2 topleft;
 53 |         fvec2 btmright;
 54 |         fvec2 keys[kPoseDetectKeyNum];
 55 | 
 56 |         float  rotation;
 57 |         fvec2  roi_center;
 58 |         fvec2  roi_size;
 59 |         fvec2  roi_coord[4];
 60 |     } detect_region_t;
 61 | 
 62 |     typedef struct _pose_detect_result_t
 63 |     {
 64 |         int num;
 65 |         detect_region_t poses[MAX_POSE_NUM];
 66 |     } pose_detect_result_t;
 67 | 
 68 |     typedef struct _pose_landmark_result_t
 69 |     {
 70 |         float score;
 71 |         fvec3 joint[POSE_JOINT_NUM];
 72 |     } pose_landmark_result_t;
 73 | 
 74 | 
 75 |     typedef struct _blazepose_config_t
 76 |     {
 77 |         float score_thresh;
 78 |         float iou_thresh;
 79 |     } blazepose_config_t;
 80 | 
 81 |     int init_tflite_blazepose(int use_quantized_tflite, blazepose_config_t* config);
 82 | 
 83 |     void* get_pose_detect_input_buf(int* w, int* h);
 84 |     int  invoke_pose_detect(pose_detect_result_t* detect_result, blazepose_config_t* config);
 85 | 
 86 |     void* get_pose_landmark_input_buf(int* w, int* h);
 87 |     int  invoke_pose_landmark(pose_landmark_result_t* pose_landmark_result);
 88 | 
 89 | #ifdef __cplusplus
 90 | }
 91 | 
 92 | typedef struct Anchor
 93 | {
 94 |     float x_center, y_center, w, h;
 95 | };
 96 | 
 97 | typedef struct SsdAnchorsCalculatorOptions
 98 | {
 99 |     // Size of input images.
100 |     int input_size_width;                   // [required]
101 |     int input_size_height;                  // [required]
102 | 
103 |     // Min and max scales for generating anchor boxes on feature maps.
104 |     float min_scale;                        // [required]
105 |     float max_scale;                        // [required]
106 | 
107 |     // The offset for the center of anchors. The value is in the scale of stride.
108 |     // E.g. 0.5 meaning 0.5 * |current_stride| in pixels.
109 |     float anchor_offset_x;                  // default = 0.5
110 |     float anchor_offset_y;                  // default = 0.5
111 | 
112 |     // Number of output feature maps to generate the anchors on.
113 |     int num_layers;                         // [required]
114 | 
115 |     // Sizes of output feature maps to create anchors. Either feature_map size or
116 |     // stride should be provided.
117 |     std::vector<int> feature_map_width;
118 |     std::vector<int> feature_map_height;
119 | 
120 |     // Strides of each output feature maps.
121 |     std::vector<int>   strides;
122 | 
123 |     // List of different aspect ratio to generate anchors.
124 |     std::vector<float> aspect_ratios;
125 | 
126 |     // A boolean to indicate whether the fixed 3 boxes per location is used in the
127 |     // lowest layer.
128 |     bool reduce_boxes_in_lowest_layer;      // default = false
129 | 
130 |     // An additional anchor is added with this aspect ratio and a scale
131 |     // interpolated between the scale for a layer and the scale for the next layer
132 |     // (1.0 for the last layer). This anchor is not included if this value is 0.
133 |     float interpolated_scale_aspect_ratio;  // default = 1.0
134 | 
135 |     // Whether use fixed width and height (e.g. both 1.0f) for each anchor.
136 |     // This option can be used when the predicted anchor width and height are in
137 |     // pixels.
138 |     bool fixed_anchor_size;                 // default = false
139 | } SsdAnchorsCalculatorOptions_t;
140 | 
141 | #endif
142 | 
143 | #endif /* TFLITE_BLAZEPOSE_H_ */
144 | 


--------------------------------------------------------------------------------
/main.cpp:
--------------------------------------------------------------------------------
  1 | /* ------------------------------------------------ *
  2 |  * The MIT License (MIT)
  3 |  * Copyright (c) 2020 terryky1220@gmail.com
  4 |  * ------------------------------------------------ */
  5 | 
  6 | // main.cpp - blazepose for openvino - modified by Yasunori Shimura   twitter: @yassim0710
  7 | 
  8 | // Note: This program works with BlazePose model for OpenVINO from PINTO model zoo 
  9 | //       https://github.com/PINTO0309/PINTO_model_zoo
 10 | 
 11 | #define _USE_MATH_DEFINES
 12 | #include <cmath>
 13 | 
 14 | #include <vector>
 15 | #include <string>
 16 | #include <chrono>
 17 | 
 18 | #include <opencv2/opencv.hpp>
 19 | #include <inference_engine.hpp>
 20 | 
 21 | #define FULL_POSE
 22 | #include "blazepose.h"
 23 | 
 24 | #define IMAGE_INPUT    (1)
 25 | #define VIDEO_INPUT    (2)
 26 | #define CAM_INPUT      (3)
 27 | 
 28 | #ifndef FULL_POSE
 29 | const std::string MODEL_POSE_DET = "../pose_detection/128x128/FP32/pose_detection";
 30 | const std::string MODEL_LM_DET   = "../pose_landmark_upper_body/256x256/FP32/pose_landmark_upper_body";
 31 | #else
 32 | const std::string MODEL_POSE_DET = "../pose_detection_full/128x128/FP16/pose_detection_full";
 33 |   //Input Blob(s):
 34 |   //  BlobName:input, Shape:[1, 3, 128, 128], Precision:FP32
 35 |   //Output Blob(s):
 36 |   //  BlobName:StatefulPartitionedCall/functional_1/classificators/concat, Shape:[1, 896, 1], Precision:FP32
 37 |   //  BlobName:StatefulPartitionedCall/functional_1/regressors/concat, Shape:[1, 896, 8], Precision:FP32
 38 | const std::string MODEL_LM_DET   = "../pose_landmark_full_body/256x256/FP16/pose_landmark_full_body";
 39 |   //Input Blob(s):
 40 |   //  BlobName:input, Shape:[1, 3, 256, 256], Precision:FP32
 41 |   //Output Blob(s):
 42 |   //  BlobName:StatefulPartitionedCall/functional_1/output_segmentation/BiasAdd/Add, Shape:[1, 1, 128, 128], Precision:FP32
 43 |   //  BlobName:StatefulPartitionedCall/functional_1/tf_op_layer_Sigmoid/Sigmoid, Shape:[1, 1, 1, 1], Precision:FP32
 44 |   //  BlobName:StatefulPartitionedCall/functional_1/tf_op_layer_ld_3d/ld_3d, Shape:[1, 156], Precision:FP32
 45 | #endif
 46 | 
 47 | // INPUT_TYPE = { IMAGE_INPUT | VIDEO_INPUT | CAM_INPUT }
 48 | #define INPUT_TYPE    VIDEO_INPUT
 49 | const std::string INPUT_FILE = "../boy.mp4";                    /* Image or movie file */
 50 | //const std::string INPUT_FILE = "../capoeira.mp4";             /* Image or movie file */
 51 | //const std::string INPUT_FILE = "../people-detection.mp4";     /* Image or movie file */
 52 | 
 53 | // 'output.mp4' will be generated when this macro is defined and the input source is either one of VIDEO_INPUT or CAM_INPUT
 54 | #define VIDEO_OUTPUT
 55 | #define VIDEO_SIZE    (400)             /* output video size = (VIDEO_SIZE, VIDEO_SIZE) */
 56 | 
 57 | // Device to use for inferencing. Possible options = "CPU", "GPU", "MYRIAD", "HDDL", "HETERO:FPGA,CPU", ...
 58 | const std::string DEVICE_PD = "CPU";
 59 | const std::string DEVICE_LM = "CPU";
 60 | 
 61 | // ** Define or Undefine to control the items to display
 62 | //#define RENDER_ROI
 63 | #define RENDER_TIME
 64 | #define RENDER_POINTS
 65 | 
 66 | 
 67 | 
 68 | namespace ie = InferenceEngine;
 69 | 
 70 | 
 71 | float CalculateScale(float min_scale, float max_scale, int stride_index, int num_strides) {
 72 |     if (num_strides == 1)
 73 |         return (min_scale + max_scale) * 0.5f;
 74 |     else
 75 |         return min_scale + (max_scale - min_scale) * 1.0 * stride_index / (num_strides - 1.0f);
 76 | }
 77 | 
 78 | void GenerateAnchors(std::vector<Anchor>& anchors, const SsdAnchorsCalculatorOptions& options) {
 79 |     int layer_id = 0;
 80 |     while (layer_id < (int)options.strides.size()) {
 81 |         std::vector<float> anchor_height;
 82 |         std::vector<float> anchor_width;
 83 |         std::vector<float> aspect_ratios;
 84 |         std::vector<float> scales;
 85 |         
 86 |         // For same strides, we merge the anchors in the same order.
 87 |         int last_same_stride_layer = layer_id;
 88 |         while (last_same_stride_layer < (int)options.strides.size() &&
 89 |             options.strides[last_same_stride_layer] == options.strides[layer_id])
 90 |         {
 91 |             const float scale =
 92 |                 CalculateScale(options.min_scale, options.max_scale,
 93 |                     last_same_stride_layer, options.strides.size());
 94 |             if (last_same_stride_layer == 0 && options.reduce_boxes_in_lowest_layer) {
 95 |                 // For first layer, it can be specified to use predefined anchors.
 96 |                 aspect_ratios.push_back(1.0);
 97 |                 aspect_ratios.push_back(2.0);
 98 |                 aspect_ratios.push_back(0.5);
 99 |                 scales.push_back(0.1);
100 |                 scales.push_back(scale);
101 |                 scales.push_back(scale);
102 |             }
103 |             else {
104 |                 for (int aspect_ratio_id = 0;
105 |                     aspect_ratio_id < (int)options.aspect_ratios.size();
106 |                     ++aspect_ratio_id) {
107 |                     aspect_ratios.push_back(options.aspect_ratios[aspect_ratio_id]);
108 |                     scales.push_back(scale);
109 |                 }
110 |                 if (options.interpolated_scale_aspect_ratio > 0.0) {
111 |                     const float scale_next =
112 |                         last_same_stride_layer == (int)options.strides.size() - 1
113 |                         ? 1.0f
114 |                         : CalculateScale(options.min_scale, options.max_scale,
115 |                             last_same_stride_layer + 1,
116 |                             options.strides.size());
117 |                     scales.push_back(std::sqrt(scale * scale_next));
118 |                     aspect_ratios.push_back(options.interpolated_scale_aspect_ratio);
119 |                 }
120 |             }
121 |             last_same_stride_layer++;
122 |         }
123 | 
124 |         for (int i = 0; i < (int)aspect_ratios.size(); ++i) {
125 |             const float ratio_sqrts = std::sqrt(aspect_ratios[i]);
126 |             anchor_height.push_back(scales[i] / ratio_sqrts);
127 |             anchor_width.push_back(scales[i] * ratio_sqrts);
128 |         }
129 | 
130 |         int feature_map_height = 0;
131 |         int feature_map_width = 0;
132 |         if (options.feature_map_height.size()) {
133 |             feature_map_height = options.feature_map_height[layer_id];
134 |             feature_map_width = options.feature_map_width[layer_id];
135 |         }
136 |         else {
137 |             const int stride = options.strides[layer_id];
138 |             feature_map_height = std::ceil(1.0f * options.input_size_height / stride);
139 |             feature_map_width = std::ceil(1.0f * options.input_size_width / stride);
140 |         }
141 | 
142 |         for (int y = 0; y < feature_map_height; ++y) {
143 |             for (int x = 0; x < feature_map_width; ++x) {
144 |                 for (int anchor_id = 0; anchor_id < (int)anchor_height.size(); ++anchor_id) {
145 |                     // TODO: Support specifying anchor_offset_x, anchor_offset_y.
146 |                     const float x_center = (x + options.anchor_offset_x) * 1.0f / feature_map_width;
147 |                     const float y_center = (y + options.anchor_offset_y) * 1.0f / feature_map_height;
148 | 
149 |                     Anchor new_anchor;
150 |                     new_anchor.x_center = x_center;
151 |                     new_anchor.y_center = y_center;
152 | 
153 |                     if (options.fixed_anchor_size) {
154 |                         new_anchor.w = 1.0f;
155 |                         new_anchor.h = 1.0f;
156 |                     }
157 |                     else {
158 |                         new_anchor.w = anchor_width[anchor_id];
159 |                         new_anchor.h = anchor_height[anchor_id];
160 |                     }
161 |                     anchors.push_back(new_anchor);
162 |                 }
163 |             }
164 |         }
165 |         layer_id = last_same_stride_layer;
166 |     }
167 | }
168 | 
169 | void create_ssd_anchors(int input_w, int input_h, std::vector<Anchor> &anchors) {
170 |     /*
171 |      *  Anchor parameters are based on:
172 |      *      mediapipe/modules/pose_detection/pose_detection_cpu.pbtxt
173 |      */
174 |     SsdAnchorsCalculatorOptions anchor_options;
175 |     anchor_options.num_layers        = 4;
176 |     anchor_options.min_scale         = 0.1484375;
177 |     anchor_options.max_scale         = 0.75;
178 |     anchor_options.input_size_height = 128;
179 |     anchor_options.input_size_width  = 128;
180 |     anchor_options.anchor_offset_x   = 0.5f;
181 |     anchor_options.anchor_offset_y   = 0.5f;
182 |     //  anchor_options.feature_map_width .push_back(0);
183 |     //  anchor_options.feature_map_height.push_back(0);
184 |     anchor_options.strides.push_back(8);
185 |     anchor_options.strides.push_back(16);
186 |     anchor_options.strides.push_back(16);
187 |     anchor_options.strides.push_back(16);
188 |     anchor_options.aspect_ratios.push_back(1.0);
189 |     anchor_options.reduce_boxes_in_lowest_layer = false;
190 |     anchor_options.interpolated_scale_aspect_ratio = 1.0;
191 |     anchor_options.fixed_anchor_size = true;
192 | 
193 |     GenerateAnchors(anchors, anchor_options);
194 | }
195 | 
196 | int get_bbox_idx(int anchor_idx) {
197 |     /*
198 |      *  cx, cy, width, height
199 |      *  key0_x, key0_y      kMidHipCenter
200 |      *  key1_x, key1_y      kFullBodySizeRot
201 |      *  key2_x, key2_y      kMidShoulderCenter - upper body only
202 |      *  key3_x, key3_y      kUpperBodySizeRot  - upper body only
203 |      */
204 |     int numkey = kPoseDetectKeyNum;   // FullBody:4, UpperBody:2
205 |     int idx = (4 + 2 * numkey) * anchor_idx;
206 | 
207 |     return idx;
208 | }
209 | 
210 | int decode_bounds(std::list<detect_region_t>& region_list, float score_thresh, int input_img_w, int input_img_h, float* scores_ptr, float* bboxes_ptr, std::vector<Anchor>& anchors) {
211 |     detect_region_t region;
212 |     int i = 0;
213 |     for (auto &anchor : anchors) {
214 |         float score0 = scores_ptr[i];
215 |         float score = 1.0f / (1.0f + exp(-score0));
216 | 
217 |         if (score > score_thresh)
218 |         {
219 |             float* p = bboxes_ptr + get_bbox_idx(i);
220 | 
221 |             /* boundary box */
222 |             float cx = p[0] / input_img_w + anchor.x_center;
223 |             float cy = p[1] / input_img_h + anchor.y_center;
224 |             float w  = p[2] / input_img_w;
225 |             float h  = p[3] / input_img_h;
226 | 
227 |             fvec2 topleft, btmright;
228 |             topleft.x  = cx - w * 0.5f;
229 |             topleft.y  = cy - h * 0.5f;
230 |             btmright.x = cx + w * 0.5f;
231 |             btmright.y = cy + h * 0.5f;
232 | 
233 |             region.score    = score;
234 |             region.topleft  = topleft;
235 |             region.btmright = btmright;
236 | 
237 |             /* landmark positions (6 keys) */
238 |             for (int j = 0; j < kPoseDetectKeyNum; j++)
239 |             {
240 |                 float lx = p[4 + (2 * j) + 0];
241 |                 float ly = p[4 + (2 * j) + 1];
242 |                 lx += anchor.x_center * input_img_w;
243 |                 ly += anchor.y_center * input_img_h;
244 |                 lx /= (float)input_img_w;
245 |                 ly /= (float)input_img_h;
246 | 
247 |                 region.keys[j].x = lx;
248 |                 region.keys[j].y = ly;
249 |             }
250 | 
251 |             region_list.push_back(region);
252 |         }
253 |         i++;
254 |     }
255 |     return 0;
256 | }
257 | 
258 | /* -------------------------------------------------- *
259 |  *  Apply NonMaxSuppression:
260 |  *      https://github.com/tensorflow/tfjs/blob/master/tfjs-core/src/ops/image_ops.ts
261 |  * -------------------------------------------------- */
262 | float calc_intersection_over_union(detect_region_t& region0, detect_region_t& region1) {
263 |     float sx0 = region0.topleft.x;
264 |     float sy0 = region0.topleft.y;
265 |     float ex0 = region0.btmright.x;
266 |     float ey0 = region0.btmright.y;
267 |     float sx1 = region1.topleft.x;
268 |     float sy1 = region1.topleft.y;
269 |     float ex1 = region1.btmright.x;
270 |     float ey1 = region1.btmright.y;
271 | 
272 |     float xmin0 = std::min(sx0, ex0);
273 |     float ymin0 = std::min(sy0, ey0);
274 |     float xmax0 = std::max(sx0, ex0);
275 |     float ymax0 = std::max(sy0, ey0);
276 |     float xmin1 = std::min(sx1, ex1);
277 |     float ymin1 = std::min(sy1, ey1);
278 |     float xmax1 = std::max(sx1, ex1);
279 |     float ymax1 = std::max(sy1, ey1);
280 | 
281 |     float area0 = (ymax0 - ymin0) * (xmax0 - xmin0);
282 |     float area1 = (ymax1 - ymin1) * (xmax1 - xmin1);
283 |     if (area0 <= 0 || area1 <= 0)
284 |         return 0.0f;
285 | 
286 |     float intersect_xmin = std::max(xmin0, xmin1);
287 |     float intersect_ymin = std::max(ymin0, ymin1);
288 |     float intersect_xmax = std::min(xmax0, xmax1);
289 |     float intersect_ymax = std::min(ymax0, ymax1);
290 | 
291 |     float intersect_area = std::max(intersect_ymax - intersect_ymin, 0.0f) *
292 |         std::max(intersect_xmax - intersect_xmin, 0.0f);
293 | 
294 |     return intersect_area / (area0 + area1 - intersect_area);
295 | }
296 | 
297 | 
298 | int non_max_suppression(std::list<detect_region_t>& region_list, std::list<detect_region_t>& region_nms_list, float iou_thresh) {
299 |     region_list.sort([](detect_region_t& v1, detect_region_t& v2) { return v1.score > v2.score ? true : false; });
300 | 
301 |     for (auto itr = region_list.begin(); itr != region_list.end(); itr++)
302 |     {
303 |         detect_region_t region_candidate = *itr;
304 | 
305 |         int ignore_candidate = false;
306 |         for (auto itr_nms = region_nms_list.rbegin(); itr_nms != region_nms_list.rend(); itr_nms++)
307 |         {
308 |             detect_region_t region_nms = *itr_nms;
309 | 
310 |             float iou = calc_intersection_over_union(region_candidate, region_nms);
311 |             if (iou >= iou_thresh)
312 |             {
313 |                 ignore_candidate = true;
314 |                 break;
315 |             }
316 |         }
317 | 
318 |         if (!ignore_candidate)
319 |         {
320 |             region_nms_list.push_back(region_candidate);
321 |             if (region_nms_list.size() >= MAX_POSE_NUM)
322 |                 break;
323 |         }
324 |     }
325 |     return 0;
326 | }
327 | 
328 | float normalize_radians(float angle)
329 | {
330 |     return angle - 2 * M_PI * std::floor((angle - (-M_PI)) / (2 * M_PI));
331 | }
332 | 
333 | void compute_rotation(detect_region_t& region) {
334 | #ifndef FULL_POSE
335 |     float x0 = region.keys[kMidHipCenter].x;
336 |     float y0 = region.keys[kMidHipCenter].y;
337 |     float x1 = region.keys[kMidShoulderCenter].x;
338 |     float y1 = region.keys[kMidShoulderCenter].y;
339 | #else
340 |     float x0 = region.keys[kMidHipCenter].x;
341 |     float y0 = region.keys[kMidHipCenter].y;
342 |     float x1 = (region.topleft.x + region.btmright.x) * 0.5f;
343 |     float y1 = (region.topleft.y + region.btmright.y) * 0.5f;
344 | #endif
345 |     float target_angle = M_PI * 0.5f;
346 |     float rotation = target_angle - std::atan2(-(y1 - y0), x1 - x0);
347 | 
348 |     region.rotation = normalize_radians(rotation);
349 | }
350 | 
351 | void rot_vec(fvec2& vec, float rotation) {
352 |     float sx = vec.x;
353 |     float sy = vec.y;
354 |     vec.x = sx * std::cos(rotation) - sy * std::sin(rotation);
355 |     vec.y = sx * std::sin(rotation) + sy * std::cos(rotation);
356 | }
357 | 
358 | void compute_detect_to_roi(detect_region_t& region, const ie::SizeVector& dims) {
359 |     int input_img_w = dims[3];
360 |     int input_img_h = dims[2];
361 | #ifndef FULL_POSE
362 |     float x_center = region.keys[kMidShoulderCenter].x * input_img_w;
363 |     float y_center = region.keys[kMidShoulderCenter].y * input_img_h;
364 |     float x_scale  = region.keys[kUpperBodySizeRot ].x * input_img_w;
365 |     float y_scale  = region.keys[kUpperBodySizeRot ].y * input_img_h;
366 | #else
367 |     float x_center = region.keys[kMidHipCenter   ].x * input_img_w;
368 |     float y_center = region.keys[kMidHipCenter   ].y * input_img_h;
369 |     float x_scale  = region.keys[kFullBodySizeRot].x * input_img_w;
370 |     float y_scale  = region.keys[kFullBodySizeRot].y * input_img_h;
371 | #endif
372 |     // Bounding box size as double distance from center to scale point.
373 |     float box_size = std::sqrt((x_scale - x_center) * (x_scale - x_center) +
374 |                                (y_scale - y_center) * (y_scale - y_center)) * 2.0;
375 | 
376 |     /* RectTransformationCalculator::TransformNormalizedRect() */
377 |     float width = box_size;
378 |     float height = box_size;
379 |     float rotation = region.rotation;
380 |     float shift_x = 0.0f;
381 |     float shift_y = 0.0f;
382 |     float roi_cx;
383 |     float roi_cy;
384 | 
385 |     if (rotation == 0.0f) {
386 |         roi_cx = x_center + (width  * shift_x);
387 |         roi_cy = y_center + (height * shift_y);
388 |     } else {
389 |         float dx = (width * shift_x) * std::cos(rotation) - (height * shift_y) * std::sin(rotation);
390 |         float dy = (width * shift_x) * std::sin(rotation) + (height * shift_y) * std::cos(rotation);
391 |         roi_cx = x_center + dx;
392 |         roi_cy = y_center + dy;
393 |     }
394 | 
395 |     /*
396 |      *  calculate ROI width and height.
397 |      *  scale parameter is based on
398 |      *      "mediapipe/modules/pose_landmark/pose_detection_to_roi.pbtxt"
399 |      */
400 |     float scale_x = 1.5f;
401 |     float scale_y = 1.5f;
402 |     float long_side = std::max(width, height);
403 |     float roi_w = long_side * scale_x;
404 |     float roi_h = long_side * scale_y;
405 | 
406 |     region.roi_center.x = roi_cx / (float)input_img_w;
407 |     region.roi_center.y = roi_cy / (float)input_img_h;
408 |     region.roi_size.x = roi_w / (float)input_img_w;
409 |     region.roi_size.y = roi_h / (float)input_img_h;
410 | 
411 |     /* calculate ROI coordinates */
412 |     float dx = roi_w * 0.5f;
413 |     float dy = roi_h * 0.5f;
414 |     region.roi_coord[0].x = -dx;  region.roi_coord[0].y = -dy;
415 |     region.roi_coord[1].x = +dx;  region.roi_coord[1].y = -dy;
416 |     region.roi_coord[2].x = +dx;  region.roi_coord[2].y = +dy;
417 |     region.roi_coord[3].x = -dx;  region.roi_coord[3].y = +dy;
418 | 
419 |     for (int i = 0; i < 4; i++)
420 |     {
421 |         rot_vec(region.roi_coord[i], rotation);
422 |         region.roi_coord[i].x += roi_cx;
423 |         region.roi_coord[i].y += roi_cy;
424 | 
425 |         region.roi_coord[i].x /= (float)input_img_h;
426 |         region.roi_coord[i].y /= (float)input_img_h;
427 |     }
428 | }
429 | 
430 | 
431 | static void pack_detect_result(std::vector<detect_region_t>&detect_results, std::list<detect_region_t>& region_list, const ie::SizeVector& dims) {
432 |     for (auto& region : region_list) {
433 |         compute_rotation(region);
434 |         compute_detect_to_roi(region, dims);
435 |         detect_results.push_back(region);
436 |     }
437 | }
438 | 
439 | 
440 | void dumpPose(detect_region_t& pose) {
441 |     std::cout << "Score :" << pose.score << ", topleft: (" << pose.topleft.x << ", " << pose.topleft.y << ") btmright(" << pose.btmright.x << ", " << pose.btmright.y << ")" << std::endl;
442 |     std::cout << "keys: ";
443 |     for (size_t i = 0; i < kPoseDetectKeyNum; i++) {
444 |         std::cout << "(" << pose.keys[i].x << ", " << pose.keys[i].y << ") ";
445 |     }
446 |     std::cout << std::endl;
447 |     std::cout << "rotation(" << pose.rotation << ", roi_center(" << pose.roi_center.x << ", " << pose.roi_center.y << "), roi_size(" << pose.roi_size.x << ", " << pose.roi_size.y << ")" << std::endl;
448 |     for (size_t i = 0; i < 4; i++) {
449 |         std::cout << "roi_coord[" << i << "]=(" << pose.roi_coord[i].x << ", " << pose.roi_coord[i].y << ") ";
450 |     }
451 |     std::cout << std::endl;
452 | }
453 | 
454 | 
455 | void renderROI(cv::Mat& img, const fvec2(&roi_coord)[4]) {
456 |     size_t w = img.size().width;
457 |     size_t h = img.size().height;
458 |     std::vector<cv::Point> p;
459 |     for (size_t i = 0; i < 4; i++) {
460 |         cv::Point pt = cv::Point(roi_coord[i].x * w, roi_coord[i].y * h);
461 |         p.push_back(pt);
462 |         cv::putText(img, std::to_string(i), pt, cv::FONT_HERSHEY_PLAIN, 1, cv::Scalar(255,255,255), 1);
463 |     }
464 |     cv::polylines(img, p, true, cv::Scalar(255, 0, 0), 2, cv::LINE_AA);
465 | }
466 | 
467 | 
468 | 
469 | void renderPose(cv::Mat& image_ocv, std::vector<detect_region_t>& detect_results, pose_landmark_result_t *landmarks) {
470 |     const std::vector<cv::Scalar> colors = {
471 |         cv::Scalar(255,   0,   0), cv::Scalar(255,  85,   0), cv::Scalar(255, 170,   0),
472 |         cv::Scalar(255, 255,   0), cv::Scalar(170, 255,   0), cv::Scalar( 85, 255,   0),
473 |         cv::Scalar(  0, 255,   0), cv::Scalar(  0, 255,  85), cv::Scalar(  0, 255, 170),
474 |         cv::Scalar(  0, 255, 255), cv::Scalar(  0, 170, 255), cv::Scalar(  0,  85, 255),
475 |         cv::Scalar(  0,   0, 255), cv::Scalar( 85,   0, 255), cv::Scalar(170,   0, 255),
476 |         cv::Scalar(255,   0, 255), cv::Scalar(255,   0, 170), cv::Scalar(255,   0,  85)
477 |     };
478 |     const std::vector<std::vector<int>> bones = {
479 |         {  0,  1,  2,  3,  7},          // 0 right eye and ear
480 |         {  0,  4,  5,  6,  8},          // 1 left eye and ear
481 |         {  9, 10},                      // 2 mouth
482 |         { 11, 12, 24, 23, 11},          // 3 body
483 |         { 11, 13, 15, 17, 19, 15, 21},  // 4 right arm
484 |         { 12, 14, 16, 18, 20, 16, 22}   // 5 left arm
485 | #ifdef FULL_POSE
486 |        ,{ 23, 25, 27, 29, 31, 27 },     // 6 right leg
487 |         { 24, 26, 28, 30, 32, 28}       // 7 left leg
488 | #endif
489 |     };
490 |     size_t W = image_ocv.size().width;
491 |     size_t H = image_ocv.size().height;
492 |     for (size_t pose_id = 0; pose_id < detect_results.size(); pose_id++) {
493 |         detect_region_t pose = detect_results[pose_id];
494 |         cv::Point2f mat_src[] = { cv::Point2f(0.f, 0.f), cv::Point2f(1.f, 0.f), cv::Point2f(1.f, 1.f) };
495 |         cv::Point2f mat_dst[] = { cv::Point2f(pose.roi_coord[0].x * W, pose.roi_coord[0].y * H),
496 |                                   cv::Point2f(pose.roi_coord[1].x * W, pose.roi_coord[1].y * H),
497 |                                   cv::Point2f(pose.roi_coord[2].x * W, pose.roi_coord[2].y * H) };
498 |         cv::Mat mat = cv::getAffineTransform(mat_src, mat_dst);
499 |         mat.resize(3, cv::Scalar(0.f));
500 |         mat.at<double>(2, 2) = 1.f;
501 | 
502 |         // Apply affine transform to project the junction points to the original ROI in the input image
503 |         double px, py, flag;
504 |         std::vector<std::pair<cv::Point2f, float>> pts;
505 |         for (size_t i = 0; i < POSE_JOINT_NUM; i++) {
506 |             cv::Mat pt = (cv::Mat_<double>(3, 1) << landmarks[pose_id].joint[i].x, landmarks[pose_id].joint[i].y, 1.f);   // 1x3 matrix
507 |             pt = mat * pt;                                              // Affine transform (0,0)-(1,1)  => ROI
508 |             px = pt.at<double>(0, 0);
509 |             py = pt.at<double>(1, 0);
510 |             flag = landmarks[pose_id].joint[i].z;
511 |             pts.push_back(std::pair<cv::Point2f, float>(cv::Point2f(px, py), flag));
512 |             cv::circle(image_ocv, cv::Point(pt.at<double>(0, 0), pt.at<double>(1, 0)), 4, cv::Scalar(255, 0, 0), -1);
513 |         }
514 |         // render bones
515 |         size_t bone_idx = 0;
516 |         for (auto& bone : bones) {
517 |             size_t prev_idx = -1;
518 |             for (auto& idx : bone) {
519 |                 if (prev_idx == -1) {
520 |                     prev_idx = idx;
521 |                     continue;
522 |                 }
523 |                 cv::Point2f pt1 = pts[prev_idx].first;
524 |                 cv::Point2f pt2 = pts[     idx].first;
525 |                 float flag1 = pts[prev_idx].second;
526 |                 float flag2 = pts[     idx].second;
527 |                 cv::Scalar color = colors[bone_idx++ % colors.size()];
528 |                 cv::line(image_ocv, pt1, pt2, color, 2);
529 |                 prev_idx = idx;
530 |             }
531 |         }
532 | #ifdef RENDER_POINTS
533 |         for (auto& pt : pts ) {
534 |             cv::circle(image_ocv, pt.first, 4, cv::Scalar(255, 0, 0), -1);
535 |         }
536 | #endif
537 |     }
538 | }
539 | 
540 | void renderTime(cv::Mat& img, double time_pd, double time_lm, double time_ttl) {
541 | 
542 |     cv::putText(img, cv::format("PD: %6.2fms", time_pd/1000.f), cv::Point(0, 10), cv::FONT_HERSHEY_PLAIN, 1, cv::Scalar(255, 0, 0), 1);
543 |     cv::putText(img, cv::format("LM: %6.2fms", time_lm/1000.f), cv::Point(0, 30), cv::FONT_HERSHEY_PLAIN, 1, cv::Scalar(255, 0, 0), 1);
544 |     cv::putText(img, cv::format("TTL: %6.2fms", time_ttl / 1000.f), cv::Point(0, 50), cv::FONT_HERSHEY_PLAIN, 1, cv::Scalar(255, 0, 0), 1);
545 | }
546 | 
547 | 
548 | 
549 | int main(int argc, char *argv[]) {
550 | 
551 |     std::chrono::system_clock::time_point start, end, start_ttl, end_ttl;
552 |     std::chrono::microseconds time_pd, time_lm, time_ttl;
553 | 
554 |     ie::Core ie;
555 | 
556 |     // Initialization for pose model
557 |     ie::CNNNetwork net_pd;
558 |     net_pd = ie.ReadNetwork(MODEL_POSE_DET+".xml", MODEL_POSE_DET+".bin");
559 |     net_pd.setBatchSize(1);
560 |     std::string input_name_pd = "input";                                                               // [1,3,128,128]
561 |     std::shared_ptr<ie::InputInfo> input_info_pd = net_pd.getInputsInfo()[input_name_pd];
562 |     const ie::SizeVector idims_pd = input_info_pd->getTensorDesc().getDims();                          // 0,1,2,3 = N,C,H,W
563 |     input_info_pd->setLayout(ie::Layout::NHWC);
564 |     input_info_pd->setPrecision(ie::Precision::FP32);
565 | 
566 |     std::vector<std::string> oname_pd = { "StatefulPartitionedCall/functional_1/classificators/concat" ,    // [1,896,1]    tensor scores
567 |                                           "StatefulPartitionedCall/functional_1/regressors/concat" };       // [1,896,12]   tensor bboxes
568 |     for (auto& oname : oname_pd) {
569 |         ie::DataPtr output_info = net_pd.getOutputsInfo()[oname];
570 |         output_info->setPrecision(ie::Precision::FP32);
571 |     }
572 | 
573 |     ie::ExecutableNetwork exenet_pd = ie.LoadNetwork(net_pd, DEVICE_PD);
574 |     ie::InferRequest ireq_pd = exenet_pd.CreateInferRequest();
575 | 
576 | 
577 | 
578 |     // Initialization for landmark model
579 |     ie::CNNNetwork net_lm;
580 |     net_lm = ie.ReadNetwork(MODEL_LM_DET + ".xml", MODEL_LM_DET + ".bin");
581 |     net_lm.setBatchSize(1);
582 |     std::string input_name_lm = "input";                                                                // [1,3,256,256]
583 |     std::shared_ptr<ie::InputInfo> input_info_lm = net_lm.getInputsInfo()[input_name_lm];
584 |     const ie::SizeVector idims_lm = input_info_lm ->getTensorDesc().getDims();                          // 0,1,2,3 = N,C,H,W
585 |     input_info_lm->setLayout(ie::Layout::NHWC);
586 |     input_info_lm->setPrecision(ie::Precision::FP32);
587 | 
588 |     std::vector<std::string> oname_lm = { "StatefulPartitionedCall/functional_1/output_segmentation/BiasAdd/Add",   // [1, 1, 128, 128]
589 |                                           "StatefulPartitionedCall/functional_1/tf_op_layer_Sigmoid/Sigmoid",       // [1, 1, 1, 1]      landmark flag ?
590 |                                           "StatefulPartitionedCall/functional_1/tf_op_layer_ld_3d/ld_3d" };         // [1, 124]          landmark
591 |     for (auto& oname : oname_lm) {
592 |         ie::DataPtr output_info = net_lm.getOutputsInfo()[oname];
593 |         output_info->setPrecision(ie::Precision::FP32);
594 |     }
595 | 
596 |     ie::ExecutableNetwork exenet_lm = ie.LoadNetwork(net_lm, DEVICE_LM);
597 |     ie::InferRequest ireq_lm = exenet_lm.CreateInferRequest();
598 | 
599 | 
600 | 
601 |     std::vector<Anchor> anchors;
602 |     create_ssd_anchors(idims_pd[3], idims_pd[2], anchors);
603 | 
604 | #if   INPUT_TYPE == VIDEO_INPUT
605 |     cv::VideoCapture cap(INPUT_FILE);
606 | #elif INPUT_TYPE == CAM_INPUT
607 |     cv::VideoCapture cap(0);
608 | #endif
609 | 
610 | #if INPUT_TYPE == VIDEO_INPUT || INPUT_TYPE == CAM_INPUT
611 | #ifdef VIDEO_OUTPUT
612 |     cv::VideoWriter writer("output.mp4", cv::VideoWriter::fourcc('m', 'p', '4', 'v'), 30.0, cv::Size(VIDEO_SIZE, VIDEO_SIZE));
613 | #endif
614 | #endif
615 | 
616 | 
617 |     int key = -1;
618 |     while (key != 27) {     // ESC key to quit
619 | 
620 |         // read an image and trim down to square image
621 |         const size_t _N = 0, _C = 1, _H = 2, _W = 3;
622 |         cv::Mat image_ocv, image_pd, image_lm, image_f;
623 |         cv::Mat image_org;
624 | #if INPUT_TYPE == IMAGE_INPUT
625 |         image_org = cv::imread(INPUT_FILE);
626 | #elif INPUT_TYPE == VIDEO_INPUT || INPUT_TYPE == CAM_INPUT
627 |         bool flg = cap.read(image_org);
628 |         if (flg == false) break;                        // end of the movie file
629 | #endif
630 |         start_ttl = std::chrono::system_clock::now();
631 | 
632 |         // resize input image with keeping aspect ratio
633 |         size_t iw = image_org.size().width;
634 |         size_t ih = image_org.size().height;
635 |         if (iw > ih) {
636 |             image_ocv = image_org(cv::Rect(iw / 2 - ih / 2, 0, ih, ih));
637 |         }
638 |         else if (iw < ih) {
639 |             image_ocv = image_org(cv::Rect(0, ih / 2 - iw / 2, iw, iw));
640 |         }
641 | 
642 |         // pose (ROI) detection
643 |         cv::resize(image_ocv, image_pd, cv::Size(idims_pd[_W], idims_pd[_H]));
644 |         image_pd.convertTo(image_f, CV_32F, (1.f / 255.f), 0.f);                    // Convert to FP32 and do pre-processes (scale and mean subtract)
645 |         //image_pd.convertTo(image_f, CV_32F, (1.f / 127.5f), -1.f);                    // Convert to FP32 and do pre-processes (scale and mean subtract)
646 |         ie::TensorDesc tDesc(ie::Precision::FP32, { 1, static_cast<unsigned int>(image_f.channels()), static_cast<unsigned int>(image_f.size().height), static_cast<unsigned int>(image_f.size().width) }, ie::Layout::NHWC);
647 |         ireq_pd.SetBlob(input_name_pd, ie::make_shared_blob<float>(tDesc, (float*)(image_f.data)));
648 | 
649 |         start = std::chrono::system_clock::now();
650 |         ireq_pd.Infer();                                                            // Pose detection
651 |         end = std::chrono::system_clock::now();
652 |         time_pd = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
653 | 
654 |         //  BlobName:StatefulPartitionedCall/functional_1/classificators/concat, Shape:[1, 896, 1], Precision:FP32
655 |         //  BlobName:StatefulPartitionedCall/functional_1/regressors/concat, Shape:[1, 896, 8], Precision:FP32
656 |         float* scores = ireq_pd.GetBlob(oname_pd[0])->buffer();
657 |         float* bboxes = ireq_pd.GetBlob(oname_pd[1])->buffer();
658 | 
659 |         std::list<detect_region_t> region_list, region_nms_list;
660 |         std::vector<detect_region_t> detect_results;
661 |         decode_bounds(region_list, 0.5f, idims_pd[_W], idims_pd[_H], scores, bboxes, anchors);
662 |         non_max_suppression(region_list, region_nms_list, 0.3f);
663 |         pack_detect_result(detect_results, region_nms_list, idims_pd);
664 | 
665 |         // landmark detection
666 |         cv::Mat outimg = image_ocv.clone();             // clone input image for rendering
667 |         size_t W = image_ocv.size().width;
668 |         size_t H = image_ocv.size().height;
669 |         pose_landmark_result_t  landmarks[MAX_POSE_NUM] = { 0 };
670 |         for (size_t pose_id = 0; pose_id < detect_results.size(); pose_id++) {
671 | 
672 |             detect_region_t pose = detect_results[pose_id];
673 | 
674 |             cv::Point2f mat_src[] = { cv::Point2f(pose.roi_coord[0].x * W, pose.roi_coord[0].y * H),
675 |                                       cv::Point2f(pose.roi_coord[1].x * W, pose.roi_coord[1].y * H),
676 |                                       cv::Point2f(pose.roi_coord[2].x * W, pose.roi_coord[2].y * H) };
677 |             cv::Point2f mat_dst[] = { cv::Point2f(0, 0), cv::Point2f(idims_lm[_W], 0), cv::Point2f(idims_lm[_W], idims_lm[_H]) };
678 |             cv::Mat mat = cv::getAffineTransform(mat_src, mat_dst);
679 |             cv::Mat img_affine = cv::Mat::zeros(idims_lm[_W], idims_lm[_H], CV_8UC3);
680 |             cv::warpAffine(image_ocv, img_affine, mat, img_affine.size());              // Crop and rotate ROI by warp affine transform
681 | 
682 |             img_affine.convertTo(image_f, CV_32F, (1.f / 255.f), 0.f);
683 |             //img_affine.convertTo(image_f, CV_32F, (1.f / 127.5f), -1.f);
684 |             tDesc = ie::TensorDesc(ie::Precision::FP32, { 1, (unsigned int)(image_f.channels()), (unsigned int)(image_f.size().height), (unsigned int)(image_f.size().width) }, ie::Layout::NHWC);
685 |             ireq_lm.SetBlob(input_name_lm, ie::make_shared_blob<float>(tDesc, (float*)(image_f.data)));
686 | 
687 |             start = std::chrono::system_clock::now();
688 |             ireq_lm.Infer();                                                // Landmark detection
689 |             end = std::chrono::system_clock::now();
690 |             time_lm = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
691 | 
692 |             float* output_lm1 = ireq_lm.GetBlob(oname_lm[0])->buffer();     // ??
693 |             float* lm_score   = ireq_lm.GetBlob(oname_lm[1])->buffer();     // Landmark score (flag)
694 |             float* lm         = ireq_lm.GetBlob(oname_lm[2])->buffer();     // Landmarks
695 | 
696 |             landmarks[pose_id].score = *lm_score;
697 |             for (size_t i = 0; i < POSE_JOINT_NUM; i++) {
698 |                 landmarks[pose_id].joint[i].x = lm[4 * i + 0] / (float)idims_lm[_W];
699 |                 landmarks[pose_id].joint[i].y = lm[4 * i + 1] / (float)idims_lm[_H];
700 |                 landmarks[pose_id].joint[i].z = lm[4 * i + 2];
701 |             }
702 | #ifdef RENDER_ROI
703 |             renderROI(outimg, pose.roi_coord);
704 | #endif
705 |             renderPose(outimg, detect_results, landmarks);
706 |         }
707 |         end_ttl = std::chrono::system_clock::now();
708 |         time_ttl = std::chrono::duration_cast<std::chrono::microseconds>(end_ttl - start_ttl);
709 | 
710 | #ifdef RENDER_TIME
711 |         renderTime(outimg, time_pd.count(), time_lm.count(), time_ttl.count());
712 | #endif
713 | 
714 | #if INPUT_TYPE == IMAGE_INPUT
715 |         cv::imwrite("output.jpg", outimg);
716 |         cv::imshow("output", outimg);
717 |         cv::waitKey(0);
718 |         key = 27;       // force exit
719 | #elif INPUT_TYPE == VIDEO_INPUT || INPUT_TYPE == CAM_INPUT
720 |         cv::imshow("output", outimg);
721 |         key = cv::waitKey(1);
722 | #ifdef VIDEO_OUTPUT
723 |         cv::resize(outimg, outimg, cv::Size(VIDEO_SIZE, VIDEO_SIZE));
724 |         writer << outimg;
725 | #endif
726 | #endif
727 |     }
728 |     return 0;
729 | }
730 | 


--------------------------------------------------------------------------------
/resources/output-full.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yas-sim/blazepose_openvino/7cdcc10894cf5b01c8018c6b346e0dcc01e42209/resources/output-full.gif


--------------------------------------------------------------------------------
/resources/output.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yas-sim/blazepose_openvino/7cdcc10894cf5b01c8018c6b346e0dcc01e42209/resources/output.gif


--------------------------------------------------------------------------------
/resources/result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yas-sim/blazepose_openvino/7cdcc10894cf5b01c8018c6b346e0dcc01e42209/resources/result.png


--------------------------------------------------------------------------------