├── CLA_LICENSE.md ├── LICENSE.md ├── Makefile ├── README.md ├── bin ├── .gitkeep ├── Jetson │ ├── .gitkeep │ └── libnvds_osd.so └── x86 │ ├── libnvds_osd.so │ └── libnvds_osd_cuda11.so ├── cover_table.hpp ├── deepstream_pose_estimation_app.cpp ├── deepstream_pose_estimation_config.txt ├── images ├── .gitkeep ├── auxillary.png ├── input.gif ├── main.png └── output.gif ├── munkres_algorithm.cpp ├── pair_graph.hpp ├── pose_estimation.onnx └── post_process.cpp /CLA_LICENSE.md: -------------------------------------------------------------------------------- 1 | # Individual Contributor License Agreement (CLA) 2 | 3 | Thank you for submitting your contributions to this project. 4 | 5 | By signing this CLA, you agree that the following terms apply to all of your past, present and future contributions to the project. 6 | 7 | ## License. 8 | You hereby represent that all present, past and future contributions are governed by the MIT License copyright statement. 9 | 10 | This entails that to the extent possible under law, you transfer all copyright and related or neighboring rights of the code or documents you contribute to the project itself or its maintainers. Furthermore you also represent that you have the authority to perform the above waiver with respect to the entirety of you contributions. 11 | 12 | ## Moral Rights. 13 | To the fullest extent permitted under applicable law, you hereby waive, and agree not to assert, all of your “moral rights” in or relating to your contributions for the benefit of the project. 14 | 15 | ## Third Party Content. 16 | If your Contribution includes or is based on any source code, object code, bug fixes, configuration changes, tools, specifications, documentation, data, materials, feedback, information or other works of authorship that were not authored by you (“Third Party Content”) or if you are aware of any third party intellectual property or proprietary rights associated with your Contribution (“Third Party Rights”), then you agree to include with the submission of your Contribution full details respecting such Third Party Content and Third Party Rights, including, without limitation, identification of which aspects of your Contribution contain Third Party Content or are associated with Third Party Rights, the owner/author of the Third Party Content and Third Party Rights, where you obtained the Third Party Content, and any applicable third party license terms or restrictions respecting the Third Party Content and Third Party Rights. For greater certainty, the foregoing obligations respecting the identification of Third Party Content and Third Party Rights do not apply to any portion of a Project that is incorporated into your Contribution to that same Project. 17 | 18 | ## Representations. 19 | You represent that, other than the Third Party Content and Third Party Rights identified by you in accordance with this Agreement, you are the sole author of your Contributions and are legally entitled to grant the foregoing licenses and waivers in respect of your Contributions. If your Contributions were created in the course of your employment with your past or present employer(s), you represent that such employer(s) has authorized you to make your Contributions on behalf of such employer(s) or such employer (s) has waived all of their right, title or interest in or to your Contributions. 20 | 21 | ## Disclaimer. 22 | To the fullest extent permitted under applicable law, your Contributions are provided on an "as is" basis, without any warranties or conditions, express or implied, including, without limitation, any implied warranties or conditions of non-infringement, merchantability or fitness for a particular purpose. You are not required to provide support for your Contributions, except to the extent you desire to provide support. 23 | 24 | ## No Obligation. 25 | You acknowledge that the maintainers of this project are under no obligation to use or incorporate your contributions into the project. The decision to use or incorporate your contributions into the project will be made at the sole discretion of the maintainers or their authorized delegates. -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2020, NVIDIA Corporation 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so, 8 | subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 15 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 16 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 17 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 18 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Copyright 2020 - NVIDIA Corporation 3 | # SPDX-License-Identifier: MIT 4 | ################################################################################ 5 | 6 | CXX=g++ 7 | 8 | APP:= deepstream-pose-estimation-app 9 | 10 | TARGET_DEVICE = $(shell gcc -dumpmachine | cut -f1 -d -) 11 | 12 | NVDS_VERSION:=5.0 13 | 14 | LIB_INSTALL_DIR?=/opt/nvidia/deepstream/deepstream-$(NVDS_VERSION)/lib/ 15 | APP_INSTALL_DIR?=/opt/nvidia/deepstream/deepstream-$(NVDS_VERSION)/bin/ 16 | 17 | ifeq ($(TARGET_DEVICE),aarch64) 18 | CFLAGS:= -DPLATFORM_TEGRA 19 | endif 20 | 21 | SRCS:= deepstream_pose_estimation_app.cpp 22 | 23 | INCS:= $(wildcard *.h) 24 | 25 | PKGS:= gstreamer-1.0 gstreamer-video-1.0 x11 json-glib-1.0 26 | 27 | OBJS:= $(patsubst %.c,%.o, $(patsubst %.cpp,%.o, $(SRCS))) 28 | 29 | CFLAGS+= -I../../apps-common/includes -I../../../includes -I../deepstream-app/ -DDS_VERSION_MINOR=0 -DDS_VERSION_MAJOR=5 30 | 31 | LIBS+= -L$(LIB_INSTALL_DIR) -lnvdsgst_meta -lnvds_meta -lnvds_utils -lm \ 32 | -lpthread -ldl -Wl,-rpath,$(LIB_INSTALL_DIR) 33 | 34 | CFLAGS+= $(shell pkg-config --cflags $(PKGS)) 35 | 36 | LIBS+= $(shell pkg-config --libs $(PKGS)) 37 | 38 | all: $(APP) 39 | 40 | %.o: %.c $(INCS) Makefile 41 | $(CC) -c -o $@ $(CFLAGS) $< 42 | 43 | %.o: %.cpp $(INCS) Makefile 44 | $(CXX) -c -o $@ $(CFLAGS) $< 45 | 46 | $(APP): $(OBJS) Makefile 47 | $(CXX) -o $(APP) $(OBJS) $(LIBS) 48 | 49 | install: $(APP) 50 | cp -rv $(APP) $(APP_INSTALL_DIR) 51 | 52 | clean: 53 | rm -rf $(OBJS) $(APP) 54 | 55 | 56 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------ 2 | # This sample application is no longer maintained 3 | # ------------------------------------------------------ 4 | 5 | # DeepStream Human Pose Estimation 6 | 7 | Human pose estimation is the computer vision task of estimating the configuration (‘the pose’) of the human body by localizing certain key points on a body within a video or a photo. The following application serves as a reference to deploy custom pose estimation models with DeepStream 5.0 using the [TRTPose](https://github.com/NVIDIA-AI-IOT/trt_pose) project as an example. 8 | 9 | A detailed deep-dive NVIDIA Developer blog is available [here](https://developer.nvidia.com/blog/creating-a-human-pose-estimation-application-with-deepstream-sdk/?ncid=so-link-52952-vt24&sfdcid=EM08#cid=em08_so-link_en-us). 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 |
Input Video SourceOutput Video
24 | 25 | 26 | ## Prerequisites 27 | You will need 28 | 1. DeepStreamSDK 5.0 29 | 2. CUDA 10.2 30 | 3. TensorRT 7.x 31 | 32 | 33 | ## Getting Started: 34 | To get started, please follow these steps. 35 | 1. Install [DeepStream](https://developer.nvidia.com/deepstream-sdk) on your platform, verify it is working by running deepstream-app. 36 | 2. Clone the repository preferably in `$DEEPSTREAM_DIR/sources/apps/sample_apps`. 37 | 2. Download the TRTPose [model](https://github.com/NVIDIA-AI-IOT/trt_pose), convert it to ONNX using this [export utility](https://github.com/NVIDIA-AI-IOT/trt_pose/blob/master/trt_pose/utils/export_for_isaac.py), and set its location in the DeepStream configuration file. 38 | 3. Replace the OSD binaries (x86 or Jetson) in `$DEEPSTREAM_DIR/libs` with the ones provided in this repository under `bin/`. Please note that these are not inter-compatible across platforms. 39 | 4. Compile the program 40 | ``` 41 | $ cd deepstream-pose-estimation/ 42 | $ sudo make 43 | $ sudo ./deepstream-pose-estimation-app 44 | ``` 45 | 5. The final output is stored in 'output-path' as `Pose_Estimation.mp4` 46 | 47 | NOTE: If you do not already have a .trt engine generated from the ONNX model you provided to DeepStream, an engine will be created on the first run of the application. Depending upon the system you’re using, this may take anywhere from 4 to 10 minutes. 48 | 49 | For any issues or questions, please feel free to make a new post on the [DeepStreamSDK forums](https://forums.developer.nvidia.com/c/accelerated-computing/intelligent-video-analytics/deepstream-sdk/). 50 | 51 | ## References 52 | Cao, Zhe, et al. "Realtime multi-person 2d pose estimation using part affinity fields." Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2017. 53 | 54 | Xiao, Bin, Haiping Wu, and Yichen Wei. "Simple baselines for human pose estimation and tracking." Proceedings of the European Conference on Computer Vision (ECCV). 2018. 55 | -------------------------------------------------------------------------------- /bin/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/deepstream_pose_estimation/8b385dd8ba8ae03d7daeb981e96249461498ccda/bin/.gitkeep -------------------------------------------------------------------------------- /bin/Jetson/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/deepstream_pose_estimation/8b385dd8ba8ae03d7daeb981e96249461498ccda/bin/Jetson/.gitkeep -------------------------------------------------------------------------------- /bin/Jetson/libnvds_osd.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/deepstream_pose_estimation/8b385dd8ba8ae03d7daeb981e96249461498ccda/bin/Jetson/libnvds_osd.so -------------------------------------------------------------------------------- /bin/x86/libnvds_osd.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/deepstream_pose_estimation/8b385dd8ba8ae03d7daeb981e96249461498ccda/bin/x86/libnvds_osd.so -------------------------------------------------------------------------------- /bin/x86/libnvds_osd_cuda11.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/deepstream_pose_estimation/8b385dd8ba8ae03d7daeb981e96249461498ccda/bin/x86/libnvds_osd_cuda11.so -------------------------------------------------------------------------------- /cover_table.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | class CoverTable 7 | { 8 | public: 9 | CoverTable(int nrows, int ncols) : nrows(nrows), ncols(ncols) 10 | { 11 | rows.resize(nrows); 12 | cols.resize(ncols); 13 | } 14 | 15 | inline void coverRow(int row) 16 | { 17 | rows[row] = 1; 18 | } 19 | 20 | inline void coverCol(int col) 21 | { 22 | cols[col] = 1; 23 | } 24 | 25 | inline void uncoverRow(int row) 26 | { 27 | rows[row] = 0; 28 | } 29 | 30 | inline void uncoverCol(int col) 31 | { 32 | cols[col] = 0; 33 | } 34 | 35 | inline bool isCovered(int row, int col) const 36 | { 37 | return rows[row] || cols[col]; 38 | } 39 | 40 | inline bool isRowCovered(int row) const 41 | { 42 | return rows[row]; 43 | } 44 | 45 | inline bool isColCovered(int col) const 46 | { 47 | return cols[col]; 48 | } 49 | 50 | inline void clear() 51 | { 52 | for (int i = 0; i < nrows; i++) 53 | { 54 | uncoverRow(i); 55 | } 56 | for (int j = 0; j < ncols; j++) 57 | { 58 | uncoverCol(j); 59 | } 60 | } 61 | 62 | const int nrows; 63 | const int ncols; 64 | 65 | private: 66 | std::vector rows; 67 | std::vector cols; 68 | }; 69 | -------------------------------------------------------------------------------- /deepstream_pose_estimation_app.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2020 - NVIDIA Corporation 2 | // SPDX-License-Identifier: MIT 3 | 4 | #include "post_process.cpp" 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "gstnvdsmeta.h" 11 | #include "nvdsgstutils.h" 12 | #include "nvbufsurface.h" 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #define EPS 1e-6 21 | 22 | #define MAX_DISPLAY_LEN 64 23 | 24 | /* The muxer output resolution must be set if the input streams will be of 25 | * different resolution. The muxer will scale all the input frames to this 26 | * resolution. */ 27 | #define MUXER_OUTPUT_WIDTH 1920 28 | #define MUXER_OUTPUT_HEIGHT 1080 29 | 30 | /* Muxer batch formation timeout, for e.g. 40 millisec. Should ideally be set 31 | * based on the fastest source's framerate. */ 32 | #define MUXER_BATCH_TIMEOUT_USEC 4000000 33 | 34 | template 35 | using Vec1D = std::vector; 36 | 37 | template 38 | using Vec2D = std::vector>; 39 | 40 | template 41 | using Vec3D = std::vector>; 42 | 43 | gint frame_number = 0; 44 | 45 | /*Method to parse information returned from the model*/ 46 | std::tuple, Vec3D> 47 | parse_objects_from_tensor_meta(NvDsInferTensorMeta *tensor_meta) 48 | { 49 | Vec1D counts; 50 | Vec3D peaks; 51 | 52 | float threshold = 0.1; 53 | int window_size = 5; 54 | int max_num_parts = 20; 55 | int num_integral_samples = 7; 56 | float link_threshold = 0.1; 57 | int max_num_objects = 100; 58 | 59 | void *cmap_data = tensor_meta->out_buf_ptrs_host[0]; 60 | NvDsInferDims &cmap_dims = tensor_meta->output_layers_info[0].inferDims; 61 | void *paf_data = tensor_meta->out_buf_ptrs_host[1]; 62 | NvDsInferDims &paf_dims = tensor_meta->output_layers_info[1].inferDims; 63 | 64 | /* Finding peaks within a given window */ 65 | find_peaks(counts, peaks, cmap_data, cmap_dims, threshold, window_size, max_num_parts); 66 | /* Non-Maximum Suppression */ 67 | Vec3D refined_peaks = refine_peaks(counts, peaks, cmap_data, cmap_dims, window_size); 68 | /* Create a Bipartite graph to assign detected body-parts to a unique person in the frame */ 69 | Vec3D score_graph = paf_score_graph(paf_data, paf_dims, topology, counts, refined_peaks, num_integral_samples); 70 | /* Assign weights to all edges in the bipartite graph generated */ 71 | Vec3D connections = assignment(score_graph, topology, counts, link_threshold, max_num_parts); 72 | /* Connecting all the Body Parts and Forming a Human Skeleton */ 73 | Vec2D objects = connect_parts(connections, topology, counts, max_num_objects); 74 | return {objects, refined_peaks}; 75 | } 76 | 77 | /* MetaData to handle drawing onto the on-screen-display */ 78 | static void 79 | create_display_meta(Vec2D &objects, Vec3D &normalized_peaks, NvDsFrameMeta *frame_meta, int frame_width, int frame_height) 80 | { 81 | int K = topology.size(); 82 | int count = objects.size(); 83 | NvDsBatchMeta *bmeta = frame_meta->base_meta.batch_meta; 84 | NvDsDisplayMeta *dmeta = nvds_acquire_display_meta_from_pool(bmeta); 85 | nvds_add_display_meta_to_frame(frame_meta, dmeta); 86 | 87 | for (auto &object : objects) 88 | { 89 | int C = object.size(); 90 | for (int j = 0; j < C; j++) 91 | { 92 | int k = object[j]; 93 | if (k >= 0) 94 | { 95 | auto &peak = normalized_peaks[j][k]; 96 | int x = peak[1] * MUXER_OUTPUT_WIDTH; 97 | int y = peak[0] * MUXER_OUTPUT_HEIGHT; 98 | if (dmeta->num_circles == MAX_ELEMENTS_IN_DISPLAY_META) 99 | { 100 | dmeta = nvds_acquire_display_meta_from_pool(bmeta); 101 | nvds_add_display_meta_to_frame(frame_meta, dmeta); 102 | } 103 | NvOSD_CircleParams &cparams = dmeta->circle_params[dmeta->num_circles]; 104 | cparams.xc = x; 105 | cparams.yc = y; 106 | cparams.radius = 8; 107 | cparams.circle_color = NvOSD_ColorParams{244, 67, 54, 1}; 108 | cparams.has_bg_color = 1; 109 | cparams.bg_color = NvOSD_ColorParams{0, 255, 0, 1}; 110 | dmeta->num_circles++; 111 | } 112 | } 113 | 114 | for (int k = 0; k < K; k++) 115 | { 116 | int c_a = topology[k][2]; 117 | int c_b = topology[k][3]; 118 | if (object[c_a] >= 0 && object[c_b] >= 0) 119 | { 120 | auto &peak0 = normalized_peaks[c_a][object[c_a]]; 121 | auto &peak1 = normalized_peaks[c_b][object[c_b]]; 122 | int x0 = peak0[1] * MUXER_OUTPUT_WIDTH; 123 | int y0 = peak0[0] * MUXER_OUTPUT_HEIGHT; 124 | int x1 = peak1[1] * MUXER_OUTPUT_WIDTH; 125 | int y1 = peak1[0] * MUXER_OUTPUT_HEIGHT; 126 | if (dmeta->num_lines == MAX_ELEMENTS_IN_DISPLAY_META) 127 | { 128 | dmeta = nvds_acquire_display_meta_from_pool(bmeta); 129 | nvds_add_display_meta_to_frame(frame_meta, dmeta); 130 | } 131 | NvOSD_LineParams &lparams = dmeta->line_params[dmeta->num_lines]; 132 | lparams.x1 = x0; 133 | lparams.x2 = x1; 134 | lparams.y1 = y0; 135 | lparams.y2 = y1; 136 | lparams.line_width = 3; 137 | lparams.line_color = NvOSD_ColorParams{0, 255, 0, 1}; 138 | dmeta->num_lines++; 139 | } 140 | } 141 | } 142 | } 143 | 144 | /* pgie_src_pad_buffer_probe will extract metadata received from pgie 145 | * and update params for drawing rectangle, object information etc. */ 146 | static GstPadProbeReturn 147 | pgie_src_pad_buffer_probe(GstPad *pad, GstPadProbeInfo *info, 148 | gpointer u_data) 149 | { 150 | gchar *msg = NULL; 151 | GstBuffer *buf = (GstBuffer *)info->data; 152 | NvDsMetaList *l_frame = NULL; 153 | NvDsMetaList *l_obj = NULL; 154 | NvDsMetaList *l_user = NULL; 155 | NvDsBatchMeta *batch_meta = gst_buffer_get_nvds_batch_meta(buf); 156 | 157 | for (l_frame = batch_meta->frame_meta_list; l_frame != NULL; 158 | l_frame = l_frame->next) 159 | { 160 | NvDsFrameMeta *frame_meta = (NvDsFrameMeta *)(l_frame->data); 161 | 162 | for (l_user = frame_meta->frame_user_meta_list; l_user != NULL; 163 | l_user = l_user->next) 164 | { 165 | NvDsUserMeta *user_meta = (NvDsUserMeta *)l_user->data; 166 | if (user_meta->base_meta.meta_type == NVDSINFER_TENSOR_OUTPUT_META) 167 | { 168 | NvDsInferTensorMeta *tensor_meta = 169 | (NvDsInferTensorMeta *)user_meta->user_meta_data; 170 | Vec2D objects; 171 | Vec3D normalized_peaks; 172 | tie(objects, normalized_peaks) = parse_objects_from_tensor_meta(tensor_meta); 173 | create_display_meta(objects, normalized_peaks, frame_meta, frame_meta->source_frame_width, frame_meta->source_frame_height); 174 | } 175 | } 176 | 177 | for (l_obj = frame_meta->obj_meta_list; l_obj != NULL; 178 | l_obj = l_obj->next) 179 | { 180 | NvDsObjectMeta *obj_meta = (NvDsObjectMeta *)l_obj->data; 181 | for (l_user = obj_meta->obj_user_meta_list; l_user != NULL; 182 | l_user = l_user->next) 183 | { 184 | NvDsUserMeta *user_meta = (NvDsUserMeta *)l_user->data; 185 | if (user_meta->base_meta.meta_type == NVDSINFER_TENSOR_OUTPUT_META) 186 | { 187 | NvDsInferTensorMeta *tensor_meta = 188 | (NvDsInferTensorMeta *)user_meta->user_meta_data; 189 | Vec2D objects; 190 | Vec3D normalized_peaks; 191 | tie(objects, normalized_peaks) = parse_objects_from_tensor_meta(tensor_meta); 192 | create_display_meta(objects, normalized_peaks, frame_meta, frame_meta->source_frame_width, frame_meta->source_frame_height); 193 | } 194 | } 195 | } 196 | } 197 | return GST_PAD_PROBE_OK; 198 | } 199 | 200 | /* osd_sink_pad_buffer_probe will extract metadata received from OSD 201 | * and update params for drawing rectangle, object information etc. */ 202 | static GstPadProbeReturn 203 | osd_sink_pad_buffer_probe(GstPad *pad, GstPadProbeInfo *info, 204 | gpointer u_data) 205 | { 206 | GstBuffer *buf = (GstBuffer *)info->data; 207 | guint num_rects = 0; 208 | NvDsObjectMeta *obj_meta = NULL; 209 | NvDsMetaList *l_frame = NULL; 210 | NvDsMetaList *l_obj = NULL; 211 | NvDsDisplayMeta *display_meta = NULL; 212 | 213 | NvDsBatchMeta *batch_meta = gst_buffer_get_nvds_batch_meta(buf); 214 | 215 | for (l_frame = batch_meta->frame_meta_list; l_frame != NULL; 216 | l_frame = l_frame->next) 217 | { 218 | NvDsFrameMeta *frame_meta = (NvDsFrameMeta *)(l_frame->data); 219 | int offset = 0; 220 | for (l_obj = frame_meta->obj_meta_list; l_obj != NULL; l_obj = l_obj->next) 221 | { 222 | obj_meta = (NvDsObjectMeta *)(l_obj->data); 223 | } 224 | display_meta = nvds_acquire_display_meta_from_pool(batch_meta); 225 | 226 | /* Parameters to draw text onto the On-Screen-Display */ 227 | NvOSD_TextParams *txt_params = &display_meta->text_params[0]; 228 | display_meta->num_labels = 1; 229 | txt_params->display_text = (char *)g_malloc0(MAX_DISPLAY_LEN); 230 | offset = snprintf(txt_params->display_text, MAX_DISPLAY_LEN, "Frame Number = %d", frame_number); 231 | offset = snprintf(txt_params->display_text + offset, MAX_DISPLAY_LEN, ""); 232 | 233 | txt_params->x_offset = 10; 234 | txt_params->y_offset = 12; 235 | 236 | txt_params->font_params.font_name = "Mono"; 237 | txt_params->font_params.font_size = 10; 238 | txt_params->font_params.font_color.red = 1.0; 239 | txt_params->font_params.font_color.green = 1.0; 240 | txt_params->font_params.font_color.blue = 1.0; 241 | txt_params->font_params.font_color.alpha = 1.0; 242 | 243 | txt_params->set_bg_clr = 1; 244 | txt_params->text_bg_clr.red = 0.0; 245 | txt_params->text_bg_clr.green = 0.0; 246 | txt_params->text_bg_clr.blue = 0.0; 247 | txt_params->text_bg_clr.alpha = 1.0; 248 | 249 | nvds_add_display_meta_to_frame(frame_meta, display_meta); 250 | } 251 | frame_number++; 252 | return GST_PAD_PROBE_OK; 253 | } 254 | 255 | static gboolean 256 | bus_call(GstBus *bus, GstMessage *msg, gpointer data) 257 | { 258 | GMainLoop *loop = (GMainLoop *)data; 259 | switch (GST_MESSAGE_TYPE(msg)) 260 | { 261 | case GST_MESSAGE_EOS: 262 | g_print("End of Stream\n"); 263 | g_main_loop_quit(loop); 264 | break; 265 | 266 | case GST_MESSAGE_ERROR: 267 | { 268 | gchar *debug; 269 | GError *error; 270 | gst_message_parse_error(msg, &error, &debug); 271 | g_printerr("ERROR from element %s: %s\n", 272 | GST_OBJECT_NAME(msg->src), error->message); 273 | if (debug) 274 | g_printerr("Error details: %s\n", debug); 275 | g_free(debug); 276 | g_error_free(error); 277 | g_main_loop_quit(loop); 278 | break; 279 | } 280 | 281 | default: 282 | break; 283 | } 284 | return TRUE; 285 | } 286 | 287 | gboolean 288 | link_element_to_tee_src_pad(GstElement *tee, GstElement *sinkelem) 289 | { 290 | gboolean ret = FALSE; 291 | GstPad *tee_src_pad = NULL; 292 | GstPad *sinkpad = NULL; 293 | GstPadTemplate *padtemplate = NULL; 294 | 295 | padtemplate = (GstPadTemplate *)gst_element_class_get_pad_template(GST_ELEMENT_GET_CLASS(tee), "src_%u"); 296 | tee_src_pad = gst_element_request_pad(tee, padtemplate, NULL, NULL); 297 | 298 | if (!tee_src_pad) 299 | { 300 | g_printerr("Failed to get src pad from tee"); 301 | goto done; 302 | } 303 | 304 | sinkpad = gst_element_get_static_pad(sinkelem, "sink"); 305 | if (!sinkpad) 306 | { 307 | g_printerr("Failed to get sink pad from '%s'", 308 | GST_ELEMENT_NAME(sinkelem)); 309 | goto done; 310 | } 311 | 312 | if (gst_pad_link(tee_src_pad, sinkpad) != GST_PAD_LINK_OK) 313 | { 314 | g_printerr("Failed to link '%s' and '%s'", GST_ELEMENT_NAME(tee), 315 | GST_ELEMENT_NAME(sinkelem)); 316 | goto done; 317 | } 318 | ret = TRUE; 319 | 320 | done: 321 | if (tee_src_pad) 322 | { 323 | gst_object_unref(tee_src_pad); 324 | } 325 | if (sinkpad) 326 | { 327 | gst_object_unref(sinkpad); 328 | } 329 | return ret; 330 | } 331 | 332 | int main(int argc, char *argv[]) 333 | { 334 | GMainLoop *loop = NULL; 335 | GstCaps *caps = NULL; 336 | GstElement *pipeline = NULL, *source = NULL, *h264parser = NULL, 337 | *decoder = NULL, *streammux = NULL, *sink = NULL, *pgie = NULL, *nvvidconv = NULL, *nvosd = NULL, 338 | *nvvideoconvert = NULL, *tee = NULL, *h264encoder = NULL, *cap_filter = NULL, *filesink = NULL, *queue = NULL, *qtmux = NULL, *h264parser1 = NULL, *nvsink = NULL; 339 | 340 | /* Add a transform element for Jetson*/ 341 | #ifdef PLATFORM_TEGRA 342 | GstElement *transform = NULL; 343 | #endif 344 | GstBus *bus = NULL; 345 | guint bus_watch_id; 346 | GstPad *osd_sink_pad = NULL; 347 | 348 | /* Check input arguments */ 349 | if (argc != 3) 350 | { 351 | g_printerr("Usage: %s \n", argv[0]); 352 | return -1; 353 | } 354 | 355 | /* Standard GStreamer initialization */ 356 | gst_init(&argc, &argv); 357 | loop = g_main_loop_new(NULL, FALSE); 358 | 359 | /* Create gstreamer elements */ 360 | /* Create Pipeline element that will form a connection of other elements */ 361 | pipeline = gst_pipeline_new("deepstream-tensorrt-openpose-pipeline"); 362 | 363 | /* Source element for reading from the file */ 364 | source = gst_element_factory_make("filesrc", "file-source"); 365 | 366 | /* Since the data format in the input file is elementary h264 stream, 367 | * we need a h264parser */ 368 | h264parser = gst_element_factory_make("h264parse", "h264-parser"); 369 | h264parser1 = gst_element_factory_make("h264parse", "h264-parser1"); 370 | 371 | /* Use nvdec_h264 for hardware accelerated decode on GPU */ 372 | decoder = gst_element_factory_make("nvv4l2decoder", "nvv4l2-decoder"); 373 | 374 | /* Create nvstreammux instance to form batches from one or more sources. */ 375 | streammux = gst_element_factory_make("nvstreammux", "stream-muxer"); 376 | 377 | if (!pipeline || !streammux) 378 | { 379 | g_printerr("One element could not be created. Exiting.\n"); 380 | return -1; 381 | } 382 | 383 | /* Use nvinfer to run inferencing on decoder's output, 384 | * behaviour of inferencing is set through config file */ 385 | pgie = gst_element_factory_make("nvinfer", "primary-nvinference-engine"); 386 | 387 | /* Use convertor to convert from NV12 to RGBA as required by nvosd */ 388 | nvvidconv = gst_element_factory_make("nvvideoconvert", "nvvideo-converter"); 389 | 390 | queue = gst_element_factory_make("queue", "queue"); 391 | filesink = gst_element_factory_make("filesink", "filesink"); 392 | 393 | /* Set output file location */ 394 | char *output_path = argv[2]; 395 | strcat(output_path,"Pose_Estimation.mp4"); 396 | g_object_set(G_OBJECT(filesink), "location", output_path, NULL); 397 | 398 | nvvideoconvert = gst_element_factory_make("nvvideoconvert", "nvvideo-converter1"); 399 | tee = gst_element_factory_make("tee", "TEE"); 400 | h264encoder = gst_element_factory_make("nvv4l2h264enc", "video-encoder"); 401 | cap_filter = gst_element_factory_make("capsfilter", "enc_caps_filter"); 402 | caps = gst_caps_from_string("video/x-raw(memory:NVMM), format=I420"); 403 | g_object_set(G_OBJECT(cap_filter), "caps", caps, NULL); 404 | qtmux = gst_element_factory_make("qtmux", "muxer"); 405 | 406 | /* Create OSD to draw on the converted RGBA buffer */ 407 | nvosd = gst_element_factory_make("nvdsosd", "nv-onscreendisplay"); 408 | 409 | /* Finally render the osd output */ 410 | #ifdef PLATFORM_TEGRA 411 | transform = gst_element_factory_make("nvegltransform", "nvegl-transform"); 412 | #endif 413 | nvsink = gst_element_factory_make("nveglglessink", "nvvideo-renderer"); 414 | sink = gst_element_factory_make("fpsdisplaysink", "fps-display"); 415 | 416 | g_object_set(G_OBJECT(sink), "text-overlay", FALSE, "video-sink", nvsink, "sync", FALSE, NULL); 417 | 418 | if (!source || !h264parser || !decoder || !pgie || !nvvidconv || !nvosd || !sink || !cap_filter || !tee || !nvvideoconvert || 419 | !h264encoder || !filesink || !queue || !qtmux || !h264parser1) 420 | { 421 | g_printerr("One element could not be created. Exiting.\n"); 422 | return -1; 423 | } 424 | #ifdef PLATFORM_TEGRA 425 | if (!transform) 426 | { 427 | g_printerr("One tegra element could not be created. Exiting.\n"); 428 | return -1; 429 | } 430 | #endif 431 | 432 | /* we set the input filename to the source element */ 433 | g_object_set(G_OBJECT(source), "location", argv[1], NULL); 434 | 435 | g_object_set(G_OBJECT(streammux), "width", MUXER_OUTPUT_WIDTH, "height", 436 | MUXER_OUTPUT_HEIGHT, "batch-size", 1, 437 | "batched-push-timeout", MUXER_BATCH_TIMEOUT_USEC, NULL); 438 | 439 | /* Set all the necessary properties of the nvinfer element, 440 | * the necessary ones are : */ 441 | g_object_set(G_OBJECT(pgie), "output-tensor-meta", TRUE, 442 | "config-file-path", "deepstream_pose_estimation_config.txt", NULL); 443 | 444 | /* we add a message handler */ 445 | bus = gst_pipeline_get_bus(GST_PIPELINE(pipeline)); 446 | bus_watch_id = gst_bus_add_watch(bus, bus_call, loop); 447 | gst_object_unref(bus); 448 | 449 | /* Set up the pipeline */ 450 | /* we add all elements into the pipeline */ 451 | #ifdef PLATFORM_TEGRA 452 | gst_bin_add_many(GST_BIN(pipeline), 453 | source, h264parser, decoder, streammux, pgie, 454 | nvvidconv, nvosd, transform, /*sink,*/ 455 | tee, nvvideoconvert, h264encoder, cap_filter, filesink, queue, h264parser1, qtmux, NULL); 456 | #else 457 | gst_bin_add_many(GST_BIN(pipeline), 458 | source, h264parser, decoder, streammux, pgie, 459 | nvvidconv, nvosd, /*sink,*/ 460 | tee, nvvideoconvert, h264encoder, cap_filter, filesink, queue, h264parser1, qtmux, NULL); 461 | #endif 462 | 463 | GstPad *sinkpad, *srcpad; 464 | gchar pad_name_sink[16] = "sink_0"; 465 | gchar pad_name_src[16] = "src"; 466 | 467 | sinkpad = gst_element_get_request_pad(streammux, pad_name_sink); 468 | if (!sinkpad) 469 | { 470 | g_printerr("Streammux request sink pad failed. Exiting.\n"); 471 | return -1; 472 | } 473 | 474 | srcpad = gst_element_get_static_pad(decoder, pad_name_src); 475 | if (!srcpad) 476 | { 477 | g_printerr("Decoder request src pad failed. Exiting.\n"); 478 | return -1; 479 | } 480 | 481 | if (gst_pad_link(srcpad, sinkpad) != GST_PAD_LINK_OK) 482 | { 483 | g_printerr("Failed to link decoder to stream muxer. Exiting.\n"); 484 | return -1; 485 | } 486 | 487 | gst_object_unref(sinkpad); 488 | gst_object_unref(srcpad); 489 | 490 | if (!gst_element_link_many(source, h264parser, decoder, NULL)) 491 | { 492 | g_printerr("Elements could not be linked: 1. Exiting.\n"); 493 | return -1; 494 | } 495 | #if 0 496 | #ifdef PLATFORM_TEGRA 497 | if (!gst_element_link_many (streammux, pgie, 498 | nvvidconv, nvosd, transform, sink, NULL)) { 499 | g_printerr ("Elements could not be linked: 2. Exiting.\n"); 500 | return -1; 501 | } 502 | #else 503 | if (!gst_element_link_many (streammux, pgie, nvvidconv, nvosd, sink, NULL)) { 504 | g_printerr ("Elements could not be linked: 2. Exiting.\n"); 505 | return -1; 506 | } 507 | #endif 508 | #else 509 | #ifdef PLATFORM_TEGRA 510 | if (!gst_element_link_many(streammux, pgie, 511 | nvvidconv, nvosd, tee, NULL)) 512 | { 513 | g_printerr("Elements could not be linked: 2. Exiting.\n"); 514 | return -1; 515 | } 516 | #else 517 | if (!gst_element_link_many(streammux, pgie, nvvidconv, nvosd, tee, NULL)) 518 | { 519 | g_printerr("Elements could not be linked: 2. Exiting.\n"); 520 | return -1; 521 | } 522 | #endif 523 | #if 0 524 | if (!link_element_to_tee_src_pad(tee, queue)) { 525 | g_printerr ("Could not link tee to sink\n"); 526 | return -1; 527 | } 528 | if (!gst_element_link_many (queue, sink, NULL)) { 529 | g_printerr ("Elements could not be linked: 2. Exiting.\n"); 530 | return -1; 531 | } 532 | #else 533 | if (!link_element_to_tee_src_pad(tee, queue)) 534 | { 535 | g_printerr("Could not link tee to nvvideoconvert\n"); 536 | return -1; 537 | } 538 | if (!gst_element_link_many(queue, nvvideoconvert, cap_filter, h264encoder, 539 | h264parser1, qtmux, filesink, NULL)) 540 | { 541 | g_printerr("Elements could not be linked\n"); 542 | return -1; 543 | } 544 | #endif 545 | 546 | #endif 547 | 548 | GstPad *pgie_src_pad = gst_element_get_static_pad(pgie, "src"); 549 | if (!pgie_src_pad) 550 | g_print("Unable to get pgie src pad\n"); 551 | else 552 | gst_pad_add_probe(pgie_src_pad, GST_PAD_PROBE_TYPE_BUFFER, 553 | pgie_src_pad_buffer_probe, (gpointer)sink, NULL); 554 | 555 | /* Lets add probe to get informed of the meta data generated, we add probe to 556 | * the sink pad of the osd element, since by that time, the buffer would have 557 | * had got all the metadata. */ 558 | osd_sink_pad = gst_element_get_static_pad(nvosd, "sink"); 559 | if (!osd_sink_pad) 560 | g_print("Unable to get sink pad\n"); 561 | else 562 | gst_pad_add_probe(osd_sink_pad, GST_PAD_PROBE_TYPE_BUFFER, 563 | osd_sink_pad_buffer_probe, (gpointer)sink, NULL); 564 | 565 | /* Set the pipeline to "playing" state */ 566 | g_print("Now playing: %s\n", argv[1]); 567 | gst_element_set_state(pipeline, GST_STATE_PLAYING); 568 | 569 | /* Wait till pipeline encounters an error or EOS */ 570 | g_print("Running...\n"); 571 | g_main_loop_run(loop); 572 | 573 | /* Out of the main loop, clean up nicely */ 574 | g_print("Returned, stopping playback\n"); 575 | gst_element_set_state(pipeline, GST_STATE_NULL); 576 | g_print("Deleting pipeline\n"); 577 | gst_object_unref(GST_OBJECT(pipeline)); 578 | g_source_remove(bus_watch_id); 579 | g_main_loop_unref(loop); 580 | return 0; 581 | } 582 | -------------------------------------------------------------------------------- /deepstream_pose_estimation_config.txt: -------------------------------------------------------------------------------- 1 | # Copyright 2020 - NVIDIA Corporation 2 | # SPDX-License-Identifier: MIT 3 | 4 | # Following properties are mandatory when engine files are not specified: 5 | # int8-calib-file(Only in INT8) 6 | # Caffemodel mandatory properties: model-file, proto-file, output-blob-names 7 | # UFF: uff-file, input-dims, uff-input-blob-name, output-blob-names 8 | # ONNX: onnx-file 9 | # 10 | # Mandatory properties for detectors: 11 | # parse-func, num-detected-classes, 12 | # custom-lib-path (when parse-func=0 i.e. custom), 13 | # parse-bbox-func-name (when parse-func=0) 14 | # 15 | # Optional properties for detectors: 16 | # enable-dbscan(Default=false), interval(Primary mode only, Default=0) 17 | # 18 | # Mandatory properties for classifiers: 19 | # classifier-threshold, is-classifier 20 | # 21 | # Optional properties for classifiers: 22 | # classifier-async-mode(Secondary mode only, Default=false) 23 | # 24 | # Optional properties in secondary mode: 25 | # operate-on-gie-id(Default=0), operate-on-class-ids(Defaults to all classes), 26 | # input-object-min-width, input-object-min-height, input-object-max-width, 27 | # input-object-max-height 28 | # 29 | # Following properties are always recommended: 30 | # batch-size(Default=1) 31 | # 32 | # Other optional properties: 33 | # net-scale-factor(Default=1), network-mode(Default=0 i.e FP32), 34 | # model-color-format(Default=0 i.e. RGB) model-engine-file, labelfile-path, 35 | # mean-file, gie-unique-id(Default=0), offsets, gie-mode (Default=1 i.e. primary), 36 | # custom-lib-path, network-mode(Default=0 i.e FP32) 37 | # 38 | # The values in the config file are overridden by values set through GObject 39 | # properties. 40 | 41 | [property] 42 | gpu-id=0 43 | net-scale-factor=0.0174292 44 | offsets=123.675;116.28;103.53 45 | onnx-file=pose_estimation.onnx 46 | labelfile-path=labels.txt 47 | batch-size=1 48 | process-mode=1 49 | model-color-format=0 50 | ## 0=FP32, 1=INT8, 2=FP16 mode 51 | network-mode=2 52 | num-detected-classes=4 53 | interval=0 54 | gie-unique-id=1 55 | model-engine-file=pose_estimation.onnx_b1_gpu0_fp16.engine 56 | network-type=100 57 | workspace-size=3000 58 | 59 | 60 | -------------------------------------------------------------------------------- /images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/deepstream_pose_estimation/8b385dd8ba8ae03d7daeb981e96249461498ccda/images/.gitkeep -------------------------------------------------------------------------------- /images/auxillary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/deepstream_pose_estimation/8b385dd8ba8ae03d7daeb981e96249461498ccda/images/auxillary.png -------------------------------------------------------------------------------- /images/input.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/deepstream_pose_estimation/8b385dd8ba8ae03d7daeb981e96249461498ccda/images/input.gif -------------------------------------------------------------------------------- /images/main.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/deepstream_pose_estimation/8b385dd8ba8ae03d7daeb981e96249461498ccda/images/main.png -------------------------------------------------------------------------------- /images/output.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/deepstream_pose_estimation/8b385dd8ba8ae03d7daeb981e96249461498ccda/images/output.gif -------------------------------------------------------------------------------- /munkres_algorithm.cpp: -------------------------------------------------------------------------------- 1 | #include "pair_graph.hpp" 2 | #include "cover_table.hpp" 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | template 11 | using Vec1D = std::vector; 12 | template 13 | using Vec2D = std::vector>; 14 | template 15 | using Vec3D = std::vector>; 16 | 17 | // Helper method to subtract the minimum row from cost_graph 18 | void subtract_minimum_row(Vec2D &cost_graph, int nrows, int ncols) 19 | { 20 | for (int i = 0; i < nrows; i++) 21 | { 22 | // Iterate the find the minimum 23 | float min = cost_graph[i][0]; 24 | for (int j = 0; j < ncols; j++) 25 | { 26 | float val = cost_graph[i][j]; 27 | if (val < min) 28 | { 29 | min = val; 30 | } 31 | } 32 | 33 | // Subtract the Minimum 34 | for (int j = 0; j < ncols; j++) 35 | { 36 | cost_graph[i][j] -= min; 37 | } 38 | } 39 | } 40 | 41 | // Helper method to subtract the minimum col from cost_graph 42 | void subtract_minimum_column(Vec2D &cost_graph, int nrows, int ncols) 43 | { 44 | for (int j = 0; j < ncols; j++) 45 | { 46 | // Iterate and find the minimum 47 | float min = cost_graph[0][j]; 48 | for (int i = 0; i < nrows; i++) 49 | { 50 | float val = cost_graph[i][j]; 51 | if (val < min) 52 | { 53 | min = val; 54 | } 55 | } 56 | 57 | // Subtract the minimum 58 | for (int i = 0; i < nrows; i++) 59 | { 60 | cost_graph[i][j] -= min; 61 | } 62 | } 63 | } 64 | 65 | void munkresStep1(Vec2D &cost_graph, PairGraph &star_graph, int nrows, 66 | int ncols) 67 | { 68 | for (int i = 0; i < nrows; i++) 69 | { 70 | for (int j = 0; j < ncols; j++) 71 | { 72 | if (!star_graph.isRowSet(i) && !star_graph.isColSet(j) && (cost_graph[i][j] == 0)) 73 | { 74 | star_graph.set(i, j); 75 | } 76 | } 77 | } 78 | } 79 | 80 | // Exits if '1' is returned 81 | bool munkresStep2(const PairGraph &star_graph, CoverTable &cover_table) 82 | { 83 | int k = 84 | star_graph.nrows < star_graph.ncols ? star_graph.nrows : star_graph.ncols; 85 | int count = 0; 86 | for (int j = 0; j < star_graph.ncols; j++) 87 | { 88 | if (star_graph.isColSet(j)) 89 | { 90 | cover_table.coverCol(j); 91 | count++; 92 | } 93 | } 94 | return count >= k; 95 | } 96 | 97 | bool munkresStep3(Vec2D &cost_graph, const PairGraph &star_graph, 98 | PairGraph &prime_graph, CoverTable &cover_table, std::pair &p, 99 | int nrows, int ncols) 100 | { 101 | for (int i = 0; i < nrows; i++) 102 | { 103 | for (int j = 0; j < ncols; j++) 104 | { 105 | if (cost_graph[i][j] == 0 && !cover_table.isCovered(i, j)) 106 | { 107 | prime_graph.set(i, j); 108 | if (star_graph.isRowSet(i)) 109 | { 110 | cover_table.coverRow(i); 111 | cover_table.uncoverCol(star_graph.colForRow(i)); 112 | } 113 | else 114 | { 115 | p.first = i; 116 | p.second = j; 117 | return 1; 118 | } 119 | } 120 | } 121 | } 122 | return 0; 123 | }; 124 | 125 | void munkresStep4(PairGraph &star_graph, PairGraph &prime_graph, 126 | CoverTable &cover_table, std::pair &p) 127 | { 128 | // This process should be repeated until no star is found in prime's column 129 | while (star_graph.isColSet(p.second)) 130 | { 131 | // First find and reset any star found in the prime's columns 132 | std::pair s = {star_graph.rowForCol(p.second), p.second}; 133 | star_graph.reset(s.first, s.second); 134 | 135 | // Set this prime to a star 136 | star_graph.set(p.first, p.second); 137 | 138 | // Repeat the same process for prime in cleared star's row 139 | p = {s.first, prime_graph.colForRow(s.first)}; 140 | } 141 | star_graph.set(p.first, p.second); 142 | cover_table.clear(); 143 | prime_graph.clear(); 144 | } 145 | 146 | void munkresStep5(Vec2D &cost_graph, const CoverTable &cover_table, 147 | int nrows, int ncols) 148 | { 149 | bool valid = false; 150 | float min; 151 | for (int i = 0; i < nrows; i++) 152 | { 153 | for (int j = 0; j < ncols; j++) 154 | { 155 | if (!cover_table.isCovered(i, j)) 156 | { 157 | if (!valid) 158 | { 159 | min = cost_graph[i][j]; 160 | valid = true; 161 | } 162 | else if (cost_graph[i][j] < min) 163 | { 164 | min = cost_graph[i][j]; 165 | } 166 | } 167 | } 168 | } 169 | 170 | for (int i = 0; i < nrows; i++) 171 | { 172 | if (cover_table.isRowCovered(i)) 173 | { 174 | for (int j = 0; j < ncols; j++) 175 | { 176 | cost_graph[i][j] += min; 177 | } 178 | } 179 | } 180 | for (int j = 0; j < ncols; j++) 181 | { 182 | if (!cover_table.isColCovered(j)) 183 | { 184 | for (int i = 0; i < nrows; i++) 185 | { 186 | cost_graph[i][j] -= min; 187 | } 188 | } 189 | } 190 | } 191 | 192 | void munkres_algorithm(Vec2D &cost_graph, PairGraph &star_graph, int nrows, 193 | int ncols) 194 | { 195 | PairGraph prime_graph(nrows, ncols); 196 | CoverTable cover_table(nrows, ncols); 197 | prime_graph.clear(); 198 | cover_table.clear(); 199 | star_graph.clear(); 200 | 201 | int step = 0; 202 | if (ncols >= nrows) 203 | { 204 | subtract_minimum_row(cost_graph, nrows, ncols); 205 | } 206 | if (ncols > nrows) 207 | { 208 | step = 1; 209 | } 210 | 211 | std::pair p; 212 | bool done = false; 213 | while (!done) 214 | { 215 | switch (step) 216 | { 217 | case 0: 218 | subtract_minimum_column(cost_graph, nrows, ncols); 219 | case 1: 220 | munkresStep1(cost_graph, star_graph, nrows, ncols); 221 | case 2: 222 | if (munkresStep2(star_graph, cover_table)) 223 | { 224 | done = true; 225 | break; 226 | } 227 | case 3: 228 | if (!munkresStep3(cost_graph, star_graph, prime_graph, cover_table, p, 229 | nrows, ncols)) 230 | { 231 | step = 5; 232 | break; 233 | } 234 | case 4: 235 | munkresStep4(star_graph, prime_graph, cover_table, p); 236 | step = 2; 237 | break; 238 | case 5: 239 | munkresStep5(cost_graph, cover_table, nrows, ncols); 240 | step = 3; 241 | break; 242 | } 243 | } 244 | } -------------------------------------------------------------------------------- /pair_graph.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | class PairGraph 7 | { 8 | public: 9 | PairGraph(int nrows, int ncols) : nrows(nrows), ncols(ncols) 10 | { 11 | this->rows.resize(nrows); 12 | this->cols.resize(ncols); 13 | } 14 | 15 | /** 16 | * Returns the column index of the pair matching this row 17 | */ 18 | inline int colForRow(int row) const 19 | { 20 | return this->rows[row]; 21 | } 22 | 23 | /** 24 | * Returns the row index of the pair matching this column 25 | */ 26 | inline int rowForCol(int col) const 27 | { 28 | return this->cols[col]; 29 | } 30 | 31 | /** 32 | * Creates a pair between row and col 33 | */ 34 | inline void set(int row, int col) 35 | { 36 | this->rows[row] = col; 37 | this->cols[col] = row; 38 | } 39 | 40 | inline bool isRowSet(int row) const 41 | { 42 | return rows[row] >= 0; 43 | } 44 | 45 | inline bool isColSet(int col) const 46 | { 47 | return cols[col] >= 0; 48 | } 49 | 50 | inline bool isPair(int row, int col) 51 | { 52 | return rows[row] == col; 53 | } 54 | 55 | /** 56 | * Clears pair between row and col 57 | */ 58 | inline void reset(int row, int col) 59 | { 60 | this->rows[row] = -1; 61 | this->cols[col] = -1; 62 | } 63 | 64 | /** 65 | * Clears all pairs in graph 66 | */ 67 | void clear() 68 | { 69 | for (int i = 0; i < this->nrows; i++) 70 | { 71 | this->rows[i] = -1; 72 | } 73 | for (int j = 0; j < this->ncols; j++) 74 | { 75 | this->cols[j] = -1; 76 | } 77 | } 78 | 79 | int numPairs() 80 | { 81 | int count = 0; 82 | for (int i = 0; i < nrows; i++) 83 | { 84 | if (rows[i] >= 0) 85 | { 86 | count++; 87 | } 88 | } 89 | return count; 90 | } 91 | 92 | std::vector> pairs() 93 | { 94 | std::vector> p(numPairs()); 95 | int count = 0; 96 | for (int i = 0; i < nrows; i++) 97 | { 98 | if (isRowSet(i)) 99 | { 100 | p[count++] = {i, colForRow(i)}; 101 | } 102 | } 103 | return p; 104 | } 105 | 106 | const int nrows; 107 | const int ncols; 108 | 109 | private: 110 | std::vector rows; 111 | std::vector cols; 112 | }; 113 | -------------------------------------------------------------------------------- /pose_estimation.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/deepstream_pose_estimation/8b385dd8ba8ae03d7daeb981e96249461498ccda/pose_estimation.onnx -------------------------------------------------------------------------------- /post_process.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2020 - NVIDIA Corporation 2 | // SPDX-License-Identifier: MIT 3 | 4 | #include "pair_graph.hpp" 5 | #include "cover_table.hpp" 6 | #include "munkres_algorithm.cpp" 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #include "gstnvdsmeta.h" 13 | #include "gstnvdsinfer.h" 14 | #include "nvdsgstutils.h" 15 | #include "nvbufsurface.h" 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | #define EPS 1e-6 24 | 25 | template 26 | using Vec1D = std::vector; 27 | template 28 | using Vec2D = std::vector>; 29 | template 30 | using Vec3D = std::vector>; 31 | 32 | static const int M = 2; 33 | 34 | static Vec2D topology{ 35 | {0, 1, 15, 13}, 36 | {2, 3, 13, 11}, 37 | {4, 5, 16, 14}, 38 | {6, 7, 14, 12}, 39 | {8, 9, 11, 12}, 40 | {10, 11, 5, 7}, 41 | {12, 13, 6, 8}, 42 | {14, 15, 7, 9}, 43 | {16, 17, 8, 10}, 44 | {18, 19, 1, 2}, 45 | {20, 21, 0, 1}, 46 | {22, 23, 0, 2}, 47 | {24, 25, 1, 3}, 48 | {26, 27, 2, 4}, 49 | {28, 29, 3, 5}, 50 | {30, 31, 4, 6}, 51 | {32, 33, 17, 0}, 52 | {34, 35, 17, 5}, 53 | {36, 37, 17, 6}, 54 | {38, 39, 17, 11}, 55 | {40, 41, 17, 12}}; 56 | 57 | /* Method to find peaks in the output tensor. 'window_size' represents how many pixels we are considering at once to find a maximum value, or a ‘peak’. 58 | Once we find a peak, we mark it using the ‘is_peak’ boolean in the inner loop and assign this maximum value to the center pixel of our window. 59 | This is then repeated until we cover the entire frame. */ 60 | void find_peaks(Vec1D &counts_out, Vec3D &peaks_out, void *cmap_data, 61 | NvDsInferDims &cmap_dims, float threshold, int window_size, int max_count) 62 | { 63 | int w = window_size / 2; 64 | int width = cmap_dims.d[2]; 65 | int height = cmap_dims.d[1]; 66 | 67 | counts_out.assign(cmap_dims.d[0], 0); 68 | peaks_out.assign(cmap_dims.d[0], Vec2D(max_count, Vec1D(M, 69 | 0))); 70 | 71 | for (unsigned int c = 0; c < cmap_dims.d[0]; c++) 72 | { 73 | int count = 0; 74 | float *cmap_data_c = (float *)cmap_data + c * width * height; 75 | 76 | for (int i = 0; i < height && count < max_count; i++) 77 | { 78 | for (int j = 0; j < width && count < max_count; j++) 79 | { 80 | float value = cmap_data_c[i * width + j]; 81 | 82 | if (value < threshold) 83 | continue; 84 | 85 | int ii_min = i - w; 86 | int jj_min = j - w; 87 | int ii_max = i + w + 1; 88 | int jj_max = j + w + 1; 89 | 90 | if (ii_min < 0) 91 | ii_min = 0; 92 | if (ii_max > height) 93 | ii_max = height; 94 | if (jj_min < 0) 95 | jj_min = 0; 96 | if (jj_max > width) 97 | jj_max = width; 98 | 99 | bool is_peak = true; 100 | for (int ii = ii_min; ii < ii_max; ii++) 101 | { 102 | for (int jj = jj_min; jj < jj_max; jj++) 103 | { 104 | if (cmap_data_c[ii * width + jj] > value) 105 | { 106 | is_peak = false; 107 | } 108 | } 109 | } 110 | 111 | if (is_peak) 112 | { 113 | peaks_out[c][count][0] = i; 114 | peaks_out[c][count][1] = j; 115 | count++; 116 | } 117 | } 118 | } 119 | 120 | counts_out[c] = count; 121 | } 122 | } 123 | 124 | /* Normalize the peaks found in 'find_peaks' and apply non-maximal suppression*/ 125 | Vec3D 126 | refine_peaks(Vec1D &counts, 127 | Vec3D &peaks, void *cmap_data, NvDsInferDims &cmap_dims, 128 | int window_size) 129 | { 130 | int w = window_size / 2; 131 | int width = cmap_dims.d[2]; 132 | int height = cmap_dims.d[1]; 133 | 134 | Vec3D refined_peaks(peaks.size(), Vec2D(peaks[0].size(), 135 | Vec1D(peaks[0][0].size(), 0))); 136 | 137 | for (unsigned int c = 0; c < cmap_dims.d[0]; c++) 138 | { 139 | int count = counts[c]; 140 | auto &refined_peaks_a_bc = refined_peaks[c]; 141 | auto &peaks_a_bc = peaks[c]; 142 | float *cmap_data_c = (float *)cmap_data + c * width * height; 143 | 144 | for (int p = 0; p < count; p++) 145 | { 146 | auto &refined_peak = refined_peaks_a_bc[p]; 147 | auto &peak = peaks_a_bc[p]; 148 | 149 | int i = peak[0]; 150 | int j = peak[1]; 151 | float weight_sum = 0.0f; 152 | 153 | for (int ii = i - w; ii < i + w + 1; ii++) 154 | { 155 | int ii_idx = ii; 156 | 157 | if (ii < 0) 158 | ii_idx = -ii; 159 | else if (ii >= height) 160 | ii_idx = height - (ii - height) - 2; 161 | 162 | for (int jj = j - w; jj < j + w + 1; jj++) 163 | { 164 | int jj_idx = jj; 165 | 166 | if (jj < 0) 167 | jj_idx = -jj; 168 | else if (jj >= width) 169 | jj_idx = width - (jj - width) - 2; 170 | 171 | float weight = cmap_data_c[ii_idx * width + jj_idx]; 172 | refined_peak[0] += weight * ii; 173 | refined_peak[1] += weight * jj; 174 | weight_sum += weight; 175 | } 176 | } 177 | 178 | refined_peak[0] /= weight_sum; 179 | refined_peak[1] /= weight_sum; 180 | refined_peak[0] += 0.5; 181 | refined_peak[1] += 0.5; 182 | refined_peak[0] /= height; 183 | refined_peak[1] /= width; 184 | } 185 | } 186 | 187 | return refined_peaks; 188 | } 189 | 190 | /* Create a bipartite graph to assign detected body-parts to a unique person in the frame. This method also takes care of finding the line integral to assign scores 191 | to these points */ 192 | Vec3D 193 | paf_score_graph(void *paf_data, NvDsInferDims &paf_dims, 194 | Vec2D &topology, Vec1D &counts, 195 | Vec3D &peaks, int num_integral_samples) 196 | { 197 | int K = topology.size(); 198 | int H = paf_dims.d[1]; 199 | int W = paf_dims.d[2]; 200 | int max_count = peaks[0].size(); 201 | Vec3D score_graph(K, Vec2D(max_count, Vec1D(max_count, 0))); 202 | 203 | for (int k = 0; k < K; k++) 204 | { 205 | auto &score_graph_nk = score_graph[k]; 206 | auto &paf_i_idx = topology[k][0]; 207 | auto &paf_j_idx = topology[k][1]; 208 | auto &cmap_a_idx = topology[k][2]; 209 | auto &cmap_b_idx = topology[k][3]; 210 | float *paf_i = (float *)paf_data + paf_i_idx * H * W; 211 | float *paf_j = (float *)paf_data + paf_j_idx * H * W; 212 | 213 | auto &counts_a = counts[cmap_a_idx]; 214 | auto &counts_b = counts[cmap_b_idx]; 215 | auto &peaks_a = peaks[cmap_a_idx]; 216 | auto &peaks_b = peaks[cmap_b_idx]; 217 | 218 | for (int a = 0; a < counts_a; a++) 219 | { 220 | // Point A 221 | float pa_i = peaks_a[a][0] * H; 222 | float pa_j = peaks_a[a][1] * W; 223 | 224 | for (int b = 0; b < counts_b; b++) 225 | { 226 | // Point B 227 | float pb_i = peaks_b[b][0] * H; 228 | float pb_j = peaks_b[b][1] * W; 229 | 230 | // Vector from Point A to Point B 231 | float pab_i = pb_i - pa_i; 232 | float pab_j = pb_j - pa_j; 233 | 234 | // Normalized Vector from Point A to Point B 235 | float pab_norm = sqrtf(pab_i * pab_i + pab_j * pab_j) + EPS; 236 | float uab_i = pab_i / pab_norm; 237 | float uab_j = pab_j / pab_norm; 238 | 239 | float integral = 0.0; 240 | float increment = 1.0f / num_integral_samples; 241 | 242 | for (int t = 0; t < num_integral_samples; t++) 243 | { 244 | // Integral Point T 245 | float progress = (float)t / (float)num_integral_samples; 246 | float pt_i = pa_i + progress * pab_i; 247 | float pt_j = pa_j + progress * pab_j; 248 | 249 | // Convert to Integer 250 | int pt_i_int = (int)pt_i; 251 | int pt_j_int = (int)pt_j; 252 | 253 | // Edge cases for if the point is out of bounds, just skip them 254 | if (pt_i_int < 0) 255 | continue; 256 | if (pt_i_int > H) 257 | continue; 258 | if (pt_j_int < 0) 259 | continue; 260 | if (pt_j_int > W) 261 | continue; 262 | 263 | // Vector at integral point 264 | float pt_paf_i = paf_i[pt_i_int * W + pt_j_int]; 265 | float pt_paf_j = paf_j[pt_i_int * W + pt_j_int]; 266 | 267 | // Dot Product Normalized A->B with PAF Vector 268 | float dot = pt_paf_i * uab_i + pt_paf_j * uab_j; 269 | integral += dot; 270 | 271 | progress += increment; 272 | } 273 | 274 | // Normalize the integral with respect to the number of samples 275 | integral /= num_integral_samples; 276 | score_graph_nk[a][b] = integral; 277 | } 278 | } 279 | } 280 | return score_graph; 281 | } 282 | 283 | /* 284 | This method takes care of solving the graph assignment problem using Munkres algorithm. Munkres algorithm is defind in 'munkres_algorithm.cpp' 285 | */ 286 | 287 | Vec3D 288 | assignment(Vec3D &score_graph, 289 | Vec2D &topology, Vec1D &counts, float score_threshold, int max_count) 290 | { 291 | int K = topology.size(); 292 | Vec3D connections(K, Vec2D(M, Vec1D(max_count, -1))); 293 | 294 | Vec3D cost_graph = score_graph; 295 | for (Vec2D &cg_iter1 : cost_graph) 296 | for (Vec1D &cg_iter2 : cg_iter1) 297 | for (float &cg_iter3 : cg_iter2) 298 | cg_iter3 = -cg_iter3; 299 | auto &cost_graph_out_a = cost_graph; 300 | 301 | for (int k = 0; k < K; k++) 302 | { 303 | int cmap_a_idx = topology[k][2]; 304 | int cmap_b_idx = topology[k][3]; 305 | int nrows = counts[cmap_a_idx]; 306 | int ncols = counts[cmap_b_idx]; 307 | auto star_graph = PairGraph(nrows, ncols); 308 | auto &cost_graph_out_a_nk = cost_graph_out_a[k]; 309 | munkres_algorithm(cost_graph_out_a_nk, star_graph, nrows, ncols); 310 | 311 | auto &connections_a_nk = connections[k]; 312 | auto &score_graph_a_nk = score_graph[k]; 313 | 314 | for (int i = 0; i < nrows; i++) 315 | { 316 | for (int j = 0; j < ncols; j++) 317 | { 318 | if (star_graph.isPair(i, j) && score_graph_a_nk[i][j] > score_threshold) 319 | { 320 | connections_a_nk[0][i] = j; 321 | connections_a_nk[1][j] = i; 322 | } 323 | } 324 | } 325 | } 326 | return connections; 327 | } 328 | 329 | /* This method takes care of connecting all the body parts detected to each other 330 | after finding the relationships between them in the 'assignment' method */ 331 | Vec2D 332 | connect_parts( 333 | Vec3D &connections, Vec2D &topology, Vec1D &counts, 334 | int max_count) 335 | { 336 | int K = topology.size(); 337 | int C = counts.size(); 338 | 339 | Vec2D visited(C, Vec1D(max_count, 0)); 340 | 341 | Vec2D objects(max_count, Vec1D(C, -1)); 342 | 343 | int num_objects = 0; 344 | for (int c = 0; c < C; c++) 345 | { 346 | if (num_objects >= max_count) 347 | { 348 | break; 349 | } 350 | 351 | int count = counts[c]; 352 | 353 | for (int i = 0; i < count; i++) 354 | { 355 | if (num_objects >= max_count) 356 | { 357 | break; 358 | } 359 | 360 | std::queue> q; 361 | bool new_object = false; 362 | q.push({c, i}); 363 | 364 | while (!q.empty()) 365 | { 366 | auto node = q.front(); 367 | q.pop(); 368 | int c_n = node.first; 369 | int i_n = node.second; 370 | 371 | if (visited[c_n][i_n]) 372 | { 373 | continue; 374 | } 375 | 376 | visited[c_n][i_n] = 1; 377 | new_object = true; 378 | objects[num_objects][c_n] = i_n; 379 | 380 | for (int k = 0; k < K; k++) 381 | { 382 | int c_a = topology[k][2]; 383 | int c_b = topology[k][3]; 384 | 385 | if (c_a == c_n) 386 | { 387 | int i_b = connections[k][0][i_n]; 388 | if (i_b >= 0) 389 | { 390 | q.push({c_b, i_b}); 391 | } 392 | } 393 | 394 | if (c_b == c_n) 395 | { 396 | int i_a = connections[k][1][i_n]; 397 | if (i_a >= 0) 398 | { 399 | q.push({c_a, i_a}); 400 | } 401 | } 402 | } 403 | } 404 | 405 | if (new_object) 406 | { 407 | num_objects++; 408 | } 409 | } 410 | } 411 | 412 | objects.resize(num_objects); 413 | return objects; 414 | } --------------------------------------------------------------------------------