├── CLA_LICENSE.md
├── LICENSE.md
├── Makefile
├── README.md
├── bin
    ├── .gitkeep
    ├── Jetson
    │   ├── .gitkeep
    │   └── libnvds_osd.so
    └── x86
    │   ├── libnvds_osd.so
    │   └── libnvds_osd_cuda11.so
├── cover_table.hpp
├── deepstream_pose_estimation_app.cpp
├── deepstream_pose_estimation_config.txt
├── images
    ├── .gitkeep
    ├── auxillary.png
    ├── input.gif
    ├── main.png
    └── output.gif
├── munkres_algorithm.cpp
├── pair_graph.hpp
├── pose_estimation.onnx
└── post_process.cpp


/CLA_LICENSE.md:
--------------------------------------------------------------------------------
 1 | # Individual Contributor License Agreement (CLA)
 2 | 
 3 | Thank you for submitting your contributions to this project.
 4 | 
 5 | By signing this CLA, you agree that the following terms apply to all of your past, present and future contributions to the project.
 6 | 
 7 | ## License.
 8 | You hereby represent that all present, past and future contributions are governed by the MIT License copyright statement.
 9 | 
10 | This entails that to the extent possible under law, you transfer all copyright and related or neighboring rights of the code or documents you contribute to the project itself or its maintainers. Furthermore you also represent that you have the authority to perform the above waiver with respect to the entirety of you contributions.
11 | 
12 | ## Moral Rights.
13 | To the fullest extent permitted under applicable law, you hereby waive, and agree not to assert, all of your “moral rights” in or relating to your contributions for the benefit of the project.
14 | 
15 | ## Third Party Content.
16 | If your Contribution includes or is based on any source code, object code, bug fixes, configuration changes, tools, specifications, documentation, data, materials, feedback, information or other works of authorship that were not authored by you (“Third Party Content”) or if you are aware of any third party intellectual property or proprietary rights associated with your Contribution (“Third Party Rights”), then you agree to include with the submission of your Contribution full details respecting such Third Party Content and Third Party Rights, including, without limitation, identification of which aspects of your Contribution contain Third Party Content or are associated with Third Party Rights, the owner/author of the Third Party Content and Third Party Rights, where you obtained the Third Party Content, and any applicable third party license terms or restrictions respecting the Third Party Content and Third Party Rights. For greater certainty, the foregoing obligations respecting the identification of Third Party Content and Third Party Rights do not apply to any portion of a Project that is incorporated into your Contribution to that same Project.
17 | 
18 | ## Representations.
19 | You represent that, other than the Third Party Content and Third Party Rights identified by you in accordance with this Agreement, you are the sole author of your Contributions and are legally entitled to grant the foregoing licenses and waivers in respect of your Contributions. If your Contributions were created in the course of your employment with your past or present employer(s), you represent that such employer(s) has authorized you to make your Contributions on behalf of such employer(s) or such employer (s) has waived all of their right, title or interest in or to your Contributions.
20 | 
21 | ## Disclaimer.
22 | To the fullest extent permitted under applicable law, your Contributions are provided on an "as is" basis, without any warranties or conditions, express or implied, including, without limitation, any implied warranties or conditions of non-infringement, merchantability or fitness for a particular purpose. You are not required to provide support for your Contributions, except to the extent you desire to provide support.
23 | 
24 | ## No Obligation.
25 | You acknowledge that the maintainers of this project are under no obligation to use or incorporate your contributions into the project. The decision to use or incorporate your contributions into the project will be made at the sole discretion of the maintainers or their authorized delegates.


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2020, NVIDIA Corporation
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 4 | this software and associated documentation files (the "Software"), to deal in
 5 | the Software without restriction, including without limitation the rights to
 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 7 | the Software, and to permit persons to whom the Software is furnished to do so,
 8 | subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
15 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
16 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
17 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | # Copyright 2020 - NVIDIA Corporation
 3 | # SPDX-License-Identifier: MIT
 4 | ################################################################################
 5 | 
 6 | CXX=g++
 7 | 
 8 | APP:= deepstream-pose-estimation-app
 9 | 
10 | TARGET_DEVICE = $(shell gcc -dumpmachine | cut -f1 -d -)
11 | 
12 | NVDS_VERSION:=5.0
13 | 
14 | LIB_INSTALL_DIR?=/opt/nvidia/deepstream/deepstream-$(NVDS_VERSION)/lib/
15 | APP_INSTALL_DIR?=/opt/nvidia/deepstream/deepstream-$(NVDS_VERSION)/bin/
16 | 
17 | ifeq ($(TARGET_DEVICE),aarch64)
18 |   CFLAGS:= -DPLATFORM_TEGRA
19 | endif
20 | 
21 | SRCS:= deepstream_pose_estimation_app.cpp
22 | 
23 | INCS:= $(wildcard *.h)
24 | 
25 | PKGS:= gstreamer-1.0 gstreamer-video-1.0 x11 json-glib-1.0
26 | 
27 | OBJS:= $(patsubst %.c,%.o, $(patsubst %.cpp,%.o, $(SRCS)))
28 | 
29 | CFLAGS+= -I../../apps-common/includes -I../../../includes -I../deepstream-app/ -DDS_VERSION_MINOR=0 -DDS_VERSION_MAJOR=5
30 | 
31 | LIBS+= -L$(LIB_INSTALL_DIR) -lnvdsgst_meta -lnvds_meta -lnvds_utils -lm \
32 |         -lpthread -ldl -Wl,-rpath,$(LIB_INSTALL_DIR)
33 | 
34 | CFLAGS+= $(shell pkg-config --cflags $(PKGS))
35 | 
36 | LIBS+= $(shell pkg-config --libs $(PKGS))
37 | 
38 | all: $(APP)
39 | 
40 | %.o: %.c $(INCS) Makefile
41 | 	$(CC) -c -o $@ $(CFLAGS) $<
42 | 
43 | %.o: %.cpp $(INCS) Makefile
44 | 	$(CXX) -c -o $@ $(CFLAGS) $<
45 | 
46 | $(APP): $(OBJS) Makefile
47 | 	$(CXX) -o $(APP) $(OBJS) $(LIBS)
48 | 
49 | install: $(APP)
50 | 	cp -rv $(APP) $(APP_INSTALL_DIR)
51 | 
52 | clean:
53 | 	rm -rf $(OBJS) $(APP)
54 | 
55 | 
56 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------
 2 | # This sample application is no longer maintained
 3 | # ------------------------------------------------------
 4 | 
 5 | # DeepStream Human Pose Estimation
 6 | 
 7 | Human pose estimation is the computer vision task of estimating the configuration (‘the pose’) of the human body by localizing certain key points on a body within a video or a photo. The following application serves as a reference to deploy custom pose estimation models with DeepStream 5.0 using the [TRTPose](https://github.com/NVIDIA-AI-IOT/trt_pose) project as an example. 
 8 | 
 9 | A detailed deep-dive NVIDIA Developer blog is available [here](https://developer.nvidia.com/blog/creating-a-human-pose-estimation-application-with-deepstream-sdk/?ncid=so-link-52952-vt24&sfdcid=EM08#cid=em08_so-link_en-us).
10 | <!--<img src="images/input.gif" width="300"/> <img src="images/auxillary.png" width="100"/> <img src="images/output.gif" width="300"/>-->
11 | 
12 | <table>
13 |   <tr>
14 |     <td>Input Video Source</td>
15 |      <td></td>
16 |      <td>Output Video</td>
17 |   </tr>
18 |   <tr>
19 |     <td valign="top"><img src="images/input.gif"></td>
20 |     <td valign="center"><img src="images/auxillary.png" width="100"></td>
21 |     <td valign="top"><img src="images/output.gif"></td>
22 |   </tr>
23 |  </table>
24 | 
25 | 
26 | ## Prerequisites
27 | You will need 
28 | 1. DeepStreamSDK 5.0
29 | 2. CUDA 10.2
30 | 3. TensorRT 7.x
31 | 
32 | 
33 | ## Getting Started:
34 | To get started, please follow these steps.
35 | 1. Install [DeepStream](https://developer.nvidia.com/deepstream-sdk) on your platform, verify it is working by running deepstream-app.
36 | 2. Clone the repository preferably in `$DEEPSTREAM_DIR/sources/apps/sample_apps`.
37 | 2. Download the TRTPose [model](https://github.com/NVIDIA-AI-IOT/trt_pose), convert it to ONNX using this [export utility](https://github.com/NVIDIA-AI-IOT/trt_pose/blob/master/trt_pose/utils/export_for_isaac.py), and set its location in the DeepStream configuration file.
38 | 3. Replace the OSD binaries (x86 or Jetson) in `$DEEPSTREAM_DIR/libs` with the ones provided in this repository under `bin/`. Please note that these are not inter-compatible across platforms.
39 | 4. Compile the program
40 |  ```
41 |   $ cd deepstream-pose-estimation/
42 |   $ sudo make
43 |   $ sudo ./deepstream-pose-estimation-app <file-uri> <output-path>
44 | ```
45 | 5. The final output is stored in 'output-path' as `Pose_Estimation.mp4`
46 | 
47 | NOTE: If you do not already have a .trt engine generated from the ONNX model you provided to DeepStream, an engine will be created on the first run of the application. Depending upon the system you’re using, this may take anywhere from 4 to 10 minutes.
48 | 
49 | For any issues or questions, please feel free to make a new post on the [DeepStreamSDK forums](https://forums.developer.nvidia.com/c/accelerated-computing/intelligent-video-analytics/deepstream-sdk/).
50 | 
51 | ## References
52 | Cao, Zhe, et al. "Realtime multi-person 2d pose estimation using part affinity fields." Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2017.
53 | 
54 | Xiao, Bin, Haiping Wu, and Yichen Wei. "Simple baselines for human pose estimation and tracking." Proceedings of the European Conference on Computer Vision (ECCV). 2018.
55 | 


--------------------------------------------------------------------------------
/bin/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/deepstream_pose_estimation/8b385dd8ba8ae03d7daeb981e96249461498ccda/bin/.gitkeep


--------------------------------------------------------------------------------
/bin/Jetson/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/deepstream_pose_estimation/8b385dd8ba8ae03d7daeb981e96249461498ccda/bin/Jetson/.gitkeep


--------------------------------------------------------------------------------
/bin/Jetson/libnvds_osd.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/deepstream_pose_estimation/8b385dd8ba8ae03d7daeb981e96249461498ccda/bin/Jetson/libnvds_osd.so


--------------------------------------------------------------------------------
/bin/x86/libnvds_osd.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/deepstream_pose_estimation/8b385dd8ba8ae03d7daeb981e96249461498ccda/bin/x86/libnvds_osd.so


--------------------------------------------------------------------------------
/bin/x86/libnvds_osd_cuda11.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/deepstream_pose_estimation/8b385dd8ba8ae03d7daeb981e96249461498ccda/bin/x86/libnvds_osd_cuda11.so


--------------------------------------------------------------------------------
/cover_table.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <memory>
 4 | #include <vector>
 5 | 
 6 | class CoverTable
 7 | {
 8 | public:
 9 |   CoverTable(int nrows, int ncols) : nrows(nrows), ncols(ncols)
10 |   {
11 |     rows.resize(nrows);
12 |     cols.resize(ncols);
13 |   }
14 | 
15 |   inline void coverRow(int row)
16 |   {
17 |     rows[row] = 1;
18 |   }
19 | 
20 |   inline void coverCol(int col)
21 |   {
22 |     cols[col] = 1;
23 |   }
24 | 
25 |   inline void uncoverRow(int row)
26 |   {
27 |     rows[row] = 0;
28 |   }
29 | 
30 |   inline void uncoverCol(int col)
31 |   {
32 |     cols[col] = 0;
33 |   }
34 | 
35 |   inline bool isCovered(int row, int col) const
36 |   {
37 |     return rows[row] || cols[col];
38 |   }
39 | 
40 |   inline bool isRowCovered(int row) const
41 |   {
42 |     return rows[row];
43 |   }
44 | 
45 |   inline bool isColCovered(int col) const
46 |   {
47 |     return cols[col];
48 |   }
49 | 
50 |   inline void clear()
51 |   {
52 |     for (int i = 0; i < nrows; i++)
53 |     {
54 |       uncoverRow(i);
55 |     }
56 |     for (int j = 0; j < ncols; j++)
57 |     {
58 |       uncoverCol(j);
59 |     }
60 |   }
61 | 
62 |   const int nrows;
63 |   const int ncols;
64 | 
65 | private:
66 |   std::vector<bool> rows;
67 |   std::vector<bool> cols;
68 | };
69 | 


--------------------------------------------------------------------------------
/deepstream_pose_estimation_app.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2020 - NVIDIA Corporation
  2 | // SPDX-License-Identifier: MIT
  3 | 
  4 | #include "post_process.cpp"
  5 | 
  6 | #include <gst/gst.h>
  7 | #include <glib.h>
  8 | #include <stdio.h>
  9 | 
 10 | #include "gstnvdsmeta.h"
 11 | #include "nvdsgstutils.h"
 12 | #include "nvbufsurface.h"
 13 | 
 14 | #include <vector>
 15 | #include <array>
 16 | #include <queue>
 17 | #include <cmath>
 18 | #include <string>
 19 | 
 20 | #define EPS 1e-6
 21 | 
 22 | #define MAX_DISPLAY_LEN 64
 23 | 
 24 | /* The muxer output resolution must be set if the input streams will be of
 25 |  * different resolution. The muxer will scale all the input frames to this
 26 |  * resolution. */
 27 | #define MUXER_OUTPUT_WIDTH 1920
 28 | #define MUXER_OUTPUT_HEIGHT 1080
 29 | 
 30 | /* Muxer batch formation timeout, for e.g. 40 millisec. Should ideally be set
 31 |  * based on the fastest source's framerate. */
 32 | #define MUXER_BATCH_TIMEOUT_USEC 4000000
 33 | 
 34 | template <class T>
 35 | using Vec1D = std::vector<T>;
 36 | 
 37 | template <class T>
 38 | using Vec2D = std::vector<Vec1D<T>>;
 39 | 
 40 | template <class T>
 41 | using Vec3D = std::vector<Vec2D<T>>;
 42 | 
 43 | gint frame_number = 0;
 44 | 
 45 | /*Method to parse information returned from the model*/
 46 | std::tuple<Vec2D<int>, Vec3D<float>>
 47 | parse_objects_from_tensor_meta(NvDsInferTensorMeta *tensor_meta)
 48 | {
 49 |   Vec1D<int> counts;
 50 |   Vec3D<int> peaks;
 51 | 
 52 |   float threshold = 0.1;
 53 |   int window_size = 5;
 54 |   int max_num_parts = 20;
 55 |   int num_integral_samples = 7;
 56 |   float link_threshold = 0.1;
 57 |   int max_num_objects = 100;
 58 | 
 59 |   void *cmap_data = tensor_meta->out_buf_ptrs_host[0];
 60 |   NvDsInferDims &cmap_dims = tensor_meta->output_layers_info[0].inferDims;
 61 |   void *paf_data = tensor_meta->out_buf_ptrs_host[1];
 62 |   NvDsInferDims &paf_dims = tensor_meta->output_layers_info[1].inferDims;
 63 | 
 64 |   /* Finding peaks within a given window */
 65 |   find_peaks(counts, peaks, cmap_data, cmap_dims, threshold, window_size, max_num_parts);
 66 |   /* Non-Maximum Suppression */
 67 |   Vec3D<float> refined_peaks = refine_peaks(counts, peaks, cmap_data, cmap_dims, window_size);
 68 |   /* Create a Bipartite graph to assign detected body-parts to a unique person in the frame */
 69 |   Vec3D<float> score_graph = paf_score_graph(paf_data, paf_dims, topology, counts, refined_peaks, num_integral_samples);
 70 |   /* Assign weights to all edges in the bipartite graph generated */
 71 |   Vec3D<int> connections = assignment(score_graph, topology, counts, link_threshold, max_num_parts);
 72 |   /* Connecting all the Body Parts and Forming a Human Skeleton */
 73 |   Vec2D<int> objects = connect_parts(connections, topology, counts, max_num_objects);
 74 |   return {objects, refined_peaks};
 75 | }
 76 | 
 77 | /* MetaData to handle drawing onto the on-screen-display */
 78 | static void
 79 | create_display_meta(Vec2D<int> &objects, Vec3D<float> &normalized_peaks, NvDsFrameMeta *frame_meta, int frame_width, int frame_height)
 80 | {
 81 |   int K = topology.size();
 82 |   int count = objects.size();
 83 |   NvDsBatchMeta *bmeta = frame_meta->base_meta.batch_meta;
 84 |   NvDsDisplayMeta *dmeta = nvds_acquire_display_meta_from_pool(bmeta);
 85 |   nvds_add_display_meta_to_frame(frame_meta, dmeta);
 86 | 
 87 |   for (auto &object : objects)
 88 |   {
 89 |     int C = object.size();
 90 |     for (int j = 0; j < C; j++)
 91 |     {
 92 |       int k = object[j];
 93 |       if (k >= 0)
 94 |       {
 95 |         auto &peak = normalized_peaks[j][k];
 96 |         int x = peak[1] * MUXER_OUTPUT_WIDTH;
 97 |         int y = peak[0] * MUXER_OUTPUT_HEIGHT;
 98 |         if (dmeta->num_circles == MAX_ELEMENTS_IN_DISPLAY_META)
 99 |         {
100 |           dmeta = nvds_acquire_display_meta_from_pool(bmeta);
101 |           nvds_add_display_meta_to_frame(frame_meta, dmeta);
102 |         }
103 |         NvOSD_CircleParams &cparams = dmeta->circle_params[dmeta->num_circles];
104 |         cparams.xc = x;
105 |         cparams.yc = y;
106 |         cparams.radius = 8;
107 |         cparams.circle_color = NvOSD_ColorParams{244, 67, 54, 1};
108 |         cparams.has_bg_color = 1;
109 |         cparams.bg_color = NvOSD_ColorParams{0, 255, 0, 1};
110 |         dmeta->num_circles++;
111 |       }
112 |     }
113 | 
114 |     for (int k = 0; k < K; k++)
115 |     {
116 |       int c_a = topology[k][2];
117 |       int c_b = topology[k][3];
118 |       if (object[c_a] >= 0 && object[c_b] >= 0)
119 |       {
120 |         auto &peak0 = normalized_peaks[c_a][object[c_a]];
121 |         auto &peak1 = normalized_peaks[c_b][object[c_b]];
122 |         int x0 = peak0[1] * MUXER_OUTPUT_WIDTH;
123 |         int y0 = peak0[0] * MUXER_OUTPUT_HEIGHT;
124 |         int x1 = peak1[1] * MUXER_OUTPUT_WIDTH;
125 |         int y1 = peak1[0] * MUXER_OUTPUT_HEIGHT;
126 |         if (dmeta->num_lines == MAX_ELEMENTS_IN_DISPLAY_META)
127 |         {
128 |           dmeta = nvds_acquire_display_meta_from_pool(bmeta);
129 |           nvds_add_display_meta_to_frame(frame_meta, dmeta);
130 |         }
131 |         NvOSD_LineParams &lparams = dmeta->line_params[dmeta->num_lines];
132 |         lparams.x1 = x0;
133 |         lparams.x2 = x1;
134 |         lparams.y1 = y0;
135 |         lparams.y2 = y1;
136 |         lparams.line_width = 3;
137 |         lparams.line_color = NvOSD_ColorParams{0, 255, 0, 1};
138 |         dmeta->num_lines++;
139 |       }
140 |     }
141 |   }
142 | }
143 | 
144 | /* pgie_src_pad_buffer_probe  will extract metadata received from pgie
145 |  * and update params for drawing rectangle, object information etc. */
146 | static GstPadProbeReturn
147 | pgie_src_pad_buffer_probe(GstPad *pad, GstPadProbeInfo *info,
148 |                           gpointer u_data)
149 | {
150 |   gchar *msg = NULL;
151 |   GstBuffer *buf = (GstBuffer *)info->data;
152 |   NvDsMetaList *l_frame = NULL;
153 |   NvDsMetaList *l_obj = NULL;
154 |   NvDsMetaList *l_user = NULL;
155 |   NvDsBatchMeta *batch_meta = gst_buffer_get_nvds_batch_meta(buf);
156 | 
157 |   for (l_frame = batch_meta->frame_meta_list; l_frame != NULL;
158 |        l_frame = l_frame->next)
159 |   {
160 |     NvDsFrameMeta *frame_meta = (NvDsFrameMeta *)(l_frame->data);
161 | 
162 |     for (l_user = frame_meta->frame_user_meta_list; l_user != NULL;
163 |          l_user = l_user->next)
164 |     {
165 |       NvDsUserMeta *user_meta = (NvDsUserMeta *)l_user->data;
166 |       if (user_meta->base_meta.meta_type == NVDSINFER_TENSOR_OUTPUT_META)
167 |       {
168 |         NvDsInferTensorMeta *tensor_meta =
169 |             (NvDsInferTensorMeta *)user_meta->user_meta_data;
170 |         Vec2D<int> objects;
171 |         Vec3D<float> normalized_peaks;
172 |         tie(objects, normalized_peaks) = parse_objects_from_tensor_meta(tensor_meta);
173 |         create_display_meta(objects, normalized_peaks, frame_meta, frame_meta->source_frame_width, frame_meta->source_frame_height);
174 |       }
175 |     }
176 | 
177 |     for (l_obj = frame_meta->obj_meta_list; l_obj != NULL;
178 |          l_obj = l_obj->next)
179 |     {
180 |       NvDsObjectMeta *obj_meta = (NvDsObjectMeta *)l_obj->data;
181 |       for (l_user = obj_meta->obj_user_meta_list; l_user != NULL;
182 |            l_user = l_user->next)
183 |       {
184 |         NvDsUserMeta *user_meta = (NvDsUserMeta *)l_user->data;
185 |         if (user_meta->base_meta.meta_type == NVDSINFER_TENSOR_OUTPUT_META)
186 |         {
187 |           NvDsInferTensorMeta *tensor_meta =
188 |               (NvDsInferTensorMeta *)user_meta->user_meta_data;
189 |           Vec2D<int> objects;
190 |           Vec3D<float> normalized_peaks;
191 |           tie(objects, normalized_peaks) = parse_objects_from_tensor_meta(tensor_meta);
192 |           create_display_meta(objects, normalized_peaks, frame_meta, frame_meta->source_frame_width, frame_meta->source_frame_height);
193 |         }
194 |       }
195 |     }
196 |   }
197 |   return GST_PAD_PROBE_OK;
198 | }
199 | 
200 | /* osd_sink_pad_buffer_probe  will extract metadata received from OSD
201 |  * and update params for drawing rectangle, object information etc. */
202 | static GstPadProbeReturn
203 | osd_sink_pad_buffer_probe(GstPad *pad, GstPadProbeInfo *info,
204 |                           gpointer u_data)
205 | {
206 |   GstBuffer *buf = (GstBuffer *)info->data;
207 |   guint num_rects = 0;
208 |   NvDsObjectMeta *obj_meta = NULL;
209 |   NvDsMetaList *l_frame = NULL;
210 |   NvDsMetaList *l_obj = NULL;
211 |   NvDsDisplayMeta *display_meta = NULL;
212 | 
213 |   NvDsBatchMeta *batch_meta = gst_buffer_get_nvds_batch_meta(buf);
214 | 
215 |   for (l_frame = batch_meta->frame_meta_list; l_frame != NULL;
216 |        l_frame = l_frame->next)
217 |   {
218 |     NvDsFrameMeta *frame_meta = (NvDsFrameMeta *)(l_frame->data);
219 |     int offset = 0;
220 |     for (l_obj = frame_meta->obj_meta_list; l_obj != NULL; l_obj = l_obj->next)
221 |     {
222 |       obj_meta = (NvDsObjectMeta *)(l_obj->data);
223 |     }
224 |     display_meta = nvds_acquire_display_meta_from_pool(batch_meta);
225 | 
226 |     /* Parameters to draw text onto the On-Screen-Display */
227 |     NvOSD_TextParams *txt_params = &display_meta->text_params[0];
228 |     display_meta->num_labels = 1;
229 |     txt_params->display_text = (char *)g_malloc0(MAX_DISPLAY_LEN);
230 |     offset = snprintf(txt_params->display_text, MAX_DISPLAY_LEN, "Frame Number =  %d", frame_number);
231 |     offset = snprintf(txt_params->display_text + offset, MAX_DISPLAY_LEN, "");
232 | 
233 |     txt_params->x_offset = 10;
234 |     txt_params->y_offset = 12;
235 | 
236 |     txt_params->font_params.font_name = "Mono";
237 |     txt_params->font_params.font_size = 10;
238 |     txt_params->font_params.font_color.red = 1.0;
239 |     txt_params->font_params.font_color.green = 1.0;
240 |     txt_params->font_params.font_color.blue = 1.0;
241 |     txt_params->font_params.font_color.alpha = 1.0;
242 | 
243 |     txt_params->set_bg_clr = 1;
244 |     txt_params->text_bg_clr.red = 0.0;
245 |     txt_params->text_bg_clr.green = 0.0;
246 |     txt_params->text_bg_clr.blue = 0.0;
247 |     txt_params->text_bg_clr.alpha = 1.0;
248 | 
249 |     nvds_add_display_meta_to_frame(frame_meta, display_meta);
250 |   }
251 |   frame_number++;
252 |   return GST_PAD_PROBE_OK;
253 | }
254 | 
255 | static gboolean
256 | bus_call(GstBus *bus, GstMessage *msg, gpointer data)
257 | {
258 |   GMainLoop *loop = (GMainLoop *)data;
259 |   switch (GST_MESSAGE_TYPE(msg))
260 |   {
261 |   case GST_MESSAGE_EOS:
262 |     g_print("End of Stream\n");
263 |     g_main_loop_quit(loop);
264 |     break;
265 | 
266 |   case GST_MESSAGE_ERROR:
267 |   {
268 |     gchar *debug;
269 |     GError *error;
270 |     gst_message_parse_error(msg, &error, &debug);
271 |     g_printerr("ERROR from element %s: %s\n",
272 |                GST_OBJECT_NAME(msg->src), error->message);
273 |     if (debug)
274 |       g_printerr("Error details: %s\n", debug);
275 |     g_free(debug);
276 |     g_error_free(error);
277 |     g_main_loop_quit(loop);
278 |     break;
279 |   }
280 | 
281 |   default:
282 |     break;
283 |   }
284 |   return TRUE;
285 | }
286 | 
287 | gboolean
288 | link_element_to_tee_src_pad(GstElement *tee, GstElement *sinkelem)
289 | {
290 |   gboolean ret = FALSE;
291 |   GstPad *tee_src_pad = NULL;
292 |   GstPad *sinkpad = NULL;
293 |   GstPadTemplate *padtemplate = NULL;
294 | 
295 |   padtemplate = (GstPadTemplate *)gst_element_class_get_pad_template(GST_ELEMENT_GET_CLASS(tee), "src_%u");
296 |   tee_src_pad = gst_element_request_pad(tee, padtemplate, NULL, NULL);
297 | 
298 |   if (!tee_src_pad)
299 |   {
300 |     g_printerr("Failed to get src pad from tee");
301 |     goto done;
302 |   }
303 | 
304 |   sinkpad = gst_element_get_static_pad(sinkelem, "sink");
305 |   if (!sinkpad)
306 |   {
307 |     g_printerr("Failed to get sink pad from '%s'",
308 |                GST_ELEMENT_NAME(sinkelem));
309 |     goto done;
310 |   }
311 | 
312 |   if (gst_pad_link(tee_src_pad, sinkpad) != GST_PAD_LINK_OK)
313 |   {
314 |     g_printerr("Failed to link '%s' and '%s'", GST_ELEMENT_NAME(tee),
315 |                GST_ELEMENT_NAME(sinkelem));
316 |     goto done;
317 |   }
318 |   ret = TRUE;
319 | 
320 | done:
321 |   if (tee_src_pad)
322 |   {
323 |     gst_object_unref(tee_src_pad);
324 |   }
325 |   if (sinkpad)
326 |   {
327 |     gst_object_unref(sinkpad);
328 |   }
329 |   return ret;
330 | }
331 | 
332 | int main(int argc, char *argv[])
333 | {
334 |   GMainLoop *loop = NULL;
335 |   GstCaps *caps = NULL;
336 |   GstElement *pipeline = NULL, *source = NULL, *h264parser = NULL,
337 |              *decoder = NULL, *streammux = NULL, *sink = NULL, *pgie = NULL, *nvvidconv = NULL, *nvosd = NULL,
338 |              *nvvideoconvert = NULL, *tee = NULL, *h264encoder = NULL, *cap_filter = NULL, *filesink = NULL, *queue = NULL, *qtmux = NULL, *h264parser1 = NULL, *nvsink = NULL;
339 | 
340 | /* Add a transform element for Jetson*/
341 | #ifdef PLATFORM_TEGRA
342 |   GstElement *transform = NULL;
343 | #endif
344 |   GstBus *bus = NULL;
345 |   guint bus_watch_id;
346 |   GstPad *osd_sink_pad = NULL;
347 | 
348 |   /* Check input arguments */
349 |   if (argc != 3)
350 |   {
351 |     g_printerr("Usage: %s <filename> <output-path>\n", argv[0]);
352 |     return -1;
353 |   }
354 | 
355 |   /* Standard GStreamer initialization */
356 |   gst_init(&argc, &argv);
357 |   loop = g_main_loop_new(NULL, FALSE);
358 | 
359 |   /* Create gstreamer elements */
360 |   /* Create Pipeline element that will form a connection of other elements */
361 |   pipeline = gst_pipeline_new("deepstream-tensorrt-openpose-pipeline");
362 | 
363 |   /* Source element for reading from the file */
364 |   source = gst_element_factory_make("filesrc", "file-source");
365 | 
366 |   /* Since the data format in the input file is elementary h264 stream,
367 |    * we need a h264parser */
368 |   h264parser = gst_element_factory_make("h264parse", "h264-parser");
369 |   h264parser1 = gst_element_factory_make("h264parse", "h264-parser1");
370 | 
371 |   /* Use nvdec_h264 for hardware accelerated decode on GPU */
372 |   decoder = gst_element_factory_make("nvv4l2decoder", "nvv4l2-decoder");
373 | 
374 |   /* Create nvstreammux instance to form batches from one or more sources. */
375 |   streammux = gst_element_factory_make("nvstreammux", "stream-muxer");
376 | 
377 |   if (!pipeline || !streammux)
378 |   {
379 |     g_printerr("One element could not be created. Exiting.\n");
380 |     return -1;
381 |   }
382 | 
383 |   /* Use nvinfer to run inferencing on decoder's output,
384 |    * behaviour of inferencing is set through config file */
385 |   pgie = gst_element_factory_make("nvinfer", "primary-nvinference-engine");
386 | 
387 |   /* Use convertor to convert from NV12 to RGBA as required by nvosd */
388 |   nvvidconv = gst_element_factory_make("nvvideoconvert", "nvvideo-converter");
389 | 
390 |   queue = gst_element_factory_make("queue", "queue");
391 |   filesink = gst_element_factory_make("filesink", "filesink");
392 |   
393 |   /* Set output file location */
394 |   char *output_path = argv[2];
395 |   strcat(output_path,"Pose_Estimation.mp4");
396 |   g_object_set(G_OBJECT(filesink), "location", output_path, NULL);
397 |   
398 |   nvvideoconvert = gst_element_factory_make("nvvideoconvert", "nvvideo-converter1");
399 |   tee = gst_element_factory_make("tee", "TEE");
400 |   h264encoder = gst_element_factory_make("nvv4l2h264enc", "video-encoder");
401 |   cap_filter = gst_element_factory_make("capsfilter", "enc_caps_filter");
402 |   caps = gst_caps_from_string("video/x-raw(memory:NVMM), format=I420");
403 |   g_object_set(G_OBJECT(cap_filter), "caps", caps, NULL);
404 |   qtmux = gst_element_factory_make("qtmux", "muxer");
405 | 
406 |   /* Create OSD to draw on the converted RGBA buffer */
407 |   nvosd = gst_element_factory_make("nvdsosd", "nv-onscreendisplay");
408 | 
409 |   /* Finally render the osd output */
410 | #ifdef PLATFORM_TEGRA
411 |   transform = gst_element_factory_make("nvegltransform", "nvegl-transform");
412 | #endif
413 |   nvsink = gst_element_factory_make("nveglglessink", "nvvideo-renderer");
414 |   sink = gst_element_factory_make("fpsdisplaysink", "fps-display");
415 | 
416 |   g_object_set(G_OBJECT(sink), "text-overlay", FALSE, "video-sink", nvsink, "sync", FALSE, NULL);
417 | 
418 |   if (!source || !h264parser || !decoder || !pgie || !nvvidconv || !nvosd || !sink || !cap_filter || !tee || !nvvideoconvert ||
419 |       !h264encoder || !filesink || !queue || !qtmux || !h264parser1)
420 |   {
421 |     g_printerr("One element could not be created. Exiting.\n");
422 |     return -1;
423 |   }
424 | #ifdef PLATFORM_TEGRA
425 |   if (!transform)
426 |   {
427 |     g_printerr("One tegra element could not be created. Exiting.\n");
428 |     return -1;
429 |   }
430 | #endif
431 | 
432 |   /* we set the input filename to the source element */
433 |   g_object_set(G_OBJECT(source), "location", argv[1], NULL);
434 | 
435 |   g_object_set(G_OBJECT(streammux), "width", MUXER_OUTPUT_WIDTH, "height",
436 |                MUXER_OUTPUT_HEIGHT, "batch-size", 1,
437 |                "batched-push-timeout", MUXER_BATCH_TIMEOUT_USEC, NULL);
438 | 
439 |   /* Set all the necessary properties of the nvinfer element,
440 |    * the necessary ones are : */
441 |   g_object_set(G_OBJECT(pgie), "output-tensor-meta", TRUE,
442 |                "config-file-path", "deepstream_pose_estimation_config.txt", NULL);
443 | 
444 |   /* we add a message handler */
445 |   bus = gst_pipeline_get_bus(GST_PIPELINE(pipeline));
446 |   bus_watch_id = gst_bus_add_watch(bus, bus_call, loop);
447 |   gst_object_unref(bus);
448 | 
449 |   /* Set up the pipeline */
450 |   /* we add all elements into the pipeline */
451 | #ifdef PLATFORM_TEGRA
452 |   gst_bin_add_many(GST_BIN(pipeline),
453 |                    source, h264parser, decoder, streammux, pgie,
454 |                    nvvidconv, nvosd, transform, /*sink,*/
455 |                    tee, nvvideoconvert, h264encoder, cap_filter, filesink, queue, h264parser1, qtmux, NULL);
456 | #else
457 |   gst_bin_add_many(GST_BIN(pipeline),
458 |                    source, h264parser, decoder, streammux, pgie,
459 |                    nvvidconv, nvosd, /*sink,*/
460 |                    tee, nvvideoconvert, h264encoder, cap_filter, filesink, queue, h264parser1, qtmux, NULL);
461 | #endif
462 | 
463 |   GstPad *sinkpad, *srcpad;
464 |   gchar pad_name_sink[16] = "sink_0";
465 |   gchar pad_name_src[16] = "src";
466 | 
467 |   sinkpad = gst_element_get_request_pad(streammux, pad_name_sink);
468 |   if (!sinkpad)
469 |   {
470 |     g_printerr("Streammux request sink pad failed. Exiting.\n");
471 |     return -1;
472 |   }
473 | 
474 |   srcpad = gst_element_get_static_pad(decoder, pad_name_src);
475 |   if (!srcpad)
476 |   {
477 |     g_printerr("Decoder request src pad failed. Exiting.\n");
478 |     return -1;
479 |   }
480 | 
481 |   if (gst_pad_link(srcpad, sinkpad) != GST_PAD_LINK_OK)
482 |   {
483 |     g_printerr("Failed to link decoder to stream muxer. Exiting.\n");
484 |     return -1;
485 |   }
486 | 
487 |   gst_object_unref(sinkpad);
488 |   gst_object_unref(srcpad);
489 | 
490 |   if (!gst_element_link_many(source, h264parser, decoder, NULL))
491 |   {
492 |     g_printerr("Elements could not be linked: 1. Exiting.\n");
493 |     return -1;
494 |   }
495 | #if 0
496 | #ifdef PLATFORM_TEGRA
497 |   if (!gst_element_link_many (streammux, pgie,
498 |           nvvidconv, nvosd, transform, sink, NULL)) {
499 |     g_printerr ("Elements could not be linked: 2. Exiting.\n");
500 |     return -1;
501 |   }
502 | #else
503 |   if (!gst_element_link_many (streammux, pgie, nvvidconv, nvosd, sink, NULL)) {
504 |     g_printerr ("Elements could not be linked: 2. Exiting.\n");
505 |     return -1;
506 |   }
507 | #endif
508 | #else
509 | #ifdef PLATFORM_TEGRA
510 |   if (!gst_element_link_many(streammux, pgie,
511 |                              nvvidconv, nvosd, tee, NULL))
512 |   {
513 |     g_printerr("Elements could not be linked: 2. Exiting.\n");
514 |     return -1;
515 |   }
516 | #else
517 |   if (!gst_element_link_many(streammux, pgie, nvvidconv, nvosd, tee, NULL))
518 |   {
519 |     g_printerr("Elements could not be linked: 2. Exiting.\n");
520 |     return -1;
521 |   }
522 | #endif
523 | #if 0
524 |   if (!link_element_to_tee_src_pad(tee, queue)) {
525 |       g_printerr ("Could not link tee to sink\n");
526 |       return -1;
527 |   }
528 |   if (!gst_element_link_many (queue, sink, NULL)) {
529 |     g_printerr ("Elements could not be linked: 2. Exiting.\n");
530 |     return -1;
531 |   }
532 | #else
533 |   if (!link_element_to_tee_src_pad(tee, queue))
534 |   {
535 |     g_printerr("Could not link tee to nvvideoconvert\n");
536 |     return -1;
537 |   }
538 |   if (!gst_element_link_many(queue, nvvideoconvert, cap_filter, h264encoder,
539 |                              h264parser1, qtmux, filesink, NULL))
540 |   {
541 |     g_printerr("Elements could not be linked\n");
542 |     return -1;
543 |   }
544 | #endif
545 | 
546 | #endif
547 | 
548 |   GstPad *pgie_src_pad = gst_element_get_static_pad(pgie, "src");
549 |   if (!pgie_src_pad)
550 |     g_print("Unable to get pgie src pad\n");
551 |   else
552 |     gst_pad_add_probe(pgie_src_pad, GST_PAD_PROBE_TYPE_BUFFER,
553 |                       pgie_src_pad_buffer_probe, (gpointer)sink, NULL);
554 | 
555 |   /* Lets add probe to get informed of the meta data generated, we add probe to
556 |    * the sink pad of the osd element, since by that time, the buffer would have
557 |    * had got all the metadata. */
558 |   osd_sink_pad = gst_element_get_static_pad(nvosd, "sink");
559 |   if (!osd_sink_pad)
560 |     g_print("Unable to get sink pad\n");
561 |   else
562 |     gst_pad_add_probe(osd_sink_pad, GST_PAD_PROBE_TYPE_BUFFER,
563 |                       osd_sink_pad_buffer_probe, (gpointer)sink, NULL);
564 | 
565 |   /* Set the pipeline to "playing" state */
566 |   g_print("Now playing: %s\n", argv[1]);
567 |   gst_element_set_state(pipeline, GST_STATE_PLAYING);
568 | 
569 |   /* Wait till pipeline encounters an error or EOS */
570 |   g_print("Running...\n");
571 |   g_main_loop_run(loop);
572 | 
573 |   /* Out of the main loop, clean up nicely */
574 |   g_print("Returned, stopping playback\n");
575 |   gst_element_set_state(pipeline, GST_STATE_NULL);
576 |   g_print("Deleting pipeline\n");
577 |   gst_object_unref(GST_OBJECT(pipeline));
578 |   g_source_remove(bus_watch_id);
579 |   g_main_loop_unref(loop);
580 |   return 0;
581 | }
582 | 


--------------------------------------------------------------------------------
/deepstream_pose_estimation_config.txt:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 - NVIDIA Corporation
 2 | # SPDX-License-Identifier: MIT
 3 | 
 4 | # Following properties are mandatory when engine files are not specified:
 5 | #   int8-calib-file(Only in INT8)
 6 | #   Caffemodel mandatory properties: model-file, proto-file, output-blob-names
 7 | #   UFF: uff-file, input-dims, uff-input-blob-name, output-blob-names
 8 | #   ONNX: onnx-file
 9 | #
10 | # Mandatory properties for detectors:
11 | #   parse-func, num-detected-classes,
12 | #   custom-lib-path (when parse-func=0 i.e. custom),
13 | #   parse-bbox-func-name (when parse-func=0)
14 | #
15 | # Optional properties for detectors:
16 | #   enable-dbscan(Default=false), interval(Primary mode only, Default=0)
17 | #
18 | # Mandatory properties for classifiers:
19 | #   classifier-threshold, is-classifier
20 | #
21 | # Optional properties for classifiers:
22 | #   classifier-async-mode(Secondary mode only, Default=false)
23 | #
24 | # Optional properties in secondary mode:
25 | #   operate-on-gie-id(Default=0), operate-on-class-ids(Defaults to all classes),
26 | #   input-object-min-width, input-object-min-height, input-object-max-width,
27 | #   input-object-max-height
28 | #
29 | # Following properties are always recommended:
30 | #   batch-size(Default=1)
31 | #
32 | # Other optional properties:
33 | #   net-scale-factor(Default=1), network-mode(Default=0 i.e FP32),
34 | #   model-color-format(Default=0 i.e. RGB) model-engine-file, labelfile-path,
35 | #   mean-file, gie-unique-id(Default=0), offsets, gie-mode (Default=1 i.e. primary),
36 | #   custom-lib-path, network-mode(Default=0 i.e FP32)
37 | #
38 | # The values in the config file are overridden by values set through GObject
39 | # properties.
40 | 
41 | [property]
42 | gpu-id=0
43 | net-scale-factor=0.0174292
44 | offsets=123.675;116.28;103.53
45 | onnx-file=pose_estimation.onnx
46 | labelfile-path=labels.txt
47 | batch-size=1
48 | process-mode=1
49 | model-color-format=0
50 | ## 0=FP32, 1=INT8, 2=FP16 mode
51 | network-mode=2
52 | num-detected-classes=4
53 | interval=0
54 | gie-unique-id=1
55 | model-engine-file=pose_estimation.onnx_b1_gpu0_fp16.engine
56 | network-type=100
57 | workspace-size=3000
58 | 
59 | 
60 | 


--------------------------------------------------------------------------------
/images/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/deepstream_pose_estimation/8b385dd8ba8ae03d7daeb981e96249461498ccda/images/.gitkeep


--------------------------------------------------------------------------------
/images/auxillary.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/deepstream_pose_estimation/8b385dd8ba8ae03d7daeb981e96249461498ccda/images/auxillary.png


--------------------------------------------------------------------------------
/images/input.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/deepstream_pose_estimation/8b385dd8ba8ae03d7daeb981e96249461498ccda/images/input.gif


--------------------------------------------------------------------------------
/images/main.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/deepstream_pose_estimation/8b385dd8ba8ae03d7daeb981e96249461498ccda/images/main.png


--------------------------------------------------------------------------------
/images/output.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/deepstream_pose_estimation/8b385dd8ba8ae03d7daeb981e96249461498ccda/images/output.gif


--------------------------------------------------------------------------------
/munkres_algorithm.cpp:
--------------------------------------------------------------------------------
  1 | #include "pair_graph.hpp"
  2 | #include "cover_table.hpp"
  3 | 
  4 | #include <stdio.h>
  5 | #include <vector>
  6 | #include <array>
  7 | #include <queue>
  8 | #include <cmath>
  9 | 
 10 | template <class T>
 11 | using Vec1D = std::vector<T>;
 12 | template <class T>
 13 | using Vec2D = std::vector<Vec1D<T>>;
 14 | template <class T>
 15 | using Vec3D = std::vector<Vec2D<T>>;
 16 | 
 17 | // Helper method to subtract the minimum row from cost_graph
 18 | void subtract_minimum_row(Vec2D<float> &cost_graph, int nrows, int ncols)
 19 | {
 20 |   for (int i = 0; i < nrows; i++)
 21 |   {
 22 |     // Iterate the find the minimum
 23 |     float min = cost_graph[i][0];
 24 |     for (int j = 0; j < ncols; j++)
 25 |     {
 26 |       float val = cost_graph[i][j];
 27 |       if (val < min)
 28 |       {
 29 |         min = val;
 30 |       }
 31 |     }
 32 | 
 33 |     // Subtract the Minimum
 34 |     for (int j = 0; j < ncols; j++)
 35 |     {
 36 |       cost_graph[i][j] -= min;
 37 |     }
 38 |   }
 39 | }
 40 | 
 41 | // Helper method to subtract the minimum col from cost_graph
 42 | void subtract_minimum_column(Vec2D<float> &cost_graph, int nrows, int ncols)
 43 | {
 44 |   for (int j = 0; j < ncols; j++)
 45 |   {
 46 |     // Iterate and find the minimum
 47 |     float min = cost_graph[0][j];
 48 |     for (int i = 0; i < nrows; i++)
 49 |     {
 50 |       float val = cost_graph[i][j];
 51 |       if (val < min)
 52 |       {
 53 |         min = val;
 54 |       }
 55 |     }
 56 | 
 57 |     // Subtract the minimum
 58 |     for (int i = 0; i < nrows; i++)
 59 |     {
 60 |       cost_graph[i][j] -= min;
 61 |     }
 62 |   }
 63 | }
 64 | 
 65 | void munkresStep1(Vec2D<float> &cost_graph, PairGraph &star_graph, int nrows,
 66 |                   int ncols)
 67 | {
 68 |   for (int i = 0; i < nrows; i++)
 69 |   {
 70 |     for (int j = 0; j < ncols; j++)
 71 |     {
 72 |       if (!star_graph.isRowSet(i) && !star_graph.isColSet(j) && (cost_graph[i][j] == 0))
 73 |       {
 74 |         star_graph.set(i, j);
 75 |       }
 76 |     }
 77 |   }
 78 | }
 79 | 
 80 | // Exits if '1' is returned
 81 | bool munkresStep2(const PairGraph &star_graph, CoverTable &cover_table)
 82 | {
 83 |   int k =
 84 |       star_graph.nrows < star_graph.ncols ? star_graph.nrows : star_graph.ncols;
 85 |   int count = 0;
 86 |   for (int j = 0; j < star_graph.ncols; j++)
 87 |   {
 88 |     if (star_graph.isColSet(j))
 89 |     {
 90 |       cover_table.coverCol(j);
 91 |       count++;
 92 |     }
 93 |   }
 94 |   return count >= k;
 95 | }
 96 | 
 97 | bool munkresStep3(Vec2D<float> &cost_graph, const PairGraph &star_graph,
 98 |                   PairGraph &prime_graph, CoverTable &cover_table, std::pair<int, int> &p,
 99 |                   int nrows, int ncols)
100 | {
101 |   for (int i = 0; i < nrows; i++)
102 |   {
103 |     for (int j = 0; j < ncols; j++)
104 |     {
105 |       if (cost_graph[i][j] == 0 && !cover_table.isCovered(i, j))
106 |       {
107 |         prime_graph.set(i, j);
108 |         if (star_graph.isRowSet(i))
109 |         {
110 |           cover_table.coverRow(i);
111 |           cover_table.uncoverCol(star_graph.colForRow(i));
112 |         }
113 |         else
114 |         {
115 |           p.first = i;
116 |           p.second = j;
117 |           return 1;
118 |         }
119 |       }
120 |     }
121 |   }
122 |   return 0;
123 | };
124 | 
125 | void munkresStep4(PairGraph &star_graph, PairGraph &prime_graph,
126 |                   CoverTable &cover_table, std::pair<int, int> &p)
127 | {
128 |   // This process should be repeated until no star is found in prime's column
129 |   while (star_graph.isColSet(p.second))
130 |   {
131 |     // First find and reset any star found in the prime's columns
132 |     std::pair<int, int> s = {star_graph.rowForCol(p.second), p.second};
133 |     star_graph.reset(s.first, s.second);
134 | 
135 |     // Set this prime to a star
136 |     star_graph.set(p.first, p.second);
137 | 
138 |     // Repeat the same process for prime in cleared star's row
139 |     p = {s.first, prime_graph.colForRow(s.first)};
140 |   }
141 |   star_graph.set(p.first, p.second);
142 |   cover_table.clear();
143 |   prime_graph.clear();
144 | }
145 | 
146 | void munkresStep5(Vec2D<float> &cost_graph, const CoverTable &cover_table,
147 |                   int nrows, int ncols)
148 | {
149 |   bool valid = false;
150 |   float min;
151 |   for (int i = 0; i < nrows; i++)
152 |   {
153 |     for (int j = 0; j < ncols; j++)
154 |     {
155 |       if (!cover_table.isCovered(i, j))
156 |       {
157 |         if (!valid)
158 |         {
159 |           min = cost_graph[i][j];
160 |           valid = true;
161 |         }
162 |         else if (cost_graph[i][j] < min)
163 |         {
164 |           min = cost_graph[i][j];
165 |         }
166 |       }
167 |     }
168 |   }
169 | 
170 |   for (int i = 0; i < nrows; i++)
171 |   {
172 |     if (cover_table.isRowCovered(i))
173 |     {
174 |       for (int j = 0; j < ncols; j++)
175 |       {
176 |         cost_graph[i][j] += min;
177 |       }
178 |     }
179 |   }
180 |   for (int j = 0; j < ncols; j++)
181 |   {
182 |     if (!cover_table.isColCovered(j))
183 |     {
184 |       for (int i = 0; i < nrows; i++)
185 |       {
186 |         cost_graph[i][j] -= min;
187 |       }
188 |     }
189 |   }
190 | }
191 | 
192 | void munkres_algorithm(Vec2D<float> &cost_graph, PairGraph &star_graph, int nrows,
193 |               int ncols)
194 | {
195 |   PairGraph prime_graph(nrows, ncols);
196 |   CoverTable cover_table(nrows, ncols);
197 |   prime_graph.clear();
198 |   cover_table.clear();
199 |   star_graph.clear();
200 | 
201 |   int step = 0;
202 |   if (ncols >= nrows)
203 |   {
204 |     subtract_minimum_row(cost_graph, nrows, ncols);
205 |   }
206 |   if (ncols > nrows)
207 |   {
208 |     step = 1;
209 |   }
210 | 
211 |   std::pair<int, int> p;
212 |   bool done = false;
213 |   while (!done)
214 |   {
215 |     switch (step)
216 |     {
217 |     case 0:
218 |       subtract_minimum_column(cost_graph, nrows, ncols);
219 |     case 1:
220 |       munkresStep1(cost_graph, star_graph, nrows, ncols);
221 |     case 2:
222 |       if (munkresStep2(star_graph, cover_table))
223 |       {
224 |         done = true;
225 |         break;
226 |       }
227 |     case 3:
228 |       if (!munkresStep3(cost_graph, star_graph, prime_graph, cover_table, p,
229 |                         nrows, ncols))
230 |       {
231 |         step = 5;
232 |         break;
233 |       }
234 |     case 4:
235 |       munkresStep4(star_graph, prime_graph, cover_table, p);
236 |       step = 2;
237 |       break;
238 |     case 5:
239 |       munkresStep5(cost_graph, cover_table, nrows, ncols);
240 |       step = 3;
241 |       break;
242 |     }
243 |   }
244 | }


--------------------------------------------------------------------------------
/pair_graph.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <memory>
  4 | #include <vector>
  5 | 
  6 | class PairGraph
  7 | {
  8 | public:
  9 |   PairGraph(int nrows, int ncols) : nrows(nrows), ncols(ncols)
 10 |   {
 11 |     this->rows.resize(nrows);
 12 |     this->cols.resize(ncols);
 13 |   }
 14 | 
 15 |   /**
 16 |    * Returns the column index of the pair matching this row
 17 |    */
 18 |   inline int colForRow(int row) const
 19 |   {
 20 |     return this->rows[row];
 21 |   }
 22 | 
 23 |   /**
 24 |    * Returns the row index of the pair matching this column
 25 |    */
 26 |   inline int rowForCol(int col) const
 27 |   {
 28 |     return this->cols[col];
 29 |   }
 30 | 
 31 |   /**
 32 |    * Creates a pair between row and col
 33 |    */
 34 |   inline void set(int row, int col)
 35 |   {
 36 |     this->rows[row] = col;
 37 |     this->cols[col] = row;
 38 |   }
 39 | 
 40 |   inline bool isRowSet(int row) const
 41 |   {
 42 |     return rows[row] >= 0;
 43 |   }
 44 | 
 45 |   inline bool isColSet(int col) const
 46 |   {
 47 |     return cols[col] >= 0;
 48 |   }
 49 | 
 50 |   inline bool isPair(int row, int col)
 51 |   {
 52 |     return rows[row] == col;
 53 |   }
 54 | 
 55 |   /**
 56 |    * Clears pair between row and col
 57 |    */
 58 |   inline void reset(int row, int col)
 59 |   {
 60 |     this->rows[row] = -1;
 61 |     this->cols[col] = -1;
 62 |   }
 63 | 
 64 |   /**
 65 |    * Clears all pairs in graph
 66 |    */
 67 |   void clear()
 68 |   {
 69 |     for (int i = 0; i < this->nrows; i++)
 70 |     {
 71 |       this->rows[i] = -1;
 72 |     }
 73 |     for (int j = 0; j < this->ncols; j++)
 74 |     {
 75 |       this->cols[j] = -1;
 76 |     }
 77 |   }
 78 | 
 79 |   int numPairs()
 80 |   {
 81 |     int count = 0;
 82 |     for (int i = 0; i < nrows; i++)
 83 |     {
 84 |       if (rows[i] >= 0)
 85 |       {
 86 |         count++;
 87 |       }
 88 |     }
 89 |     return count;
 90 |   }
 91 | 
 92 |   std::vector<std::pair<int, int>> pairs()
 93 |   {
 94 |     std::vector<std::pair<int, int>> p(numPairs());
 95 |     int count = 0;
 96 |     for (int i = 0; i < nrows; i++)
 97 |     {
 98 |       if (isRowSet(i))
 99 |       {
100 |         p[count++] = {i, colForRow(i)};
101 |       }
102 |     }
103 |     return p;
104 |   }
105 | 
106 |   const int nrows;
107 |   const int ncols;
108 | 
109 | private:
110 |   std::vector<int> rows;
111 |   std::vector<int> cols;
112 | };
113 | 


--------------------------------------------------------------------------------
/pose_estimation.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/deepstream_pose_estimation/8b385dd8ba8ae03d7daeb981e96249461498ccda/pose_estimation.onnx


--------------------------------------------------------------------------------
/post_process.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2020 - NVIDIA Corporation
  2 | // SPDX-License-Identifier: MIT
  3 | 
  4 | #include "pair_graph.hpp"
  5 | #include "cover_table.hpp"
  6 | #include "munkres_algorithm.cpp"
  7 | 
  8 | #include <gst/gst.h>
  9 | #include <glib.h>
 10 | #include <stdio.h>
 11 | 
 12 | #include "gstnvdsmeta.h"
 13 | #include "gstnvdsinfer.h"
 14 | #include "nvdsgstutils.h"
 15 | #include "nvbufsurface.h"
 16 | 
 17 | #include <stdio.h>
 18 | #include <vector>
 19 | #include <array>
 20 | #include <queue>
 21 | #include <cmath>
 22 | 
 23 | #define EPS 1e-6
 24 | 
 25 | template <class T>
 26 | using Vec1D = std::vector<T>;
 27 | template <class T>
 28 | using Vec2D = std::vector<Vec1D<T>>;
 29 | template <class T>
 30 | using Vec3D = std::vector<Vec2D<T>>;
 31 | 
 32 | static const int M = 2;
 33 | 
 34 | static Vec2D<int> topology{
 35 |     {0, 1, 15, 13},
 36 |     {2, 3, 13, 11},
 37 |     {4, 5, 16, 14},
 38 |     {6, 7, 14, 12},
 39 |     {8, 9, 11, 12},
 40 |     {10, 11, 5, 7},
 41 |     {12, 13, 6, 8},
 42 |     {14, 15, 7, 9},
 43 |     {16, 17, 8, 10},
 44 |     {18, 19, 1, 2},
 45 |     {20, 21, 0, 1},
 46 |     {22, 23, 0, 2},
 47 |     {24, 25, 1, 3},
 48 |     {26, 27, 2, 4},
 49 |     {28, 29, 3, 5},
 50 |     {30, 31, 4, 6},
 51 |     {32, 33, 17, 0},
 52 |     {34, 35, 17, 5},
 53 |     {36, 37, 17, 6},
 54 |     {38, 39, 17, 11},
 55 |     {40, 41, 17, 12}};
 56 | 
 57 | /* Method to find peaks in the output tensor. 'window_size' represents how many pixels we are considering at once to find a maximum value, or a ‘peak’. 
 58 |    Once we find a peak, we mark it using the ‘is_peak’ boolean in the inner loop and assign this maximum value to the center pixel of our window. 
 59 |    This is then repeated until we cover the entire frame. */
 60 | void find_peaks(Vec1D<int> &counts_out, Vec3D<int> &peaks_out, void *cmap_data,
 61 |                 NvDsInferDims &cmap_dims, float threshold, int window_size, int max_count)
 62 | {
 63 |   int w = window_size / 2;
 64 |   int width = cmap_dims.d[2];
 65 |   int height = cmap_dims.d[1];
 66 | 
 67 |   counts_out.assign(cmap_dims.d[0], 0);
 68 |   peaks_out.assign(cmap_dims.d[0], Vec2D<int>(max_count, Vec1D<int>(M,
 69 |                                                                     0)));
 70 | 
 71 |   for (unsigned int c = 0; c < cmap_dims.d[0]; c++)
 72 |   {
 73 |     int count = 0;
 74 |     float *cmap_data_c = (float *)cmap_data + c * width * height;
 75 | 
 76 |     for (int i = 0; i < height && count < max_count; i++)
 77 |     {
 78 |       for (int j = 0; j < width && count < max_count; j++)
 79 |       {
 80 |         float value = cmap_data_c[i * width + j];
 81 | 
 82 |         if (value < threshold)
 83 |           continue;
 84 | 
 85 |         int ii_min = i - w;
 86 |         int jj_min = j - w;
 87 |         int ii_max = i + w + 1;
 88 |         int jj_max = j + w + 1;
 89 | 
 90 |         if (ii_min < 0)
 91 |           ii_min = 0;
 92 |         if (ii_max > height)
 93 |           ii_max = height;
 94 |         if (jj_min < 0)
 95 |           jj_min = 0;
 96 |         if (jj_max > width)
 97 |           jj_max = width;
 98 | 
 99 |         bool is_peak = true;
100 |         for (int ii = ii_min; ii < ii_max; ii++)
101 |         {
102 |           for (int jj = jj_min; jj < jj_max; jj++)
103 |           {
104 |             if (cmap_data_c[ii * width + jj] > value)
105 |             {
106 |               is_peak = false;
107 |             }
108 |           }
109 |         }
110 | 
111 |         if (is_peak)
112 |         {
113 |           peaks_out[c][count][0] = i;
114 |           peaks_out[c][count][1] = j;
115 |           count++;
116 |         }
117 |       }
118 |     }
119 | 
120 |     counts_out[c] = count;
121 |   }
122 | }
123 | 
124 | /* Normalize the peaks found in 'find_peaks' and apply non-maximal suppression*/
125 | Vec3D<float>
126 | refine_peaks(Vec1D<int> &counts,
127 |              Vec3D<int> &peaks, void *cmap_data, NvDsInferDims &cmap_dims,
128 |              int window_size)
129 | {
130 |   int w = window_size / 2;
131 |   int width = cmap_dims.d[2];
132 |   int height = cmap_dims.d[1];
133 | 
134 |   Vec3D<float> refined_peaks(peaks.size(), Vec2D<float>(peaks[0].size(),
135 |                                                         Vec1D<float>(peaks[0][0].size(), 0)));
136 | 
137 |   for (unsigned int c = 0; c < cmap_dims.d[0]; c++)
138 |   {
139 |     int count = counts[c];
140 |     auto &refined_peaks_a_bc = refined_peaks[c];
141 |     auto &peaks_a_bc = peaks[c];
142 |     float *cmap_data_c = (float *)cmap_data + c * width * height;
143 | 
144 |     for (int p = 0; p < count; p++)
145 |     {
146 |       auto &refined_peak = refined_peaks_a_bc[p];
147 |       auto &peak = peaks_a_bc[p];
148 | 
149 |       int i = peak[0];
150 |       int j = peak[1];
151 |       float weight_sum = 0.0f;
152 | 
153 |       for (int ii = i - w; ii < i + w + 1; ii++)
154 |       {
155 |         int ii_idx = ii;
156 | 
157 |         if (ii < 0)
158 |           ii_idx = -ii;
159 |         else if (ii >= height)
160 |           ii_idx = height - (ii - height) - 2;
161 | 
162 |         for (int jj = j - w; jj < j + w + 1; jj++)
163 |         {
164 |           int jj_idx = jj;
165 | 
166 |           if (jj < 0)
167 |             jj_idx = -jj;
168 |           else if (jj >= width)
169 |             jj_idx = width - (jj - width) - 2;
170 | 
171 |           float weight = cmap_data_c[ii_idx * width + jj_idx];
172 |           refined_peak[0] += weight * ii;
173 |           refined_peak[1] += weight * jj;
174 |           weight_sum += weight;
175 |         }
176 |       }
177 | 
178 |       refined_peak[0] /= weight_sum;
179 |       refined_peak[1] /= weight_sum;
180 |       refined_peak[0] += 0.5;
181 |       refined_peak[1] += 0.5;
182 |       refined_peak[0] /= height;
183 |       refined_peak[1] /= width;
184 |     }
185 |   }
186 | 
187 |   return refined_peaks;
188 | }
189 | 
190 | /* Create a bipartite graph to assign detected body-parts to a unique person in the frame. This method also takes care of finding the line integral to assign scores
191 |    to these points */
192 | Vec3D<float>
193 | paf_score_graph(void *paf_data, NvDsInferDims &paf_dims,
194 |                 Vec2D<int> &topology, Vec1D<int> &counts,
195 |                 Vec3D<float> &peaks, int num_integral_samples)
196 | {
197 |   int K = topology.size();
198 |   int H = paf_dims.d[1];
199 |   int W = paf_dims.d[2];
200 |   int max_count = peaks[0].size();
201 |   Vec3D<float> score_graph(K, Vec2D<float>(max_count, Vec1D<float>(max_count, 0)));
202 | 
203 |   for (int k = 0; k < K; k++)
204 |   {
205 |     auto &score_graph_nk = score_graph[k];
206 |     auto &paf_i_idx = topology[k][0];
207 |     auto &paf_j_idx = topology[k][1];
208 |     auto &cmap_a_idx = topology[k][2];
209 |     auto &cmap_b_idx = topology[k][3];
210 |     float *paf_i = (float *)paf_data + paf_i_idx * H * W;
211 |     float *paf_j = (float *)paf_data + paf_j_idx * H * W;
212 | 
213 |     auto &counts_a = counts[cmap_a_idx];
214 |     auto &counts_b = counts[cmap_b_idx];
215 |     auto &peaks_a = peaks[cmap_a_idx];
216 |     auto &peaks_b = peaks[cmap_b_idx];
217 | 
218 |     for (int a = 0; a < counts_a; a++)
219 |     {
220 |       // Point A
221 |       float pa_i = peaks_a[a][0] * H;
222 |       float pa_j = peaks_a[a][1] * W;
223 | 
224 |       for (int b = 0; b < counts_b; b++)
225 |       {
226 |         // Point B
227 |         float pb_i = peaks_b[b][0] * H;
228 |         float pb_j = peaks_b[b][1] * W;
229 | 
230 |         // Vector from Point A to Point B
231 |         float pab_i = pb_i - pa_i;
232 |         float pab_j = pb_j - pa_j;
233 | 
234 |         // Normalized Vector from Point A to Point B
235 |         float pab_norm = sqrtf(pab_i * pab_i + pab_j * pab_j) + EPS;
236 |         float uab_i = pab_i / pab_norm;
237 |         float uab_j = pab_j / pab_norm;
238 | 
239 |         float integral = 0.0;
240 |         float increment = 1.0f / num_integral_samples;
241 | 
242 |         for (int t = 0; t < num_integral_samples; t++)
243 |         {
244 |           // Integral Point T
245 |           float progress = (float)t / (float)num_integral_samples;
246 |           float pt_i = pa_i + progress * pab_i;
247 |           float pt_j = pa_j + progress * pab_j;
248 | 
249 |           // Convert to Integer
250 |           int pt_i_int = (int)pt_i;
251 |           int pt_j_int = (int)pt_j;
252 | 
253 |           // Edge cases for if the point is out of bounds, just skip them
254 |           if (pt_i_int < 0)
255 |             continue;
256 |           if (pt_i_int > H)
257 |             continue;
258 |           if (pt_j_int < 0)
259 |             continue;
260 |           if (pt_j_int > W)
261 |             continue;
262 | 
263 |           // Vector at integral point
264 |           float pt_paf_i = paf_i[pt_i_int * W + pt_j_int];
265 |           float pt_paf_j = paf_j[pt_i_int * W + pt_j_int];
266 | 
267 |           // Dot Product Normalized A->B with PAF Vector
268 |           float dot = pt_paf_i * uab_i + pt_paf_j * uab_j;
269 |           integral += dot;
270 | 
271 |           progress += increment;
272 |         }
273 | 
274 |         // Normalize the integral with respect to the number of samples
275 |         integral /= num_integral_samples;
276 |         score_graph_nk[a][b] = integral;
277 |       }
278 |     }
279 |   }
280 |   return score_graph;
281 | }
282 | 
283 | /*
284 |  This method takes care of solving the graph assignment problem using Munkres algorithm. Munkres algorithm is defind in 'munkres_algorithm.cpp'
285 |  */
286 | 
287 | Vec3D<int>
288 | assignment(Vec3D<float> &score_graph,
289 |            Vec2D<int> &topology, Vec1D<int> &counts, float score_threshold, int max_count)
290 | {
291 |   int K = topology.size();
292 |   Vec3D<int> connections(K, Vec2D<int>(M, Vec1D<int>(max_count, -1)));
293 | 
294 |   Vec3D<float> cost_graph = score_graph;
295 |   for (Vec2D<float> &cg_iter1 : cost_graph)
296 |     for (Vec1D<float> &cg_iter2 : cg_iter1)
297 |       for (float &cg_iter3 : cg_iter2)
298 |         cg_iter3 = -cg_iter3;
299 |   auto &cost_graph_out_a = cost_graph;
300 | 
301 |   for (int k = 0; k < K; k++)
302 |   {
303 |     int cmap_a_idx = topology[k][2];
304 |     int cmap_b_idx = topology[k][3];
305 |     int nrows = counts[cmap_a_idx];
306 |     int ncols = counts[cmap_b_idx];
307 |     auto star_graph = PairGraph(nrows, ncols);
308 |     auto &cost_graph_out_a_nk = cost_graph_out_a[k];
309 |     munkres_algorithm(cost_graph_out_a_nk, star_graph, nrows, ncols);
310 | 
311 |     auto &connections_a_nk = connections[k];
312 |     auto &score_graph_a_nk = score_graph[k];
313 | 
314 |     for (int i = 0; i < nrows; i++)
315 |     {
316 |       for (int j = 0; j < ncols; j++)
317 |       {
318 |         if (star_graph.isPair(i, j) && score_graph_a_nk[i][j] > score_threshold)
319 |         {
320 |           connections_a_nk[0][i] = j;
321 |           connections_a_nk[1][j] = i;
322 |         }
323 |       }
324 |     }
325 |   }
326 |   return connections;
327 | }
328 | 
329 | /* This method takes care of connecting all the body parts detected to each other 
330 |    after finding the relationships between them in the 'assignment' method */
331 | Vec2D<int>
332 | connect_parts(
333 |     Vec3D<int> &connections, Vec2D<int> &topology, Vec1D<int> &counts,
334 |     int max_count)
335 | {
336 |   int K = topology.size();
337 |   int C = counts.size();
338 | 
339 |   Vec2D<int> visited(C, Vec1D<int>(max_count, 0));
340 | 
341 |   Vec2D<int> objects(max_count, Vec1D<int>(C, -1));
342 | 
343 |   int num_objects = 0;
344 |   for (int c = 0; c < C; c++)
345 |   {
346 |     if (num_objects >= max_count)
347 |     {
348 |       break;
349 |     }
350 | 
351 |     int count = counts[c];
352 | 
353 |     for (int i = 0; i < count; i++)
354 |     {
355 |       if (num_objects >= max_count)
356 |       {
357 |         break;
358 |       }
359 | 
360 |       std::queue<std::pair<int, int>> q;
361 |       bool new_object = false;
362 |       q.push({c, i});
363 | 
364 |       while (!q.empty())
365 |       {
366 |         auto node = q.front();
367 |         q.pop();
368 |         int c_n = node.first;
369 |         int i_n = node.second;
370 | 
371 |         if (visited[c_n][i_n])
372 |         {
373 |           continue;
374 |         }
375 | 
376 |         visited[c_n][i_n] = 1;
377 |         new_object = true;
378 |         objects[num_objects][c_n] = i_n;
379 | 
380 |         for (int k = 0; k < K; k++)
381 |         {
382 |           int c_a = topology[k][2];
383 |           int c_b = topology[k][3];
384 | 
385 |           if (c_a == c_n)
386 |           {
387 |             int i_b = connections[k][0][i_n];
388 |             if (i_b >= 0)
389 |             {
390 |               q.push({c_b, i_b});
391 |             }
392 |           }
393 | 
394 |           if (c_b == c_n)
395 |           {
396 |             int i_a = connections[k][1][i_n];
397 |             if (i_a >= 0)
398 |             {
399 |               q.push({c_a, i_a});
400 |             }
401 |           }
402 |         }
403 |       }
404 | 
405 |       if (new_object)
406 |       {
407 |         num_objects++;
408 |       }
409 |     }
410 |   }
411 | 
412 |   objects.resize(num_objects);
413 |   return objects;
414 | }


--------------------------------------------------------------------------------