├── .gitignore
├── CMakeLists.txt
├── HandKeypointDetector.py
├── README.md
├── __init__.py
├── data
    ├── front-back.jpg
    └── hand.jpg
├── getModels.sh
├── hand
    └── pose_deploy.prototxt
├── handPoseImage.py
└── handPoseVideo.py


/.gitignore:
--------------------------------------------------------------------------------
1 | /hand/pose_iter_102000.caffemodel
2 | /out
3 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8.12)
 2 | 
 3 | PROJECT(handPose)
 4 | 
 5 | find_package( OpenCV REQUIRED )
 6 | 
 7 | include_directories( ${OpenCV_INCLUDE_DIRS})
 8 | 
 9 | MACRO(add_example name)
10 |   ADD_EXECUTABLE(${name} ${name}.cpp)
11 |   TARGET_LINK_LIBRARIES(${name} ${OpenCV_LIBS})
12 | ENDMACRO()
13 | 
14 | 
15 | add_example(handPoseImage)
16 | add_example(handPoseVideo)
17 | 


--------------------------------------------------------------------------------
/HandKeypointDetector.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | 
  3 | import glob
  4 | import sys
  5 | import cv2
  6 | import time
  7 | import numpy as np
  8 | import os
  9 | import shutil
 10 | class HandKeypointDetector():
 11 |     def __init__(self,output_folder,show_debug=False):
 12 |         self.show_debug = show_debug
 13 |         file_dir = os.path.abspath(os.path.dirname(__file__))
 14 |         self.protoFile = file_dir+"\\hand\\pose_deploy.prototxt"
 15 |         self.weightsFile = file_dir+"\\hand\\pose_iter_102000.caffemodel"
 16 |         self.nPoints = 22
 17 | 	
 18 |         self.data_out = output_folder
 19 |         if not os.path.exists(output_folder):
 20 |             os.mkdir(output_folder)
 21 |         else:
 22 |             shutil.rmtree(output_folder)
 23 |             time.sleep(1)
 24 |             os.mkdir(output_folder)
 25 |         self.keypoints = np.zeros((2*(self.nPoints -1),3))
 26 |         self.resize_factor = 0.5
 27 |         self.rearrange_finger_indices = np.array([0, 4, 3, 2, 1, 8, 7, 6, 5, 12, 11, 10, 9, 16, 15, 14, 13, 20, 19, 18, 17])
 28 |         self.min_number_of_points = 8
 29 |         self.confidence_for_roi = 0.05
 30 |         self.roi_expansion = 0.09
 31 |         self.POSE_PAIRS = [ [0,1],[1,2],[2,3],[3,4],[0,5],[5,6],[6,7],[7,8],[0,9],[9,10],[10,11],[11,12],[0,13],[13,14],[14,15],[15,16],[0,17],[17,18],[18,19],[19,20] ]
 32 |         self.net = cv2.dnn.readNetFromCaffe(self.protoFile, self.weightsFile)
 33 |     def storeKeyPoints(self,prefix):
 34 |         cv2.imwrite(self.data_out + '\\' + prefix + self.output_file_name + '.png', self.debug_image)
 35 |         np.savez(self.data_out + '\\{}.npz'.format(self.output_file_name), num_hands=1, kp_coord_uv=self.keypoints[:, 0:2],
 36 |                  kp_visible=self.keypoints[:, 2], )
 37 | 
 38 |     def detectKeyPoints(self,data_folder):
 39 |         self.debug_image = None
 40 |         self.keypoints = np.zeros((2*(self.nPoints -1),3))
 41 |         self.output_file_name = ''
 42 |         single_imag = False
 43 | 
 44 |         try:
 45 |             if os.path.isdir(data_folder):
 46 |                 files = glob.glob(data_folder + '\*.png')
 47 |                 bb=None
 48 |             else:
 49 |                 files = [data_folder]
 50 |                 single_imag = False
 51 |         except:
 52 |             single_imag = True
 53 |             files = [0]
 54 |         for f in range(0,len(files),1):
 55 |             if single_imag:
 56 |                 frame = data_folder.detach().cpu().numpy()
 57 |             else:
 58 |                 frame = cv2.imread(files[f])
 59 |                 import re
 60 |                 self.output_file_name =  re.split('[\\\ .]', files[f])[-2] + '_skeleton'
 61 | 
 62 |             frame = cv2.resize(frame,None,fx=self.resize_factor,fy=self.resize_factor)
 63 |             # Select ROI
 64 | 
 65 |             # frame=frame[int(frame.shape[0] / 2):, :, :]
 66 |             # r = cv2.selectROI(frame)
 67 |             #
 68 |             # # Crop image
 69 |             # frame = frame[int(r[1]):int(r[1] + r[3]), int(r[0]):int(r[0] + r[2])]
 70 | 
 71 |             # frameCopy = np.copy(frame)
 72 |             frameWidth = frame.shape[1]
 73 |             frameHeight = frame.shape[0]
 74 |             aspect_ratio = frameWidth/frameHeight
 75 | 
 76 |             threshold = 0.1
 77 | 
 78 |             t = time.time()
 79 |             # input image dimensions for the network
 80 |             inHeight = 368
 81 |             inWidth = int(((aspect_ratio*inHeight)*8)//8)
 82 |             inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight), (0, 0, 0), swapRB=False, crop=False)
 83 | 
 84 |             self.net.setInput(inpBlob)
 85 | 
 86 |             output = self.net.forward()
 87 |             if self.show_debug:
 88 |                 print("time taken by network : {:.3f}".format(time.time() - t))
 89 | 
 90 |             # Empty list to store the detected keypoints
 91 |             points = []
 92 |             # points_probs = []
 93 |             for i in range(self.nPoints):
 94 |                 # confidence map of corresponding body's part.
 95 |                 probMap = output[0, i, :, :]
 96 |                 probMap = cv2.resize(probMap, (frameWidth, frameHeight))
 97 | 
 98 |                 # Find global maxima of the probMap.
 99 |                 minVal, prob, minLoc, point = cv2.minMaxLoc(probMap)
100 | 
101 |                 if prob > threshold :
102 |                     cv2.circle(frame, (int(point[0]), int(point[1])), 2, (0, 0, int(255*prob)), thickness=-1, lineType=cv2.FILLED)
103 |                     cv2.putText(frame, "{}".format(self.rearrange_finger_indices[i]), (int(point[0]), int(point[1])), cv2.FONT_HERSHEY_SIMPLEX, 0.1, (0, 0, 255), 1, lineType=cv2.LINE_AA)
104 | 
105 |                     # Add the point to the list if the probability is greater than the threshold
106 |                     points.append(np.array([int(point[0]), int(point[1]),prob]))
107 |                     # points_probs.append(prob)
108 |                 else :
109 |                     # points_probs.append(0)
110 |                     points.append(np.array([0, 0,0]))
111 |             points = np.array(points)
112 |             # Draw Skeleton
113 |             for ii,pair in enumerate(self.POSE_PAIRS):
114 |                 partA = pair[0]
115 |                 partB = pair[1]
116 |                 # prob = points_probs[ii]
117 |                 if  np.all(points[partA]) and  np.all(points[partB]):
118 |                     cv2.line(frame, tuple((points[partA][0:2]).astype(int)), tuple((points[partB][0:2]).astype(int)), (0, 255, 255), 2)
119 |                     # cv2.circle(frame, points[partA][0:2], 8, (0, 0, int(255*prob)), thickness=-1, lineType=cv2.FILLED)
120 |                     # cv2.circle(frame, points[partB][0:2], 8, (0, 0, int(255*prob)), thickness=-1, lineType=cv2.FILLED)
121 | 
122 |             if self.show_debug:
123 |                 cv2.imshow('Output-Skeleton', frame)
124 |                 print("Total time taken : {:.3f}".format(time.time() - t))
125 | 
126 |                 cv2.waitKey(0)
127 |             # cv2.imwrite(self.data_out + '\\'+self.output_file_name+'.png', frame)
128 |             if self.min_number_of_points < sum(x is not None for x in points):
129 |                 ordered_points = np.array(points)[self.rearrange_finger_indices]
130 |                 ordered_points[:,0:2] = ordered_points[:,0:2]/self.resize_factor
131 |                 self.keypoints[0:self.nPoints-1,:] = ordered_points[:,0:3]
132 |                 # np.savez(self.data_out + '\\{}.npz'.format(output_file_name), num_hands = 1 , kp_coord_uv=self.keypoints[:,0:2], kp_visible=self.keypoints[:,2], )
133 |                 indices = self.keypoints[:, 2] > self.confidence_for_roi
134 |                 bb = {'minX': int((1 - self.roi_expansion) * min(self.keypoints[indices, 0])),
135 |                       'maxX': int((1 + self.roi_expansion) * max(self.keypoints[indices, 0])),
136 |                       'minY': int((1 - self.roi_expansion) * min(self.keypoints[indices, 1])),
137 |                       'maxY': int((1 + self.roi_expansion) * max(self.keypoints[indices, 1])),
138 |                       }
139 |                 # print(bb)
140 |         self.debug_image = cv2.resize(frame,None,fx=1/self.resize_factor,fy=1/self.resize_factor)
141 |         # import matplotlib.pyplot as plt
142 |         # plt.close('all')
143 |         # plt.imshow(self.debug_image)
144 |         # plt.savefig('a.png')
145 |         return bb
146 | 
147 | if __name__=='__main__':
148 |     data_folder = r"P:\4Erez\david\test\raw_stream\stream001_00104RGB.png"
149 |     show_debug = False
150 |     hd = HandKeypointDetector("out/",show_debug)
151 |     hd.detectKeyPoints(data_folder)
152 |     print('%%%%%%%%%%% Done %%%%%%%%%%%%%%%')
153 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Repository is based on 
 2 | https://www.learnopencv.com/hand-keypoint-detection-using-deep-learning-and-opencv/
 3 | additional development was made as a wrapping class to detect hand keypoints
 4 | based on the article  - https://arxiv.org/pdf/1704.07809.pdf
 5 | 
 6 | 
 7 | 
 8 | Please run getModels.sh from the command line to download the model in the correct folder.
 9 | 
10 | ### USAGE
11 | 
12 | #### Python
13 | **For using it on single image :**
14 | python handPoseImage.py
15 | 
16 | **For using on video :**
17 | python handPoseVideo.py
18 | 
19 | **To create a class for hand keypoints detector**
20 | python HandKeypointDetector.py
21 | 
22 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erezposner/MV_HandKeyPointDetector/afdaa1a9e7701605176d82b03a7436e9981cc467/__init__.py


--------------------------------------------------------------------------------
/data/front-back.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erezposner/MV_HandKeyPointDetector/afdaa1a9e7701605176d82b03a7436e9981cc467/data/front-back.jpg


--------------------------------------------------------------------------------
/data/hand.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erezposner/MV_HandKeyPointDetector/afdaa1a9e7701605176d82b03a7436e9981cc467/data/hand.jpg


--------------------------------------------------------------------------------
/getModels.sh:
--------------------------------------------------------------------------------
 1 | # ------------------------- BODY, FACE AND HAND MODELS -------------------------
 2 | # Downloading body pose (COCO and MPI), face and hand models
 3 | OPENPOSE_URL="http://posefs1.perception.cs.cmu.edu/OpenPose/models/"
 4 | HAND_FOLDER="hand/"
 5 | 
 6 | # "------------------------- HAND MODELS -------------------------"
 7 | # Hand
 8 | HAND_MODEL=$HAND_FOLDER"pose_iter_102000.caffemodel"
 9 | wget -c ${OPENPOSE_URL}${HAND_MODEL} -P ${HAND_FOLDER}
10 | 


--------------------------------------------------------------------------------
/hand/pose_deploy.prototxt:
--------------------------------------------------------------------------------
   1 | input: "image"
   2 | input_dim: 1 # Original: 2
   3 | input_dim: 3 # It crashes if not left to 3
   4 | input_dim: 1 # Original: 368
   5 | input_dim: 1 # Original: 368
   6 | layer {
   7 |   name: "conv1_1"
   8 |   type: "Convolution"
   9 |   bottom: "image"
  10 |   top: "conv1_1"
  11 |   param {
  12 |     lr_mult: 1.0
  13 |     decay_mult: 1
  14 |   }
  15 |   param {
  16 |     lr_mult: 2.0
  17 |     decay_mult: 0
  18 |   }
  19 |   convolution_param {
  20 |     num_output: 64
  21 |     pad: 1
  22 |     kernel_size: 3
  23 |     weight_filler {
  24 |       type: "xavier"
  25 |     }
  26 |     bias_filler {
  27 |       type: "constant"
  28 |     }
  29 |     dilation: 1
  30 |   }
  31 | }
  32 | layer {
  33 |   name: "relu1_1"
  34 |   type: "ReLU"
  35 |   bottom: "conv1_1"
  36 |   top: "conv1_1"
  37 | }
  38 | layer {
  39 |   name: "conv1_2"
  40 |   type: "Convolution"
  41 |   bottom: "conv1_1"
  42 |   top: "conv1_2"
  43 |   param {
  44 |     lr_mult: 1.0
  45 |     decay_mult: 1
  46 |   }
  47 |   param {
  48 |     lr_mult: 2.0
  49 |     decay_mult: 0
  50 |   }
  51 |   convolution_param {
  52 |     num_output: 64
  53 |     pad: 1
  54 |     kernel_size: 3
  55 |     weight_filler {
  56 |       type: "xavier"
  57 |     }
  58 |     bias_filler {
  59 |       type: "constant"
  60 |     }
  61 |     dilation: 1
  62 |   }
  63 | }
  64 | layer {
  65 |   name: "relu1_2"
  66 |   type: "ReLU"
  67 |   bottom: "conv1_2"
  68 |   top: "conv1_2"
  69 | }
  70 | layer {
  71 |   name: "pool1_stage1"
  72 |   type: "Pooling"
  73 |   bottom: "conv1_2"
  74 |   top: "pool1_stage1"
  75 |   pooling_param {
  76 |     pool: MAX
  77 |     kernel_size: 2
  78 |     stride: 2
  79 |   }
  80 | }
  81 | layer {
  82 |   name: "conv2_1"
  83 |   type: "Convolution"
  84 |   bottom: "pool1_stage1"
  85 |   top: "conv2_1"
  86 |   param {
  87 |     lr_mult: 1.0
  88 |     decay_mult: 1
  89 |   }
  90 |   param {
  91 |     lr_mult: 2.0
  92 |     decay_mult: 0
  93 |   }
  94 |   convolution_param {
  95 |     num_output: 128
  96 |     pad: 1
  97 |     kernel_size: 3
  98 |     weight_filler {
  99 |       type: "xavier"
 100 |     }
 101 |     bias_filler {
 102 |       type: "constant"
 103 |     }
 104 |     dilation: 1
 105 |   }
 106 | }
 107 | layer {
 108 |   name: "relu2_1"
 109 |   type: "ReLU"
 110 |   bottom: "conv2_1"
 111 |   top: "conv2_1"
 112 | }
 113 | layer {
 114 |   name: "conv2_2"
 115 |   type: "Convolution"
 116 |   bottom: "conv2_1"
 117 |   top: "conv2_2"
 118 |   param {
 119 |     lr_mult: 1.0
 120 |     decay_mult: 1
 121 |   }
 122 |   param {
 123 |     lr_mult: 2.0
 124 |     decay_mult: 0
 125 |   }
 126 |   convolution_param {
 127 |     num_output: 128
 128 |     pad: 1
 129 |     kernel_size: 3
 130 |     weight_filler {
 131 |       type: "xavier"
 132 |     }
 133 |     bias_filler {
 134 |       type: "constant"
 135 |     }
 136 |     dilation: 1
 137 |   }
 138 | }
 139 | layer {
 140 |   name: "relu2_2"
 141 |   type: "ReLU"
 142 |   bottom: "conv2_2"
 143 |   top: "conv2_2"
 144 | }
 145 | layer {
 146 |   name: "pool2_stage1"
 147 |   type: "Pooling"
 148 |   bottom: "conv2_2"
 149 |   top: "pool2_stage1"
 150 |   pooling_param {
 151 |     pool: MAX
 152 |     kernel_size: 2
 153 |     stride: 2
 154 |   }
 155 | }
 156 | layer {
 157 |   name: "conv3_1"
 158 |   type: "Convolution"
 159 |   bottom: "pool2_stage1"
 160 |   top: "conv3_1"
 161 |   param {
 162 |     lr_mult: 1.0
 163 |     decay_mult: 1
 164 |   }
 165 |   param {
 166 |     lr_mult: 2.0
 167 |     decay_mult: 0
 168 |   }
 169 |   convolution_param {
 170 |     num_output: 256
 171 |     pad: 1
 172 |     kernel_size: 3
 173 |     weight_filler {
 174 |       type: "xavier"
 175 |     }
 176 |     bias_filler {
 177 |       type: "constant"
 178 |     }
 179 |     dilation: 1
 180 |   }
 181 | }
 182 | layer {
 183 |   name: "relu3_1"
 184 |   type: "ReLU"
 185 |   bottom: "conv3_1"
 186 |   top: "conv3_1"
 187 | }
 188 | layer {
 189 |   name: "conv3_2"
 190 |   type: "Convolution"
 191 |   bottom: "conv3_1"
 192 |   top: "conv3_2"
 193 |   param {
 194 |     lr_mult: 1.0
 195 |     decay_mult: 1
 196 |   }
 197 |   param {
 198 |     lr_mult: 2.0
 199 |     decay_mult: 0
 200 |   }
 201 |   convolution_param {
 202 |     num_output: 256
 203 |     pad: 1
 204 |     kernel_size: 3
 205 |     weight_filler {
 206 |       type: "xavier"
 207 |     }
 208 |     bias_filler {
 209 |       type: "constant"
 210 |     }
 211 |     dilation: 1
 212 |   }
 213 | }
 214 | layer {
 215 |   name: "relu3_2"
 216 |   type: "ReLU"
 217 |   bottom: "conv3_2"
 218 |   top: "conv3_2"
 219 | }
 220 | layer {
 221 |   name: "conv3_3"
 222 |   type: "Convolution"
 223 |   bottom: "conv3_2"
 224 |   top: "conv3_3"
 225 |   param {
 226 |     lr_mult: 1.0
 227 |     decay_mult: 1
 228 |   }
 229 |   param {
 230 |     lr_mult: 2.0
 231 |     decay_mult: 0
 232 |   }
 233 |   convolution_param {
 234 |     num_output: 256
 235 |     pad: 1
 236 |     kernel_size: 3
 237 |     weight_filler {
 238 |       type: "xavier"
 239 |     }
 240 |     bias_filler {
 241 |       type: "constant"
 242 |     }
 243 |     dilation: 1
 244 |   }
 245 | }
 246 | layer {
 247 |   name: "relu3_3"
 248 |   type: "ReLU"
 249 |   bottom: "conv3_3"
 250 |   top: "conv3_3"
 251 | }
 252 | layer {
 253 |   name: "conv3_4"
 254 |   type: "Convolution"
 255 |   bottom: "conv3_3"
 256 |   top: "conv3_4"
 257 |   param {
 258 |     lr_mult: 1.0
 259 |     decay_mult: 1
 260 |   }
 261 |   param {
 262 |     lr_mult: 2.0
 263 |     decay_mult: 0
 264 |   }
 265 |   convolution_param {
 266 |     num_output: 256
 267 |     pad: 1
 268 |     kernel_size: 3
 269 |     weight_filler {
 270 |       type: "xavier"
 271 |     }
 272 |     bias_filler {
 273 |       type: "constant"
 274 |     }
 275 |     dilation: 1
 276 |   }
 277 | }
 278 | layer {
 279 |   name: "relu3_4"
 280 |   type: "ReLU"
 281 |   bottom: "conv3_4"
 282 |   top: "conv3_4"
 283 | }
 284 | layer {
 285 |   name: "pool3_stage1"
 286 |   type: "Pooling"
 287 |   bottom: "conv3_4"
 288 |   top: "pool3_stage1"
 289 |   pooling_param {
 290 |     pool: MAX
 291 |     kernel_size: 2
 292 |     stride: 2
 293 |   }
 294 | }
 295 | layer {
 296 |   name: "conv4_1"
 297 |   type: "Convolution"
 298 |   bottom: "pool3_stage1"
 299 |   top: "conv4_1"
 300 |   param {
 301 |     lr_mult: 1.0
 302 |     decay_mult: 1
 303 |   }
 304 |   param {
 305 |     lr_mult: 2.0
 306 |     decay_mult: 0
 307 |   }
 308 |   convolution_param {
 309 |     num_output: 512
 310 |     pad: 1
 311 |     kernel_size: 3
 312 |     weight_filler {
 313 |       type: "xavier"
 314 |     }
 315 |     bias_filler {
 316 |       type: "constant"
 317 |     }
 318 |     dilation: 1
 319 |   }
 320 | }
 321 | layer {
 322 |   name: "relu4_1"
 323 |   type: "ReLU"
 324 |   bottom: "conv4_1"
 325 |   top: "conv4_1"
 326 | }
 327 | layer {
 328 |   name: "conv4_2"
 329 |   type: "Convolution"
 330 |   bottom: "conv4_1"
 331 |   top: "conv4_2"
 332 |   param {
 333 |     lr_mult: 1.0
 334 |     decay_mult: 1
 335 |   }
 336 |   param {
 337 |     lr_mult: 2.0
 338 |     decay_mult: 0
 339 |   }
 340 |   convolution_param {
 341 |     num_output: 512
 342 |     pad: 1
 343 |     kernel_size: 3
 344 |     weight_filler {
 345 |       type: "xavier"
 346 |     }
 347 |     bias_filler {
 348 |       type: "constant"
 349 |     }
 350 |     dilation: 1
 351 |   }
 352 | }
 353 | layer {
 354 |   name: "relu4_2"
 355 |   type: "ReLU"
 356 |   bottom: "conv4_2"
 357 |   top: "conv4_2"
 358 | }
 359 | layer {
 360 |   name: "conv4_3"
 361 |   type: "Convolution"
 362 |   bottom: "conv4_2"
 363 |   top: "conv4_3"
 364 |   param {
 365 |     lr_mult: 1.0
 366 |     decay_mult: 1
 367 |   }
 368 |   param {
 369 |     lr_mult: 2.0
 370 |     decay_mult: 0
 371 |   }
 372 |   convolution_param {
 373 |     num_output: 512
 374 |     pad: 1
 375 |     kernel_size: 3
 376 |     weight_filler {
 377 |       type: "xavier"
 378 |     }
 379 |     bias_filler {
 380 |       type: "constant"
 381 |     }
 382 |     dilation: 1
 383 |   }
 384 | }
 385 | layer {
 386 |   name: "relu4_3"
 387 |   type: "ReLU"
 388 |   bottom: "conv4_3"
 389 |   top: "conv4_3"
 390 | }
 391 | layer {
 392 |   name: "conv4_4"
 393 |   type: "Convolution"
 394 |   bottom: "conv4_3"
 395 |   top: "conv4_4"
 396 |   param {
 397 |     lr_mult: 1.0
 398 |     decay_mult: 1
 399 |   }
 400 |   param {
 401 |     lr_mult: 2.0
 402 |     decay_mult: 0
 403 |   }
 404 |   convolution_param {
 405 |     num_output: 512
 406 |     pad: 1
 407 |     kernel_size: 3
 408 |     weight_filler {
 409 |       type: "xavier"
 410 |     }
 411 |     bias_filler {
 412 |       type: "constant"
 413 |     }
 414 |     dilation: 1
 415 |   }
 416 | }
 417 | layer {
 418 |   name: "relu4_4"
 419 |   type: "ReLU"
 420 |   bottom: "conv4_4"
 421 |   top: "conv4_4"
 422 | }
 423 | layer {
 424 |   name: "conv5_1"
 425 |   type: "Convolution"
 426 |   bottom: "conv4_4"
 427 |   top: "conv5_1"
 428 |   param {
 429 |     lr_mult: 1.0
 430 |     decay_mult: 1
 431 |   }
 432 |   param {
 433 |     lr_mult: 2.0
 434 |     decay_mult: 0
 435 |   }
 436 |   convolution_param {
 437 |     num_output: 512
 438 |     pad: 1
 439 |     kernel_size: 3
 440 |     weight_filler {
 441 |       type: "xavier"
 442 |     }
 443 |     bias_filler {
 444 |       type: "constant"
 445 |     }
 446 |     dilation: 1
 447 |   }
 448 | }
 449 | layer {
 450 |   name: "relu5_1"
 451 |   type: "ReLU"
 452 |   bottom: "conv5_1"
 453 |   top: "conv5_1"
 454 | }
 455 | layer {
 456 |   name: "conv5_2"
 457 |   type: "Convolution"
 458 |   bottom: "conv5_1"
 459 |   top: "conv5_2"
 460 |   param {
 461 |     lr_mult: 1.0
 462 |     decay_mult: 1
 463 |   }
 464 |   param {
 465 |     lr_mult: 2.0
 466 |     decay_mult: 0
 467 |   }
 468 |   convolution_param {
 469 |     num_output: 512
 470 |     pad: 1
 471 |     kernel_size: 3
 472 |     weight_filler {
 473 |       type: "xavier"
 474 |     }
 475 |     bias_filler {
 476 |       type: "constant"
 477 |     }
 478 |     dilation: 1
 479 |   }
 480 | }
 481 | layer {
 482 |   name: "relu5_2"
 483 |   type: "ReLU"
 484 |   bottom: "conv5_2"
 485 |   top: "conv5_2"
 486 | }
 487 | layer {
 488 |   name: "conv5_3_CPM"
 489 |   type: "Convolution"
 490 |   bottom: "conv5_2"
 491 |   top: "conv5_3_CPM"
 492 |   param {
 493 |     lr_mult: 1.0
 494 |     decay_mult: 1
 495 |   }
 496 |   param {
 497 |     lr_mult: 2.0
 498 |     decay_mult: 0
 499 |   }
 500 |   convolution_param {
 501 |     num_output: 128
 502 |     pad: 1
 503 |     kernel_size: 3
 504 |     weight_filler {
 505 |       type: "gaussian"
 506 |       std: 0.01
 507 |     }
 508 |     bias_filler {
 509 |       type: "constant"
 510 |     }
 511 |     dilation: 1
 512 |   }
 513 | }
 514 | layer {
 515 |   name: "relu5_4_stage1_3"
 516 |   type: "ReLU"
 517 |   bottom: "conv5_3_CPM"
 518 |   top: "conv5_3_CPM"
 519 | }
 520 | layer {
 521 |   name: "conv6_1_CPM"
 522 |   type: "Convolution"
 523 |   bottom: "conv5_3_CPM"
 524 |   top: "conv6_1_CPM"
 525 |   param {
 526 |     lr_mult: 1.0
 527 |     decay_mult: 1
 528 |   }
 529 |   param {
 530 |     lr_mult: 2.0
 531 |     decay_mult: 0
 532 |   }
 533 |   convolution_param {
 534 |     num_output: 512
 535 |     pad: 0
 536 |     kernel_size: 1
 537 |     weight_filler {
 538 |       type: "gaussian"
 539 |       std: 0.01
 540 |     }
 541 |     bias_filler {
 542 |       type: "constant"
 543 |     }
 544 |     dilation: 1
 545 |   }
 546 | }
 547 | layer {
 548 |   name: "relu6_4_stage1_1"
 549 |   type: "ReLU"
 550 |   bottom: "conv6_1_CPM"
 551 |   top: "conv6_1_CPM"
 552 | }
 553 | layer {
 554 |   name: "conv6_2_CPM"
 555 |   type: "Convolution"
 556 |   bottom: "conv6_1_CPM"
 557 |   top: "conv6_2_CPM"
 558 |   param {
 559 |     lr_mult: 1.0
 560 |     decay_mult: 1
 561 |   }
 562 |   param {
 563 |     lr_mult: 2.0
 564 |     decay_mult: 0
 565 |   }
 566 |   convolution_param {
 567 |     num_output: 22
 568 |     pad: 0
 569 |     kernel_size: 1
 570 |     weight_filler {
 571 |       type: "gaussian"
 572 |       std: 0.01
 573 |     }
 574 |     bias_filler {
 575 |       type: "constant"
 576 |     }
 577 |     dilation: 1
 578 |   }
 579 | }
 580 | layer {
 581 |   name: "concat_stage2"
 582 |   type: "Concat"
 583 |   bottom: "conv6_2_CPM"
 584 |   bottom: "conv5_3_CPM"
 585 |   top: "concat_stage2"
 586 |   concat_param {
 587 |     axis: 1
 588 |   }
 589 | }
 590 | layer {
 591 |   name: "Mconv1_stage2"
 592 |   type: "Convolution"
 593 |   bottom: "concat_stage2"
 594 |   top: "Mconv1_stage2"
 595 |   param {
 596 |     lr_mult: 4.0
 597 |     decay_mult: 1
 598 |   }
 599 |   param {
 600 |     lr_mult: 8.0
 601 |     decay_mult: 0
 602 |   }
 603 |   convolution_param {
 604 |     num_output: 128
 605 |     pad: 3
 606 |     kernel_size: 7
 607 |     weight_filler {
 608 |       type: "gaussian"
 609 |       std: 0.01
 610 |     }
 611 |     bias_filler {
 612 |       type: "constant"
 613 |     }
 614 |     dilation: 1
 615 |   }
 616 | }
 617 | layer {
 618 |   name: "Mrelu1_2_stage2_1"
 619 |   type: "ReLU"
 620 |   bottom: "Mconv1_stage2"
 621 |   top: "Mconv1_stage2"
 622 | }
 623 | layer {
 624 |   name: "Mconv2_stage2"
 625 |   type: "Convolution"
 626 |   bottom: "Mconv1_stage2"
 627 |   top: "Mconv2_stage2"
 628 |   param {
 629 |     lr_mult: 4.0
 630 |     decay_mult: 1
 631 |   }
 632 |   param {
 633 |     lr_mult: 8.0
 634 |     decay_mult: 0
 635 |   }
 636 |   convolution_param {
 637 |     num_output: 128
 638 |     pad: 3
 639 |     kernel_size: 7
 640 |     weight_filler {
 641 |       type: "gaussian"
 642 |       std: 0.01
 643 |     }
 644 |     bias_filler {
 645 |       type: "constant"
 646 |     }
 647 |     dilation: 1
 648 |   }
 649 | }
 650 | layer {
 651 |   name: "Mrelu1_3_stage2_2"
 652 |   type: "ReLU"
 653 |   bottom: "Mconv2_stage2"
 654 |   top: "Mconv2_stage2"
 655 | }
 656 | layer {
 657 |   name: "Mconv3_stage2"
 658 |   type: "Convolution"
 659 |   bottom: "Mconv2_stage2"
 660 |   top: "Mconv3_stage2"
 661 |   param {
 662 |     lr_mult: 4.0
 663 |     decay_mult: 1
 664 |   }
 665 |   param {
 666 |     lr_mult: 8.0
 667 |     decay_mult: 0
 668 |   }
 669 |   convolution_param {
 670 |     num_output: 128
 671 |     pad: 3
 672 |     kernel_size: 7
 673 |     weight_filler {
 674 |       type: "gaussian"
 675 |       std: 0.01
 676 |     }
 677 |     bias_filler {
 678 |       type: "constant"
 679 |     }
 680 |     dilation: 1
 681 |   }
 682 | }
 683 | layer {
 684 |   name: "Mrelu1_4_stage2_3"
 685 |   type: "ReLU"
 686 |   bottom: "Mconv3_stage2"
 687 |   top: "Mconv3_stage2"
 688 | }
 689 | layer {
 690 |   name: "Mconv4_stage2"
 691 |   type: "Convolution"
 692 |   bottom: "Mconv3_stage2"
 693 |   top: "Mconv4_stage2"
 694 |   param {
 695 |     lr_mult: 4.0
 696 |     decay_mult: 1
 697 |   }
 698 |   param {
 699 |     lr_mult: 8.0
 700 |     decay_mult: 0
 701 |   }
 702 |   convolution_param {
 703 |     num_output: 128
 704 |     pad: 3
 705 |     kernel_size: 7
 706 |     weight_filler {
 707 |       type: "gaussian"
 708 |       std: 0.01
 709 |     }
 710 |     bias_filler {
 711 |       type: "constant"
 712 |     }
 713 |     dilation: 1
 714 |   }
 715 | }
 716 | layer {
 717 |   name: "Mrelu1_5_stage2_4"
 718 |   type: "ReLU"
 719 |   bottom: "Mconv4_stage2"
 720 |   top: "Mconv4_stage2"
 721 | }
 722 | layer {
 723 |   name: "Mconv5_stage2"
 724 |   type: "Convolution"
 725 |   bottom: "Mconv4_stage2"
 726 |   top: "Mconv5_stage2"
 727 |   param {
 728 |     lr_mult: 4.0
 729 |     decay_mult: 1
 730 |   }
 731 |   param {
 732 |     lr_mult: 8.0
 733 |     decay_mult: 0
 734 |   }
 735 |   convolution_param {
 736 |     num_output: 128
 737 |     pad: 3
 738 |     kernel_size: 7
 739 |     weight_filler {
 740 |       type: "gaussian"
 741 |       std: 0.01
 742 |     }
 743 |     bias_filler {
 744 |       type: "constant"
 745 |     }
 746 |     dilation: 1
 747 |   }
 748 | }
 749 | layer {
 750 |   name: "Mrelu1_6_stage2_5"
 751 |   type: "ReLU"
 752 |   bottom: "Mconv5_stage2"
 753 |   top: "Mconv5_stage2"
 754 | }
 755 | layer {
 756 |   name: "Mconv6_stage2"
 757 |   type: "Convolution"
 758 |   bottom: "Mconv5_stage2"
 759 |   top: "Mconv6_stage2"
 760 |   param {
 761 |     lr_mult: 4.0
 762 |     decay_mult: 1
 763 |   }
 764 |   param {
 765 |     lr_mult: 8.0
 766 |     decay_mult: 0
 767 |   }
 768 |   convolution_param {
 769 |     num_output: 128
 770 |     pad: 0
 771 |     kernel_size: 1
 772 |     weight_filler {
 773 |       type: "gaussian"
 774 |       std: 0.01
 775 |     }
 776 |     bias_filler {
 777 |       type: "constant"
 778 |     }
 779 |     dilation: 1
 780 |   }
 781 | }
 782 | layer {
 783 |   name: "Mrelu1_7_stage2_6"
 784 |   type: "ReLU"
 785 |   bottom: "Mconv6_stage2"
 786 |   top: "Mconv6_stage2"
 787 | }
 788 | layer {
 789 |   name: "Mconv7_stage2"
 790 |   type: "Convolution"
 791 |   bottom: "Mconv6_stage2"
 792 |   top: "Mconv7_stage2"
 793 |   param {
 794 |     lr_mult: 4.0
 795 |     decay_mult: 1
 796 |   }
 797 |   param {
 798 |     lr_mult: 8.0
 799 |     decay_mult: 0
 800 |   }
 801 |   convolution_param {
 802 |     num_output: 22
 803 |     pad: 0
 804 |     kernel_size: 1
 805 |     weight_filler {
 806 |       type: "gaussian"
 807 |       std: 0.01
 808 |     }
 809 |     bias_filler {
 810 |       type: "constant"
 811 |     }
 812 |     dilation: 1
 813 |   }
 814 | }
 815 | layer {
 816 |   name: "concat_stage3"
 817 |   type: "Concat"
 818 |   bottom: "Mconv7_stage2"
 819 |   bottom: "conv5_3_CPM"
 820 |   top: "concat_stage3"
 821 |   concat_param {
 822 |     axis: 1
 823 |   }
 824 | }
 825 | layer {
 826 |   name: "Mconv1_stage3"
 827 |   type: "Convolution"
 828 |   bottom: "concat_stage3"
 829 |   top: "Mconv1_stage3"
 830 |   param {
 831 |     lr_mult: 4.0
 832 |     decay_mult: 1
 833 |   }
 834 |   param {
 835 |     lr_mult: 8.0
 836 |     decay_mult: 0
 837 |   }
 838 |   convolution_param {
 839 |     num_output: 128
 840 |     pad: 3
 841 |     kernel_size: 7
 842 |     weight_filler {
 843 |       type: "gaussian"
 844 |       std: 0.01
 845 |     }
 846 |     bias_filler {
 847 |       type: "constant"
 848 |     }
 849 |     dilation: 1
 850 |   }
 851 | }
 852 | layer {
 853 |   name: "Mrelu1_2_stage3_1"
 854 |   type: "ReLU"
 855 |   bottom: "Mconv1_stage3"
 856 |   top: "Mconv1_stage3"
 857 | }
 858 | layer {
 859 |   name: "Mconv2_stage3"
 860 |   type: "Convolution"
 861 |   bottom: "Mconv1_stage3"
 862 |   top: "Mconv2_stage3"
 863 |   param {
 864 |     lr_mult: 4.0
 865 |     decay_mult: 1
 866 |   }
 867 |   param {
 868 |     lr_mult: 8.0
 869 |     decay_mult: 0
 870 |   }
 871 |   convolution_param {
 872 |     num_output: 128
 873 |     pad: 3
 874 |     kernel_size: 7
 875 |     weight_filler {
 876 |       type: "gaussian"
 877 |       std: 0.01
 878 |     }
 879 |     bias_filler {
 880 |       type: "constant"
 881 |     }
 882 |     dilation: 1
 883 |   }
 884 | }
 885 | layer {
 886 |   name: "Mrelu1_3_stage3_2"
 887 |   type: "ReLU"
 888 |   bottom: "Mconv2_stage3"
 889 |   top: "Mconv2_stage3"
 890 | }
 891 | layer {
 892 |   name: "Mconv3_stage3"
 893 |   type: "Convolution"
 894 |   bottom: "Mconv2_stage3"
 895 |   top: "Mconv3_stage3"
 896 |   param {
 897 |     lr_mult: 4.0
 898 |     decay_mult: 1
 899 |   }
 900 |   param {
 901 |     lr_mult: 8.0
 902 |     decay_mult: 0
 903 |   }
 904 |   convolution_param {
 905 |     num_output: 128
 906 |     pad: 3
 907 |     kernel_size: 7
 908 |     weight_filler {
 909 |       type: "gaussian"
 910 |       std: 0.01
 911 |     }
 912 |     bias_filler {
 913 |       type: "constant"
 914 |     }
 915 |     dilation: 1
 916 |   }
 917 | }
 918 | layer {
 919 |   name: "Mrelu1_4_stage3_3"
 920 |   type: "ReLU"
 921 |   bottom: "Mconv3_stage3"
 922 |   top: "Mconv3_stage3"
 923 | }
 924 | layer {
 925 |   name: "Mconv4_stage3"
 926 |   type: "Convolution"
 927 |   bottom: "Mconv3_stage3"
 928 |   top: "Mconv4_stage3"
 929 |   param {
 930 |     lr_mult: 4.0
 931 |     decay_mult: 1
 932 |   }
 933 |   param {
 934 |     lr_mult: 8.0
 935 |     decay_mult: 0
 936 |   }
 937 |   convolution_param {
 938 |     num_output: 128
 939 |     pad: 3
 940 |     kernel_size: 7
 941 |     weight_filler {
 942 |       type: "gaussian"
 943 |       std: 0.01
 944 |     }
 945 |     bias_filler {
 946 |       type: "constant"
 947 |     }
 948 |     dilation: 1
 949 |   }
 950 | }
 951 | layer {
 952 |   name: "Mrelu1_5_stage3_4"
 953 |   type: "ReLU"
 954 |   bottom: "Mconv4_stage3"
 955 |   top: "Mconv4_stage3"
 956 | }
 957 | layer {
 958 |   name: "Mconv5_stage3"
 959 |   type: "Convolution"
 960 |   bottom: "Mconv4_stage3"
 961 |   top: "Mconv5_stage3"
 962 |   param {
 963 |     lr_mult: 4.0
 964 |     decay_mult: 1
 965 |   }
 966 |   param {
 967 |     lr_mult: 8.0
 968 |     decay_mult: 0
 969 |   }
 970 |   convolution_param {
 971 |     num_output: 128
 972 |     pad: 3
 973 |     kernel_size: 7
 974 |     weight_filler {
 975 |       type: "gaussian"
 976 |       std: 0.01
 977 |     }
 978 |     bias_filler {
 979 |       type: "constant"
 980 |     }
 981 |     dilation: 1
 982 |   }
 983 | }
 984 | layer {
 985 |   name: "Mrelu1_6_stage3_5"
 986 |   type: "ReLU"
 987 |   bottom: "Mconv5_stage3"
 988 |   top: "Mconv5_stage3"
 989 | }
 990 | layer {
 991 |   name: "Mconv6_stage3"
 992 |   type: "Convolution"
 993 |   bottom: "Mconv5_stage3"
 994 |   top: "Mconv6_stage3"
 995 |   param {
 996 |     lr_mult: 4.0
 997 |     decay_mult: 1
 998 |   }
 999 |   param {
1000 |     lr_mult: 8.0
1001 |     decay_mult: 0
1002 |   }
1003 |   convolution_param {
1004 |     num_output: 128
1005 |     pad: 0
1006 |     kernel_size: 1
1007 |     weight_filler {
1008 |       type: "gaussian"
1009 |       std: 0.01
1010 |     }
1011 |     bias_filler {
1012 |       type: "constant"
1013 |     }
1014 |     dilation: 1
1015 |   }
1016 | }
1017 | layer {
1018 |   name: "Mrelu1_7_stage3_6"
1019 |   type: "ReLU"
1020 |   bottom: "Mconv6_stage3"
1021 |   top: "Mconv6_stage3"
1022 | }
1023 | layer {
1024 |   name: "Mconv7_stage3"
1025 |   type: "Convolution"
1026 |   bottom: "Mconv6_stage3"
1027 |   top: "Mconv7_stage3"
1028 |   param {
1029 |     lr_mult: 4.0
1030 |     decay_mult: 1
1031 |   }
1032 |   param {
1033 |     lr_mult: 8.0
1034 |     decay_mult: 0
1035 |   }
1036 |   convolution_param {
1037 |     num_output: 22
1038 |     pad: 0
1039 |     kernel_size: 1
1040 |     weight_filler {
1041 |       type: "gaussian"
1042 |       std: 0.01
1043 |     }
1044 |     bias_filler {
1045 |       type: "constant"
1046 |     }
1047 |     dilation: 1
1048 |   }
1049 | }
1050 | layer {
1051 |   name: "concat_stage4"
1052 |   type: "Concat"
1053 |   bottom: "Mconv7_stage3"
1054 |   bottom: "conv5_3_CPM"
1055 |   top: "concat_stage4"
1056 |   concat_param {
1057 |     axis: 1
1058 |   }
1059 | }
1060 | layer {
1061 |   name: "Mconv1_stage4"
1062 |   type: "Convolution"
1063 |   bottom: "concat_stage4"
1064 |   top: "Mconv1_stage4"
1065 |   param {
1066 |     lr_mult: 4.0
1067 |     decay_mult: 1
1068 |   }
1069 |   param {
1070 |     lr_mult: 8.0
1071 |     decay_mult: 0
1072 |   }
1073 |   convolution_param {
1074 |     num_output: 128
1075 |     pad: 3
1076 |     kernel_size: 7
1077 |     weight_filler {
1078 |       type: "gaussian"
1079 |       std: 0.01
1080 |     }
1081 |     bias_filler {
1082 |       type: "constant"
1083 |     }
1084 |     dilation: 1
1085 |   }
1086 | }
1087 | layer {
1088 |   name: "Mrelu1_2_stage4_1"
1089 |   type: "ReLU"
1090 |   bottom: "Mconv1_stage4"
1091 |   top: "Mconv1_stage4"
1092 | }
1093 | layer {
1094 |   name: "Mconv2_stage4"
1095 |   type: "Convolution"
1096 |   bottom: "Mconv1_stage4"
1097 |   top: "Mconv2_stage4"
1098 |   param {
1099 |     lr_mult: 4.0
1100 |     decay_mult: 1
1101 |   }
1102 |   param {
1103 |     lr_mult: 8.0
1104 |     decay_mult: 0
1105 |   }
1106 |   convolution_param {
1107 |     num_output: 128
1108 |     pad: 3
1109 |     kernel_size: 7
1110 |     weight_filler {
1111 |       type: "gaussian"
1112 |       std: 0.01
1113 |     }
1114 |     bias_filler {
1115 |       type: "constant"
1116 |     }
1117 |     dilation: 1
1118 |   }
1119 | }
1120 | layer {
1121 |   name: "Mrelu1_3_stage4_2"
1122 |   type: "ReLU"
1123 |   bottom: "Mconv2_stage4"
1124 |   top: "Mconv2_stage4"
1125 | }
1126 | layer {
1127 |   name: "Mconv3_stage4"
1128 |   type: "Convolution"
1129 |   bottom: "Mconv2_stage4"
1130 |   top: "Mconv3_stage4"
1131 |   param {
1132 |     lr_mult: 4.0
1133 |     decay_mult: 1
1134 |   }
1135 |   param {
1136 |     lr_mult: 8.0
1137 |     decay_mult: 0
1138 |   }
1139 |   convolution_param {
1140 |     num_output: 128
1141 |     pad: 3
1142 |     kernel_size: 7
1143 |     weight_filler {
1144 |       type: "gaussian"
1145 |       std: 0.01
1146 |     }
1147 |     bias_filler {
1148 |       type: "constant"
1149 |     }
1150 |     dilation: 1
1151 |   }
1152 | }
1153 | layer {
1154 |   name: "Mrelu1_4_stage4_3"
1155 |   type: "ReLU"
1156 |   bottom: "Mconv3_stage4"
1157 |   top: "Mconv3_stage4"
1158 | }
1159 | layer {
1160 |   name: "Mconv4_stage4"
1161 |   type: "Convolution"
1162 |   bottom: "Mconv3_stage4"
1163 |   top: "Mconv4_stage4"
1164 |   param {
1165 |     lr_mult: 4.0
1166 |     decay_mult: 1
1167 |   }
1168 |   param {
1169 |     lr_mult: 8.0
1170 |     decay_mult: 0
1171 |   }
1172 |   convolution_param {
1173 |     num_output: 128
1174 |     pad: 3
1175 |     kernel_size: 7
1176 |     weight_filler {
1177 |       type: "gaussian"
1178 |       std: 0.01
1179 |     }
1180 |     bias_filler {
1181 |       type: "constant"
1182 |     }
1183 |     dilation: 1
1184 |   }
1185 | }
1186 | layer {
1187 |   name: "Mrelu1_5_stage4_4"
1188 |   type: "ReLU"
1189 |   bottom: "Mconv4_stage4"
1190 |   top: "Mconv4_stage4"
1191 | }
1192 | layer {
1193 |   name: "Mconv5_stage4"
1194 |   type: "Convolution"
1195 |   bottom: "Mconv4_stage4"
1196 |   top: "Mconv5_stage4"
1197 |   param {
1198 |     lr_mult: 4.0
1199 |     decay_mult: 1
1200 |   }
1201 |   param {
1202 |     lr_mult: 8.0
1203 |     decay_mult: 0
1204 |   }
1205 |   convolution_param {
1206 |     num_output: 128
1207 |     pad: 3
1208 |     kernel_size: 7
1209 |     weight_filler {
1210 |       type: "gaussian"
1211 |       std: 0.01
1212 |     }
1213 |     bias_filler {
1214 |       type: "constant"
1215 |     }
1216 |     dilation: 1
1217 |   }
1218 | }
1219 | layer {
1220 |   name: "Mrelu1_6_stage4_5"
1221 |   type: "ReLU"
1222 |   bottom: "Mconv5_stage4"
1223 |   top: "Mconv5_stage4"
1224 | }
1225 | layer {
1226 |   name: "Mconv6_stage4"
1227 |   type: "Convolution"
1228 |   bottom: "Mconv5_stage4"
1229 |   top: "Mconv6_stage4"
1230 |   param {
1231 |     lr_mult: 4.0
1232 |     decay_mult: 1
1233 |   }
1234 |   param {
1235 |     lr_mult: 8.0
1236 |     decay_mult: 0
1237 |   }
1238 |   convolution_param {
1239 |     num_output: 128
1240 |     pad: 0
1241 |     kernel_size: 1
1242 |     weight_filler {
1243 |       type: "gaussian"
1244 |       std: 0.01
1245 |     }
1246 |     bias_filler {
1247 |       type: "constant"
1248 |     }
1249 |     dilation: 1
1250 |   }
1251 | }
1252 | layer {
1253 |   name: "Mrelu1_7_stage4_6"
1254 |   type: "ReLU"
1255 |   bottom: "Mconv6_stage4"
1256 |   top: "Mconv6_stage4"
1257 | }
1258 | layer {
1259 |   name: "Mconv7_stage4"
1260 |   type: "Convolution"
1261 |   bottom: "Mconv6_stage4"
1262 |   top: "Mconv7_stage4"
1263 |   param {
1264 |     lr_mult: 4.0
1265 |     decay_mult: 1
1266 |   }
1267 |   param {
1268 |     lr_mult: 8.0
1269 |     decay_mult: 0
1270 |   }
1271 |   convolution_param {
1272 |     num_output: 22
1273 |     pad: 0
1274 |     kernel_size: 1
1275 |     weight_filler {
1276 |       type: "gaussian"
1277 |       std: 0.01
1278 |     }
1279 |     bias_filler {
1280 |       type: "constant"
1281 |     }
1282 |     dilation: 1
1283 |   }
1284 | }
1285 | layer {
1286 |   name: "concat_stage5"
1287 |   type: "Concat"
1288 |   bottom: "Mconv7_stage4"
1289 |   bottom: "conv5_3_CPM"
1290 |   top: "concat_stage5"
1291 |   concat_param {
1292 |     axis: 1
1293 |   }
1294 | }
1295 | layer {
1296 |   name: "Mconv1_stage5"
1297 |   type: "Convolution"
1298 |   bottom: "concat_stage5"
1299 |   top: "Mconv1_stage5"
1300 |   param {
1301 |     lr_mult: 4.0
1302 |     decay_mult: 1
1303 |   }
1304 |   param {
1305 |     lr_mult: 8.0
1306 |     decay_mult: 0
1307 |   }
1308 |   convolution_param {
1309 |     num_output: 128
1310 |     pad: 3
1311 |     kernel_size: 7
1312 |     weight_filler {
1313 |       type: "gaussian"
1314 |       std: 0.01
1315 |     }
1316 |     bias_filler {
1317 |       type: "constant"
1318 |     }
1319 |     dilation: 1
1320 |   }
1321 | }
1322 | layer {
1323 |   name: "Mrelu1_2_stage5_1"
1324 |   type: "ReLU"
1325 |   bottom: "Mconv1_stage5"
1326 |   top: "Mconv1_stage5"
1327 | }
1328 | layer {
1329 |   name: "Mconv2_stage5"
1330 |   type: "Convolution"
1331 |   bottom: "Mconv1_stage5"
1332 |   top: "Mconv2_stage5"
1333 |   param {
1334 |     lr_mult: 4.0
1335 |     decay_mult: 1
1336 |   }
1337 |   param {
1338 |     lr_mult: 8.0
1339 |     decay_mult: 0
1340 |   }
1341 |   convolution_param {
1342 |     num_output: 128
1343 |     pad: 3
1344 |     kernel_size: 7
1345 |     weight_filler {
1346 |       type: "gaussian"
1347 |       std: 0.01
1348 |     }
1349 |     bias_filler {
1350 |       type: "constant"
1351 |     }
1352 |     dilation: 1
1353 |   }
1354 | }
1355 | layer {
1356 |   name: "Mrelu1_3_stage5_2"
1357 |   type: "ReLU"
1358 |   bottom: "Mconv2_stage5"
1359 |   top: "Mconv2_stage5"
1360 | }
1361 | layer {
1362 |   name: "Mconv3_stage5"
1363 |   type: "Convolution"
1364 |   bottom: "Mconv2_stage5"
1365 |   top: "Mconv3_stage5"
1366 |   param {
1367 |     lr_mult: 4.0
1368 |     decay_mult: 1
1369 |   }
1370 |   param {
1371 |     lr_mult: 8.0
1372 |     decay_mult: 0
1373 |   }
1374 |   convolution_param {
1375 |     num_output: 128
1376 |     pad: 3
1377 |     kernel_size: 7
1378 |     weight_filler {
1379 |       type: "gaussian"
1380 |       std: 0.01
1381 |     }
1382 |     bias_filler {
1383 |       type: "constant"
1384 |     }
1385 |     dilation: 1
1386 |   }
1387 | }
1388 | layer {
1389 |   name: "Mrelu1_4_stage5_3"
1390 |   type: "ReLU"
1391 |   bottom: "Mconv3_stage5"
1392 |   top: "Mconv3_stage5"
1393 | }
1394 | layer {
1395 |   name: "Mconv4_stage5"
1396 |   type: "Convolution"
1397 |   bottom: "Mconv3_stage5"
1398 |   top: "Mconv4_stage5"
1399 |   param {
1400 |     lr_mult: 4.0
1401 |     decay_mult: 1
1402 |   }
1403 |   param {
1404 |     lr_mult: 8.0
1405 |     decay_mult: 0
1406 |   }
1407 |   convolution_param {
1408 |     num_output: 128
1409 |     pad: 3
1410 |     kernel_size: 7
1411 |     weight_filler {
1412 |       type: "gaussian"
1413 |       std: 0.01
1414 |     }
1415 |     bias_filler {
1416 |       type: "constant"
1417 |     }
1418 |     dilation: 1
1419 |   }
1420 | }
1421 | layer {
1422 |   name: "Mrelu1_5_stage5_4"
1423 |   type: "ReLU"
1424 |   bottom: "Mconv4_stage5"
1425 |   top: "Mconv4_stage5"
1426 | }
1427 | layer {
1428 |   name: "Mconv5_stage5"
1429 |   type: "Convolution"
1430 |   bottom: "Mconv4_stage5"
1431 |   top: "Mconv5_stage5"
1432 |   param {
1433 |     lr_mult: 4.0
1434 |     decay_mult: 1
1435 |   }
1436 |   param {
1437 |     lr_mult: 8.0
1438 |     decay_mult: 0
1439 |   }
1440 |   convolution_param {
1441 |     num_output: 128
1442 |     pad: 3
1443 |     kernel_size: 7
1444 |     weight_filler {
1445 |       type: "gaussian"
1446 |       std: 0.01
1447 |     }
1448 |     bias_filler {
1449 |       type: "constant"
1450 |     }
1451 |     dilation: 1
1452 |   }
1453 | }
1454 | layer {
1455 |   name: "Mrelu1_6_stage5_5"
1456 |   type: "ReLU"
1457 |   bottom: "Mconv5_stage5"
1458 |   top: "Mconv5_stage5"
1459 | }
1460 | layer {
1461 |   name: "Mconv6_stage5"
1462 |   type: "Convolution"
1463 |   bottom: "Mconv5_stage5"
1464 |   top: "Mconv6_stage5"
1465 |   param {
1466 |     lr_mult: 4.0
1467 |     decay_mult: 1
1468 |   }
1469 |   param {
1470 |     lr_mult: 8.0
1471 |     decay_mult: 0
1472 |   }
1473 |   convolution_param {
1474 |     num_output: 128
1475 |     pad: 0
1476 |     kernel_size: 1
1477 |     weight_filler {
1478 |       type: "gaussian"
1479 |       std: 0.01
1480 |     }
1481 |     bias_filler {
1482 |       type: "constant"
1483 |     }
1484 |     dilation: 1
1485 |   }
1486 | }
1487 | layer {
1488 |   name: "Mrelu1_7_stage5_6"
1489 |   type: "ReLU"
1490 |   bottom: "Mconv6_stage5"
1491 |   top: "Mconv6_stage5"
1492 | }
1493 | layer {
1494 |   name: "Mconv7_stage5"
1495 |   type: "Convolution"
1496 |   bottom: "Mconv6_stage5"
1497 |   top: "Mconv7_stage5"
1498 |   param {
1499 |     lr_mult: 4.0
1500 |     decay_mult: 1
1501 |   }
1502 |   param {
1503 |     lr_mult: 8.0
1504 |     decay_mult: 0
1505 |   }
1506 |   convolution_param {
1507 |     num_output: 22
1508 |     pad: 0
1509 |     kernel_size: 1
1510 |     weight_filler {
1511 |       type: "gaussian"
1512 |       std: 0.01
1513 |     }
1514 |     bias_filler {
1515 |       type: "constant"
1516 |     }
1517 |     dilation: 1
1518 |   }
1519 | }
1520 | layer {
1521 |   name: "concat_stage6"
1522 |   type: "Concat"
1523 |   bottom: "Mconv7_stage5"
1524 |   bottom: "conv5_3_CPM"
1525 |   top: "concat_stage6"
1526 |   concat_param {
1527 |     axis: 1
1528 |   }
1529 | }
1530 | layer {
1531 |   name: "Mconv1_stage6"
1532 |   type: "Convolution"
1533 |   bottom: "concat_stage6"
1534 |   top: "Mconv1_stage6"
1535 |   param {
1536 |     lr_mult: 4.0
1537 |     decay_mult: 1
1538 |   }
1539 |   param {
1540 |     lr_mult: 8.0
1541 |     decay_mult: 0
1542 |   }
1543 |   convolution_param {
1544 |     num_output: 128
1545 |     pad: 3
1546 |     kernel_size: 7
1547 |     weight_filler {
1548 |       type: "gaussian"
1549 |       std: 0.01
1550 |     }
1551 |     bias_filler {
1552 |       type: "constant"
1553 |     }
1554 |     dilation: 1
1555 |   }
1556 | }
1557 | layer {
1558 |   name: "Mrelu1_2_stage6_1"
1559 |   type: "ReLU"
1560 |   bottom: "Mconv1_stage6"
1561 |   top: "Mconv1_stage6"
1562 | }
1563 | layer {
1564 |   name: "Mconv2_stage6"
1565 |   type: "Convolution"
1566 |   bottom: "Mconv1_stage6"
1567 |   top: "Mconv2_stage6"
1568 |   param {
1569 |     lr_mult: 4.0
1570 |     decay_mult: 1
1571 |   }
1572 |   param {
1573 |     lr_mult: 8.0
1574 |     decay_mult: 0
1575 |   }
1576 |   convolution_param {
1577 |     num_output: 128
1578 |     pad: 3
1579 |     kernel_size: 7
1580 |     weight_filler {
1581 |       type: "gaussian"
1582 |       std: 0.01
1583 |     }
1584 |     bias_filler {
1585 |       type: "constant"
1586 |     }
1587 |     dilation: 1
1588 |   }
1589 | }
1590 | layer {
1591 |   name: "Mrelu1_3_stage6_2"
1592 |   type: "ReLU"
1593 |   bottom: "Mconv2_stage6"
1594 |   top: "Mconv2_stage6"
1595 | }
1596 | layer {
1597 |   name: "Mconv3_stage6"
1598 |   type: "Convolution"
1599 |   bottom: "Mconv2_stage6"
1600 |   top: "Mconv3_stage6"
1601 |   param {
1602 |     lr_mult: 4.0
1603 |     decay_mult: 1
1604 |   }
1605 |   param {
1606 |     lr_mult: 8.0
1607 |     decay_mult: 0
1608 |   }
1609 |   convolution_param {
1610 |     num_output: 128
1611 |     pad: 3
1612 |     kernel_size: 7
1613 |     weight_filler {
1614 |       type: "gaussian"
1615 |       std: 0.01
1616 |     }
1617 |     bias_filler {
1618 |       type: "constant"
1619 |     }
1620 |     dilation: 1
1621 |   }
1622 | }
1623 | layer {
1624 |   name: "Mrelu1_4_stage6_3"
1625 |   type: "ReLU"
1626 |   bottom: "Mconv3_stage6"
1627 |   top: "Mconv3_stage6"
1628 | }
1629 | layer {
1630 |   name: "Mconv4_stage6"
1631 |   type: "Convolution"
1632 |   bottom: "Mconv3_stage6"
1633 |   top: "Mconv4_stage6"
1634 |   param {
1635 |     lr_mult: 4.0
1636 |     decay_mult: 1
1637 |   }
1638 |   param {
1639 |     lr_mult: 8.0
1640 |     decay_mult: 0
1641 |   }
1642 |   convolution_param {
1643 |     num_output: 128
1644 |     pad: 3
1645 |     kernel_size: 7
1646 |     weight_filler {
1647 |       type: "gaussian"
1648 |       std: 0.01
1649 |     }
1650 |     bias_filler {
1651 |       type: "constant"
1652 |     }
1653 |     dilation: 1
1654 |   }
1655 | }
1656 | layer {
1657 |   name: "Mrelu1_5_stage6_4"
1658 |   type: "ReLU"
1659 |   bottom: "Mconv4_stage6"
1660 |   top: "Mconv4_stage6"
1661 | }
1662 | layer {
1663 |   name: "Mconv5_stage6"
1664 |   type: "Convolution"
1665 |   bottom: "Mconv4_stage6"
1666 |   top: "Mconv5_stage6"
1667 |   param {
1668 |     lr_mult: 4.0
1669 |     decay_mult: 1
1670 |   }
1671 |   param {
1672 |     lr_mult: 8.0
1673 |     decay_mult: 0
1674 |   }
1675 |   convolution_param {
1676 |     num_output: 128
1677 |     pad: 3
1678 |     kernel_size: 7
1679 |     weight_filler {
1680 |       type: "gaussian"
1681 |       std: 0.01
1682 |     }
1683 |     bias_filler {
1684 |       type: "constant"
1685 |     }
1686 |     dilation: 1
1687 |   }
1688 | }
1689 | layer {
1690 |   name: "Mrelu1_6_stage6_5"
1691 |   type: "ReLU"
1692 |   bottom: "Mconv5_stage6"
1693 |   top: "Mconv5_stage6"
1694 | }
1695 | layer {
1696 |   name: "Mconv6_stage6"
1697 |   type: "Convolution"
1698 |   bottom: "Mconv5_stage6"
1699 |   top: "Mconv6_stage6"
1700 |   param {
1701 |     lr_mult: 4.0
1702 |     decay_mult: 1
1703 |   }
1704 |   param {
1705 |     lr_mult: 8.0
1706 |     decay_mult: 0
1707 |   }
1708 |   convolution_param {
1709 |     num_output: 128
1710 |     pad: 0
1711 |     kernel_size: 1
1712 |     weight_filler {
1713 |       type: "gaussian"
1714 |       std: 0.01
1715 |     }
1716 |     bias_filler {
1717 |       type: "constant"
1718 |     }
1719 |     dilation: 1
1720 |   }
1721 | }
1722 | layer {
1723 |   name: "Mrelu1_7_stage6_6"
1724 |   type: "ReLU"
1725 |   bottom: "Mconv6_stage6"
1726 |   top: "Mconv6_stage6"
1727 | }
1728 | layer {
1729 |   name: "Mconv7_stage6"
1730 |   type: "Convolution"
1731 |   bottom: "Mconv6_stage6"
1732 | #   top: "Mconv7_stage6"
1733 |   top: "net_output"
1734 |   param {
1735 |     lr_mult: 4.0
1736 |     decay_mult: 1
1737 |   }
1738 |   param {
1739 |     lr_mult: 8.0
1740 |     decay_mult: 0
1741 |   }
1742 |   convolution_param {
1743 |     num_output: 22
1744 |     pad: 0
1745 |     kernel_size: 1
1746 |     weight_filler {
1747 |       type: "gaussian"
1748 |       std: 0.01
1749 |     }
1750 |     bias_filler {
1751 |       type: "constant"
1752 |     }
1753 |     dilation: 1
1754 |   }
1755 | }
1756 | 
1757 | 


--------------------------------------------------------------------------------
/handPoseImage.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | import cv2
 3 | import time
 4 | import numpy as np
 5 | 
 6 | protoFile = "hand/pose_deploy.prototxt"
 7 | weightsFile = "hand/pose_iter_102000.caffemodel"
 8 | nPoints = 22
 9 | POSE_PAIRS = [ [0,1],[1,2],[2,3],[3,4],[0,5],[5,6],[6,7],[7,8],[0,9],[9,10],[10,11],[11,12],[0,13],[13,14],[14,15],[15,16],[0,17],[17,18],[18,19],[19,20] ]
10 | net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile)
11 | 
12 | frame = cv2.imread("right-frontal.jpg")
13 | frameCopy = np.copy(frame)
14 | frameWidth = frame.shape[1]
15 | frameHeight = frame.shape[0]
16 | aspect_ratio = frameWidth/frameHeight
17 | 
18 | threshold = 0.1
19 | 
20 | t = time.time()
21 | # input image dimensions for the network
22 | inHeight = 368
23 | inWidth = int(((aspect_ratio*inHeight)*8)//8)
24 | inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight), (0, 0, 0), swapRB=False, crop=False)
25 | 
26 | net.setInput(inpBlob)
27 | 
28 | output = net.forward()
29 | print("time taken by network : {:.3f}".format(time.time() - t))
30 | 
31 | # Empty list to store the detected keypoints
32 | points = []
33 | 
34 | for i in range(nPoints):
35 |     # confidence map of corresponding body's part.
36 |     probMap = output[0, i, :, :]
37 |     probMap = cv2.resize(probMap, (frameWidth, frameHeight))
38 | 
39 |     # Find global maxima of the probMap.
40 |     minVal, prob, minLoc, point = cv2.minMaxLoc(probMap)
41 | 
42 |     if prob > threshold :
43 |         cv2.circle(frameCopy, (int(point[0]), int(point[1])), 8, (0, 255, 255), thickness=-1, lineType=cv2.FILLED)
44 |         cv2.putText(frameCopy, "{}".format(i), (int(point[0]), int(point[1])), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, lineType=cv2.LINE_AA)
45 | 
46 |         # Add the point to the list if the probability is greater than the threshold
47 |         points.append((int(point[0]), int(point[1])))
48 |     else :
49 |         points.append(None)
50 | 
51 | # Draw Skeleton
52 | for pair in POSE_PAIRS:
53 |     partA = pair[0]
54 |     partB = pair[1]
55 | 
56 |     if points[partA] and points[partB]:
57 |         cv2.line(frame, points[partA], points[partB], (0, 255, 255), 2)
58 |         cv2.circle(frame, points[partA], 8, (0, 0, 255), thickness=-1, lineType=cv2.FILLED)
59 |         cv2.circle(frame, points[partB], 8, (0, 0, 255), thickness=-1, lineType=cv2.FILLED)
60 | 
61 | 
62 | cv2.imshow('Output-Keypoints', frameCopy)
63 | cv2.imshow('Output-Skeleton', frame)
64 | 
65 | 
66 | cv2.imwrite('Output-Keypoints.jpg', frameCopy)
67 | cv2.imwrite('Output-Skeleton.jpg', frame)
68 | 
69 | print("Total time taken : {:.3f}".format(time.time() - t))
70 | 
71 | cv2.waitKey(0)
72 | 


--------------------------------------------------------------------------------
/handPoseVideo.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import time
 3 | import numpy as np
 4 | 
 5 | 
 6 | protoFile = "hand/pose_deploy.prototxt"
 7 | weightsFile = "hand/pose_iter_102000.caffemodel"
 8 | nPoints = 22
 9 | POSE_PAIRS = [ [0,1],[1,2],[2,3],[3,4],[0,5],[5,6],[6,7],[7,8],[0,9],[9,10],[10,11],[11,12],[0,13],[13,14],[14,15],[15,16],[0,17],[17,18],[18,19],[19,20] ]
10 | 
11 | threshold = 0.2
12 | 
13 | 
14 | input_source = "asl.mp4"
15 | cap = cv2.VideoCapture(input_source)
16 | hasFrame, frame = cap.read()
17 | 
18 | frameWidth = frame.shape[1]
19 | frameHeight = frame.shape[0]
20 | 
21 | aspect_ratio = frameWidth/frameHeight
22 | 
23 | inHeight = 368
24 | inWidth = int(((aspect_ratio*inHeight)*8)//8)
25 | 
26 | vid_writer = cv2.VideoWriter('output.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 15, (frame.shape[1],frame.shape[0]))
27 | 
28 | net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile)
29 | k = 0
30 | while 1:
31 |     k+=1
32 |     t = time.time()
33 |     hasFrame, frame = cap.read()
34 |     frameCopy = np.copy(frame)
35 |     if not hasFrame:
36 |         cv2.waitKey()
37 |         break
38 | 
39 |     inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight),
40 |                               (0, 0, 0), swapRB=False, crop=False)
41 | 
42 |     net.setInput(inpBlob)
43 | 
44 |     output = net.forward()
45 | 
46 |     print("forward = {}".format(time.time() - t))
47 | 
48 |     # Empty list to store the detected keypoints
49 |     points = []
50 | 
51 |     for i in range(nPoints):
52 |         # confidence map of corresponding body's part.
53 |         probMap = output[0, i, :, :]
54 |         probMap = cv2.resize(probMap, (frameWidth, frameHeight))
55 | 
56 |         # Find global maxima of the probMap.
57 |         minVal, prob, minLoc, point = cv2.minMaxLoc(probMap)
58 | 
59 |         if prob > threshold :
60 |             cv2.circle(frameCopy, (int(point[0]), int(point[1])), 6, (0, 255, 255), thickness=-1, lineType=cv2.FILLED)
61 |             cv2.putText(frameCopy, "{}".format(i), (int(point[0]), int(point[1])), cv2.FONT_HERSHEY_SIMPLEX, .8, (0, 0, 255), 2, lineType=cv2.LINE_AA)
62 | 
63 |             # Add the point to the list if the probability is greater than the threshold
64 |             points.append((int(point[0]), int(point[1])))
65 |         else :
66 |             points.append(None)
67 | 
68 |     # Draw Skeleton
69 |     for pair in POSE_PAIRS:
70 |         partA = pair[0]
71 |         partB = pair[1]
72 | 
73 |         if points[partA] and points[partB]:
74 |             cv2.line(frame, points[partA], points[partB], (0, 255, 255), 2, lineType=cv2.LINE_AA)
75 |             cv2.circle(frame, points[partA], 5, (0, 0, 255), thickness=-1, lineType=cv2.FILLED)
76 |             cv2.circle(frame, points[partB], 5, (0, 0, 255), thickness=-1, lineType=cv2.FILLED)
77 | 
78 |     print("Time Taken for frame = {}".format(time.time() - t))
79 | 
80 |     # cv2.putText(frame, "time taken = {:.2f} sec".format(time.time() - t), (50, 50), cv2.FONT_HERSHEY_COMPLEX, .8, (255, 50, 0), 2, lineType=cv2.LINE_AA)
81 |     # cv2.putText(frame, "Hand Pose using OpenCV", (50, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 50, 0), 2, lineType=cv2.LINE_AA)
82 |     cv2.imshow('Output-Skeleton', frame)
83 |     # cv2.imwrite("video_output/{:03d}.jpg".format(k), frame)
84 |     key = cv2.waitKey(1)
85 |     if key == 27:
86 |         break
87 | 
88 |     print("total = {}".format(time.time() - t))
89 | 
90 |     vid_writer.write(frame)
91 | 
92 | vid_writer.release()
93 | 


--------------------------------------------------------------------------------