├── .gitignore ├── CMakeLists.txt ├── HandKeypointDetector.py ├── README.md ├── __init__.py ├── data ├── front-back.jpg └── hand.jpg ├── getModels.sh ├── hand └── pose_deploy.prototxt ├── handPoseImage.py └── handPoseVideo.py /.gitignore: -------------------------------------------------------------------------------- 1 | /hand/pose_iter_102000.caffemodel 2 | /out 3 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.12) 2 | 3 | PROJECT(handPose) 4 | 5 | find_package( OpenCV REQUIRED ) 6 | 7 | include_directories( ${OpenCV_INCLUDE_DIRS}) 8 | 9 | MACRO(add_example name) 10 | ADD_EXECUTABLE(${name} ${name}.cpp) 11 | TARGET_LINK_LIBRARIES(${name} ${OpenCV_LIBS}) 12 | ENDMACRO() 13 | 14 | 15 | add_example(handPoseImage) 16 | add_example(handPoseVideo) 17 | -------------------------------------------------------------------------------- /HandKeypointDetector.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import glob 4 | import sys 5 | import cv2 6 | import time 7 | import numpy as np 8 | import os 9 | import shutil 10 | class HandKeypointDetector(): 11 | def __init__(self,output_folder,show_debug=False): 12 | self.show_debug = show_debug 13 | file_dir = os.path.abspath(os.path.dirname(__file__)) 14 | self.protoFile = file_dir+"\\hand\\pose_deploy.prototxt" 15 | self.weightsFile = file_dir+"\\hand\\pose_iter_102000.caffemodel" 16 | self.nPoints = 22 17 | 18 | self.data_out = output_folder 19 | if not os.path.exists(output_folder): 20 | os.mkdir(output_folder) 21 | else: 22 | shutil.rmtree(output_folder) 23 | time.sleep(1) 24 | os.mkdir(output_folder) 25 | self.keypoints = np.zeros((2*(self.nPoints -1),3)) 26 | self.resize_factor = 0.5 27 | self.rearrange_finger_indices = np.array([0, 4, 3, 2, 1, 8, 7, 6, 5, 12, 11, 10, 9, 16, 15, 14, 13, 20, 19, 18, 17]) 28 | self.min_number_of_points = 8 29 | self.confidence_for_roi = 0.05 30 | self.roi_expansion = 0.09 31 | self.POSE_PAIRS = [ [0,1],[1,2],[2,3],[3,4],[0,5],[5,6],[6,7],[7,8],[0,9],[9,10],[10,11],[11,12],[0,13],[13,14],[14,15],[15,16],[0,17],[17,18],[18,19],[19,20] ] 32 | self.net = cv2.dnn.readNetFromCaffe(self.protoFile, self.weightsFile) 33 | def storeKeyPoints(self,prefix): 34 | cv2.imwrite(self.data_out + '\\' + prefix + self.output_file_name + '.png', self.debug_image) 35 | np.savez(self.data_out + '\\{}.npz'.format(self.output_file_name), num_hands=1, kp_coord_uv=self.keypoints[:, 0:2], 36 | kp_visible=self.keypoints[:, 2], ) 37 | 38 | def detectKeyPoints(self,data_folder): 39 | self.debug_image = None 40 | self.keypoints = np.zeros((2*(self.nPoints -1),3)) 41 | self.output_file_name = '' 42 | single_imag = False 43 | 44 | try: 45 | if os.path.isdir(data_folder): 46 | files = glob.glob(data_folder + '\*.png') 47 | bb=None 48 | else: 49 | files = [data_folder] 50 | single_imag = False 51 | except: 52 | single_imag = True 53 | files = [0] 54 | for f in range(0,len(files),1): 55 | if single_imag: 56 | frame = data_folder.detach().cpu().numpy() 57 | else: 58 | frame = cv2.imread(files[f]) 59 | import re 60 | self.output_file_name = re.split('[\\\ .]', files[f])[-2] + '_skeleton' 61 | 62 | frame = cv2.resize(frame,None,fx=self.resize_factor,fy=self.resize_factor) 63 | # Select ROI 64 | 65 | # frame=frame[int(frame.shape[0] / 2):, :, :] 66 | # r = cv2.selectROI(frame) 67 | # 68 | # # Crop image 69 | # frame = frame[int(r[1]):int(r[1] + r[3]), int(r[0]):int(r[0] + r[2])] 70 | 71 | # frameCopy = np.copy(frame) 72 | frameWidth = frame.shape[1] 73 | frameHeight = frame.shape[0] 74 | aspect_ratio = frameWidth/frameHeight 75 | 76 | threshold = 0.1 77 | 78 | t = time.time() 79 | # input image dimensions for the network 80 | inHeight = 368 81 | inWidth = int(((aspect_ratio*inHeight)*8)//8) 82 | inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight), (0, 0, 0), swapRB=False, crop=False) 83 | 84 | self.net.setInput(inpBlob) 85 | 86 | output = self.net.forward() 87 | if self.show_debug: 88 | print("time taken by network : {:.3f}".format(time.time() - t)) 89 | 90 | # Empty list to store the detected keypoints 91 | points = [] 92 | # points_probs = [] 93 | for i in range(self.nPoints): 94 | # confidence map of corresponding body's part. 95 | probMap = output[0, i, :, :] 96 | probMap = cv2.resize(probMap, (frameWidth, frameHeight)) 97 | 98 | # Find global maxima of the probMap. 99 | minVal, prob, minLoc, point = cv2.minMaxLoc(probMap) 100 | 101 | if prob > threshold : 102 | cv2.circle(frame, (int(point[0]), int(point[1])), 2, (0, 0, int(255*prob)), thickness=-1, lineType=cv2.FILLED) 103 | cv2.putText(frame, "{}".format(self.rearrange_finger_indices[i]), (int(point[0]), int(point[1])), cv2.FONT_HERSHEY_SIMPLEX, 0.1, (0, 0, 255), 1, lineType=cv2.LINE_AA) 104 | 105 | # Add the point to the list if the probability is greater than the threshold 106 | points.append(np.array([int(point[0]), int(point[1]),prob])) 107 | # points_probs.append(prob) 108 | else : 109 | # points_probs.append(0) 110 | points.append(np.array([0, 0,0])) 111 | points = np.array(points) 112 | # Draw Skeleton 113 | for ii,pair in enumerate(self.POSE_PAIRS): 114 | partA = pair[0] 115 | partB = pair[1] 116 | # prob = points_probs[ii] 117 | if np.all(points[partA]) and np.all(points[partB]): 118 | cv2.line(frame, tuple((points[partA][0:2]).astype(int)), tuple((points[partB][0:2]).astype(int)), (0, 255, 255), 2) 119 | # cv2.circle(frame, points[partA][0:2], 8, (0, 0, int(255*prob)), thickness=-1, lineType=cv2.FILLED) 120 | # cv2.circle(frame, points[partB][0:2], 8, (0, 0, int(255*prob)), thickness=-1, lineType=cv2.FILLED) 121 | 122 | if self.show_debug: 123 | cv2.imshow('Output-Skeleton', frame) 124 | print("Total time taken : {:.3f}".format(time.time() - t)) 125 | 126 | cv2.waitKey(0) 127 | # cv2.imwrite(self.data_out + '\\'+self.output_file_name+'.png', frame) 128 | if self.min_number_of_points < sum(x is not None for x in points): 129 | ordered_points = np.array(points)[self.rearrange_finger_indices] 130 | ordered_points[:,0:2] = ordered_points[:,0:2]/self.resize_factor 131 | self.keypoints[0:self.nPoints-1,:] = ordered_points[:,0:3] 132 | # np.savez(self.data_out + '\\{}.npz'.format(output_file_name), num_hands = 1 , kp_coord_uv=self.keypoints[:,0:2], kp_visible=self.keypoints[:,2], ) 133 | indices = self.keypoints[:, 2] > self.confidence_for_roi 134 | bb = {'minX': int((1 - self.roi_expansion) * min(self.keypoints[indices, 0])), 135 | 'maxX': int((1 + self.roi_expansion) * max(self.keypoints[indices, 0])), 136 | 'minY': int((1 - self.roi_expansion) * min(self.keypoints[indices, 1])), 137 | 'maxY': int((1 + self.roi_expansion) * max(self.keypoints[indices, 1])), 138 | } 139 | # print(bb) 140 | self.debug_image = cv2.resize(frame,None,fx=1/self.resize_factor,fy=1/self.resize_factor) 141 | # import matplotlib.pyplot as plt 142 | # plt.close('all') 143 | # plt.imshow(self.debug_image) 144 | # plt.savefig('a.png') 145 | return bb 146 | 147 | if __name__=='__main__': 148 | data_folder = r"P:\4Erez\david\test\raw_stream\stream001_00104RGB.png" 149 | show_debug = False 150 | hd = HandKeypointDetector("out/",show_debug) 151 | hd.detectKeyPoints(data_folder) 152 | print('%%%%%%%%%%% Done %%%%%%%%%%%%%%%') 153 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Repository is based on 2 | https://www.learnopencv.com/hand-keypoint-detection-using-deep-learning-and-opencv/ 3 | additional development was made as a wrapping class to detect hand keypoints 4 | based on the article - https://arxiv.org/pdf/1704.07809.pdf 5 | 6 | 7 | 8 | Please run getModels.sh from the command line to download the model in the correct folder. 9 | 10 | ### USAGE 11 | 12 | #### Python 13 | **For using it on single image :** 14 | python handPoseImage.py 15 | 16 | **For using on video :** 17 | python handPoseVideo.py 18 | 19 | **To create a class for hand keypoints detector** 20 | python HandKeypointDetector.py 21 | 22 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/erezposner/MV_HandKeyPointDetector/afdaa1a9e7701605176d82b03a7436e9981cc467/__init__.py -------------------------------------------------------------------------------- /data/front-back.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/erezposner/MV_HandKeyPointDetector/afdaa1a9e7701605176d82b03a7436e9981cc467/data/front-back.jpg -------------------------------------------------------------------------------- /data/hand.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/erezposner/MV_HandKeyPointDetector/afdaa1a9e7701605176d82b03a7436e9981cc467/data/hand.jpg -------------------------------------------------------------------------------- /getModels.sh: -------------------------------------------------------------------------------- 1 | # ------------------------- BODY, FACE AND HAND MODELS ------------------------- 2 | # Downloading body pose (COCO and MPI), face and hand models 3 | OPENPOSE_URL="http://posefs1.perception.cs.cmu.edu/OpenPose/models/" 4 | HAND_FOLDER="hand/" 5 | 6 | # "------------------------- HAND MODELS -------------------------" 7 | # Hand 8 | HAND_MODEL=$HAND_FOLDER"pose_iter_102000.caffemodel" 9 | wget -c ${OPENPOSE_URL}${HAND_MODEL} -P ${HAND_FOLDER} 10 | -------------------------------------------------------------------------------- /hand/pose_deploy.prototxt: -------------------------------------------------------------------------------- 1 | input: "image" 2 | input_dim: 1 # Original: 2 3 | input_dim: 3 # It crashes if not left to 3 4 | input_dim: 1 # Original: 368 5 | input_dim: 1 # Original: 368 6 | layer { 7 | name: "conv1_1" 8 | type: "Convolution" 9 | bottom: "image" 10 | top: "conv1_1" 11 | param { 12 | lr_mult: 1.0 13 | decay_mult: 1 14 | } 15 | param { 16 | lr_mult: 2.0 17 | decay_mult: 0 18 | } 19 | convolution_param { 20 | num_output: 64 21 | pad: 1 22 | kernel_size: 3 23 | weight_filler { 24 | type: "xavier" 25 | } 26 | bias_filler { 27 | type: "constant" 28 | } 29 | dilation: 1 30 | } 31 | } 32 | layer { 33 | name: "relu1_1" 34 | type: "ReLU" 35 | bottom: "conv1_1" 36 | top: "conv1_1" 37 | } 38 | layer { 39 | name: "conv1_2" 40 | type: "Convolution" 41 | bottom: "conv1_1" 42 | top: "conv1_2" 43 | param { 44 | lr_mult: 1.0 45 | decay_mult: 1 46 | } 47 | param { 48 | lr_mult: 2.0 49 | decay_mult: 0 50 | } 51 | convolution_param { 52 | num_output: 64 53 | pad: 1 54 | kernel_size: 3 55 | weight_filler { 56 | type: "xavier" 57 | } 58 | bias_filler { 59 | type: "constant" 60 | } 61 | dilation: 1 62 | } 63 | } 64 | layer { 65 | name: "relu1_2" 66 | type: "ReLU" 67 | bottom: "conv1_2" 68 | top: "conv1_2" 69 | } 70 | layer { 71 | name: "pool1_stage1" 72 | type: "Pooling" 73 | bottom: "conv1_2" 74 | top: "pool1_stage1" 75 | pooling_param { 76 | pool: MAX 77 | kernel_size: 2 78 | stride: 2 79 | } 80 | } 81 | layer { 82 | name: "conv2_1" 83 | type: "Convolution" 84 | bottom: "pool1_stage1" 85 | top: "conv2_1" 86 | param { 87 | lr_mult: 1.0 88 | decay_mult: 1 89 | } 90 | param { 91 | lr_mult: 2.0 92 | decay_mult: 0 93 | } 94 | convolution_param { 95 | num_output: 128 96 | pad: 1 97 | kernel_size: 3 98 | weight_filler { 99 | type: "xavier" 100 | } 101 | bias_filler { 102 | type: "constant" 103 | } 104 | dilation: 1 105 | } 106 | } 107 | layer { 108 | name: "relu2_1" 109 | type: "ReLU" 110 | bottom: "conv2_1" 111 | top: "conv2_1" 112 | } 113 | layer { 114 | name: "conv2_2" 115 | type: "Convolution" 116 | bottom: "conv2_1" 117 | top: "conv2_2" 118 | param { 119 | lr_mult: 1.0 120 | decay_mult: 1 121 | } 122 | param { 123 | lr_mult: 2.0 124 | decay_mult: 0 125 | } 126 | convolution_param { 127 | num_output: 128 128 | pad: 1 129 | kernel_size: 3 130 | weight_filler { 131 | type: "xavier" 132 | } 133 | bias_filler { 134 | type: "constant" 135 | } 136 | dilation: 1 137 | } 138 | } 139 | layer { 140 | name: "relu2_2" 141 | type: "ReLU" 142 | bottom: "conv2_2" 143 | top: "conv2_2" 144 | } 145 | layer { 146 | name: "pool2_stage1" 147 | type: "Pooling" 148 | bottom: "conv2_2" 149 | top: "pool2_stage1" 150 | pooling_param { 151 | pool: MAX 152 | kernel_size: 2 153 | stride: 2 154 | } 155 | } 156 | layer { 157 | name: "conv3_1" 158 | type: "Convolution" 159 | bottom: "pool2_stage1" 160 | top: "conv3_1" 161 | param { 162 | lr_mult: 1.0 163 | decay_mult: 1 164 | } 165 | param { 166 | lr_mult: 2.0 167 | decay_mult: 0 168 | } 169 | convolution_param { 170 | num_output: 256 171 | pad: 1 172 | kernel_size: 3 173 | weight_filler { 174 | type: "xavier" 175 | } 176 | bias_filler { 177 | type: "constant" 178 | } 179 | dilation: 1 180 | } 181 | } 182 | layer { 183 | name: "relu3_1" 184 | type: "ReLU" 185 | bottom: "conv3_1" 186 | top: "conv3_1" 187 | } 188 | layer { 189 | name: "conv3_2" 190 | type: "Convolution" 191 | bottom: "conv3_1" 192 | top: "conv3_2" 193 | param { 194 | lr_mult: 1.0 195 | decay_mult: 1 196 | } 197 | param { 198 | lr_mult: 2.0 199 | decay_mult: 0 200 | } 201 | convolution_param { 202 | num_output: 256 203 | pad: 1 204 | kernel_size: 3 205 | weight_filler { 206 | type: "xavier" 207 | } 208 | bias_filler { 209 | type: "constant" 210 | } 211 | dilation: 1 212 | } 213 | } 214 | layer { 215 | name: "relu3_2" 216 | type: "ReLU" 217 | bottom: "conv3_2" 218 | top: "conv3_2" 219 | } 220 | layer { 221 | name: "conv3_3" 222 | type: "Convolution" 223 | bottom: "conv3_2" 224 | top: "conv3_3" 225 | param { 226 | lr_mult: 1.0 227 | decay_mult: 1 228 | } 229 | param { 230 | lr_mult: 2.0 231 | decay_mult: 0 232 | } 233 | convolution_param { 234 | num_output: 256 235 | pad: 1 236 | kernel_size: 3 237 | weight_filler { 238 | type: "xavier" 239 | } 240 | bias_filler { 241 | type: "constant" 242 | } 243 | dilation: 1 244 | } 245 | } 246 | layer { 247 | name: "relu3_3" 248 | type: "ReLU" 249 | bottom: "conv3_3" 250 | top: "conv3_3" 251 | } 252 | layer { 253 | name: "conv3_4" 254 | type: "Convolution" 255 | bottom: "conv3_3" 256 | top: "conv3_4" 257 | param { 258 | lr_mult: 1.0 259 | decay_mult: 1 260 | } 261 | param { 262 | lr_mult: 2.0 263 | decay_mult: 0 264 | } 265 | convolution_param { 266 | num_output: 256 267 | pad: 1 268 | kernel_size: 3 269 | weight_filler { 270 | type: "xavier" 271 | } 272 | bias_filler { 273 | type: "constant" 274 | } 275 | dilation: 1 276 | } 277 | } 278 | layer { 279 | name: "relu3_4" 280 | type: "ReLU" 281 | bottom: "conv3_4" 282 | top: "conv3_4" 283 | } 284 | layer { 285 | name: "pool3_stage1" 286 | type: "Pooling" 287 | bottom: "conv3_4" 288 | top: "pool3_stage1" 289 | pooling_param { 290 | pool: MAX 291 | kernel_size: 2 292 | stride: 2 293 | } 294 | } 295 | layer { 296 | name: "conv4_1" 297 | type: "Convolution" 298 | bottom: "pool3_stage1" 299 | top: "conv4_1" 300 | param { 301 | lr_mult: 1.0 302 | decay_mult: 1 303 | } 304 | param { 305 | lr_mult: 2.0 306 | decay_mult: 0 307 | } 308 | convolution_param { 309 | num_output: 512 310 | pad: 1 311 | kernel_size: 3 312 | weight_filler { 313 | type: "xavier" 314 | } 315 | bias_filler { 316 | type: "constant" 317 | } 318 | dilation: 1 319 | } 320 | } 321 | layer { 322 | name: "relu4_1" 323 | type: "ReLU" 324 | bottom: "conv4_1" 325 | top: "conv4_1" 326 | } 327 | layer { 328 | name: "conv4_2" 329 | type: "Convolution" 330 | bottom: "conv4_1" 331 | top: "conv4_2" 332 | param { 333 | lr_mult: 1.0 334 | decay_mult: 1 335 | } 336 | param { 337 | lr_mult: 2.0 338 | decay_mult: 0 339 | } 340 | convolution_param { 341 | num_output: 512 342 | pad: 1 343 | kernel_size: 3 344 | weight_filler { 345 | type: "xavier" 346 | } 347 | bias_filler { 348 | type: "constant" 349 | } 350 | dilation: 1 351 | } 352 | } 353 | layer { 354 | name: "relu4_2" 355 | type: "ReLU" 356 | bottom: "conv4_2" 357 | top: "conv4_2" 358 | } 359 | layer { 360 | name: "conv4_3" 361 | type: "Convolution" 362 | bottom: "conv4_2" 363 | top: "conv4_3" 364 | param { 365 | lr_mult: 1.0 366 | decay_mult: 1 367 | } 368 | param { 369 | lr_mult: 2.0 370 | decay_mult: 0 371 | } 372 | convolution_param { 373 | num_output: 512 374 | pad: 1 375 | kernel_size: 3 376 | weight_filler { 377 | type: "xavier" 378 | } 379 | bias_filler { 380 | type: "constant" 381 | } 382 | dilation: 1 383 | } 384 | } 385 | layer { 386 | name: "relu4_3" 387 | type: "ReLU" 388 | bottom: "conv4_3" 389 | top: "conv4_3" 390 | } 391 | layer { 392 | name: "conv4_4" 393 | type: "Convolution" 394 | bottom: "conv4_3" 395 | top: "conv4_4" 396 | param { 397 | lr_mult: 1.0 398 | decay_mult: 1 399 | } 400 | param { 401 | lr_mult: 2.0 402 | decay_mult: 0 403 | } 404 | convolution_param { 405 | num_output: 512 406 | pad: 1 407 | kernel_size: 3 408 | weight_filler { 409 | type: "xavier" 410 | } 411 | bias_filler { 412 | type: "constant" 413 | } 414 | dilation: 1 415 | } 416 | } 417 | layer { 418 | name: "relu4_4" 419 | type: "ReLU" 420 | bottom: "conv4_4" 421 | top: "conv4_4" 422 | } 423 | layer { 424 | name: "conv5_1" 425 | type: "Convolution" 426 | bottom: "conv4_4" 427 | top: "conv5_1" 428 | param { 429 | lr_mult: 1.0 430 | decay_mult: 1 431 | } 432 | param { 433 | lr_mult: 2.0 434 | decay_mult: 0 435 | } 436 | convolution_param { 437 | num_output: 512 438 | pad: 1 439 | kernel_size: 3 440 | weight_filler { 441 | type: "xavier" 442 | } 443 | bias_filler { 444 | type: "constant" 445 | } 446 | dilation: 1 447 | } 448 | } 449 | layer { 450 | name: "relu5_1" 451 | type: "ReLU" 452 | bottom: "conv5_1" 453 | top: "conv5_1" 454 | } 455 | layer { 456 | name: "conv5_2" 457 | type: "Convolution" 458 | bottom: "conv5_1" 459 | top: "conv5_2" 460 | param { 461 | lr_mult: 1.0 462 | decay_mult: 1 463 | } 464 | param { 465 | lr_mult: 2.0 466 | decay_mult: 0 467 | } 468 | convolution_param { 469 | num_output: 512 470 | pad: 1 471 | kernel_size: 3 472 | weight_filler { 473 | type: "xavier" 474 | } 475 | bias_filler { 476 | type: "constant" 477 | } 478 | dilation: 1 479 | } 480 | } 481 | layer { 482 | name: "relu5_2" 483 | type: "ReLU" 484 | bottom: "conv5_2" 485 | top: "conv5_2" 486 | } 487 | layer { 488 | name: "conv5_3_CPM" 489 | type: "Convolution" 490 | bottom: "conv5_2" 491 | top: "conv5_3_CPM" 492 | param { 493 | lr_mult: 1.0 494 | decay_mult: 1 495 | } 496 | param { 497 | lr_mult: 2.0 498 | decay_mult: 0 499 | } 500 | convolution_param { 501 | num_output: 128 502 | pad: 1 503 | kernel_size: 3 504 | weight_filler { 505 | type: "gaussian" 506 | std: 0.01 507 | } 508 | bias_filler { 509 | type: "constant" 510 | } 511 | dilation: 1 512 | } 513 | } 514 | layer { 515 | name: "relu5_4_stage1_3" 516 | type: "ReLU" 517 | bottom: "conv5_3_CPM" 518 | top: "conv5_3_CPM" 519 | } 520 | layer { 521 | name: "conv6_1_CPM" 522 | type: "Convolution" 523 | bottom: "conv5_3_CPM" 524 | top: "conv6_1_CPM" 525 | param { 526 | lr_mult: 1.0 527 | decay_mult: 1 528 | } 529 | param { 530 | lr_mult: 2.0 531 | decay_mult: 0 532 | } 533 | convolution_param { 534 | num_output: 512 535 | pad: 0 536 | kernel_size: 1 537 | weight_filler { 538 | type: "gaussian" 539 | std: 0.01 540 | } 541 | bias_filler { 542 | type: "constant" 543 | } 544 | dilation: 1 545 | } 546 | } 547 | layer { 548 | name: "relu6_4_stage1_1" 549 | type: "ReLU" 550 | bottom: "conv6_1_CPM" 551 | top: "conv6_1_CPM" 552 | } 553 | layer { 554 | name: "conv6_2_CPM" 555 | type: "Convolution" 556 | bottom: "conv6_1_CPM" 557 | top: "conv6_2_CPM" 558 | param { 559 | lr_mult: 1.0 560 | decay_mult: 1 561 | } 562 | param { 563 | lr_mult: 2.0 564 | decay_mult: 0 565 | } 566 | convolution_param { 567 | num_output: 22 568 | pad: 0 569 | kernel_size: 1 570 | weight_filler { 571 | type: "gaussian" 572 | std: 0.01 573 | } 574 | bias_filler { 575 | type: "constant" 576 | } 577 | dilation: 1 578 | } 579 | } 580 | layer { 581 | name: "concat_stage2" 582 | type: "Concat" 583 | bottom: "conv6_2_CPM" 584 | bottom: "conv5_3_CPM" 585 | top: "concat_stage2" 586 | concat_param { 587 | axis: 1 588 | } 589 | } 590 | layer { 591 | name: "Mconv1_stage2" 592 | type: "Convolution" 593 | bottom: "concat_stage2" 594 | top: "Mconv1_stage2" 595 | param { 596 | lr_mult: 4.0 597 | decay_mult: 1 598 | } 599 | param { 600 | lr_mult: 8.0 601 | decay_mult: 0 602 | } 603 | convolution_param { 604 | num_output: 128 605 | pad: 3 606 | kernel_size: 7 607 | weight_filler { 608 | type: "gaussian" 609 | std: 0.01 610 | } 611 | bias_filler { 612 | type: "constant" 613 | } 614 | dilation: 1 615 | } 616 | } 617 | layer { 618 | name: "Mrelu1_2_stage2_1" 619 | type: "ReLU" 620 | bottom: "Mconv1_stage2" 621 | top: "Mconv1_stage2" 622 | } 623 | layer { 624 | name: "Mconv2_stage2" 625 | type: "Convolution" 626 | bottom: "Mconv1_stage2" 627 | top: "Mconv2_stage2" 628 | param { 629 | lr_mult: 4.0 630 | decay_mult: 1 631 | } 632 | param { 633 | lr_mult: 8.0 634 | decay_mult: 0 635 | } 636 | convolution_param { 637 | num_output: 128 638 | pad: 3 639 | kernel_size: 7 640 | weight_filler { 641 | type: "gaussian" 642 | std: 0.01 643 | } 644 | bias_filler { 645 | type: "constant" 646 | } 647 | dilation: 1 648 | } 649 | } 650 | layer { 651 | name: "Mrelu1_3_stage2_2" 652 | type: "ReLU" 653 | bottom: "Mconv2_stage2" 654 | top: "Mconv2_stage2" 655 | } 656 | layer { 657 | name: "Mconv3_stage2" 658 | type: "Convolution" 659 | bottom: "Mconv2_stage2" 660 | top: "Mconv3_stage2" 661 | param { 662 | lr_mult: 4.0 663 | decay_mult: 1 664 | } 665 | param { 666 | lr_mult: 8.0 667 | decay_mult: 0 668 | } 669 | convolution_param { 670 | num_output: 128 671 | pad: 3 672 | kernel_size: 7 673 | weight_filler { 674 | type: "gaussian" 675 | std: 0.01 676 | } 677 | bias_filler { 678 | type: "constant" 679 | } 680 | dilation: 1 681 | } 682 | } 683 | layer { 684 | name: "Mrelu1_4_stage2_3" 685 | type: "ReLU" 686 | bottom: "Mconv3_stage2" 687 | top: "Mconv3_stage2" 688 | } 689 | layer { 690 | name: "Mconv4_stage2" 691 | type: "Convolution" 692 | bottom: "Mconv3_stage2" 693 | top: "Mconv4_stage2" 694 | param { 695 | lr_mult: 4.0 696 | decay_mult: 1 697 | } 698 | param { 699 | lr_mult: 8.0 700 | decay_mult: 0 701 | } 702 | convolution_param { 703 | num_output: 128 704 | pad: 3 705 | kernel_size: 7 706 | weight_filler { 707 | type: "gaussian" 708 | std: 0.01 709 | } 710 | bias_filler { 711 | type: "constant" 712 | } 713 | dilation: 1 714 | } 715 | } 716 | layer { 717 | name: "Mrelu1_5_stage2_4" 718 | type: "ReLU" 719 | bottom: "Mconv4_stage2" 720 | top: "Mconv4_stage2" 721 | } 722 | layer { 723 | name: "Mconv5_stage2" 724 | type: "Convolution" 725 | bottom: "Mconv4_stage2" 726 | top: "Mconv5_stage2" 727 | param { 728 | lr_mult: 4.0 729 | decay_mult: 1 730 | } 731 | param { 732 | lr_mult: 8.0 733 | decay_mult: 0 734 | } 735 | convolution_param { 736 | num_output: 128 737 | pad: 3 738 | kernel_size: 7 739 | weight_filler { 740 | type: "gaussian" 741 | std: 0.01 742 | } 743 | bias_filler { 744 | type: "constant" 745 | } 746 | dilation: 1 747 | } 748 | } 749 | layer { 750 | name: "Mrelu1_6_stage2_5" 751 | type: "ReLU" 752 | bottom: "Mconv5_stage2" 753 | top: "Mconv5_stage2" 754 | } 755 | layer { 756 | name: "Mconv6_stage2" 757 | type: "Convolution" 758 | bottom: "Mconv5_stage2" 759 | top: "Mconv6_stage2" 760 | param { 761 | lr_mult: 4.0 762 | decay_mult: 1 763 | } 764 | param { 765 | lr_mult: 8.0 766 | decay_mult: 0 767 | } 768 | convolution_param { 769 | num_output: 128 770 | pad: 0 771 | kernel_size: 1 772 | weight_filler { 773 | type: "gaussian" 774 | std: 0.01 775 | } 776 | bias_filler { 777 | type: "constant" 778 | } 779 | dilation: 1 780 | } 781 | } 782 | layer { 783 | name: "Mrelu1_7_stage2_6" 784 | type: "ReLU" 785 | bottom: "Mconv6_stage2" 786 | top: "Mconv6_stage2" 787 | } 788 | layer { 789 | name: "Mconv7_stage2" 790 | type: "Convolution" 791 | bottom: "Mconv6_stage2" 792 | top: "Mconv7_stage2" 793 | param { 794 | lr_mult: 4.0 795 | decay_mult: 1 796 | } 797 | param { 798 | lr_mult: 8.0 799 | decay_mult: 0 800 | } 801 | convolution_param { 802 | num_output: 22 803 | pad: 0 804 | kernel_size: 1 805 | weight_filler { 806 | type: "gaussian" 807 | std: 0.01 808 | } 809 | bias_filler { 810 | type: "constant" 811 | } 812 | dilation: 1 813 | } 814 | } 815 | layer { 816 | name: "concat_stage3" 817 | type: "Concat" 818 | bottom: "Mconv7_stage2" 819 | bottom: "conv5_3_CPM" 820 | top: "concat_stage3" 821 | concat_param { 822 | axis: 1 823 | } 824 | } 825 | layer { 826 | name: "Mconv1_stage3" 827 | type: "Convolution" 828 | bottom: "concat_stage3" 829 | top: "Mconv1_stage3" 830 | param { 831 | lr_mult: 4.0 832 | decay_mult: 1 833 | } 834 | param { 835 | lr_mult: 8.0 836 | decay_mult: 0 837 | } 838 | convolution_param { 839 | num_output: 128 840 | pad: 3 841 | kernel_size: 7 842 | weight_filler { 843 | type: "gaussian" 844 | std: 0.01 845 | } 846 | bias_filler { 847 | type: "constant" 848 | } 849 | dilation: 1 850 | } 851 | } 852 | layer { 853 | name: "Mrelu1_2_stage3_1" 854 | type: "ReLU" 855 | bottom: "Mconv1_stage3" 856 | top: "Mconv1_stage3" 857 | } 858 | layer { 859 | name: "Mconv2_stage3" 860 | type: "Convolution" 861 | bottom: "Mconv1_stage3" 862 | top: "Mconv2_stage3" 863 | param { 864 | lr_mult: 4.0 865 | decay_mult: 1 866 | } 867 | param { 868 | lr_mult: 8.0 869 | decay_mult: 0 870 | } 871 | convolution_param { 872 | num_output: 128 873 | pad: 3 874 | kernel_size: 7 875 | weight_filler { 876 | type: "gaussian" 877 | std: 0.01 878 | } 879 | bias_filler { 880 | type: "constant" 881 | } 882 | dilation: 1 883 | } 884 | } 885 | layer { 886 | name: "Mrelu1_3_stage3_2" 887 | type: "ReLU" 888 | bottom: "Mconv2_stage3" 889 | top: "Mconv2_stage3" 890 | } 891 | layer { 892 | name: "Mconv3_stage3" 893 | type: "Convolution" 894 | bottom: "Mconv2_stage3" 895 | top: "Mconv3_stage3" 896 | param { 897 | lr_mult: 4.0 898 | decay_mult: 1 899 | } 900 | param { 901 | lr_mult: 8.0 902 | decay_mult: 0 903 | } 904 | convolution_param { 905 | num_output: 128 906 | pad: 3 907 | kernel_size: 7 908 | weight_filler { 909 | type: "gaussian" 910 | std: 0.01 911 | } 912 | bias_filler { 913 | type: "constant" 914 | } 915 | dilation: 1 916 | } 917 | } 918 | layer { 919 | name: "Mrelu1_4_stage3_3" 920 | type: "ReLU" 921 | bottom: "Mconv3_stage3" 922 | top: "Mconv3_stage3" 923 | } 924 | layer { 925 | name: "Mconv4_stage3" 926 | type: "Convolution" 927 | bottom: "Mconv3_stage3" 928 | top: "Mconv4_stage3" 929 | param { 930 | lr_mult: 4.0 931 | decay_mult: 1 932 | } 933 | param { 934 | lr_mult: 8.0 935 | decay_mult: 0 936 | } 937 | convolution_param { 938 | num_output: 128 939 | pad: 3 940 | kernel_size: 7 941 | weight_filler { 942 | type: "gaussian" 943 | std: 0.01 944 | } 945 | bias_filler { 946 | type: "constant" 947 | } 948 | dilation: 1 949 | } 950 | } 951 | layer { 952 | name: "Mrelu1_5_stage3_4" 953 | type: "ReLU" 954 | bottom: "Mconv4_stage3" 955 | top: "Mconv4_stage3" 956 | } 957 | layer { 958 | name: "Mconv5_stage3" 959 | type: "Convolution" 960 | bottom: "Mconv4_stage3" 961 | top: "Mconv5_stage3" 962 | param { 963 | lr_mult: 4.0 964 | decay_mult: 1 965 | } 966 | param { 967 | lr_mult: 8.0 968 | decay_mult: 0 969 | } 970 | convolution_param { 971 | num_output: 128 972 | pad: 3 973 | kernel_size: 7 974 | weight_filler { 975 | type: "gaussian" 976 | std: 0.01 977 | } 978 | bias_filler { 979 | type: "constant" 980 | } 981 | dilation: 1 982 | } 983 | } 984 | layer { 985 | name: "Mrelu1_6_stage3_5" 986 | type: "ReLU" 987 | bottom: "Mconv5_stage3" 988 | top: "Mconv5_stage3" 989 | } 990 | layer { 991 | name: "Mconv6_stage3" 992 | type: "Convolution" 993 | bottom: "Mconv5_stage3" 994 | top: "Mconv6_stage3" 995 | param { 996 | lr_mult: 4.0 997 | decay_mult: 1 998 | } 999 | param { 1000 | lr_mult: 8.0 1001 | decay_mult: 0 1002 | } 1003 | convolution_param { 1004 | num_output: 128 1005 | pad: 0 1006 | kernel_size: 1 1007 | weight_filler { 1008 | type: "gaussian" 1009 | std: 0.01 1010 | } 1011 | bias_filler { 1012 | type: "constant" 1013 | } 1014 | dilation: 1 1015 | } 1016 | } 1017 | layer { 1018 | name: "Mrelu1_7_stage3_6" 1019 | type: "ReLU" 1020 | bottom: "Mconv6_stage3" 1021 | top: "Mconv6_stage3" 1022 | } 1023 | layer { 1024 | name: "Mconv7_stage3" 1025 | type: "Convolution" 1026 | bottom: "Mconv6_stage3" 1027 | top: "Mconv7_stage3" 1028 | param { 1029 | lr_mult: 4.0 1030 | decay_mult: 1 1031 | } 1032 | param { 1033 | lr_mult: 8.0 1034 | decay_mult: 0 1035 | } 1036 | convolution_param { 1037 | num_output: 22 1038 | pad: 0 1039 | kernel_size: 1 1040 | weight_filler { 1041 | type: "gaussian" 1042 | std: 0.01 1043 | } 1044 | bias_filler { 1045 | type: "constant" 1046 | } 1047 | dilation: 1 1048 | } 1049 | } 1050 | layer { 1051 | name: "concat_stage4" 1052 | type: "Concat" 1053 | bottom: "Mconv7_stage3" 1054 | bottom: "conv5_3_CPM" 1055 | top: "concat_stage4" 1056 | concat_param { 1057 | axis: 1 1058 | } 1059 | } 1060 | layer { 1061 | name: "Mconv1_stage4" 1062 | type: "Convolution" 1063 | bottom: "concat_stage4" 1064 | top: "Mconv1_stage4" 1065 | param { 1066 | lr_mult: 4.0 1067 | decay_mult: 1 1068 | } 1069 | param { 1070 | lr_mult: 8.0 1071 | decay_mult: 0 1072 | } 1073 | convolution_param { 1074 | num_output: 128 1075 | pad: 3 1076 | kernel_size: 7 1077 | weight_filler { 1078 | type: "gaussian" 1079 | std: 0.01 1080 | } 1081 | bias_filler { 1082 | type: "constant" 1083 | } 1084 | dilation: 1 1085 | } 1086 | } 1087 | layer { 1088 | name: "Mrelu1_2_stage4_1" 1089 | type: "ReLU" 1090 | bottom: "Mconv1_stage4" 1091 | top: "Mconv1_stage4" 1092 | } 1093 | layer { 1094 | name: "Mconv2_stage4" 1095 | type: "Convolution" 1096 | bottom: "Mconv1_stage4" 1097 | top: "Mconv2_stage4" 1098 | param { 1099 | lr_mult: 4.0 1100 | decay_mult: 1 1101 | } 1102 | param { 1103 | lr_mult: 8.0 1104 | decay_mult: 0 1105 | } 1106 | convolution_param { 1107 | num_output: 128 1108 | pad: 3 1109 | kernel_size: 7 1110 | weight_filler { 1111 | type: "gaussian" 1112 | std: 0.01 1113 | } 1114 | bias_filler { 1115 | type: "constant" 1116 | } 1117 | dilation: 1 1118 | } 1119 | } 1120 | layer { 1121 | name: "Mrelu1_3_stage4_2" 1122 | type: "ReLU" 1123 | bottom: "Mconv2_stage4" 1124 | top: "Mconv2_stage4" 1125 | } 1126 | layer { 1127 | name: "Mconv3_stage4" 1128 | type: "Convolution" 1129 | bottom: "Mconv2_stage4" 1130 | top: "Mconv3_stage4" 1131 | param { 1132 | lr_mult: 4.0 1133 | decay_mult: 1 1134 | } 1135 | param { 1136 | lr_mult: 8.0 1137 | decay_mult: 0 1138 | } 1139 | convolution_param { 1140 | num_output: 128 1141 | pad: 3 1142 | kernel_size: 7 1143 | weight_filler { 1144 | type: "gaussian" 1145 | std: 0.01 1146 | } 1147 | bias_filler { 1148 | type: "constant" 1149 | } 1150 | dilation: 1 1151 | } 1152 | } 1153 | layer { 1154 | name: "Mrelu1_4_stage4_3" 1155 | type: "ReLU" 1156 | bottom: "Mconv3_stage4" 1157 | top: "Mconv3_stage4" 1158 | } 1159 | layer { 1160 | name: "Mconv4_stage4" 1161 | type: "Convolution" 1162 | bottom: "Mconv3_stage4" 1163 | top: "Mconv4_stage4" 1164 | param { 1165 | lr_mult: 4.0 1166 | decay_mult: 1 1167 | } 1168 | param { 1169 | lr_mult: 8.0 1170 | decay_mult: 0 1171 | } 1172 | convolution_param { 1173 | num_output: 128 1174 | pad: 3 1175 | kernel_size: 7 1176 | weight_filler { 1177 | type: "gaussian" 1178 | std: 0.01 1179 | } 1180 | bias_filler { 1181 | type: "constant" 1182 | } 1183 | dilation: 1 1184 | } 1185 | } 1186 | layer { 1187 | name: "Mrelu1_5_stage4_4" 1188 | type: "ReLU" 1189 | bottom: "Mconv4_stage4" 1190 | top: "Mconv4_stage4" 1191 | } 1192 | layer { 1193 | name: "Mconv5_stage4" 1194 | type: "Convolution" 1195 | bottom: "Mconv4_stage4" 1196 | top: "Mconv5_stage4" 1197 | param { 1198 | lr_mult: 4.0 1199 | decay_mult: 1 1200 | } 1201 | param { 1202 | lr_mult: 8.0 1203 | decay_mult: 0 1204 | } 1205 | convolution_param { 1206 | num_output: 128 1207 | pad: 3 1208 | kernel_size: 7 1209 | weight_filler { 1210 | type: "gaussian" 1211 | std: 0.01 1212 | } 1213 | bias_filler { 1214 | type: "constant" 1215 | } 1216 | dilation: 1 1217 | } 1218 | } 1219 | layer { 1220 | name: "Mrelu1_6_stage4_5" 1221 | type: "ReLU" 1222 | bottom: "Mconv5_stage4" 1223 | top: "Mconv5_stage4" 1224 | } 1225 | layer { 1226 | name: "Mconv6_stage4" 1227 | type: "Convolution" 1228 | bottom: "Mconv5_stage4" 1229 | top: "Mconv6_stage4" 1230 | param { 1231 | lr_mult: 4.0 1232 | decay_mult: 1 1233 | } 1234 | param { 1235 | lr_mult: 8.0 1236 | decay_mult: 0 1237 | } 1238 | convolution_param { 1239 | num_output: 128 1240 | pad: 0 1241 | kernel_size: 1 1242 | weight_filler { 1243 | type: "gaussian" 1244 | std: 0.01 1245 | } 1246 | bias_filler { 1247 | type: "constant" 1248 | } 1249 | dilation: 1 1250 | } 1251 | } 1252 | layer { 1253 | name: "Mrelu1_7_stage4_6" 1254 | type: "ReLU" 1255 | bottom: "Mconv6_stage4" 1256 | top: "Mconv6_stage4" 1257 | } 1258 | layer { 1259 | name: "Mconv7_stage4" 1260 | type: "Convolution" 1261 | bottom: "Mconv6_stage4" 1262 | top: "Mconv7_stage4" 1263 | param { 1264 | lr_mult: 4.0 1265 | decay_mult: 1 1266 | } 1267 | param { 1268 | lr_mult: 8.0 1269 | decay_mult: 0 1270 | } 1271 | convolution_param { 1272 | num_output: 22 1273 | pad: 0 1274 | kernel_size: 1 1275 | weight_filler { 1276 | type: "gaussian" 1277 | std: 0.01 1278 | } 1279 | bias_filler { 1280 | type: "constant" 1281 | } 1282 | dilation: 1 1283 | } 1284 | } 1285 | layer { 1286 | name: "concat_stage5" 1287 | type: "Concat" 1288 | bottom: "Mconv7_stage4" 1289 | bottom: "conv5_3_CPM" 1290 | top: "concat_stage5" 1291 | concat_param { 1292 | axis: 1 1293 | } 1294 | } 1295 | layer { 1296 | name: "Mconv1_stage5" 1297 | type: "Convolution" 1298 | bottom: "concat_stage5" 1299 | top: "Mconv1_stage5" 1300 | param { 1301 | lr_mult: 4.0 1302 | decay_mult: 1 1303 | } 1304 | param { 1305 | lr_mult: 8.0 1306 | decay_mult: 0 1307 | } 1308 | convolution_param { 1309 | num_output: 128 1310 | pad: 3 1311 | kernel_size: 7 1312 | weight_filler { 1313 | type: "gaussian" 1314 | std: 0.01 1315 | } 1316 | bias_filler { 1317 | type: "constant" 1318 | } 1319 | dilation: 1 1320 | } 1321 | } 1322 | layer { 1323 | name: "Mrelu1_2_stage5_1" 1324 | type: "ReLU" 1325 | bottom: "Mconv1_stage5" 1326 | top: "Mconv1_stage5" 1327 | } 1328 | layer { 1329 | name: "Mconv2_stage5" 1330 | type: "Convolution" 1331 | bottom: "Mconv1_stage5" 1332 | top: "Mconv2_stage5" 1333 | param { 1334 | lr_mult: 4.0 1335 | decay_mult: 1 1336 | } 1337 | param { 1338 | lr_mult: 8.0 1339 | decay_mult: 0 1340 | } 1341 | convolution_param { 1342 | num_output: 128 1343 | pad: 3 1344 | kernel_size: 7 1345 | weight_filler { 1346 | type: "gaussian" 1347 | std: 0.01 1348 | } 1349 | bias_filler { 1350 | type: "constant" 1351 | } 1352 | dilation: 1 1353 | } 1354 | } 1355 | layer { 1356 | name: "Mrelu1_3_stage5_2" 1357 | type: "ReLU" 1358 | bottom: "Mconv2_stage5" 1359 | top: "Mconv2_stage5" 1360 | } 1361 | layer { 1362 | name: "Mconv3_stage5" 1363 | type: "Convolution" 1364 | bottom: "Mconv2_stage5" 1365 | top: "Mconv3_stage5" 1366 | param { 1367 | lr_mult: 4.0 1368 | decay_mult: 1 1369 | } 1370 | param { 1371 | lr_mult: 8.0 1372 | decay_mult: 0 1373 | } 1374 | convolution_param { 1375 | num_output: 128 1376 | pad: 3 1377 | kernel_size: 7 1378 | weight_filler { 1379 | type: "gaussian" 1380 | std: 0.01 1381 | } 1382 | bias_filler { 1383 | type: "constant" 1384 | } 1385 | dilation: 1 1386 | } 1387 | } 1388 | layer { 1389 | name: "Mrelu1_4_stage5_3" 1390 | type: "ReLU" 1391 | bottom: "Mconv3_stage5" 1392 | top: "Mconv3_stage5" 1393 | } 1394 | layer { 1395 | name: "Mconv4_stage5" 1396 | type: "Convolution" 1397 | bottom: "Mconv3_stage5" 1398 | top: "Mconv4_stage5" 1399 | param { 1400 | lr_mult: 4.0 1401 | decay_mult: 1 1402 | } 1403 | param { 1404 | lr_mult: 8.0 1405 | decay_mult: 0 1406 | } 1407 | convolution_param { 1408 | num_output: 128 1409 | pad: 3 1410 | kernel_size: 7 1411 | weight_filler { 1412 | type: "gaussian" 1413 | std: 0.01 1414 | } 1415 | bias_filler { 1416 | type: "constant" 1417 | } 1418 | dilation: 1 1419 | } 1420 | } 1421 | layer { 1422 | name: "Mrelu1_5_stage5_4" 1423 | type: "ReLU" 1424 | bottom: "Mconv4_stage5" 1425 | top: "Mconv4_stage5" 1426 | } 1427 | layer { 1428 | name: "Mconv5_stage5" 1429 | type: "Convolution" 1430 | bottom: "Mconv4_stage5" 1431 | top: "Mconv5_stage5" 1432 | param { 1433 | lr_mult: 4.0 1434 | decay_mult: 1 1435 | } 1436 | param { 1437 | lr_mult: 8.0 1438 | decay_mult: 0 1439 | } 1440 | convolution_param { 1441 | num_output: 128 1442 | pad: 3 1443 | kernel_size: 7 1444 | weight_filler { 1445 | type: "gaussian" 1446 | std: 0.01 1447 | } 1448 | bias_filler { 1449 | type: "constant" 1450 | } 1451 | dilation: 1 1452 | } 1453 | } 1454 | layer { 1455 | name: "Mrelu1_6_stage5_5" 1456 | type: "ReLU" 1457 | bottom: "Mconv5_stage5" 1458 | top: "Mconv5_stage5" 1459 | } 1460 | layer { 1461 | name: "Mconv6_stage5" 1462 | type: "Convolution" 1463 | bottom: "Mconv5_stage5" 1464 | top: "Mconv6_stage5" 1465 | param { 1466 | lr_mult: 4.0 1467 | decay_mult: 1 1468 | } 1469 | param { 1470 | lr_mult: 8.0 1471 | decay_mult: 0 1472 | } 1473 | convolution_param { 1474 | num_output: 128 1475 | pad: 0 1476 | kernel_size: 1 1477 | weight_filler { 1478 | type: "gaussian" 1479 | std: 0.01 1480 | } 1481 | bias_filler { 1482 | type: "constant" 1483 | } 1484 | dilation: 1 1485 | } 1486 | } 1487 | layer { 1488 | name: "Mrelu1_7_stage5_6" 1489 | type: "ReLU" 1490 | bottom: "Mconv6_stage5" 1491 | top: "Mconv6_stage5" 1492 | } 1493 | layer { 1494 | name: "Mconv7_stage5" 1495 | type: "Convolution" 1496 | bottom: "Mconv6_stage5" 1497 | top: "Mconv7_stage5" 1498 | param { 1499 | lr_mult: 4.0 1500 | decay_mult: 1 1501 | } 1502 | param { 1503 | lr_mult: 8.0 1504 | decay_mult: 0 1505 | } 1506 | convolution_param { 1507 | num_output: 22 1508 | pad: 0 1509 | kernel_size: 1 1510 | weight_filler { 1511 | type: "gaussian" 1512 | std: 0.01 1513 | } 1514 | bias_filler { 1515 | type: "constant" 1516 | } 1517 | dilation: 1 1518 | } 1519 | } 1520 | layer { 1521 | name: "concat_stage6" 1522 | type: "Concat" 1523 | bottom: "Mconv7_stage5" 1524 | bottom: "conv5_3_CPM" 1525 | top: "concat_stage6" 1526 | concat_param { 1527 | axis: 1 1528 | } 1529 | } 1530 | layer { 1531 | name: "Mconv1_stage6" 1532 | type: "Convolution" 1533 | bottom: "concat_stage6" 1534 | top: "Mconv1_stage6" 1535 | param { 1536 | lr_mult: 4.0 1537 | decay_mult: 1 1538 | } 1539 | param { 1540 | lr_mult: 8.0 1541 | decay_mult: 0 1542 | } 1543 | convolution_param { 1544 | num_output: 128 1545 | pad: 3 1546 | kernel_size: 7 1547 | weight_filler { 1548 | type: "gaussian" 1549 | std: 0.01 1550 | } 1551 | bias_filler { 1552 | type: "constant" 1553 | } 1554 | dilation: 1 1555 | } 1556 | } 1557 | layer { 1558 | name: "Mrelu1_2_stage6_1" 1559 | type: "ReLU" 1560 | bottom: "Mconv1_stage6" 1561 | top: "Mconv1_stage6" 1562 | } 1563 | layer { 1564 | name: "Mconv2_stage6" 1565 | type: "Convolution" 1566 | bottom: "Mconv1_stage6" 1567 | top: "Mconv2_stage6" 1568 | param { 1569 | lr_mult: 4.0 1570 | decay_mult: 1 1571 | } 1572 | param { 1573 | lr_mult: 8.0 1574 | decay_mult: 0 1575 | } 1576 | convolution_param { 1577 | num_output: 128 1578 | pad: 3 1579 | kernel_size: 7 1580 | weight_filler { 1581 | type: "gaussian" 1582 | std: 0.01 1583 | } 1584 | bias_filler { 1585 | type: "constant" 1586 | } 1587 | dilation: 1 1588 | } 1589 | } 1590 | layer { 1591 | name: "Mrelu1_3_stage6_2" 1592 | type: "ReLU" 1593 | bottom: "Mconv2_stage6" 1594 | top: "Mconv2_stage6" 1595 | } 1596 | layer { 1597 | name: "Mconv3_stage6" 1598 | type: "Convolution" 1599 | bottom: "Mconv2_stage6" 1600 | top: "Mconv3_stage6" 1601 | param { 1602 | lr_mult: 4.0 1603 | decay_mult: 1 1604 | } 1605 | param { 1606 | lr_mult: 8.0 1607 | decay_mult: 0 1608 | } 1609 | convolution_param { 1610 | num_output: 128 1611 | pad: 3 1612 | kernel_size: 7 1613 | weight_filler { 1614 | type: "gaussian" 1615 | std: 0.01 1616 | } 1617 | bias_filler { 1618 | type: "constant" 1619 | } 1620 | dilation: 1 1621 | } 1622 | } 1623 | layer { 1624 | name: "Mrelu1_4_stage6_3" 1625 | type: "ReLU" 1626 | bottom: "Mconv3_stage6" 1627 | top: "Mconv3_stage6" 1628 | } 1629 | layer { 1630 | name: "Mconv4_stage6" 1631 | type: "Convolution" 1632 | bottom: "Mconv3_stage6" 1633 | top: "Mconv4_stage6" 1634 | param { 1635 | lr_mult: 4.0 1636 | decay_mult: 1 1637 | } 1638 | param { 1639 | lr_mult: 8.0 1640 | decay_mult: 0 1641 | } 1642 | convolution_param { 1643 | num_output: 128 1644 | pad: 3 1645 | kernel_size: 7 1646 | weight_filler { 1647 | type: "gaussian" 1648 | std: 0.01 1649 | } 1650 | bias_filler { 1651 | type: "constant" 1652 | } 1653 | dilation: 1 1654 | } 1655 | } 1656 | layer { 1657 | name: "Mrelu1_5_stage6_4" 1658 | type: "ReLU" 1659 | bottom: "Mconv4_stage6" 1660 | top: "Mconv4_stage6" 1661 | } 1662 | layer { 1663 | name: "Mconv5_stage6" 1664 | type: "Convolution" 1665 | bottom: "Mconv4_stage6" 1666 | top: "Mconv5_stage6" 1667 | param { 1668 | lr_mult: 4.0 1669 | decay_mult: 1 1670 | } 1671 | param { 1672 | lr_mult: 8.0 1673 | decay_mult: 0 1674 | } 1675 | convolution_param { 1676 | num_output: 128 1677 | pad: 3 1678 | kernel_size: 7 1679 | weight_filler { 1680 | type: "gaussian" 1681 | std: 0.01 1682 | } 1683 | bias_filler { 1684 | type: "constant" 1685 | } 1686 | dilation: 1 1687 | } 1688 | } 1689 | layer { 1690 | name: "Mrelu1_6_stage6_5" 1691 | type: "ReLU" 1692 | bottom: "Mconv5_stage6" 1693 | top: "Mconv5_stage6" 1694 | } 1695 | layer { 1696 | name: "Mconv6_stage6" 1697 | type: "Convolution" 1698 | bottom: "Mconv5_stage6" 1699 | top: "Mconv6_stage6" 1700 | param { 1701 | lr_mult: 4.0 1702 | decay_mult: 1 1703 | } 1704 | param { 1705 | lr_mult: 8.0 1706 | decay_mult: 0 1707 | } 1708 | convolution_param { 1709 | num_output: 128 1710 | pad: 0 1711 | kernel_size: 1 1712 | weight_filler { 1713 | type: "gaussian" 1714 | std: 0.01 1715 | } 1716 | bias_filler { 1717 | type: "constant" 1718 | } 1719 | dilation: 1 1720 | } 1721 | } 1722 | layer { 1723 | name: "Mrelu1_7_stage6_6" 1724 | type: "ReLU" 1725 | bottom: "Mconv6_stage6" 1726 | top: "Mconv6_stage6" 1727 | } 1728 | layer { 1729 | name: "Mconv7_stage6" 1730 | type: "Convolution" 1731 | bottom: "Mconv6_stage6" 1732 | # top: "Mconv7_stage6" 1733 | top: "net_output" 1734 | param { 1735 | lr_mult: 4.0 1736 | decay_mult: 1 1737 | } 1738 | param { 1739 | lr_mult: 8.0 1740 | decay_mult: 0 1741 | } 1742 | convolution_param { 1743 | num_output: 22 1744 | pad: 0 1745 | kernel_size: 1 1746 | weight_filler { 1747 | type: "gaussian" 1748 | std: 0.01 1749 | } 1750 | bias_filler { 1751 | type: "constant" 1752 | } 1753 | dilation: 1 1754 | } 1755 | } 1756 | 1757 | -------------------------------------------------------------------------------- /handPoseImage.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import cv2 3 | import time 4 | import numpy as np 5 | 6 | protoFile = "hand/pose_deploy.prototxt" 7 | weightsFile = "hand/pose_iter_102000.caffemodel" 8 | nPoints = 22 9 | POSE_PAIRS = [ [0,1],[1,2],[2,3],[3,4],[0,5],[5,6],[6,7],[7,8],[0,9],[9,10],[10,11],[11,12],[0,13],[13,14],[14,15],[15,16],[0,17],[17,18],[18,19],[19,20] ] 10 | net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile) 11 | 12 | frame = cv2.imread("right-frontal.jpg") 13 | frameCopy = np.copy(frame) 14 | frameWidth = frame.shape[1] 15 | frameHeight = frame.shape[0] 16 | aspect_ratio = frameWidth/frameHeight 17 | 18 | threshold = 0.1 19 | 20 | t = time.time() 21 | # input image dimensions for the network 22 | inHeight = 368 23 | inWidth = int(((aspect_ratio*inHeight)*8)//8) 24 | inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight), (0, 0, 0), swapRB=False, crop=False) 25 | 26 | net.setInput(inpBlob) 27 | 28 | output = net.forward() 29 | print("time taken by network : {:.3f}".format(time.time() - t)) 30 | 31 | # Empty list to store the detected keypoints 32 | points = [] 33 | 34 | for i in range(nPoints): 35 | # confidence map of corresponding body's part. 36 | probMap = output[0, i, :, :] 37 | probMap = cv2.resize(probMap, (frameWidth, frameHeight)) 38 | 39 | # Find global maxima of the probMap. 40 | minVal, prob, minLoc, point = cv2.minMaxLoc(probMap) 41 | 42 | if prob > threshold : 43 | cv2.circle(frameCopy, (int(point[0]), int(point[1])), 8, (0, 255, 255), thickness=-1, lineType=cv2.FILLED) 44 | cv2.putText(frameCopy, "{}".format(i), (int(point[0]), int(point[1])), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, lineType=cv2.LINE_AA) 45 | 46 | # Add the point to the list if the probability is greater than the threshold 47 | points.append((int(point[0]), int(point[1]))) 48 | else : 49 | points.append(None) 50 | 51 | # Draw Skeleton 52 | for pair in POSE_PAIRS: 53 | partA = pair[0] 54 | partB = pair[1] 55 | 56 | if points[partA] and points[partB]: 57 | cv2.line(frame, points[partA], points[partB], (0, 255, 255), 2) 58 | cv2.circle(frame, points[partA], 8, (0, 0, 255), thickness=-1, lineType=cv2.FILLED) 59 | cv2.circle(frame, points[partB], 8, (0, 0, 255), thickness=-1, lineType=cv2.FILLED) 60 | 61 | 62 | cv2.imshow('Output-Keypoints', frameCopy) 63 | cv2.imshow('Output-Skeleton', frame) 64 | 65 | 66 | cv2.imwrite('Output-Keypoints.jpg', frameCopy) 67 | cv2.imwrite('Output-Skeleton.jpg', frame) 68 | 69 | print("Total time taken : {:.3f}".format(time.time() - t)) 70 | 71 | cv2.waitKey(0) 72 | -------------------------------------------------------------------------------- /handPoseVideo.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import time 3 | import numpy as np 4 | 5 | 6 | protoFile = "hand/pose_deploy.prototxt" 7 | weightsFile = "hand/pose_iter_102000.caffemodel" 8 | nPoints = 22 9 | POSE_PAIRS = [ [0,1],[1,2],[2,3],[3,4],[0,5],[5,6],[6,7],[7,8],[0,9],[9,10],[10,11],[11,12],[0,13],[13,14],[14,15],[15,16],[0,17],[17,18],[18,19],[19,20] ] 10 | 11 | threshold = 0.2 12 | 13 | 14 | input_source = "asl.mp4" 15 | cap = cv2.VideoCapture(input_source) 16 | hasFrame, frame = cap.read() 17 | 18 | frameWidth = frame.shape[1] 19 | frameHeight = frame.shape[0] 20 | 21 | aspect_ratio = frameWidth/frameHeight 22 | 23 | inHeight = 368 24 | inWidth = int(((aspect_ratio*inHeight)*8)//8) 25 | 26 | vid_writer = cv2.VideoWriter('output.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 15, (frame.shape[1],frame.shape[0])) 27 | 28 | net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile) 29 | k = 0 30 | while 1: 31 | k+=1 32 | t = time.time() 33 | hasFrame, frame = cap.read() 34 | frameCopy = np.copy(frame) 35 | if not hasFrame: 36 | cv2.waitKey() 37 | break 38 | 39 | inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight), 40 | (0, 0, 0), swapRB=False, crop=False) 41 | 42 | net.setInput(inpBlob) 43 | 44 | output = net.forward() 45 | 46 | print("forward = {}".format(time.time() - t)) 47 | 48 | # Empty list to store the detected keypoints 49 | points = [] 50 | 51 | for i in range(nPoints): 52 | # confidence map of corresponding body's part. 53 | probMap = output[0, i, :, :] 54 | probMap = cv2.resize(probMap, (frameWidth, frameHeight)) 55 | 56 | # Find global maxima of the probMap. 57 | minVal, prob, minLoc, point = cv2.minMaxLoc(probMap) 58 | 59 | if prob > threshold : 60 | cv2.circle(frameCopy, (int(point[0]), int(point[1])), 6, (0, 255, 255), thickness=-1, lineType=cv2.FILLED) 61 | cv2.putText(frameCopy, "{}".format(i), (int(point[0]), int(point[1])), cv2.FONT_HERSHEY_SIMPLEX, .8, (0, 0, 255), 2, lineType=cv2.LINE_AA) 62 | 63 | # Add the point to the list if the probability is greater than the threshold 64 | points.append((int(point[0]), int(point[1]))) 65 | else : 66 | points.append(None) 67 | 68 | # Draw Skeleton 69 | for pair in POSE_PAIRS: 70 | partA = pair[0] 71 | partB = pair[1] 72 | 73 | if points[partA] and points[partB]: 74 | cv2.line(frame, points[partA], points[partB], (0, 255, 255), 2, lineType=cv2.LINE_AA) 75 | cv2.circle(frame, points[partA], 5, (0, 0, 255), thickness=-1, lineType=cv2.FILLED) 76 | cv2.circle(frame, points[partB], 5, (0, 0, 255), thickness=-1, lineType=cv2.FILLED) 77 | 78 | print("Time Taken for frame = {}".format(time.time() - t)) 79 | 80 | # cv2.putText(frame, "time taken = {:.2f} sec".format(time.time() - t), (50, 50), cv2.FONT_HERSHEY_COMPLEX, .8, (255, 50, 0), 2, lineType=cv2.LINE_AA) 81 | # cv2.putText(frame, "Hand Pose using OpenCV", (50, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 50, 0), 2, lineType=cv2.LINE_AA) 82 | cv2.imshow('Output-Skeleton', frame) 83 | # cv2.imwrite("video_output/{:03d}.jpg".format(k), frame) 84 | key = cv2.waitKey(1) 85 | if key == 27: 86 | break 87 | 88 | print("total = {}".format(time.time() - t)) 89 | 90 | vid_writer.write(frame) 91 | 92 | vid_writer.release() 93 | --------------------------------------------------------------------------------