├── README.md
├── Wonderseen_Handpose_cnn_depth
    ├── Wonderseen_HandPose_cnn_depth.py
    ├── result.png
    └── utils
    │   └── general.py
└── result
    ├── test.png
    ├── test1.png
    ├── test2.png
    ├── test3.png
    ├── test4.png
    ├── test5.png
    └── test6.png


/README.md:
--------------------------------------------------------------------------------
 1 | Though recently I have not much time for this repository maintainance possess, But fortunately (maybe you could get more details of this project) I wrote [a blog (in Chinese)](https://blog.csdn.net/wonderseen/article/details/78341932) about the handpose investigation (traditional style and DL style) completed during this project and intermittently answered relevant questions raised by people who were interested in the comments section of the blog.
 2 | # 
 3 | 
 4 | 
 5 | # Handpose-WonderSeen-Net
 6 | 
 7 | 1. DATABASE: RHD_published_v2.
 8 | 2. DATABASE INFO: RGBD, four channels, pix-level label.
 9 | 3. CODE: To be updated in 2 months.
10 | 4. ADDITION: This script isn't the final version of my work of gesture recognization. For that Code Management requires a lot of work, the script 'Wonderseen_HandPose_cnn_depth.py' is provided merely as reference to show the main thoughts of the process.
11 | 
12 | 
13 | # Network-Result
14 | 
15 | ![image](https://github.com/wonderseen/Handpose-WonderSeen-Net/tree/master/result/test.png)
16 | 
17 | ![image](https://github.com/wonderseen/Handpose-WonderSeen-Net/tree/master/result/test1.png)
18 | 
19 | ![image](https://github.com/wonderseen/Handpose-WonderSeen-Net/tree/master/result/test2.png)
20 | 
21 | # Reference
22 | [1] Hand Gesture Recognition Based on Shape Parameters.
23 | 
24 | [2] Densely Connected Convolutional Networks.
25 | 
26 | [3] Convolutional Networks for Biomedical Image Segmentation.
27 | 
28 | [4] ImageNet Classification with Deep Convolutional Neural Networks.
29 | 


--------------------------------------------------------------------------------
/Wonderseen_Handpose_cnn_depth/Wonderseen_HandPose_cnn_depth.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # This script isn't the final version of my work of gesture recognization. 
  3 | # For that Code Management requires a lot of work, the script is provided merely as reference to show the main process.
  4 | # 
  5 | # Dataset: RHD_published_v2 [Alternative: NYU handpose dataset/BigHand2.2M]
  6 | # 
  7 | # Segmentation:
  8 | # 	Input:160×160×1 deep_img
  9 | # 	output:80×80×1  mask_img
 10 | #
 11 | # Pose:
 12 | # 	Input: image cropped on proposal region
 13 | #	Output: 21 scoremap of 2D key poses
 14 | #
 15 | # Classifier:
 16 | #	Input: key part of image cropped on proposal region or 21 key points' location
 17 | #	Output:type of gesture
 18 | 
 19 | ##############################################################################################
 20 | ##############################################################################################
 21 | ##                                                                                          ##
 22 | ##  ##         ##   #####   ###     # #####   ###### ######   #####  ##### ##### ###     #  ##
 23 | ##  ##    #    ##  #######  # ##    # #   ##  #      ##   ## ##      #     #     # ##    #  ##
 24 | ##  ##   ###   ## ###   ### #  ##   # #    ## #      ##   ## ##      #     #     #  ##   #  ##
 25 | ##  ##  ## ##  ## ##     ## #   ##  # #    ## ###### ######   #####  ##### ##### #   ##  #  ##
 26 | ##  ## ##   ## ## ###   ### #    ## # #    ## #      ## ##        ## #     #     #    ## #  ##
 27 | ##   ####    ####  #######  #     ### #   ##  #      ##  ##       ## #     #     #     ###  ##
 28 | ##    ##      ##    #####   #      ## #####   ###### ##   ##  #####  ##### ##### #      ##  ##
 29 | ##                                                                                          ##
 30 | ##############################################################################################
 31 | ##############################################################################################
 32 | 
 33 | import tensorflow as tf
 34 | import pickle
 35 | import os
 36 | import numpy as np
 37 | import scipy.misc
 38 | import matplotlib.pyplot as plt
 39 | import random
 40 | from mpl_toolkits.mplot3d import Axes3D
 41 | import sys
 42 | sys.path.append("~/Wonderseen_net/nets")
 43 | sys.path.append("~/Wonderseen_Net/utils")
 44 | sys.path.append("~/Wonderseen_Net/wonderseen_handpose_fcn/tools")
 45 | 
 46 | import cv2
 47 | from playsound import playsound
 48 | import general
 49 | import ReadData
 50 | import PostTreatment
 51 | 
 52 | # mode
 53 | mode = 'predict' # train or predict
 54 | 
 55 | # get data
 56 | set = 'training'# 'training' 'evaluation'
 57 | fatherdic = 'RHD_published_v2/' + set
 58 | 
 59 | # Train Para
 60 | channel = 1
 61 | IMAGE_HEIGHT = 320
 62 | IMAGE_WIDTH = 320
 63 | trainstep = 50
 64 | savestep = 400
 65 | start_step = 101200
 66 | start_lr = 1e-3
 67 | net = general.NetworkOps
 68 | saver_restore_addr = '/root/pose-model/handposetemp-model.ckpt-101200'
 69 | X = tf.placeholder(tf.float32, [None, IMAGE_HEIGHT*IMAGE_WIDTH*channel/4], name='INPUT_IMAGE_HEIGHT_MULTI_WIDTH')
 70 | realMask = tf.placeholder(tf.float32, [None, IMAGE_HEIGHT*IMAGE_WIDTH*channel/16], name='realMask')
 71 | keep_prob = tf.placeholder(tf.float32)
 72 | 
 73 | # Classifier Para
 74 | CL_graph = tf.Graph()
 75 | CLASSIFIER_IMAGE_HEIGHT = 50
 76 | CLASSIFIER_IMAGE_WIDTH = 50
 77 | HAND_NUM = 1
 78 | GESTURE_CLASSES = 17
 79 | saver_restore_addr_classifier = '/root/clasiffier-model/handposetemp-model.ckpt-4250'
 80 | 
 81 | 
 82 | # write data into memory
 83 | if mode == 'train':
 84 |     depth_pred = []
 85 |     hand_mask_pred = []
 86 |     for x in range(0,40000):
 87 |         sample_id = random.randint(0,40000)
 88 |         # read mask / deep
 89 |         mask = scipy.misc.imread(os.path.join(fatherdic, 'mask', '%.5d.png' % sample_id)).astype('float32')
 90 |         depth = scipy.misc.imread(os.path.join(fatherdic, 'depth', '%.5d.png' % sample_id)) 
 91 |         depth = ReadData.depth_two_uint8_to_float(depth[:, :, 0], depth[:, :, 1])
 92 |         depth = cv2.resize(depth,(IMAGE_WIDTH/2,IMAGE_HEIGHT/2))
 93 | 
 94 |         print 'load_data',sample_id, x
 95 |         mask = cv2.resize(mask,(IMAGE_WIDTH/4,IMAGE_HEIGHT/4)).astype('float32')
 96 |         for i in range(0, len(mask)):
 97 |             for j in range(0, len(mask[0])):
 98 |                 if mask[i][j] <= 1:
 99 |                     mask[i][j] = 0
100 |                 else:
101 |                     mask[i][j] = 1
102 |         all = sum(sum(mask)) + 1e-4
103 |         mask /= all
104 |         depth = depth.reshape(IMAGE_WIDTH // 2 * IMAGE_HEIGHT // 2 * channel)
105 |         depth_pred.append(depth)
106 |         hand_mask_pred.append(mask.reshape(IMAGE_WIDTH//4*IMAGE_HEIGHT//4*channel))
107 | 
108 | if mode == 'predict':
109 |     pass
110 | 
111 | # train
112 | def train_handpose_depth_cnn(continueflag):
113 |     global_step = tf.Variable(0, trainable=False)
114 |     add_global = global_step.assign_add(1)
115 |     return_global = global_step.assign(start_step)
116 |     learning_rate = tf.train.exponential_decay(learning_rate = start_lr, global_step=global_step,decay_steps = 10000, decay_rate = 0.97)#,staircase=True)
117 | 
118 |     # Start TF
119 |     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
120 |     sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
121 |     tf.train.start_queue_runners(sess=sess)
122 | 
123 |     # Net-Output
124 |     hand_scoremap = depth_handpose_fcn()
125 | 
126 |     # Loss
127 |     loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=hand_scoremap, labels=realMask))
128 |     optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
129 | 
130 |     # Predict
131 |     predict = tf.reshape(hand_scoremap, [-1, IMAGE_HEIGHT, IMAGE_WIDTH])
132 |     max_idx_p = tf.argmax(predict, 2)
133 |     max_idx_l = tf.argmax(tf.reshape(realMask, [-1, IMAGE_HEIGHT, IMAGE_WIDTH]), 2)
134 |     correct_pred = tf.equal(max_idx_p, max_idx_l)
135 |     accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
136 | 
137 |     saver = tf.train.Saver()
138 |     if continueflag == True:
139 |         saver.restore(sess, saver_restore_addr)
140 |         sess.run([return_global])
141 |     else:
142 |         sess.run(tf.initialize_all_variables())
143 | 
144 |     # training loop
145 |     lossy = [[],[]]
146 |     plt.figure(figsize=(7,4))
147 |     accuracy = []
148 |     while True:
149 |         step, lr = sess.run([add_global, learning_rate])
150 |         batch_x , batch_y = get_next_data(batch_size=32)
151 |         _, train_loss = sess.run([optimizer, loss], feed_dict={X: batch_x, realMask: batch_y, keep_prob: 0.5})
152 |         if step % trainstep == 0:
153 |             batch_x, batch_y = get_next_data(batch_size=1)
154 |             hand_scoremap1 = sess.run([hand_scoremap], feed_dict={X: batch_x, keep_prob: 1})
155 |             hand_scoremap1 = np.array(hand_scoremap1).reshape(1, 80, 80)
156 |             [batch_x, batch_y] = [np.array(batch_x).reshape(1,160,160), np.array(batch_y).reshape(1,80,80,1)]
157 | 
158 |             for i in range(0, hand_scoremap1.shape[0]):
159 |                 fig = plt.figure(1)
160 |                 ax1 = fig.add_subplot('211')
161 |                 ax2 = fig.add_subplot('212')
162 |                 ax1.imshow(batch_x[i])
163 |                 ax2.imshow(hand_scoremap1[i])
164 |                 plt.pause(3)
165 | 
166 |             if step % savestep == 0:
167 |                 saver.save(sess, "./mycnnmodel/handposetemp-model.ckpt", global_step=step)
168 |                 tf.train.write_graph(sess.graph_def, "./mycnnmodel/","nn_model.pbtxt", False)#as_text=True)
169 | 
170 |             # simple evaluation on the accuracy of pixel-prediction result
171 |             accuracy.append(cacul_accuracy(hand_scoremap1[0], batch_y[0]))
172 |             print 'accuracy = ', accuracy[-1]
173 |             print 'step,mean-accuracy = ', step, np.mean(accuracy)
174 | 
175 |         lossy[0].append(step)
176 |         lossy[1].append(train_loss)
177 |         print 'step= ',step, 'train_loss= ',train_loss 
178 |         plt.clf()
179 |         plt.plot(lossy[0], lossy[1], color='blue')
180 |         plt.xlabel('/Step', fontsize=15)
181 |         plt.ylabel('/LOSS', fontsize=15)
182 |         plt.title('FCN Training Loss Iteration', fontsize=18)
183 |         plt.ylim(0, 1.0)
184 |         plt.grid(True, linestyle="-.", color="black", linewidth="1")
185 |         plt.pause(0.01)
186 | 
187 | # FCN
188 | def depth_handpose_fcn(w_alpha=0.01, b_alpha=0.1):
189 |     x = tf.reshape(X, shape=[-1, IMAGE_HEIGHT/2, IMAGE_WIDTH/2, channel]) 
190 |     w_c1 = tf.Variable(w_alpha * tf.random_normal([3, 3, 1, 64]))
191 |     b_c1 = tf.Variable(b_alpha * tf.random_normal([64]))
192 |     conv1 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x, w_c1, strides=[1, 1, 1, 1], padding='SAME'), b_c1))
193 | 
194 |     w_c2 = tf.Variable(w_alpha * tf.random_normal([7, 7, 64, 128]))
195 |     b_c2 = tf.Variable(b_alpha * tf.random_normal([128]))
196 |     conv2 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv1, w_c2,strides=[1, 1, 1, 1], padding='SAME'), b_c2))
197 | 
198 |     w_c2 = tf.Variable(w_alpha * tf.random_normal([7, 7, 128, 256]))
199 |     b_c2 = tf.Variable(b_alpha * tf.random_normal([256]))
200 |     conv2_1 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv2, w_c2,strides=[1, 1, 1, 1], padding='SAME'), b_c2))
201 |     maxpool2 = tf.nn.max_pool(conv2_1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 
202 | 
203 |     w_c3 = tf.Variable(w_alpha * tf.random_normal([3, 3, 256, 128]))
204 |     b_c3 = tf.Variable(b_alpha * tf.random_normal([128]))
205 |     conv3 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(maxpool2, w_c3,
206 |                                                    strides=[1, 1, 1, 1], padding='SAME'), b_c3))
207 | 
208 |     w_c3_1 = tf.Variable(w_alpha * tf.random_normal([3, 3, 128, 128]))
209 |     b_c3_1 = tf.Variable(b_alpha * tf.random_normal([128]))
210 |     conv3_1 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv3, w_c3_1,
211 |                                                    strides=[1, 1, 1, 1], padding='SAME'), b_c3_1))
212 |     dropout3 = tf.nn.dropout(conv3_1, keep_prob)
213 | 
214 |     w_c3_2 = tf.Variable(w_alpha * tf.random_normal([3, 3, 128, 32]))
215 |     b_c3_2 = tf.Variable(b_alpha * tf.random_normal([32]))
216 |     conv3_2 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(dropout3, w_c3_2,
217 |                                                    strides=[1, 1, 1, 1], padding='SAME'), b_c3_2))
218 | 
219 |     w_c4 = tf.Variable(w_alpha * tf.random_normal([3, 3, 32, 16]))
220 |     b_c4 = tf.Variable(b_alpha * tf.random_normal([16]))
221 |     conv4 = tf.nn.leaky_relu(tf.nn.bias_add(tf.nn.conv2d(conv3_2, w_c4, strides=[1, 1, 1, 1], padding='SAME'), b_c4))
222 |     maxpool4 = tf.nn.max_pool(conv4, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 
223 | 
224 |     w_f = tf.Variable(w_alpha * tf.random_normal([40*40*16, 80*80*1]))
225 |     b_f = tf.Variable(b_alpha * tf.random_normal([80*80*1]))
226 |     dense = tf.reshape(maxpool4, [-1, w_f.get_shape().as_list()[0]])
227 |     conv_f = tf.nn.leaky_relu(tf.add(tf.matmul(dense, w_f), b_f))
228 |     hand_scoremap = net.fully_connected_relu(conv_f, 'hand_scoremap', 80*80*1)
229 |     return hand_scoremap
230 | 
231 | # classifier
232 | def gesture_classifier_cnn(w_alpha=0.01, b_alpha=0.1):
233 |     with CL_graph.as_default():
234 |         x = tf.reshape(XX, shape=[-1, CLASSIFIER_IMAGE_HEIGHT, CLASSIFIER_IMAGE_WIDTH, 1]) 
235 |         w_c1 = tf.Variable(w_alpha * tf.random_normal([5, 5, 1, 64]))
236 |         b_c1 = tf.Variable(b_alpha * tf.random_normal([64]))
237 |         conv1 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x, w_c1, strides=[1, 1, 1, 1], padding='SAME'), b_c1))
238 | 
239 |         maxpool1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
240 |         dropout1 = tf.nn.dropout(maxpool1, kkeep_prob)
241 | 
242 |         w_c2 = tf.Variable(w_alpha * tf.random_normal([3, 3, 64, 64]))
243 |         b_c2 = tf.Variable(b_alpha * tf.random_normal([64]))
244 |         conv2 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(dropout1, w_c2,
245 |                                                        strides=[1, 1, 1, 1], padding='SAME'), b_c2))
246 | 
247 |         ww_c3 = tf.Variable(w_alpha * tf.random_normal([3, 3, 64, 128]))
248 |         bb_c3 = tf.Variable(b_alpha * tf.random_normal([128]))
249 |         conv3 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv2, ww_c3,
250 |                                                        strides=[1, 1, 1, 1], padding='SAME'), bb_c3))
251 | 
252 |         w_f1 = tf.Variable(w_alpha * tf.random_normal([25 * 25 * 128, 1024]))
253 |         b_f1 = tf.Variable(b_alpha * tf.random_normal([1024]))
254 |         h_f1 = tf.reshape(conv3,[-1,25*25*128])
255 |         h_fc1 = tf.nn.relu(tf.matmul(h_f1,w_f1)+b_f1)
256 |         h_f_drop1 = tf.nn.dropout(h_fc1, kkeep_prob)
257 | 
258 |         # Fully connected layer
259 |         w_f2 = tf.Variable(w_alpha * tf.random_normal([1024, 170]))
260 |         b_f2 = tf.Variable(b_alpha * tf.random_normal([170]))
261 |         dense = tf.reshape(h_f_drop1, [-1, w_f2.get_shape().as_list()[0]])
262 | 
263 |         dense = tf.nn.relu(tf.add(tf.matmul(dense, w_f2), b_f2))
264 |         w_out = tf.Variable(w_alpha * tf.random_normal([170, HAND_NUM * GESTURE_CLASSES]))
265 |         b_out = tf.Variable(b_alpha * tf.random_normal([HAND_NUM * GESTURE_CLASSES]))
266 |         out = tf.add(tf.matmul(dense, w_out), b_out)
267 |         return out
268 | 
269 | with CL_graph.as_default():
270 |     XX = tf.placeholder(tf.float32, [None, CLASSIFIER_IMAGE_HEIGHT * CLASSIFIER_IMAGE_WIDTH], name='INPUT_IMAGE_HEIGHT_MULTI_WIDTH')
271 |     YY = tf.placeholder(tf.float32, [None,  CLASSIFIER_IMAGE_HEIGHT * CLASSIFIER_IMAGE_WIDTH], name='OUTPUT_ONE_HOTS')
272 |     kkeep_prob = tf.placeholder(tf.float32)
273 |     sess1 = tf.Session()
274 |     classifier = gesture_classifier_cnn()
275 |     saver1 = tf.train.Saver()
276 |     saver1.restore(sess1, saver_restore_addr_classifier)
277 | 
278 | def get_next_data(batch_size = 60):
279 |     depth_pred_batch = []
280 |     hand_mask_pred_batch = []
281 |     for i in range(0,batch_size):
282 |         sample_id = random.randint(0,len(depth_pred)-1)
283 |         depth_pred_batch.append(depth_pred[sample_id])
284 |         hand_mask_pred_batch.append(hand_mask_pred[sample_id])
285 |     return depth_pred_batch, hand_mask_pred_batch
286 | 
287 | 
288 | def predict_handscoremap():
289 |     global_step = tf.Variable(0, trainable=False)
290 |     add_global = global_step.assign_add(1)
291 |     return_global = global_step.assign(start_step)
292 |     start_lr = 1e-3 
293 |     learning_rate = tf.train.exponential_decay(learning_rate=start_lr, global_step=global_step, decay_steps=10000,
294 |                                                decay_rate=0.97)  # ,staircase=True)
295 |     # Start TF
296 |     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
297 |     sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
298 |     tf.train.start_queue_runners(sess=sess)
299 |     hand_scoremap = depth_handpose_fcn()
300 | 
301 |     # Loss
302 |     saver = tf.train.Saver()
303 |     saver.restore(sess, saver_restore_addr)
304 |     while True:
305 |         depth_pre,_ = ReadData.get_one_sample_form_RHD(depth=True,fatherdic=fatherdic)
306 | 
307 |         # Test
308 |         hand_scoremap1 = sess.run([hand_scoremap], feed_dict={X: depth_pre, keep_prob: 0.5})
309 | 
310 |         hand_scoremap1 = np.array(hand_scoremap1).reshape(1, 80, 80)
311 |         [depth_pre, hand_scoremap1] = [np.array(depth_pre).reshape(1, 160, 160),
312 |                                        np.array(hand_scoremap1).reshape(1, 80, 80)]
313 |         # upsample
314 |         hand_scoremap1 = cv2.resize(hand_scoremap1[0], (160,160))
315 |         hand_scoremap_cp, hand_scoremap1_show = PostTreatment.eliminate_bkground_from_handscoremap(hand_scoremap1,
316 |                                                                                                    depth_pre,
317 |                                                                                                    threshold=0.25,
318 |                                                                                                    block_half_size=3)
319 |         hand_depth_crop, box = PostTreatment.crop_mask(hand_scoremap_cp, uv_cood_noise = 5, dominate=True)
320 | 
321 |         hand_depth_crop = cv2.resize(hand_depth_crop,(CLASSIFIER_IMAGE_HEIGHT, CLASSIFIER_IMAGE_WIDTH))
322 |         crop = []
323 |         scale = 1000.
324 |         crop.append(PostTreatment.PreTreatment(hand_depth_crop*scale))
325 |         result = predict_classifier(np.array(crop))
326 | 
327 |         # Visualization
328 |         plt.close()
329 |         fig = plt.figure(dpi=100,figsize=(10,10))
330 |         ax1 = fig.add_subplot('221')
331 |         import matplotlib.patches as mpatches
332 |         rect = mpatches.Rectangle((box[0], box[1]), box[2] - box[0], box[3] - box[1], fill=False, edgecolor='red', linewidth=2)
333 |         ax1.add_patch(rect)
334 |         ax2 = fig.add_subplot('222')
335 |         ax3 = fig.add_subplot('223')
336 |         ax4 = fig.add_subplot('224')
337 |         ax1.imshow(depth_pre[0]+hand_scoremap_cp*10.)
338 |         ax2.imshow(hand_scoremap1_show)
339 |         ax3.imshow(hand_scoremap_cp)
340 |         ax4.imshow(crop[0])
341 | 
342 |         plt.show()
343 | 
344 | def predict_classifier(hand_score_crop):
345 |     hand_score_crop = hand_score_crop.reshape(1,2500)
346 |     with CL_graph.as_default():
347 |         predict = tf.reshape(classifier, [-1, HAND_NUM, GESTURE_CLASSES])
348 |         max_idx_p = tf.argmax(predict, axis=2)
349 |         gesture_classifier_result, score = sess1.run([max_idx_p, predict], feed_dict={XX: hand_score_crop, kkeep_prob: 1.})
350 |         print 'predict result：', gesture_classifier_result[0][0], 'score=', score[0,0,int(gesture_classifier_result[0][0])]
351 |     return gesture_classifier_result[0][0]
352 | 
353 | def cacul_accuracy(hand_scoremap, mask_raw):
354 |     # mask
355 |     max = np.max(hand_scoremap)
356 |     hand_scoremap /= max
357 |     for j in range(0,len(hand_scoremap)):
358 |         for k in range(0,len(hand_scoremap[0])):
359 |             if hand_scoremap[j][k] < 0.8:
360 |                 hand_scoremap[j][k] = 0
361 |             else:
362 |                 hand_scoremap[j][k] = 1
363 | 
364 |     # calculate
365 |     accuracy = 0.
366 |     handscore_pre = hand_scoremap.reshape(6400)
367 |     mask_raw = mask_raw.reshape(6400)
368 |     for i in range(0, handscore_pre.shape[0]):
369 |         if handscore_pre[i] == 0. and mask_raw[i] == 0.:
370 |             accuracy += 1.
371 |         if handscore_pre[i] != 0. and mask_raw[i] != 0.:
372 |             accuracy += 1.
373 |     return accuracy/(80*80)
374 | 
375 | 
376 | if __name__ == '__main__':
377 |     if mode == 'train':
378 |         train_handpose_depth_cnn(continueflag= True)
379 |     if mode == 'predict':
380 |         predict_handscoremap()
381 | 


--------------------------------------------------------------------------------
/Wonderseen_Handpose_cnn_depth/result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wonderseen/Handpose-WonderSeen-Net/6a50925ea80b4c5a0ec05cf6ef22796ce42f17c6/Wonderseen_Handpose_cnn_depth/result.png


--------------------------------------------------------------------------------
/Wonderseen_Handpose_cnn_depth/utils/general.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #  ColorHandPose3DNetwork - Network for estimating 3D Hand Pose from a single RGB Image
  3 | #  Copyright (C) 2017  Christian Zimmermann
  4 | #  This program is free software: you can redistribute it and/or modify
  5 | #  it under the terms of the GNU General Public License as published by
  6 | #  the Free Software Foundation, either version 2 of the License, or
  7 | #  (at your option) any later version.
  8 | #  
  9 | #  This program is distributed in the hope that it will be useful,
 10 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | #  GNU General Public License for more details.
 13 | #  
 14 | #  You should have received a copy of the GNU General Public License
 15 | #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | from __future__ import print_function, unicode_literals
 18 | import tensorflow as tf
 19 | from tensorflow.python import pywrap_tensorflow
 20 | import numpy as np
 21 | import math
 22 | 
 23 | class NetworkOps(object):
 24 |     neg_slope_of_relu = 0.01
 25 | 
 26 |     @classmethod
 27 |     def leaky_relu(cls, tensor, name='relu'):
 28 |         out_tensor = tf.maximum(tensor, cls.neg_slope_of_relu*tensor, name=name)
 29 |         return out_tensor
 30 | 
 31 |     @classmethod
 32 |     def conv(cls, in_tensor, layer_name, kernel_size, stride, out_chan, trainable=True):
 33 |         with tf.variable_scope(layer_name):
 34 |             in_size = in_tensor.get_shape().as_list()
 35 | 
 36 |             strides = [1, stride, stride, 1]
 37 |             kernel_shape = [kernel_size, kernel_size, in_size[3], out_chan] #
 38 | 
 39 |             # conv
 40 |             kernel = tf.get_variable('weights', kernel_shape, tf.float32,
 41 |                                      tf.contrib.layers.xavier_initializer_conv2d(), trainable=trainable, collections=['wd', 'variables', 'filters'])
 42 |             tmp_result = tf.nn.conv2d(in_tensor, kernel, strides, padding='SAME')
 43 | 
 44 |             # bias
 45 |             biases = tf.get_variable('biases', [kernel_shape[3]], tf.float32,
 46 |                                      tf.constant_initializer(0.0001), trainable=trainable, collections=['wd', 'variables', 'biases'])
 47 |             out_tensor = tf.nn.bias_add(tmp_result, biases, name='out')
 48 |             
 49 |             return out_tensor
 50 | 
 51 |     @classmethod
 52 |     def conv_relu(cls, in_tensor, layer_name, kernel_size, stride, out_chan, trainable=True):
 53 |         tensor = cls.conv(in_tensor, layer_name, kernel_size, stride, out_chan, trainable)
 54 |         out_tensor = cls.leaky_relu(tensor, name='out')
 55 |         return out_tensor
 56 | 
 57 |     @classmethod
 58 |     def max_pool(cls, bottom, name='pool'):
 59 |         pooled = tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
 60 |                                 padding='VALID', name=name)
 61 |         return pooled
 62 | 
 63 |     @classmethod
 64 |     def upconv(cls, in_tensor, layer_name, output_shape, kernel_size, stride, trainable=True):
 65 |         with tf.variable_scope(layer_name):
 66 |             in_size = in_tensor.get_shape().as_list()
 67 | 
 68 |             kernel_shape = [kernel_size, kernel_size, in_size[3], in_size[3]]
 69 |             strides = [1, stride, stride, 1]
 70 | 
 71 |             # conv
 72 |             kernel = cls.get_deconv_filter(kernel_shape, trainable)
 73 |             tmp_result = tf.nn.conv2d_transpose(value=in_tensor, filter=kernel, output_shape=output_shape,
 74 |                                                 strides=strides, padding='SAME')
 75 | 
 76 |             # bias
 77 |             biases = tf.get_variable('biases', [kernel_shape[2]], tf.float32,
 78 |                                      tf.constant_initializer(0.0), trainable=trainable, collections=['wd', 'variables', 'biases'])
 79 |             out_tensor = tf.nn.bias_add(tmp_result, biases)
 80 |             return out_tensor
 81 | 
 82 |     @classmethod
 83 |     def upconv_relu(cls, in_tensor, layer_name, output_shape, kernel_size, stride, trainable=True):
 84 |         tensor = cls.upconv(in_tensor, layer_name, output_shape, kernel_size, stride, trainable)
 85 |         out_tensor = cls.leaky_relu(tensor, name='out')
 86 |         return out_tensor
 87 | 
 88 |     @staticmethod
 89 |     def get_deconv_filter(f_shape, trainable):
 90 |         width = f_shape[0]
 91 |         height = f_shape[1]
 92 |         f = math.ceil(width/2.0)
 93 |         c = (2 * f - 1 - f % 2) / (2.0 * f)
 94 |         bilinear = np.zeros([f_shape[0], f_shape[1]])
 95 |         for x in range(width):
 96 |             for y in range(height):
 97 |                 value = (1 - abs(x / f - c)) * (1 - abs(y / f - c))
 98 |                 bilinear[x, y] = value
 99 |         weights = np.zeros(f_shape)
100 |         for i in range(f_shape[2]):
101 |             weights[:, :, i, i] = bilinear
102 | 
103 |         init = tf.constant_initializer(value=weights,
104 |                                        dtype=tf.float32)
105 |         return tf.get_variable(name="weights", initializer=init,
106 |                                shape=weights.shape, trainable=trainable, collections=['wd', 'variables', 'filters'])
107 | 
108 |     @staticmethod
109 |     def fully_connected(in_tensor, layer_name, out_chan, trainable=True):
110 |         with tf.variable_scope(layer_name):
111 |             in_size = in_tensor.get_shape().as_list()
112 |             assert len(in_size) == 2, 'Input to a fully connected layer must be a vector.'
113 |             weights_shape = [in_size[1], out_chan]
114 | 
115 |             # weight matrix
116 |             weights = tf.get_variable('weights', weights_shape, tf.float32,
117 |                                      tf.contrib.layers.xavier_initializer(), trainable=trainable)
118 |             weights = tf.check_numerics(weights, 'weights: %s' % layer_name)
119 | 
120 |             # bias
121 |             biases = tf.get_variable('biases', [out_chan], tf.float32,
122 |                                      tf.constant_initializer(0.0001), trainable=trainable)
123 |             biases = tf.check_numerics(biases, 'biases: %s' % layer_name)
124 | 
125 |             out_tensor = tf.matmul(in_tensor, weights) + biases
126 |             return out_tensor
127 | 
128 |     @classmethod
129 |     def fully_connected_relu(cls, in_tensor, layer_name, out_chan, trainable=True):
130 |         tensor = cls.fully_connected(in_tensor, layer_name, out_chan, trainable)
131 |         out_tensor = tf.maximum(tensor, cls.neg_slope_of_relu*tensor, name='out')
132 |         return out_tensor
133 | 
134 |     @staticmethod
135 |     def dropout(in_tensor, keep_prob, evaluation):
136 |         """ Dropout: Each neuron is dropped independently. """
137 |         with tf.variable_scope('dropout'):
138 |             tensor_shape = in_tensor.get_shape().as_list()
139 |             out_tensor = tf.cond(evaluation,
140 |                                  lambda: tf.nn.dropout(in_tensor, 1.0,
141 |                                                        noise_shape=tensor_shape),
142 |                                  lambda: tf.nn.dropout(in_tensor, keep_prob,
143 |                                                        noise_shape=tensor_shape))
144 |             return out_tensor
145 | 
146 |     @staticmethod
147 |     def spatial_dropout(in_tensor, keep_prob, evaluation):
148 |         """ Spatial dropout: Not each neuron is dropped independently, but feature map wise. """
149 |         with tf.variable_scope('spatial_dropout'):
150 |             tensor_shape = in_tensor.get_shape().as_list()
151 |             out_tensor = tf.cond(evaluation,
152 |                                  lambda: tf.nn.dropout(in_tensor, 1.0,
153 |                                                        noise_shape=tensor_shape),
154 |                                  lambda: tf.nn.dropout(in_tensor, keep_prob,
155 |                                                        noise_shape=[tensor_shape[0], 1, 1, tensor_shape[3]]))
156 |             return out_tensor
157 | 
158 | 
159 | def crop_image_from_xy(image, crop_location, crop_size, scale=1.0):
160 |     """
161 |     Crops an image. When factor is not given does an central crop.
162 | 
163 |     Inputs:
164 |         image: 4D tensor, [batch, height, width, channels] which will be cropped in height and width dimension
165 |         crop_location: tensor, [batch, 2] which represent the height and width location of the crop
166 |         crop_size: int, describes the extension of the crop
167 |     Outputs:
168 |         image_crop: 4D tensor, [batch, crop_size, crop_size, channels]
169 |     """
170 |     with tf.name_scope('crop_image_from_xy'):
171 |         s = image.get_shape().as_list()
172 |         assert len(s) == 4, "Image needs to be of shape [batch, width, height, channel]"
173 |         scale = tf.reshape(scale, [-1])
174 |         crop_location = tf.cast(crop_location, tf.float32)
175 |         crop_location = tf.reshape(crop_location, [s[0], 2])
176 |         crop_size = tf.cast(crop_size, tf.float32)
177 | 
178 |         crop_size_scaled = crop_size / scale
179 |         y1 = crop_location[:, 0] - crop_size_scaled//2
180 |         y2 = y1 + crop_size_scaled
181 |         x1 = crop_location[:, 1] - crop_size_scaled//2
182 |         x2 = x1 + crop_size_scaled
183 |         y1 /= s[1]
184 |         y2 /= s[1]
185 |         x1 /= s[2]
186 |         x2 /= s[2]
187 |         boxes = tf.stack([y1, x1, y2, x2], -1)
188 | 
189 |         crop_size = tf.cast(tf.stack([crop_size, crop_size]), tf.int32)
190 |         box_ind = tf.range(s[0])
191 |         image_c = tf.image.crop_and_resize(tf.cast(image, tf.float32), boxes, box_ind, crop_size, name='crop')
192 |         return image_c
193 | 
194 | 
195 | def find_max_location(scoremap):
196 |     """ Returns the coordinates of the given scoremap with maximum value. """
197 |     with tf.variable_scope('find_max_location'):
198 |         s = scoremap.get_shape().as_list()
199 |         if len(s) == 4:
200 |             scoremap = tf.squeeze(scoremap, [3])
201 |         if len(s) == 2:
202 |             scoremap = tf.expand_dims(scoremap, 0)
203 | 
204 |         s = scoremap.get_shape().as_list()
205 |         assert len(s) == 3, "Scoremap must be 3D."
206 |         assert (s[0] < s[1]) and (s[0] < s[2]), "Scoremap must be [Batch, Width, Height]"
207 | 
208 |         # my meshgrid
209 |         x_range = tf.expand_dims(tf.range(s[1]), 1)
210 |         y_range = tf.expand_dims(tf.range(s[2]), 0)
211 |         X = tf.tile(x_range, [1, s[2]])
212 |         Y = tf.tile(y_range, [s[1], 1])
213 | 
214 |         x_vec = tf.reshape(X, [-1])
215 |         y_vec = tf.reshape(Y, [-1])
216 |         scoremap_vec = tf.reshape(scoremap, [s[0], -1])
217 |         max_ind_vec = tf.cast(tf.argmax(scoremap_vec, dimension=1), tf.int32)
218 | 
219 |         xy_loc = list()
220 |         for i in range(s[0]):
221 |             x_loc = tf.reshape(x_vec[max_ind_vec[i]], [1])
222 |             y_loc = tf.reshape(y_vec[max_ind_vec[i]], [1])
223 |             xy_loc.append(tf.concat([x_loc, y_loc], 0))
224 | 
225 |         xy_loc = tf.stack(xy_loc, 0)
226 |         return xy_loc
227 | 
228 | 
229 | def single_obj_scoremap(scoremap):
230 |     """ Applies my algorithm to figure out the most likely object from a given segmentation scoremap. """
231 |     with tf.variable_scope('single_obj_scoremap'):
232 |         filter_size = 21
233 |         s = scoremap.get_shape().as_list()
234 |         assert len(s) == 4, "Scoremap must be 4D."
235 | 
236 |         scoremap_softmax = tf.nn.softmax(scoremap)  #B, H, W, C --> normalizes across last dimension
237 |         scoremap_fg = tf.reduce_max(scoremap_softmax[:, :, :, 1:], 3) # B, H, W
238 |         detmap_fg = tf.round(scoremap_fg) # B, H, W
239 | 
240 |         # find maximum in the fg scoremap
241 |         max_loc = find_max_location(scoremap_fg)
242 | 
243 |         # use maximum to start "growing" our objectmap
244 |         objectmap_list = list()
245 |         kernel_dil = tf.ones((filter_size, filter_size, 1)) / float(filter_size*filter_size)
246 |         for i in range(s[0]):
247 |             # create initial objectmap (put a one at the maximum)
248 |             sparse_ind = tf.reshape(max_loc[i, :], [1, 2])  # reshape that its one point with 2dim)
249 |             objectmap = tf.sparse_to_dense(sparse_ind, [s[1], s[2]], 1.0)
250 | 
251 |             # grow the map by dilation and pixelwise and
252 |             num_passes = max(s[1], s[2]) // (filter_size//2) # number of passes needes to make sure the map can spread over the whole image
253 |             for j in range(num_passes):
254 |                 objectmap = tf.reshape(objectmap, [1, s[1], s[2], 1])
255 |                 objectmap_dil = tf.nn.dilation2d(objectmap, kernel_dil, [1, 1, 1, 1], [1, 1, 1, 1], 'SAME')
256 |                 objectmap_dil = tf.reshape(objectmap_dil, [s[1], s[2]])
257 |                 objectmap = tf.round(tf.multiply(detmap_fg[i, :, :], objectmap_dil))
258 | 
259 |             objectmap = tf.reshape(objectmap, [s[1], s[2], 1])
260 |             objectmap_list.append(objectmap)
261 | 
262 |         objectmap = tf.stack(objectmap_list)
263 | 
264 |         return objectmap
265 | 
266 | 
267 | def calc_center_bb(binary_class_mask):
268 |     """ Returns the center of mass coordinates for the given binary_class_mask. """
269 |     with tf.variable_scope('calc_center_bb'):
270 |         binary_class_mask = tf.cast(binary_class_mask, tf.int32)
271 |         binary_class_mask = tf.equal(binary_class_mask, 1)
272 |         s = binary_class_mask.get_shape().as_list()
273 |         if len(s) == 4:
274 |             binary_class_mask = tf.squeeze(binary_class_mask, [3])
275 | 
276 |         s = binary_class_mask.get_shape().as_list()
277 |         assert len(s) == 3, "binary_class_mask must be 3D."
278 |         assert (s[0] < s[1]) and (s[0] < s[2]), "binary_class_mask must be [Batch, Width, Height]"
279 | 
280 |         # my meshgrid
281 |         x_range = tf.expand_dims(tf.range(s[1]), 1)
282 |         y_range = tf.expand_dims(tf.range(s[2]), 0)
283 |         X = tf.tile(x_range, [1, s[2]])
284 |         Y = tf.tile(y_range, [s[1], 1])
285 | 
286 |         bb_list = list()
287 |         center_list = list()
288 |         crop_size_list = list()
289 |         for i in range(s[0]):
290 |             X_masked = tf.cast(tf.boolean_mask(X, binary_class_mask[i, :, :]), tf.float32)
291 |             Y_masked = tf.cast(tf.boolean_mask(Y, binary_class_mask[i, :, :]), tf.float32)
292 | 
293 |             x_min = tf.reduce_min(X_masked)
294 |             x_max = tf.reduce_max(X_masked)
295 |             y_min = tf.reduce_min(Y_masked)
296 |             y_max = tf.reduce_max(Y_masked)
297 | 
298 |             start = tf.stack([x_min, y_min])
299 |             end = tf.stack([x_max, y_max])
300 |             bb = tf.stack([start, end], 1)
301 |             bb_list.append(bb)
302 | 
303 |             center_x = 0.5*(x_max + x_min)
304 |             center_y = 0.5*(y_max + y_min)
305 |             center = tf.stack([center_x, center_y], 0)
306 | 
307 |             center = tf.cond(tf.reduce_all(tf.is_finite(center)), lambda: center,
308 |                                   lambda: tf.constant([160.0, 160.0]))
309 |             center.set_shape([2])
310 |             center_list.append(center)
311 | 
312 |             crop_size_x = x_max - x_min
313 |             crop_size_y = y_max - y_min
314 |             crop_size = tf.expand_dims(tf.maximum(crop_size_x, crop_size_y), 0)
315 |             crop_size = tf.cond(tf.reduce_all(tf.is_finite(crop_size)), lambda: crop_size,
316 |                                   lambda: tf.constant([100.0]))
317 |             crop_size.set_shape([1])
318 |             crop_size_list.append(crop_size)
319 | 
320 |         bb = tf.stack(bb_list)
321 |         center = tf.stack(center_list)
322 |         crop_size = tf.stack(crop_size_list)
323 | 
324 |         return center, bb, crop_size
325 | 
326 | 
327 | def detect_keypoints(scoremaps):
328 |     """ Performs detection per scoremap for the hands keypoints. """
329 |     if len(scoremaps.shape) == 4:
330 |         scoremaps = np.squeeze(scoremaps)
331 |     s = scoremaps.shape
332 |     assert len(s) == 3, "This function was only designed for 3D Scoremaps."
333 |     assert (s[2] < s[1]) and (s[2] < s[0]), "Probably the input is not correct, because [H, W, C] is expected."
334 | 
335 |     keypoint_coords = np.zeros((s[2], 2))
336 |     for i in range(s[2]):
337 |         v, u = np.unravel_index(np.argmax(scoremaps[:, :, i]), (s[0], s[1]))
338 |         keypoint_coords[i, 0] = v
339 |         keypoint_coords[i, 1] = u
340 |     return keypoint_coords
341 | 
342 | 
343 | def trafo_coords(keypoints_crop_coords, centers, scale, crop_size):
344 |     """ Transforms coords into global image coordinates. """
345 |     keypoints_coords = np.copy(keypoints_crop_coords)
346 | 
347 |     keypoints_coords -= crop_size // 2
348 | 
349 |     keypoints_coords /= scale
350 | 
351 |     keypoints_coords += centers
352 | 
353 |     return keypoints_coords
354 | 
355 | 
356 | def plot_hand(coords_hw, axis, color_fixed=None, linewidth='1'):
357 |     """ Plots a hand stick figure into a matplotlib figure. """
358 |     colors = np.array([[0., 0., 0.5],
359 |                        [0., 0., 0.73172906],
360 |                        [0., 0., 0.96345811],
361 |                        [0., 0.12745098, 1.],
362 |                        [0., 0.33137255, 1.],
363 |                        [0., 0.55098039, 1.],
364 |                        [0., 0.75490196, 1.],
365 |                        [0.06008855, 0.9745098, 0.90765338],
366 |                        [0.22454143, 1., 0.74320051],
367 |                        [0.40164453, 1., 0.56609741],
368 |                        [0.56609741, 1., 0.40164453],
369 |                        [0.74320051, 1., 0.22454143],
370 |                        [0.90765338, 1., 0.06008855],
371 |                        [1., 0.82861293, 0.],
372 |                        [1., 0.63979666, 0.],
373 |                        [1., 0.43645606, 0.],
374 |                        [1., 0.2476398, 0.],
375 |                        [0.96345811, 0.0442992, 0.],
376 |                        [0.73172906, 0., 0.],
377 |                        [0.5, 0., 0.]])
378 | 
379 |     # define connections and colors of the bones
380 |     bones = [((0, 4), colors[0, :]),
381 |              ((4, 3), colors[1, :]),
382 |              ((3, 2), colors[2, :]),
383 |              ((2, 1), colors[3, :]),
384 | 
385 |              ((0, 8), colors[4, :]),
386 |              ((8, 7), colors[5, :]),
387 |              ((7, 6), colors[6, :]),
388 |              ((6, 5), colors[7, :]),
389 | 
390 |              ((0, 12), colors[8, :]),
391 |              ((12, 11), colors[9, :]),
392 |              ((11, 10), colors[10, :]),
393 |              ((10, 9), colors[11, :]),
394 | 
395 |              ((0, 16), colors[12, :]),
396 |              ((16, 15), colors[13, :]),
397 |              ((15, 14), colors[14, :]),
398 |              ((14, 13), colors[15, :]),
399 | 
400 |              ((0, 20), colors[16, :]),
401 |              ((20, 19), colors[17, :]),
402 |              ((19, 18), colors[18, :]),
403 |              ((18, 17), colors[19, :])]
404 | 
405 |     for connection, color in bones:
406 |         coord1 = coords_hw[connection[0], :]
407 |         coord2 = coords_hw[connection[1], :]
408 |         coords = np.stack([coord1, coord2])
409 |         if color_fixed is None:
410 |             axis.plot(coords[:, 1], coords[:, 0], color=color, linewidth=linewidth)
411 |         else:
412 |             axis.plot(coords[:, 1], coords[:, 0], color_fixed, linewidth=linewidth)
413 | 
414 | 
415 | def plot_hand_3d(coords_xyz, axis, color_fixed=None, linewidth='1'):
416 |     """ Plots a hand stick figure into a matplotlib figure. """
417 |     colors = np.array([[0., 0., 0.5],
418 |                        [0., 0., 0.73172906],
419 |                        [0., 0., 0.96345811],
420 |                        [0., 0.12745098, 1.],
421 |                        [0., 0.33137255, 1.],
422 |                        [0., 0.55098039, 1.],
423 |                        [0., 0.75490196, 1.],
424 |                        [0.06008855, 0.9745098, 0.90765338],
425 |                        [0.22454143, 1., 0.74320051],
426 |                        [0.40164453, 1., 0.56609741],
427 |                        [0.56609741, 1., 0.40164453],
428 |                        [0.74320051, 1., 0.22454143],
429 |                        [0.90765338, 1., 0.06008855],
430 |                        [1., 0.82861293, 0.],
431 |                        [1., 0.63979666, 0.],
432 |                        [1., 0.43645606, 0.],
433 |                        [1., 0.2476398, 0.],
434 |                        [0.96345811, 0.0442992, 0.],
435 |                        [0.73172906, 0., 0.],
436 |                        [0.5, 0., 0.]])
437 | 
438 |     # define connections and colors of the bones
439 |     bones = [((0, 4), colors[0, :]),
440 |              ((4, 3), colors[1, :]),
441 |              ((3, 2), colors[2, :]),
442 |              ((2, 1), colors[3, :]),
443 | 
444 |              ((0, 8), colors[4, :]),
445 |              ((8, 7), colors[5, :]),
446 |              ((7, 6), colors[6, :]),
447 |              ((6, 5), colors[7, :]),
448 | 
449 |              ((0, 12), colors[8, :]),
450 |              ((12, 11), colors[9, :]),
451 |              ((11, 10), colors[10, :]),
452 |              ((10, 9), colors[11, :]),
453 | 
454 |              ((0, 16), colors[12, :]),
455 |              ((16, 15), colors[13, :]),
456 |              ((15, 14), colors[14, :]),
457 |              ((14, 13), colors[15, :]),
458 | 
459 |              ((0, 20), colors[16, :]),
460 |              ((20, 19), colors[17, :]),
461 |              ((19, 18), colors[18, :]),
462 |              ((18, 17), colors[19, :])]
463 | 
464 |     for connection, color in bones:
465 |         coord1 = coords_xyz[connection[0], :]
466 |         coord2 = coords_xyz[connection[1], :]
467 |         coords = np.stack([coord1, coord2])
468 |         if color_fixed is None:
469 |             axis.plot(coords[:, 0], coords[:, 1], coords[:, 2], color=color, linewidth=linewidth)
470 |         else:
471 |             axis.plot(coords[:, 0], coords[:, 1], coords[:, 2], color_fixed, linewidth=linewidth)
472 | 
473 |     axis.view_init(azim=-90., elev=90.)
474 | 
475 | 
476 | class LearningRateScheduler:
477 |     """
478 |         Provides scalar tensors at certain iteration as is needed for a multistep learning rate schedule.
479 |         根据用户定制在不同的step，对学习率进行调整
480 |     """
481 |     def __init__(self, steps, values):
482 |         self.steps = steps
483 |         self.values = values
484 | 
485 |         assert len(steps)+1 == len(values), "There must be one more element in value as step."
486 | 
487 |     def get_lr(self, global_step):
488 |         with tf.name_scope('lr_scheduler'):
489 | 
490 |             if len(self.values) == 1: #1 value -> no step
491 |                 learning_rate = tf.constant(self.values[0])
492 |             elif len(self.values) == 2: #2 values -> one step
493 |                 cond = tf.greater(global_step, self.steps[0])
494 |                 learning_rate = tf.where(cond, self.values[1], self.values[0])
495 |             else: # n values -> n-1 steps
496 |                 cond_first = tf.less(global_step, self.steps[0])
497 | 
498 |                 cond_between = list()
499 |                 for ind, step in enumerate(range(0, len(self.steps)-1)):
500 |                     cond_between.append(tf.logical_and(tf.less(global_step, self.steps[ind+1]),
501 |                                                        tf.greater_equal(global_step, self.steps[ind])))
502 | 
503 |                 cond_last = tf.greater_equal(global_step, self.steps[-1])
504 | 
505 |                 cond_full = [cond_first]
506 |                 cond_full.extend(cond_between)
507 |                 cond_full.append(cond_last)
508 | 
509 |                 cond_vec = tf.stack(cond_full)
510 |                 lr_vec = tf.stack(self.values)
511 | 
512 |                 learning_rate = tf.where(cond_vec, lr_vec, tf.zeros_like(lr_vec))
513 | 
514 |                 learning_rate = tf.reduce_sum(learning_rate)
515 | 
516 |             return learning_rate
517 | 
518 | 
519 | class EvalUtil:
520 |     """ Util class for evaluation networks.
521 |     """
522 |     def __init__(self, num_kp=21):
523 |         # init empty data storage
524 |         self.data = list()
525 |         self.num_kp = num_kp
526 |         for _ in range(num_kp):
527 |             self.data.append(list())
528 | 
529 |     def feed(self, keypoint_gt, keypoint_vis, keypoint_pred):
530 |         """ Used to feed data to the class. Stores the euclidean distance between gt and pred, when it is visible. """
531 |         keypoint_gt = np.squeeze(keypoint_gt)
532 |         keypoint_pred = np.squeeze(keypoint_pred)
533 |         keypoint_vis = np.squeeze(keypoint_vis).astype('bool')
534 | 
535 |         assert len(keypoint_gt.shape) == 2
536 |         assert len(keypoint_pred.shape) == 2
537 |         assert len(keypoint_vis.shape) == 1
538 | 
539 |         # calc euclidean distance
540 |         diff = keypoint_gt - keypoint_pred
541 |         euclidean_dist = np.sqrt(np.sum(np.square(diff), axis=1))
542 | 
543 |         num_kp = keypoint_gt.shape[0]
544 |         for i in range(num_kp):
545 |             if keypoint_vis[i]:
546 |                 self.data[i].append(euclidean_dist[i])
547 | 
548 |     def _get_pck(self, kp_id, threshold):
549 |         """ Returns pck for one keypoint for the given threshold. """
550 |         if len(self.data[kp_id]) == 0:
551 |             return None
552 | 
553 |         data = np.array(self.data[kp_id])
554 |         pck = np.mean((data <= threshold).astype('float'))
555 |         return pck
556 | 
557 |     def _get_epe(self, kp_id):
558 |         """ Returns end point error for one keypoint. """
559 |         if len(self.data[kp_id]) == 0:
560 |             return None, None
561 | 
562 |         data = np.array(self.data[kp_id])
563 |         epe_mean = np.mean(data)
564 |         epe_median = np.median(data)
565 |         return epe_mean, epe_median
566 | 
567 |     def get_measures(self, val_min, val_max, steps):
568 |         """ Outputs the average mean and median error as well as the pck score. """
569 |         thresholds = np.linspace(val_min, val_max, steps)
570 |         thresholds = np.array(thresholds)
571 |         norm_factor = np.trapz(np.ones_like(thresholds), thresholds)
572 | 
573 |         # init mean measures
574 |         epe_mean_all = list()
575 |         epe_median_all = list()
576 |         auc_all = list()
577 |         pck_curve_all = list()
578 | 
579 |         # Create one plot for each part
580 |         for part_id in range(self.num_kp):
581 |             # mean/median error
582 |             mean, median = self._get_epe(part_id)
583 | 
584 |             if mean is None:
585 |                 # there was no valid measurement for this keypoint
586 |                 continue
587 | 
588 |             epe_mean_all.append(mean)
589 |             epe_median_all.append(median)
590 | 
591 |             # pck/auc
592 |             pck_curve = list()
593 |             for t in thresholds:
594 |                 pck = self._get_pck(part_id, t)
595 |                 pck_curve.append(pck)
596 | 
597 |             pck_curve = np.array(pck_curve)
598 |             pck_curve_all.append(pck_curve)
599 |             auc = np.trapz(pck_curve, thresholds)
600 |             auc /= norm_factor
601 |             auc_all.append(auc)
602 | 
603 |         epe_mean_all = np.mean(np.array(epe_mean_all))
604 |         epe_median_all = np.mean(np.array(epe_median_all))
605 |         auc_all = np.mean(np.array(auc_all))
606 |         pck_curve_all = np.mean(np.array(pck_curve_all), 0)  # mean only over keypoints
607 | 
608 |         return epe_mean_all, epe_median_all, auc_all, pck_curve_all, thresholds
609 | 
610 | 
611 | def load_weights_from_snapshot(session, checkpoint_path, discard_list=None, rename_dict=None):
612 |         """ Loads weights from a snapshot except the ones indicated with discard_list. Others are possibly renamed. """
613 |         reader = pywrap_tensorflow.NewCheckpointReader(checkpoint_path)
614 |         var_to_shape_map = reader.get_variable_to_shape_map()
615 | 
616 |         # Remove everything from the discard list
617 |         if discard_list is not None:
618 |             num_disc = 0
619 |             var_to_shape_map_new = dict()
620 |             for k, v in var_to_shape_map.items():
621 |                 good = True
622 |                 for dis_str in discard_list:
623 |                     if dis_str in k:
624 |                         good = False
625 | 
626 |                 if good:
627 |                     var_to_shape_map_new[k] = v
628 |                 else:
629 |                     num_disc += 1
630 |             var_to_shape_map = dict(var_to_shape_map_new)
631 |             print('Discarded %d items' % num_disc)
632 | 
633 |         # rename everything according to rename_dict
634 |         num_rename = 0
635 |         var_to_shape_map_new = dict()
636 |         for name in var_to_shape_map.keys():
637 |             new_name = name
638 |             if rename_dict is not None:
639 |                 for rename_str in rename_dict.keys():
640 |                     if rename_str in name:
641 |                         new_name = new_name.replace(rename_str, rename_dict[rename_str])
642 |                         num_rename += 1
643 |             var_to_shape_map_new[new_name] = reader.get_tensor(name)
644 |         var_to_shape_map = dict(var_to_shape_map_new)
645 | 
646 |         init_op, init_feed = tf.contrib.framework.assign_from_values(var_to_shape_map)
647 |         session.run(init_op, init_feed)
648 |         print('Initialized %d variables from %s.' % (len(var_to_shape_map), checkpoint_path))
649 | 
650 | 
651 | def calc_auc(x, y):
652 |     """ Given x and y values it calculates the approx. integral and normalizes it: area under curve"""
653 |     integral = np.trapz(y, x)
654 |     norm = np.trapz(np.ones_like(y), x)
655 | 
656 |     return integral / norm
657 | 
658 | 
659 | def get_stb_ref_curves():
660 |     """
661 |         Returns results of various baseline methods on the Stereo Tracking Benchmark Dataset reported by:
662 |         Zhang et al., ‘3d Hand Pose Tracking and Estimation Using Stereo Matching’, 2016
663 |     """
664 |     curve_list = list()
665 |     thresh_mm = np.array([20.0, 25, 30, 35, 40, 45, 50])
666 |     pso_b1 = np.array([0.32236842,  0.53947368,  0.67434211,  0.75657895,  0.80921053, 0.86513158,  0.89473684])
667 |     curve_list.append((thresh_mm, pso_b1, 'PSO (AUC=%.3f)' % calc_auc(thresh_mm, pso_b1)))
668 |     icppso_b1 = np.array([ 0.51973684,  0.64473684,  0.71710526,  0.77302632,  0.80921053, 0.84868421,  0.86842105])
669 |     curve_list.append((thresh_mm, icppso_b1, 'ICPPSO (AUC=%.3f)' % calc_auc(thresh_mm, icppso_b1)))
670 |     chpr_b1 = np.array([ 0.56578947,  0.71710526,  0.82236842,  0.88157895,  0.91447368, 0.9375,  0.96052632])
671 |     curve_list.append((thresh_mm, chpr_b1, 'CHPR (AUC=%.3f)' % calc_auc(thresh_mm, chpr_b1)))
672 |     return curve_list
673 | 


--------------------------------------------------------------------------------
/result/test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wonderseen/Handpose-WonderSeen-Net/6a50925ea80b4c5a0ec05cf6ef22796ce42f17c6/result/test.png


--------------------------------------------------------------------------------
/result/test1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wonderseen/Handpose-WonderSeen-Net/6a50925ea80b4c5a0ec05cf6ef22796ce42f17c6/result/test1.png


--------------------------------------------------------------------------------
/result/test2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wonderseen/Handpose-WonderSeen-Net/6a50925ea80b4c5a0ec05cf6ef22796ce42f17c6/result/test2.png


--------------------------------------------------------------------------------
/result/test3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wonderseen/Handpose-WonderSeen-Net/6a50925ea80b4c5a0ec05cf6ef22796ce42f17c6/result/test3.png


--------------------------------------------------------------------------------
/result/test4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wonderseen/Handpose-WonderSeen-Net/6a50925ea80b4c5a0ec05cf6ef22796ce42f17c6/result/test4.png


--------------------------------------------------------------------------------
/result/test5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wonderseen/Handpose-WonderSeen-Net/6a50925ea80b4c5a0ec05cf6ef22796ce42f17c6/result/test5.png


--------------------------------------------------------------------------------
/result/test6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wonderseen/Handpose-WonderSeen-Net/6a50925ea80b4c5a0ec05cf6ef22796ce42f17c6/result/test6.png


--------------------------------------------------------------------------------