├── README.md
├── classify
    └── classmain.py
├── config.py
├── models
    ├── __init__.py
    ├── __pycache__
    │   └── __init__.cpython-35.pyc
    └── nets
    │   ├── CPM.py
    │   ├── __init__.py
    │   ├── __pycache__
    │       ├── CPM.cpython-35.pyc
    │       ├── __init__.cpython-35.pyc
    │       └── cpm_hand.cpython-35.pyc
    │   ├── cpm_body.py
    │   ├── cpm_body_slim.py
    │   ├── cpm_hand.py
    │   ├── cpm_hand_slim.py
    │   └── cpm_hand_v2.py
├── run_demo_hand_with_tracker.py
├── useClassifyModel.py
└── utils
    ├── __init__.py
    ├── __pycache__
        ├── __init__.cpython-35.pyc
        ├── cpm_utils.cpython-35.pyc
        ├── tracking_module.cpython-35.pyc
        └── utils.cpython-35.pyc
    ├── cpm_utils.py
    ├── create_cpm_tfr_fulljoints.py
    ├── tf_utils.py
    ├── tracking_module.py
    └── utils.py


/README.md:
--------------------------------------------------------------------------------
 1 | # HandGestureClassify
 2 | 基于tensorflow的手势识别和分类
 3 | 
 4 | 博文地址：https://blog.csdn.net/yyyerica/article/details/80151473
 5 | 
 6 | 原手部检测代码源自： https://github.com/timctho/convolutional-pose-machines-tensorflow 
 7 | 
 8 | 分类代码参考： https://blog.csdn.net/Enchanted_ZhouH/article/details/74116823
 9 | 
10 | 
11 | 使用说明： 
12 | 
13 | 1.手势识别 
14 | 
15 | 运行 run_demo_hand_with_tracker.py 进行实时手势识别 
16 | 
17 | 修改 config.py 中的 DEMO_TYPE 可更改输出的图像类型 
18 | 
19 | 将 run_demo_hand_with_tracker.py 中的 cv2.imwrite('./storePic/11'+str(i)+'.jpg', local_img.astype(np.uint8),[int(cv2.IMWRITE_JPEG_QUALITY), 90]) 语句解除注释可以保存图片到项目目录下，可以自行修改存储目录
20 | 
21 | 
22 | 2.手势图像分类 
23 | 
24 | classmain.py 代码用于训练分类 
25 | 
26 | 用于训练的手势数据集存于 classify -- handGesturePic 中，需要自行运行run_demo_hand_with_tracker.py保存图片作为训练集
27 | 
28 | 训练好的模型存在 classify--modelSave 中 
29 | 
30 | 调用 useClassifyModel.py 进行分类结果验证
31 | 
32 | 其中，手部检测的模型请到https://github.com/timctho/convolutional-pose-machines-tensorflow 下载（tf版本）
33 | 
34 | 手势图像分类的模型参数请自行训练classmain.py保存
35 | 


--------------------------------------------------------------------------------
/classify/classmain.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from skimage import io, transform
  3 | import glob
  4 | import os
  5 | import tensorflow as tf
  6 | import numpy as np
  7 | import time
  8 | 
  9 | path = './handGesturePic/'
 10 | # 模型保存地址
 11 | model_path='./modelSave/model.ckpt'
 12 | 
 13 | # 将所有的图片resize成100*100
 14 | w = 100
 15 | h = 100
 16 | c = 3
 17 | 
 18 | 
 19 | # 读取图片
 20 | def read_img(path):
 21 |     cate = [path + '/' + x for x in os.listdir(path) if os.path.isdir(path + '/' + x)]
 22 |     imgs = []
 23 |     labels = []
 24 |     for idx, folder in enumerate(cate):
 25 |         print('reading the images:%s' % (folder))
 26 |         for im in glob.glob(folder + '/*.jpg'):
 27 |             img = io.imread(im)
 28 |             img = transform.resize(img, (w, h))
 29 |             imgs.append(img)
 30 |             labels.append(idx)
 31 |     return np.asarray(imgs, np.float32), np.asarray(labels, np.int32)
 32 | 
 33 | 
 34 | data, label = read_img(path)  # data 4038*(100,100,3)  label 4038个0~5
 35 | 
 36 | # 打乱顺序
 37 | num_example = data.shape[0]  # 4038
 38 | arr = np.arange(num_example)  # [ 0 1 2 ... 4037]
 39 | np.random.shuffle(arr)  # 将arr乱序
 40 | data = data[arr]
 41 | label = label[arr]
 42 | 
 43 | # 将所有数据分为训练集和验证集
 44 | ratio = 0.8
 45 | s = np.int(num_example * ratio)
 46 | x_train = data[:s]
 47 | y_train = label[:s]
 48 | x_val = data[s:]  # 验证集
 49 | y_val = label[s:]
 50 | 
 51 | # -----------------构建网络----------------------
 52 | # 占位符
 53 | x = tf.placeholder(tf.float32, shape=[None, w, h, c], name='x')
 54 | y_ = tf.placeholder(tf.int32, shape=[None, ], name='y_')
 55 | 
 56 | 
 57 | # 100×100×3->100×100×32->50×50×32->50×50×64->25×25×64->25×25×128->12×12×128->12×12×128->6×6×128
 58 | def inference(input_tensor, train, regularizer):  # regularizer = tf.contrib.layers.l2_regularizer(0.0001)
 59 | 
 60 |     '''
 61 |     tf.nn.conv2d(input, filter, strides（步长，一般为1 ：[1, 1, 1, 1]）, padding, use_cudnn_on_gpu=None, data_format=None, name=None)
 62 |     input的张量[batch, in_height, in_width, in_channels]
 63 |     过滤器 / 内核张量 [filter_height, filter_width(filter大小）, in_channels（输入通道）, out_channels（输出通道）]
 64 | 
 65 |     执行以下操作：
 66 |     展平filter为一个形状为[filter_height * filter_width * in_channels, output_channels]的二维矩阵。
 67 |     从input中按照filter大小提取图片子集形成一个大小为[batch, out_height, out_width, filter_height * filter_width * in_channels]的虚拟张量。
 68 |     循环每个图片子集，右乘filter矩阵。
 69 |     '''
 70 | 
 71 |     with tf.variable_scope('layer1-conv1'):
 72 |         conv1_weights = tf.get_variable("weight", [5, 5, 3, 32], initializer=tf.truncated_normal_initializer(stddev=0.1))
 73 |         conv1_biases = tf.get_variable("bias", [32], initializer=tf.constant_initializer(0.0))
 74 |         conv1 = tf.nn.conv2d(input_tensor, conv1_weights, strides=[1, 1, 1, 1], padding='SAME')
 75 |         # 当padding=SAME时，输入与输出形状相同
 76 |         relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_biases))
 77 | 
 78 |     with tf.name_scope("layer2-pool1"):
 79 |         pool1 = tf.nn.max_pool(relu1, ksize=[1,2,2,1],strides=[1,2,2,1],padding="VALID")
 80 | 
 81 |     with tf.variable_scope("layer3-conv2"):
 82 |         conv2_weights = tf.get_variable("weight", [5, 5, 32, 64], initializer=tf.truncated_normal_initializer(stddev=0.1))
 83 |         conv2_biases = tf.get_variable("bias", [64], initializer=tf.constant_initializer(0.0))
 84 |         conv2 = tf.nn.conv2d(pool1, conv2_weights, strides=[1, 1, 1, 1], padding='SAME')
 85 |         relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases))
 86 | 
 87 |     with tf.name_scope("layer4-pool2"):
 88 |         pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
 89 | 
 90 |     with tf.variable_scope("layer5-conv3"):
 91 |         conv3_weights = tf.get_variable("weight", [3, 3, 64, 128], initializer=tf.truncated_normal_initializer(stddev=0.1))
 92 |         conv3_biases = tf.get_variable("bias", [128], initializer=tf.constant_initializer(0.0))
 93 |         conv3 = tf.nn.conv2d(pool2, conv3_weights, strides=[1, 1, 1, 1], padding='SAME')
 94 |         relu3 = tf.nn.relu(tf.nn.bias_add(conv3, conv3_biases))
 95 | 
 96 |     with tf.name_scope("layer6-pool3"):
 97 |         pool3 = tf.nn.max_pool(relu3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
 98 | 
 99 |     with tf.variable_scope("layer7-conv4"):
100 |         conv4_weights = tf.get_variable("weight",[3,3,128,128],initializer=tf.truncated_normal_initializer(stddev=0.1))
101 |         conv4_biases = tf.get_variable("bias", [128], initializer=tf.constant_initializer(0.0))
102 |         conv4 = tf.nn.conv2d(pool3, conv4_weights, strides=[1, 1, 1, 1], padding='SAME')
103 |         relu4 = tf.nn.relu(tf.nn.bias_add(conv4, conv4_biases))
104 | 
105 |     with tf.name_scope("layer8-pool4"):
106 |         pool4 = tf.nn.max_pool(relu4, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
107 |         nodes = 6*6*128
108 |         reshaped = tf.reshape(pool4,[-1,nodes])
109 | 
110 |     with tf.variable_scope('layer9-fc1'):
111 |         fc1_weights = tf.get_variable("weight", [nodes, 1024],
112 |                                       initializer=tf.truncated_normal_initializer(stddev=0.1))
113 |         if regularizer != None: tf.add_to_collection('losses', regularizer(fc1_weights))
114 |         # tf.add_to_collection向当前计算图中添加张量集合
115 |         fc1_biases = tf.get_variable("bias", [1024], initializer=tf.constant_initializer(0.1))
116 | 
117 |         fc1 = tf.nn.relu(tf.matmul(reshaped, fc1_weights) + fc1_biases)
118 |         if train: fc1 = tf.nn.dropout(fc1, 0.5)
119 | 
120 |     with tf.variable_scope('layer10-fc2'):
121 |         fc2_weights = tf.get_variable("weight", [1024, 512],
122 |                                       initializer=tf.truncated_normal_initializer(stddev=0.1))
123 |         if regularizer != None: tf.add_to_collection('losses', regularizer(fc2_weights))
124 |         fc2_biases = tf.get_variable("bias", [512], initializer=tf.constant_initializer(0.1))
125 | 
126 |         fc2 = tf.nn.relu(tf.matmul(fc1, fc2_weights) + fc2_biases)
127 |         if train: fc2 = tf.nn.dropout(fc2, 0.5)
128 | 
129 |     with tf.variable_scope('layer11-fc3'):
130 |         fc3_weights = tf.get_variable("weight", [512, 6],
131 |                                       initializer=tf.truncated_normal_initializer(stddev=0.1))
132 |         if regularizer != None: tf.add_to_collection('losses', regularizer(fc3_weights))
133 |         fc3_biases = tf.get_variable("bias", [6], initializer=tf.constant_initializer(0.1))
134 |         logit = tf.matmul(fc2, fc3_weights) + fc3_biases
135 | 
136 |     return logit
137 | 
138 | # ---------------------------网络结束---------------------------
139 | regularizer = tf.contrib.layers.l2_regularizer(0.0001)  # 返回一个执行L2正则化的函数.在损失函数上加上正则项是防止过拟合的一个重要方法
140 | logits = inference(x, False, regularizer)
141 | 
142 | # (小处理)将logits乘以1赋值给logits_eval，定义name，方便在后续调用模型时通过tensor名字调用输出tensor
143 | b = tf.constant(value=1, dtype=tf.float32)
144 | logits_eval = tf.multiply(logits, b, name='logits_eval')
145 | 
146 | loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y_)
147 | train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
148 | correct_prediction = tf.equal(tf.cast(tf.argmax(logits, 1), tf.int32), y_)
149 | # tf.equal Returns:A `Tensor` of type `bool`.
150 | # tf.cast :Casts a tensor to a new type. Returns:A `Tensor` or `SparseTensor` with same shape as `x`.(shape相同只改变type)
151 | acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
152 | 
153 | 
154 | # 定义一个函数，按批次取数据
155 | def minibatches(inputs=None, targets=None, batch_size=None, shuffle=False):
156 |     assert len(inputs) == len(targets)
157 |     if shuffle:
158 |         indices = np.arange(len(inputs))
159 |         np.random.shuffle(indices)
160 |     for start_idx in range(0, len(inputs) - batch_size + 1, batch_size):
161 |         if shuffle:
162 |             excerpt = indices[start_idx:start_idx + batch_size]
163 |         else:
164 |             excerpt = slice(start_idx, start_idx + batch_size)
165 |         yield inputs[excerpt], targets[excerpt]
166 | 
167 | 
168 | # 训练和测试数据，可将n_epoch设置更大一些
169 | 
170 | n_epoch=10
171 | batch_size=64
172 | saver=tf.train.Saver()
173 | sess=tf.Session()
174 | sess.run(tf.global_variables_initializer())
175 | for epoch in range(n_epoch):
176 |     start_time = time.time()
177 | 
178 |     # training
179 |     train_loss, train_acc, n_batch = 0, 0, 0
180 |     for x_train_a, y_train_a in minibatches(x_train, y_train, batch_size, shuffle=True):
181 | 
182 |         _, err, ac = sess.run([train_op,loss,acc], feed_dict={x: x_train_a, y_: y_train_a})
183 |         train_loss += err
184 |         train_acc += ac
185 |         n_batch += 1
186 | 
187 |     print("----------------epoch: %f-------------------" % epoch)
188 |     print("   train loss: %f" % (np.sum(train_loss) / n_batch))
189 |     print("   train acc: %f" % (np.sum(train_acc) / n_batch))
190 | 
191 |     # validation
192 |     val_loss, val_acc, n_batch = 0, 0, 0
193 |     for x_val_a, y_val_a in minibatches(x_val, y_val, batch_size, shuffle=False):
194 | 
195 |         err, ac = sess.run([loss,acc], feed_dict={x: x_val_a, y_: y_val_a})
196 |         val_loss += err
197 |         val_acc += ac
198 |         n_batch += 1
199 | 
200 |     print("   validation loss: %f" % (np.sum(val_loss) / n_batch))
201 |     print("   validation acc: %f" % (np.sum(val_acc) / n_batch))
202 |     print('\n')
203 | 
204 |     saver.save(sess, model_path)
205 | 
206 | sess.close()
207 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
  1 | class FLAGS(object):
  2 |     """ """
  3 |     """
  4 |     General settings
  5 |     """
  6 |     input_size = 256
  7 |     heatmap_size = 32
  8 |     cpm_stages = 3
  9 |     joint_gaussian_variance = 1.0
 10 |     center_radius = 21
 11 |     num_of_joints = 21
 12 |     color_channel = 'RGB'
 13 |     normalize_img = True
 14 |     use_gpu = True
 15 |     gpu_id = 0
 16 | 
 17 | 
 18 |     """
 19 |     Demo settings
 20 |     """
 21 |     # 'MULTI': show multiple stage heatmaps
 22 |     # 'SINGLE': show last stage heatmap
 23 |     # 'Joint_HM': show last stage heatmap for each joint
 24 |     # 'image or video path': show detection on single image or video
 25 |     DEMO_TYPE = 'MULTI'
 26 | 
 27 |     model_path = 'cpm_hand'
 28 |     cam_id = 0
 29 | 
 30 |     webcam_height = 480
 31 |     webcam_width = 640
 32 | 
 33 |     use_kalman = True
 34 |     kalman_noise = 0.03
 35 | 
 36 | 
 37 |     """
 38 |     Training settings
 39 |     """
 40 |     network_def = 'cpm_hand'
 41 |     train_img_dir = ''
 42 |     val_img_dir = ''
 43 |     bg_img_dir = ''
 44 |     pretrained_model = 'cpm_hand'
 45 |     batch_size = 5
 46 |     init_lr = 0.001
 47 |     lr_decay_rate = 0.5
 48 |     lr_decay_step = 10000
 49 |     training_iters = 300000
 50 |     verbose_iters = 10
 51 |     validation_iters = 1000
 52 |     model_save_iters = 5000
 53 |     augmentation_config = {'hue_shift_limit': (-5, 5),
 54 |                            'sat_shift_limit': (-10, 10),
 55 |                            'val_shift_limit': (-15, 15),
 56 |                            'translation_limit': (-0.15, 0.15),
 57 |                            'scale_limit': (-0.3, 0.5),
 58 |                            'rotate_limit': (-90, 90)}
 59 |     hnm = True  # Make sure generate hnm files first
 60 |     do_cropping = True
 61 | 
 62 |     """
 63 |     For Freeze graphs
 64 |     """
 65 |     output_node_names = 'stage_3/mid_conv7/BiasAdd:0'
 66 | 
 67 | 
 68 |     """
 69 |     For Drawing
 70 |     """
 71 |     # Default Pose
 72 |     default_hand = [[259, 335],
 73 |                     [245, 311],
 74 |                     [226, 288],
 75 |                     [206, 270],
 76 |                     [195, 261],
 77 |                     [203, 308],
 78 |                     [165, 290],
 79 |                     [139, 287],
 80 |                     [119, 284],
 81 |                     [199, 328],
 82 |                     [156, 318],
 83 |                     [128, 314],
 84 |                     [104, 318],
 85 |                     [204, 341],
 86 |                     [163, 340],
 87 |                     [133, 347],
 88 |                     [108, 349],
 89 |                     [206, 359],
 90 |                     [176, 368],
 91 |                     [164, 370],
 92 |                     [144, 377]]
 93 | 
 94 |     # Limb connections
 95 |     limbs = [[0, 1],
 96 |              [1, 2],
 97 |              [2, 3],
 98 |              [3, 4],
 99 |              [0, 5],
100 |              [5, 6],
101 |              [6, 7],
102 |              [7, 8],
103 |              [0, 9],
104 |              [9, 10],
105 |              [10, 11],
106 |              [11, 12],
107 |              [0, 13],
108 |              [13, 14],
109 |              [14, 15],
110 |              [15, 16],
111 |              [0, 17],
112 |              [17, 18],
113 |              [18, 19],
114 |              [19, 20]
115 |              ]
116 | 
117 |     # Finger colors
118 |     joint_color_code = [[139, 53, 255],
119 |                         [0, 56, 255],
120 |                         [43, 140, 237],
121 |                         [37, 168, 36],
122 |                         [147, 147, 0],
123 |                         [70, 17, 145]]
124 | 
125 |     # My hand joint order
126 |     # FLAGS.limbs = [[0, 1],
127 |     #          [1, 2],
128 |     #          [2, 3],
129 |     #          [3, 20],
130 |     #          [4, 5],
131 |     #          [5, 6],
132 |     #          [6, 7],
133 |     #          [7, 20],
134 |     #          [8, 9],
135 |     #          [9, 10],
136 |     #          [10, 11],
137 |     #          [11, 20],
138 |     #          [12, 13],
139 |     #          [13, 14],
140 |     #          [14, 15],
141 |     #          [15, 20],
142 |     #          [16, 17],
143 |     #          [17, 18],
144 |     #          [18, 19],
145 |     #          [19, 20]
146 |     #          ]
147 | 
148 | 
149 | 
150 | 
151 | 
152 | 
153 | 
154 | 
155 | 
156 | 
157 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yyyerica/HandGestureClassify/9cc5185a38f4ed48680f8d4d948929c01dbf6d2b/models/__init__.py


--------------------------------------------------------------------------------
/models/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yyyerica/HandGestureClassify/9cc5185a38f4ed48680f8d4d948929c01dbf6d2b/models/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/models/nets/CPM.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod, abstractproperty
 2 | 
 3 | class CPM(object):
 4 |     __metaclass__ = ABCMeta
 5 | 
 6 |     @abstractmethod
 7 |     def __init__(self, input_size, heatmap_size, stages, joints, img_type='RGB'):
 8 |         pass
 9 | 
10 |     @abstractmethod
11 |     def _build_model(self):
12 |         pass
13 | 
14 |     @abstractmethod
15 |     def build_loss(self, lr, lr_decay_rate, lr_decay_step, optimizer='Adam'):
16 |         pass
17 | 
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/models/nets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yyyerica/HandGestureClassify/9cc5185a38f4ed48680f8d4d948929c01dbf6d2b/models/nets/__init__.py


--------------------------------------------------------------------------------
/models/nets/__pycache__/CPM.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yyyerica/HandGestureClassify/9cc5185a38f4ed48680f8d4d948929c01dbf6d2b/models/nets/__pycache__/CPM.cpython-35.pyc


--------------------------------------------------------------------------------
/models/nets/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yyyerica/HandGestureClassify/9cc5185a38f4ed48680f8d4d948929c01dbf6d2b/models/nets/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/models/nets/__pycache__/cpm_hand.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yyyerica/HandGestureClassify/9cc5185a38f4ed48680f8d4d948929c01dbf6d2b/models/nets/__pycache__/cpm_hand.cpython-35.pyc


--------------------------------------------------------------------------------
/models/nets/cpm_body.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import pickle
  3 | 
  4 | 
  5 | class CPM_Model(object):
  6 |     def __init__(self, stages, joints):
  7 |         self.stages = stages
  8 |         self.stage_heatmap = []
  9 |         self.stage_loss = [0] * stages
 10 |         self.total_loss = 0
 11 |         self.input_image = None
 12 |         self.center_map = None
 13 |         self.gt_heatmap = None
 14 |         self.learning_rate = 0
 15 |         self.merged_summary = None
 16 |         self.joints = joints
 17 |         self.batch_size = 0
 18 | 
 19 |     def build_model(self, input_image, center_map, batch_size):
 20 |         self.batch_size = batch_size
 21 |         self.input_image = input_image
 22 |         self.center_map = center_map
 23 |         with tf.variable_scope('pooled_center_map'):
 24 |             self.center_map = tf.layers.average_pooling2d(inputs=self.center_map,
 25 |                                                           pool_size=[9, 9],
 26 |                                                           strides=[8, 8],
 27 |                                                           padding='same',
 28 |                                                           name='center_map')
 29 |         with tf.variable_scope('sub_stages'):
 30 |             sub_conv1 = tf.layers.conv2d(inputs=input_image,
 31 |                                          filters=64,
 32 |                                          kernel_size=[3, 3],
 33 |                                          strides=[1, 1],
 34 |                                          padding='same',
 35 |                                          activation=tf.nn.relu,
 36 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
 37 |                                          name='sub_conv1')
 38 |             sub_conv2 = tf.layers.conv2d(inputs=sub_conv1,
 39 |                                          filters=64,
 40 |                                          kernel_size=[3, 3],
 41 |                                          strides=[1, 1],
 42 |                                          padding='same',
 43 |                                          activation=tf.nn.relu,
 44 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
 45 |                                          name='sub_conv2')
 46 |             sub_pool1 = tf.layers.max_pooling2d(inputs=sub_conv2,
 47 |                                                 pool_size=[2, 2],
 48 |                                                 strides=2,
 49 |                                                 padding='same',
 50 |                                                 name='sub_pool1')
 51 |             sub_conv3 = tf.layers.conv2d(inputs=sub_pool1,
 52 |                                          filters=128,
 53 |                                          kernel_size=[3, 3],
 54 |                                          strides=[1, 1],
 55 |                                          padding='same',
 56 |                                          activation=tf.nn.relu,
 57 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
 58 |                                          name='sub_conv3')
 59 |             sub_conv4 = tf.layers.conv2d(inputs=sub_conv3,
 60 |                                          filters=128,
 61 |                                          kernel_size=[3, 3],
 62 |                                          strides=[1, 1],
 63 |                                          padding='same',
 64 |                                          activation=tf.nn.relu,
 65 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
 66 |                                          name='sub_conv4')
 67 |             sub_pool2 = tf.layers.max_pooling2d(inputs=sub_conv4,
 68 |                                                 pool_size=[2, 2],
 69 |                                                 strides=2,
 70 |                                                 padding='same',
 71 |                                                 name='sub_pool2')
 72 |             sub_conv5 = tf.layers.conv2d(inputs=sub_pool2,
 73 |                                          filters=256,
 74 |                                          kernel_size=[3, 3],
 75 |                                          strides=[1, 1],
 76 |                                          padding='same',
 77 |                                          activation=tf.nn.relu,
 78 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
 79 |                                          name='sub_conv5')
 80 |             sub_conv6 = tf.layers.conv2d(inputs=sub_conv5,
 81 |                                          filters=256,
 82 |                                          kernel_size=[3, 3],
 83 |                                          strides=[1, 1],
 84 |                                          padding='same',
 85 |                                          activation=tf.nn.relu,
 86 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
 87 |                                          name='sub_conv6')
 88 |             sub_conv7 = tf.layers.conv2d(inputs=sub_conv6,
 89 |                                          filters=256,
 90 |                                          kernel_size=[3, 3],
 91 |                                          strides=[1, 1],
 92 |                                          padding='same',
 93 |                                          activation=tf.nn.relu,
 94 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
 95 |                                          name='sub_conv7')
 96 |             sub_conv8 = tf.layers.conv2d(inputs=sub_conv7,
 97 |                                          filters=256,
 98 |                                          kernel_size=[3, 3],
 99 |                                          strides=[1, 1],
100 |                                          padding='same',
101 |                                          activation=tf.nn.relu,
102 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
103 |                                          name='sub_conv8')
104 |             sub_pool3 = tf.layers.max_pooling2d(inputs=sub_conv8,
105 |                                                 pool_size=[2, 2],
106 |                                                 strides=2,
107 |                                                 padding='same',
108 |                                                 name='sub_pool3')
109 |             sub_conv9 = tf.layers.conv2d(inputs=sub_pool3,
110 |                                          filters=512,
111 |                                          kernel_size=[3, 3],
112 |                                          strides=[1, 1],
113 |                                          padding='same',
114 |                                          activation=tf.nn.relu,
115 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
116 |                                          name='sub_conv9')
117 |             sub_conv10 = tf.layers.conv2d(inputs=sub_conv9,
118 |                                           filters=512,
119 |                                           kernel_size=[3, 3],
120 |                                           strides=[1, 1],
121 |                                           padding='same',
122 |                                           activation=tf.nn.relu,
123 |                                           kernel_initializer=tf.contrib.layers.xavier_initializer(),
124 |                                           name='sub_conv10')
125 |             sub_conv11 = tf.layers.conv2d(inputs=sub_conv10,
126 |                                           filters=256,
127 |                                           kernel_size=[3, 3],
128 |                                           strides=[1, 1],
129 |                                           padding='same',
130 |                                           activation=tf.nn.relu,
131 |                                           kernel_initializer=tf.contrib.layers.xavier_initializer(),
132 |                                           name='sub_conv11')
133 |             sub_conv12 = tf.layers.conv2d(inputs=sub_conv11,
134 |                                           filters=256,
135 |                                           kernel_size=[3, 3],
136 |                                           strides=[1, 1],
137 |                                           padding='same',
138 |                                           activation=tf.nn.relu,
139 |                                           kernel_initializer=tf.contrib.layers.xavier_initializer(),
140 |                                           name='sub_conv12')
141 |             sub_conv13 = tf.layers.conv2d(inputs=sub_conv12,
142 |                                           filters=256,
143 |                                           kernel_size=[3, 3],
144 |                                           strides=[1, 1],
145 |                                           padding='same',
146 |                                           activation=tf.nn.relu,
147 |                                           kernel_initializer=tf.contrib.layers.xavier_initializer(),
148 |                                           name='sub_conv13')
149 |             sub_conv14 = tf.layers.conv2d(inputs=sub_conv13,
150 |                                           filters=256,
151 |                                           kernel_size=[3, 3],
152 |                                           strides=[1, 1],
153 |                                           padding='same',
154 |                                           activation=tf.nn.relu,
155 |                                           kernel_initializer=tf.contrib.layers.xavier_initializer(),
156 |                                           name='sub_conv14')
157 | 
158 |             self.sub_stage_img_feature = tf.layers.conv2d(inputs=sub_conv14,
159 |                                                           filters=128,
160 |                                                           kernel_size=[3, 3],
161 |                                                           strides=[1, 1],
162 |                                                           padding='same',
163 |                                                           activation=tf.nn.relu,
164 |                                                           kernel_initializer=tf.contrib.layers.xavier_initializer(),
165 |                                                           name='sub_stage_img_feature')
166 | 
167 |         with tf.variable_scope('stage_1'):
168 |             conv1 = tf.layers.conv2d(inputs=self.sub_stage_img_feature,
169 |                                      filters=512,
170 |                                      kernel_size=[1, 1],
171 |                                      strides=[1, 1],
172 |                                      padding='same',
173 |                                      activation=tf.nn.relu,
174 |                                      kernel_initializer=tf.contrib.layers.xavier_initializer(),
175 |                                      name='conv1')
176 |             self.stage_heatmap.append(tf.layers.conv2d(inputs=conv1,
177 |                                                        filters=self.joints,
178 |                                                        kernel_size=[1, 1],
179 |                                                        strides=[1, 1],
180 |                                                        padding='same',
181 |                                                        kernel_initializer=tf.contrib.layers.xavier_initializer(),
182 |                                                        name='stage_heatmap'))
183 |         for stage in range(2, self.stages + 1):
184 |             self._middle_conv(stage)
185 | 
186 |     def _middle_conv(self, stage):
187 |         with tf.variable_scope('stage_' + str(stage)):
188 |             self.current_featuremap = tf.concat([self.stage_heatmap[stage - 2],
189 |                                                  self.sub_stage_img_feature,
190 |                                                  self.center_map,
191 |                                                  ],
192 |                                                 axis=3)
193 |             mid_conv1 = tf.layers.conv2d(inputs=self.current_featuremap,
194 |                                          filters=128,
195 |                                          kernel_size=[7, 7],
196 |                                          strides=[1, 1],
197 |                                          padding='same',
198 |                                          activation=tf.nn.relu,
199 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
200 |                                          name='mid_conv1')
201 |             mid_conv2 = tf.layers.conv2d(inputs=mid_conv1,
202 |                                          filters=128,
203 |                                          kernel_size=[7, 7],
204 |                                          strides=[1, 1],
205 |                                          padding='same',
206 |                                          activation=tf.nn.relu,
207 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
208 |                                          name='mid_conv2')
209 |             mid_conv3 = tf.layers.conv2d(inputs=mid_conv2,
210 |                                          filters=128,
211 |                                          kernel_size=[7, 7],
212 |                                          strides=[1, 1],
213 |                                          padding='same',
214 |                                          activation=tf.nn.relu,
215 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
216 |                                          name='mid_conv3')
217 |             mid_conv4 = tf.layers.conv2d(inputs=mid_conv3,
218 |                                          filters=128,
219 |                                          kernel_size=[7, 7],
220 |                                          strides=[1, 1],
221 |                                          padding='same',
222 |                                          activation=tf.nn.relu,
223 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
224 |                                          name='mid_conv4')
225 |             mid_conv5 = tf.layers.conv2d(inputs=mid_conv4,
226 |                                          filters=128,
227 |                                          kernel_size=[7, 7],
228 |                                          strides=[1, 1],
229 |                                          padding='same',
230 |                                          activation=tf.nn.relu,
231 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
232 |                                          name='mid_conv5')
233 |             mid_conv6 = tf.layers.conv2d(inputs=mid_conv5,
234 |                                          filters=128,
235 |                                          kernel_size=[1, 1],
236 |                                          strides=[1, 1],
237 |                                          padding='same',
238 |                                          activation=tf.nn.relu,
239 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
240 |                                          name='mid_conv6')
241 |             self.current_heatmap = tf.layers.conv2d(inputs=mid_conv6,
242 |                                                     filters=self.joints,
243 |                                                     kernel_size=[1, 1],
244 |                                                     strides=[1, 1],
245 |                                                     padding='same',
246 |                                                     kernel_initializer=tf.contrib.layers.xavier_initializer(),
247 |                                                     name='mid_conv7')
248 |             self.stage_heatmap.append(self.current_heatmap)
249 | 
250 |     def build_loss(self, gt_heatmap, lr, lr_decay_rate, lr_decay_step):
251 |         self.gt_heatmap = gt_heatmap
252 |         self.total_loss = 0
253 |         self.learning_rate = lr
254 |         self.lr_decay_rate = lr_decay_rate
255 |         self.lr_decay_step = lr_decay_step
256 | 
257 |         for stage in range(self.stages):
258 |             with tf.variable_scope('stage' + str(stage + 1) + '_loss'):
259 |                 self.stage_loss[stage] = tf.nn.l2_loss(self.stage_heatmap[stage] - self.gt_heatmap,
260 |                                                        name='l2_loss') / self.batch_size
261 |             tf.summary.scalar('stage' + str(stage + 1) + '_loss', self.stage_loss[stage])
262 | 
263 |         with tf.variable_scope('total_loss'):
264 |             for stage in range(self.stages):
265 |                 self.total_loss += self.stage_loss[stage]
266 |             tf.summary.scalar('total loss', self.total_loss)
267 | 
268 |         with tf.variable_scope('train'):
269 |             self.global_step = tf.contrib.framework.get_or_create_global_step()
270 | 
271 |             self.lr = tf.train.exponential_decay(self.learning_rate,
272 |                                                  global_step=self.global_step,
273 |                                                  decay_rate=self.lr_decay_rate,
274 |                                                  decay_steps=self.lr_decay_step)
275 |             tf.summary.scalar('learning rate', self.lr)
276 | 
277 |             self.train_op = tf.contrib.layers.optimize_loss(loss=self.total_loss,
278 |                                                             global_step=self.global_step,
279 |                                                             learning_rate=self.lr,
280 |                                                             optimizer='Adam')
281 |         self.merged_summary = tf.summary.merge_all()
282 | 
283 |     def load_weights_from_file(self, weight_file_path, sess, finetune=True):
284 |         weights = pickle.load(open(weight_file_path, 'rb'), encoding='latin1')
285 | 
286 |         with tf.variable_scope('', reuse=True):
287 |             ## Pre stage conv
288 |             # conv1
289 |             for layer in range(1, 3):
290 |                 conv_kernel = tf.get_variable('sub_stages/sub_conv' + str(layer) + '/kernel')
291 |                 conv_bias = tf.get_variable('sub_stages/sub_conv' + str(layer) + '/bias')
292 | 
293 |                 loaded_kernel = weights['conv1_' + str(layer)]
294 |                 loaded_bias = weights['conv1_' + str(layer) + '_b']
295 | 
296 |                 sess.run(tf.assign(conv_kernel, loaded_kernel))
297 |                 sess.run(tf.assign(conv_bias, loaded_bias))
298 | 
299 |             # conv2
300 |             for layer in range(1, 3):
301 |                 conv_kernel = tf.get_variable('sub_stages/sub_conv' + str(layer + 2) + '/kernel')
302 |                 conv_bias = tf.get_variable('sub_stages/sub_conv' + str(layer + 2) + '/bias')
303 | 
304 |                 loaded_kernel = weights['conv2_' + str(layer)]
305 |                 loaded_bias = weights['conv2_' + str(layer) + '_b']
306 | 
307 |                 sess.run(tf.assign(conv_kernel, loaded_kernel))
308 |                 sess.run(tf.assign(conv_bias, loaded_bias))
309 | 
310 |             # conv3
311 |             for layer in range(1, 5):
312 |                 conv_kernel = tf.get_variable('sub_stages/sub_conv' + str(layer + 4) + '/kernel')
313 |                 conv_bias = tf.get_variable('sub_stages/sub_conv' + str(layer + 4) + '/bias')
314 | 
315 |                 loaded_kernel = weights['conv3_' + str(layer)]
316 |                 loaded_bias = weights['conv3_' + str(layer) + '_b']
317 | 
318 |                 sess.run(tf.assign(conv_kernel, loaded_kernel))
319 |                 sess.run(tf.assign(conv_bias, loaded_bias))
320 | 
321 |             # conv4
322 |             for layer in range(1, 3):
323 |                 conv_kernel = tf.get_variable('sub_stages/sub_conv' + str(layer + 8) + '/kernel')
324 |                 conv_bias = tf.get_variable('sub_stages/sub_conv' + str(layer + 8) + '/bias')
325 | 
326 |                 loaded_kernel = weights['conv4_' + str(layer)]
327 |                 loaded_bias = weights['conv4_' + str(layer) + '_b']
328 | 
329 |                 sess.run(tf.assign(conv_kernel, loaded_kernel))
330 |                 sess.run(tf.assign(conv_bias, loaded_bias))
331 | 
332 |             # conv4_CPM
333 |             for layer in range(1, 5):
334 |                 conv_kernel = tf.get_variable('sub_stages/sub_conv' + str(layer + 10) + '/kernel')
335 |                 conv_bias = tf.get_variable('sub_stages/sub_conv' + str(layer + 10) + '/bias')
336 | 
337 |                 loaded_kernel = weights['conv4_' + str(2 + layer) + '_CPM']
338 |                 loaded_bias = weights['conv4_' + str(2 + layer) + '_CPM_b']
339 | 
340 |                 sess.run(tf.assign(conv_kernel, loaded_kernel))
341 |                 sess.run(tf.assign(conv_bias, loaded_bias))
342 | 
343 |             # conv5_3_CPM
344 |             conv_kernel = tf.get_variable('sub_stages/sub_stage_img_feature/kernel')
345 |             conv_bias = tf.get_variable('sub_stages/sub_stage_img_feature/bias')
346 | 
347 |             loaded_kernel = weights['conv4_7_CPM']
348 |             loaded_bias = weights['conv4_7_CPM_b']
349 | 
350 |             sess.run(tf.assign(conv_kernel, loaded_kernel))
351 |             sess.run(tf.assign(conv_bias, loaded_bias))
352 | 
353 |             ## stage 1
354 |             conv_kernel = tf.get_variable('stage_1/conv1/kernel')
355 |             conv_bias = tf.get_variable('stage_1/conv1/bias')
356 | 
357 |             loaded_kernel = weights['conv5_1_CPM']
358 |             loaded_bias = weights['conv5_1_CPM_b']
359 | 
360 |             sess.run(tf.assign(conv_kernel, loaded_kernel))
361 |             sess.run(tf.assign(conv_bias, loaded_bias))
362 | 
363 |             if finetune != True:
364 |                 conv_kernel = tf.get_variable('stage_1/stage_heatmap/kernel')
365 |                 conv_bias = tf.get_variable('stage_1/stage_heatmap/bias')
366 | 
367 |                 loaded_kernel = weights['conv5_2_CPM']
368 |                 loaded_bias = weights['conv5_2_CPM_b']
369 | 
370 |                 sess.run(tf.assign(conv_kernel, loaded_kernel))
371 |                 sess.run(tf.assign(conv_bias, loaded_bias))
372 | 
373 |                 ## stage 2 and behind
374 |                 for stage in range(2, self.stages + 1):
375 |                     for layer in range(1, 8):
376 |                         conv_kernel = tf.get_variable('stage_' + str(stage) + '/mid_conv' + str(layer) + '/kernel')
377 |                         conv_bias = tf.get_variable('stage_' + str(stage) + '/mid_conv' + str(layer) + '/bias')
378 | 
379 |                         loaded_kernel = weights['Mconv' + str(layer) + '_stage' + str(stage)]
380 |                         loaded_bias = weights['Mconv' + str(layer) + '_stage' + str(stage) + '_b']
381 | 
382 |                         sess.run(tf.assign(conv_kernel, loaded_kernel))
383 |                         sess.run(tf.assign(conv_bias, loaded_bias))
384 | 


--------------------------------------------------------------------------------
/models/nets/cpm_body_slim.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | import tensorflow as tf
  3 | import tensorflow.contrib.slim as slim
  4 | 
  5 | 
  6 | class CPM_Model(object):
  7 |     def __init__(self, stages, joints):
  8 |         self.stages = stages
  9 |         self.stage_heatmap = []
 10 |         self.stage_loss = [0] * stages
 11 |         self.total_loss = 0
 12 |         self.input_image = None
 13 |         self.center_map = None
 14 |         self.gt_heatmap = None
 15 |         self.learning_rate = 0
 16 |         self.merged_summary = None
 17 |         self.joints = joints
 18 |         self.batch_size = 0
 19 | 
 20 |     def build_model(self, input_image, center_map, batch_size):
 21 |         self.batch_size = batch_size
 22 |         self.input_image = input_image
 23 |         self.center_map = center_map
 24 |         with tf.variable_scope('pooled_center_map'):
 25 |             self.center_map = slim.avg_pool2d(self.center_map,
 26 |                                               [9, 9], stride=8,
 27 |                                               padding='SAME',
 28 |                                               scope='center_map')
 29 |         with slim.arg_scope([slim.conv2d],
 30 |                             padding='SAME',
 31 |                             activation_fn=tf.nn.relu,
 32 |                             weights_initializer=tf.contrib.layers.xavier_initializer()):
 33 |             with tf.variable_scope('sub_stages'):
 34 |                 net = slim.conv2d(input_image, 64, [3, 3], scope='sub_conv1')
 35 |                 net = slim.conv2d(net, 64, [3, 3], scope='sub_conv2')
 36 |                 net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='sub_pool1')
 37 |                 net = slim.conv2d(net, 128, [3, 3], scope='sub_conv3')
 38 |                 net = slim.conv2d(net, 128, [3, 3], scope='sub_conv4')
 39 |                 net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='sub_pool2')
 40 |                 net = slim.conv2d(net, 256, [3, 3], scope='sub_conv5')
 41 |                 net = slim.conv2d(net, 256, [3, 3], scope='sub_conv6')
 42 |                 net = slim.conv2d(net, 256, [3, 3], scope='sub_conv7')
 43 |                 net = slim.conv2d(net, 256, [3, 3], scope='sub_conv8')
 44 |                 net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='sub_pool3')
 45 |                 net = slim.conv2d(net, 512, [3, 3], scope='sub_conv9')
 46 |                 net = slim.conv2d(net, 512, [3, 3], scope='sub_conv10')
 47 |                 net = slim.conv2d(net, 256, [3, 3], scope='sub_conv11')
 48 |                 net = slim.conv2d(net, 256, [3, 3], scope='sub_conv12')
 49 |                 net = slim.conv2d(net, 256, [3, 3], scope='sub_conv13')
 50 |                 net = slim.conv2d(net, 256, [3, 3], scope='sub_conv14')
 51 |                 self.sub_stage_img_feature = slim.conv2d(net, 128, [3, 3],
 52 |                                                          scope='sub_stage_img_feature')
 53 | 
 54 |             with tf.variable_scope('stage_1'):
 55 |                 conv1 = slim.conv2d(self.sub_stage_img_feature, 512, [1, 1],
 56 |                                     scope='conv1')
 57 |                 self.stage_heatmap.append(slim.conv2d(conv1, self.joints, [1, 1],
 58 |                                                       scope='stage_heatmap'))
 59 | 
 60 |             for stage in range(2, self.stages+1):
 61 |                 self._middle_conv(stage)
 62 | 
 63 |     def _middle_conv(self, stage):
 64 |         with tf.variable_scope('stage_' + str(stage)):
 65 |             self.current_featuremap = tf.concat([self.stage_heatmap[stage-2],
 66 |                                                  self.sub_stage_img_feature,
 67 |                                                  self.center_map],
 68 |                                                 axis=3)
 69 |             with slim.arg_scope([slim.conv2d],
 70 |                                 padding='SAME',
 71 |                                 activation_fn=tf.nn.relu,
 72 |                                 weights_initializer=tf.contrib.layers.xavier_initializer()):
 73 |                 mid_net = slim.conv2d(self.current_featuremap, 128, [7, 7], scope='mid_conv1')
 74 |                 mid_net = slim.conv2d(mid_net, 128, [7, 7], scope='mid_conv2')
 75 |                 mid_net = slim.conv2d(mid_net, 128, [7, 7], scope='mid_conv3')
 76 |                 mid_net = slim.conv2d(mid_net, 128, [7, 7], scope='mid_conv4')
 77 |                 mid_net = slim.conv2d(mid_net, 128, [7, 7], scope='mid_conv5')
 78 |                 mid_net = slim.conv2d(mid_net, 128, [1, 1], scope='mid_conv6')
 79 |                 self.current_heatmap = slim.conv2d(mid_net, self.joints, [1, 1],
 80 |                                                    scope='mid_conv7')
 81 |                 self.stage_heatmap.append(self.current_heatmap)
 82 | 
 83 |     def build_loss(self, gt_heatmap, lr, lr_decay_rate, lr_decay_step):
 84 |         self.gt_heatmap = gt_heatmap
 85 |         self.total_loss = 0
 86 |         self.learning_rate = lr
 87 |         self.lr_decay_rate = lr_decay_rate
 88 |         self.lr_decay_step = lr_decay_step
 89 | 
 90 |         for stage in range(self.stages):
 91 |             with tf.variable_scope('stage' + str(stage+1) + '_loss'):
 92 |                 self.stage_loss[stage] = tf.nn.l2_loss(self.stage_heatmap[stage] - self.gt_heatmap,
 93 |                                                        name='l2_loss') / self.batch_size
 94 |             tf.summary.scalar('stage' + str(stage+1) + '_loss', self.stage_loss[stage])
 95 | 
 96 |         with tf.variable_scope('total_loss'):
 97 |             for stage in range(self.stages):
 98 |                 self.total_loss += self.stage_loss[stage]
 99 |             tf.summary.scalar('total loss', self.total_loss)
100 | 
101 |         with tf.variable_scope('train'):
102 |             self.global_step = tf.contrib.framework.get_or_creat_global_step()
103 | 
104 |             self.lr = tf.train.exponential_decay(self.learning_rate,
105 |                                                  global_step=self.global_step,
106 |                                                  decay_rate=self.lr_decay_rate,
107 |                                                  decay_steps=self.lr_decay_step)
108 |             tf.summary.scalar('learning rate', self.lr)
109 | 
110 |             self.train_op = tf.contrib.layers.optimize_loss(loss=self.total_loss,
111 |                                                             global_step=self.global_step,
112 |                                                             learning_rate=self.lr,
113 |                                                             optimizer='Adam')
114 |         self.merged_summary = tf.summary.merge_all()
115 | 
116 |     def load_weights_from_file(self, weight_file_path, sess, finetune=True):
117 |         weights = pickle.load(open(weight_file_path, 'rb'), encoding='latin1')
118 | 
119 |         with tf.variable_scope('', reuse=True):
120 |             ## Pre stage conv
121 |             # for layer in range(1, 15):
122 |             #     conv_weights = tf.get_variable('sub_stages/sub_conv' + str(layer) + '/weights')
123 |             #     conv_biases = tf.get_variable('sub_stages/sub_conv' + str(layer) + '/biases')
124 |             #
125 |             #     loaded_weights = weights['sub_conv' + str(layer)]
126 |             #     loaded_biases = weights['sub_conv' + str(layer)]
127 |             #
128 |             #     sess.run(tf.assign(conv_weights, loaded_weights))
129 |             #     sess.run(tf.assign(conv_biases, loaded_biases))
130 | 
131 |             # conv1
132 |             for layer in range(1, 3):
133 |                 conv_weights = tf.get_variable('sub_stages/sub_conv' + str(layer) + '/weights')
134 |                 conv_biases = tf.get_variable('sub_stages/sub_conv' + str(layer) + '/biases')
135 | 
136 |                 loaded_weights = weights['conv1_' + str(layer)]
137 |                 loaded_biases = weights['conv1_' + str(layer) + '_b']
138 | 
139 |                 sess.run(tf.assign(conv_weights, loaded_weights))
140 |                 sess.run(tf.assign(conv_biases, loaded_biases))
141 | 
142 |             # conv2
143 |             for layer in range(1, 3):
144 |                 conv_weights = tf.get_variable('sub_stages/sub_conv' + str(layer + 2) + '/weights')
145 |                 conv_biases = tf.get_variable('sub_stages/sub_conv' + str(layer + 2) + '/biases')
146 | 
147 |                 loaded_weights = weights['conv2_' + str(layer)]
148 |                 loaded_biases = weights['conv2_' + str(layer) + '_b']
149 | 
150 |                 sess.run(tf.assign(conv_weights, loaded_weights))
151 |                 sess.run(tf.assign(conv_biases, loaded_biases))
152 | 
153 |             # conv3
154 |             for layer in range(1, 5):
155 |                 conv_weights = tf.get_variable('sub_stages/sub_conv' + str(layer + 4) + '/weights')
156 |                 conv_biases = tf.get_variable('sub_stages/sub_conv' + str(layer + 4) + '/biases')
157 | 
158 |                 loaded_weights = weights['conv3_' + str(layer)]
159 |                 loaded_biases = weights['conv3_' + str(layer) + '_b']
160 | 
161 |                 sess.run(tf.assign(conv_weights, loaded_weights))
162 |                 sess.run(tf.assign(conv_biases, loaded_biases))
163 | 
164 |             # conv4
165 |             for layer in range(1, 3):
166 |                 conv_weights = tf.get_variable('sub_stages/sub_conv' + str(layer + 8) + '/weights')
167 |                 conv_biases = tf.get_variable('sub_stages/sub_conv' + str(layer + 8) + '/biases')
168 | 
169 |                 loaded_weights = weights['conv4_' + str(layer)]
170 |                 loaded_biases = weights['conv4_' + str(layer) + '_b']
171 | 
172 |                 sess.run(tf.assign(conv_weights, loaded_weights))
173 |                 sess.run(tf.assign(conv_biases, loaded_biases))
174 | 
175 |             # conv4_CPM
176 |             for layer in range(1, 5):
177 |                 conv_weights = tf.get_variable('sub_stages/sub_conv' + str(layer + 10) + '/weights')
178 |                 conv_biases = tf.get_variable('sub_stages/sub_conv' + str(layer + 10) + '/biases')
179 | 
180 |                 loaded_weights = weights['conv4_' + str(2 + layer) + '_CPM']
181 |                 loaded_biases = weights['conv4_' + str(2 + layer) + '_CPM_b']
182 | 
183 |                 sess.run(tf.assign(conv_weights, loaded_weights))
184 |                 sess.run(tf.assign(conv_biases, loaded_biases))
185 | 
186 |             # conv5_3_CPM
187 |             conv_weights = tf.get_variable('sub_stages/sub_stage_img_feature/weights')
188 |             conv_biases = tf.get_variable('sub_stages/sub_stage_img_feature/biases')
189 | 
190 |             loaded_weights = weights['conv4_7_CPM']
191 |             loaded_biases = weights['conv4_7_CPM_b']
192 | 
193 |             sess.run(tf.assign(conv_weights, loaded_weights))
194 |             sess.run(tf.assign(conv_biases, loaded_biases))
195 | 
196 |             ## stage 1
197 |             conv_weights = tf.get_variable('stage_1/conv1/weights')
198 |             conv_biases = tf.get_variable('stage_1/conv1/biases')
199 | 
200 |             loaded_weights = weights['conv5_1_CPM']
201 |             loaded_biases = weights['conv5_1_CPM_b']
202 | 
203 |             sess.run(tf.assign(conv_weights, loaded_weights))
204 |             sess.run(tf.assign(conv_biases, loaded_biases))
205 | 
206 |             if finetune != True:
207 |                 conv_weights = tf.get_variable('stage_1/stage_heatmap/weights')
208 |                 conv_biases = tf.get_variable('stage_1/stage_heatmap/biases')
209 | 
210 |                 loaded_weights = weights['conv5_2_CPM']
211 |                 loaded_biases = weights['conv5_2_CPM_b']
212 | 
213 |                 sess.run(tf.assign(conv_weights, loaded_weights))
214 |                 sess.run(tf.assign(conv_biases, loaded_biases))
215 | 
216 |                 ## stage 2 and behind
217 |                 for stage in range(2, self.stages + 1):
218 |                     for layer in range(1, 8):
219 |                         conv_weights = tf.get_variable('stage_' + str(stage) + '/mid_conv' + str(layer) + '/weights')
220 |                         conv_biases = tf.get_variable('stage_' + str(stage) + '/mid_conv' + str(layer) + '/biases')
221 | 
222 |                         loaded_weights = weights['Mconv' + str(layer) + '_stage' + str(stage)]
223 |                         loaded_biases = weights['Mconv' + str(layer) + '_stage' + str(stage) + '_b']
224 | 
225 |                         sess.run(tf.assign(conv_weights, loaded_weights))
226 |                         sess.run(tf.assign(conv_biases, loaded_biases))
227 | 


--------------------------------------------------------------------------------
/models/nets/cpm_hand.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | 
  3 | import tensorflow as tf
  4 | from models.nets.CPM import CPM
  5 | 
  6 | 
  7 | class CPM_Model(CPM):
  8 |     def __init__(self, input_size, heatmap_size, stages, joints, img_type='RGB', is_training=True):
  9 |         self.stages = stages
 10 |         self.stage_heatmap = []
 11 |         self.stage_loss = [0 for _ in range(stages)]
 12 |         self.total_loss = 0
 13 |         self.input_image = None
 14 |         self.center_map = None
 15 |         self.gt_heatmap = None
 16 |         self.init_lr = 0
 17 |         self.merged_summary = None
 18 |         self.joints = joints
 19 |         self.batch_size = 0
 20 |         self.inference_type = 'Train'
 21 | 
 22 |         if img_type == 'RGB':
 23 |             self.input_images = tf.placeholder(dtype=tf.float32,
 24 |                                                shape=(None, input_size, input_size, 3),
 25 |                                                name='input_placeholder')
 26 |         elif img_type == 'GRAY':
 27 |             self.input_images = tf.placeholder(dtype=tf.float32,
 28 |                                                shape=(None, input_size, input_size, 1),
 29 |                                                name='input_placeholder')
 30 |         self.cmap_placeholder = tf.placeholder(dtype=tf.float32,
 31 |                                                shape=(None, input_size, input_size, 1),
 32 |                                                name='cmap_placeholder')
 33 |         self.gt_hmap_placeholder = tf.placeholder(dtype=tf.float32,
 34 |                                                   shape=(None, heatmap_size, heatmap_size, joints + 1),
 35 |                                                   name='gt_hmap_placeholder')
 36 |         self._build_model()
 37 | 
 38 |     def _build_model(self):
 39 |         with tf.variable_scope('pooled_center_map'):
 40 |             self.center_map = tf.layers.average_pooling2d(inputs=self.cmap_placeholder,
 41 |                                                           pool_size=[9, 9],
 42 |                                                           strides=[8, 8],
 43 |                                                           padding='same',
 44 |                                                           name='center_map')
 45 |         with tf.variable_scope('sub_stages'):
 46 |             sub_conv1 = tf.layers.conv2d(inputs=self.input_images,
 47 |                                          filters=64,
 48 |                                          kernel_size=[3, 3],
 49 |                                          strides=[1, 1],
 50 |                                          padding='same',
 51 |                                          activation=tf.nn.relu,
 52 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
 53 |                                          name='sub_conv1')
 54 | 
 55 |             sub_conv2 = tf.layers.conv2d(inputs=sub_conv1,
 56 |                                          filters=64,
 57 |                                          kernel_size=[3, 3],
 58 |                                          strides=[1, 1],
 59 |                                          padding='same',
 60 |                                          activation=tf.nn.relu,
 61 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
 62 |                                          name='sub_conv2')
 63 |             sub_pool1 = tf.layers.max_pooling2d(inputs=sub_conv2,
 64 |                                                 pool_size=[2, 2],
 65 |                                                 strides=2,
 66 |                                                 padding='valid',
 67 |                                                 name='sub_pool1')
 68 |             sub_conv3 = tf.layers.conv2d(inputs=sub_pool1,
 69 |                                          filters=128,
 70 |                                          kernel_size=[3, 3],
 71 |                                          strides=[1, 1],
 72 |                                          padding='same',
 73 |                                          activation=tf.nn.relu,
 74 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
 75 |                                          name='sub_conv3')
 76 |             sub_conv4 = tf.layers.conv2d(inputs=sub_conv3,
 77 |                                          filters=128,
 78 |                                          kernel_size=[3, 3],
 79 |                                          strides=[1, 1],
 80 |                                          padding='same',
 81 |                                          activation=tf.nn.relu,
 82 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
 83 |                                          name='sub_conv4')
 84 |             sub_pool2 = tf.layers.max_pooling2d(inputs=sub_conv4,
 85 |                                                 pool_size=[2, 2],
 86 |                                                 strides=2,
 87 |                                                 padding='valid',
 88 |                                                 name='sub_pool2')
 89 |             sub_conv5 = tf.layers.conv2d(inputs=sub_pool2,
 90 |                                          filters=256,
 91 |                                          kernel_size=[3, 3],
 92 |                                          strides=[1, 1],
 93 |                                          padding='same',
 94 |                                          activation=tf.nn.relu,
 95 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
 96 |                                          name='sub_conv5')
 97 |             sub_conv6 = tf.layers.conv2d(inputs=sub_conv5,
 98 |                                          filters=256,
 99 |                                          kernel_size=[3, 3],
100 |                                          strides=[1, 1],
101 |                                          padding='same',
102 |                                          activation=tf.nn.relu,
103 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
104 |                                          name='sub_conv6')
105 |             sub_conv7 = tf.layers.conv2d(inputs=sub_conv6,
106 |                                          filters=256,
107 |                                          kernel_size=[3, 3],
108 |                                          strides=[1, 1],
109 |                                          padding='same',
110 |                                          activation=tf.nn.relu,
111 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
112 |                                          name='sub_conv7')
113 |             sub_conv8 = tf.layers.conv2d(inputs=sub_conv7,
114 |                                          filters=256,
115 |                                          kernel_size=[3, 3],
116 |                                          strides=[1, 1],
117 |                                          padding='same',
118 |                                          activation=tf.nn.relu,
119 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
120 |                                          name='sub_conv8')
121 |             sub_pool3 = tf.layers.max_pooling2d(inputs=sub_conv8,
122 |                                                 pool_size=[2, 2],
123 |                                                 strides=2,
124 |                                                 padding='valid',
125 |                                                 name='sub_pool3')
126 |             sub_conv9 = tf.layers.conv2d(inputs=sub_pool3,
127 |                                          filters=512,
128 |                                          kernel_size=[3, 3],
129 |                                          strides=[1, 1],
130 |                                          padding='same',
131 |                                          activation=tf.nn.relu,
132 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
133 |                                          name='sub_conv9')
134 |             sub_conv10 = tf.layers.conv2d(inputs=sub_conv9,
135 |                                           filters=512,
136 |                                           kernel_size=[3, 3],
137 |                                           strides=[1, 1],
138 |                                           padding='same',
139 |                                           activation=tf.nn.relu,
140 |                                           kernel_initializer=tf.contrib.layers.xavier_initializer(),
141 |                                           name='sub_conv10')
142 |             sub_conv11 = tf.layers.conv2d(inputs=sub_conv10,
143 |                                           filters=512,
144 |                                           kernel_size=[3, 3],
145 |                                           strides=[1, 1],
146 |                                           padding='same',
147 |                                           activation=tf.nn.relu,
148 |                                           kernel_initializer=tf.contrib.layers.xavier_initializer(),
149 |                                           name='sub_conv11')
150 |             sub_conv12 = tf.layers.conv2d(inputs=sub_conv11,
151 |                                           filters=512,
152 |                                           kernel_size=[3, 3],
153 |                                           strides=[1, 1],
154 |                                           padding='same',
155 |                                           activation=tf.nn.relu,
156 |                                           kernel_initializer=tf.contrib.layers.xavier_initializer(),
157 |                                           name='sub_conv12')
158 |             sub_conv13 = tf.layers.conv2d(inputs=sub_conv12,
159 |                                           filters=512,
160 |                                           kernel_size=[3, 3],
161 |                                           strides=[1, 1],
162 |                                           padding='same',
163 |                                           activation=tf.nn.relu,
164 |                                           kernel_initializer=tf.contrib.layers.xavier_initializer(),
165 |                                           name='sub_conv13')
166 |             sub_conv14 = tf.layers.conv2d(inputs=sub_conv13,
167 |                                           filters=512,
168 |                                           kernel_size=[3, 3],
169 |                                           strides=[1, 1],
170 |                                           padding='same',
171 |                                           activation=tf.nn.relu,
172 |                                           kernel_initializer=tf.contrib.layers.xavier_initializer(),
173 |                                           name='sub_conv14')
174 |             self.sub_stage_img_feature = tf.layers.conv2d(inputs=sub_conv14,
175 |                                                           filters=128,
176 |                                                           kernel_size=[3, 3],
177 |                                                           strides=[1, 1],
178 |                                                           padding='same',
179 |                                                           activation=tf.nn.relu,
180 |                                                           kernel_initializer=tf.contrib.layers.xavier_initializer(),
181 |                                                           name='sub_stage_img_feature')
182 | 
183 |         with tf.variable_scope('stage_1'):
184 |             conv1 = tf.layers.conv2d(inputs=self.sub_stage_img_feature,
185 |                                      filters=512,
186 |                                      kernel_size=[1, 1],
187 |                                      strides=[1, 1],
188 |                                      padding='valid',
189 |                                      activation=tf.nn.relu,
190 |                                      kernel_initializer=tf.contrib.layers.xavier_initializer(),
191 |                                      name='conv1')
192 |             self.stage_heatmap.append(tf.layers.conv2d(inputs=conv1,
193 |                                                        filters=self.joints+1,
194 |                                                        kernel_size=[1, 1],
195 |                                                        strides=[1, 1],
196 |                                                        padding='valid',
197 |                                                        activation=None,
198 |                                                        kernel_initializer=tf.contrib.layers.xavier_initializer(),
199 |                                                        name='stage_heatmap'))
200 |         for stage in range(2, self.stages + 1):
201 |             self._middle_conv(stage)
202 | 
203 |     def _middle_conv(self, stage):
204 |         with tf.variable_scope('stage_' + str(stage)):
205 |             self.current_featuremap = tf.concat([self.stage_heatmap[stage - 2],
206 |                                                  self.sub_stage_img_feature,
207 |                                                  # self.center_map],
208 |                                                  ],
209 |                                                 axis=3)
210 |             mid_conv1 = tf.layers.conv2d(inputs=self.current_featuremap,
211 |                                          filters=128,
212 |                                          kernel_size=[7, 7],
213 |                                          strides=[1, 1],
214 |                                          padding='same',
215 |                                          activation=tf.nn.relu,
216 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
217 |                                          name='mid_conv1')
218 |             mid_conv2 = tf.layers.conv2d(inputs=mid_conv1,
219 |                                          filters=128,
220 |                                          kernel_size=[7, 7],
221 |                                          strides=[1, 1],
222 |                                          padding='same',
223 |                                          activation=tf.nn.relu,
224 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
225 |                                          name='mid_conv2')
226 |             mid_conv3 = tf.layers.conv2d(inputs=mid_conv2,
227 |                                          filters=128,
228 |                                          kernel_size=[7, 7],
229 |                                          strides=[1, 1],
230 |                                          padding='same',
231 |                                          activation=tf.nn.relu,
232 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
233 |                                          name='mid_conv3')
234 |             mid_conv4 = tf.layers.conv2d(inputs=mid_conv3,
235 |                                          filters=128,
236 |                                          kernel_size=[7, 7],
237 |                                          strides=[1, 1],
238 |                                          padding='same',
239 |                                          activation=tf.nn.relu,
240 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
241 |                                          name='mid_conv4')
242 |             mid_conv5 = tf.layers.conv2d(inputs=mid_conv4,
243 |                                          filters=128,
244 |                                          kernel_size=[7, 7],
245 |                                          strides=[1, 1],
246 |                                          padding='same',
247 |                                          activation=tf.nn.relu,
248 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
249 |                                          name='mid_conv5')
250 |             mid_conv6 = tf.layers.conv2d(inputs=mid_conv5,
251 |                                          filters=128,
252 |                                          kernel_size=[1, 1],
253 |                                          strides=[1, 1],
254 |                                          padding='valid',
255 |                                          activation=tf.nn.relu,
256 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
257 |                                          name='mid_conv6')
258 |             self.current_heatmap = tf.layers.conv2d(inputs=mid_conv6,
259 |                                                     filters=self.joints+1,
260 |                                                     kernel_size=[1, 1],
261 |                                                     strides=[1, 1],
262 |                                                     padding='valid',
263 |                                                     activation=None,
264 |                                                     kernel_initializer=tf.contrib.layers.xavier_initializer(),
265 |                                                     name='mid_conv7')
266 |             self.stage_heatmap.append(self.current_heatmap)
267 | 
268 |     def build_loss(self, lr, lr_decay_rate, lr_decay_step, optimizer='Adam'):
269 |         self.total_loss = 0
270 |         self.total_loss_eval = 0
271 |         self.init_lr = lr
272 |         self.lr_decay_rate = lr_decay_rate
273 |         self.lr_decay_step = lr_decay_step
274 |         self.optimizer = optimizer
275 |         self.batch_size = tf.cast(tf.shape(self.input_images)[0], dtype=tf.float32)
276 | 
277 | 
278 |         for stage in range(self.stages):
279 |             with tf.variable_scope('stage' + str(stage + 1) + '_loss'):
280 |                 self.stage_loss[stage] = tf.nn.l2_loss(self.stage_heatmap[stage] - self.gt_hmap_placeholder,
281 |                                                        name='l2_loss') / self.batch_size
282 |             tf.summary.scalar('stage' + str(stage + 1) + '_loss', self.stage_loss[stage])
283 | 
284 |         with tf.variable_scope('total_loss'):
285 |             for stage in range(self.stages):
286 |                 self.total_loss += self.stage_loss[stage]
287 |             tf.summary.scalar('total loss train', self.total_loss)
288 | 
289 |         with tf.variable_scope('total_loss_eval'):
290 |             for stage in range(self.stages):
291 |                 self.total_loss_eval += self.stage_loss[stage]
292 |             tf.summary.scalar('total loss eval', self.total_loss)
293 | 
294 |         with tf.variable_scope('train'):
295 |             self.global_step = tf.contrib.framework.get_or_create_global_step()
296 | 
297 |             self.cur_lr = tf.train.exponential_decay(self.init_lr,
298 |                                                  global_step=self.global_step,
299 |                                                  decay_rate=self.lr_decay_rate,
300 |                                                  decay_steps=self.lr_decay_step)
301 |             tf.summary.scalar('global learning rate', self.cur_lr)
302 | 
303 |             self.train_op = tf.contrib.layers.optimize_loss(loss=self.total_loss,
304 |                                                             global_step=self.global_step,
305 |                                                             learning_rate=self.cur_lr,
306 |                                                             optimizer=self.optimizer)
307 | 
308 |     def load_weights_from_file(self, weight_file_path, sess, finetune=True):
309 |         # weight_file_object = open(weight_file_path, 'rb')
310 |         weights = pickle.load(open(weight_file_path, 'rb'))#, encoding='latin1')
311 | 
312 |         with tf.variable_scope('', reuse=True):
313 |             ## Pre stage conv
314 |             # conv1
315 |             for layer in range(1, 3):
316 |                 conv_kernel = tf.get_variable('sub_stages/sub_conv' + str(layer) + '/kernel')
317 |                 conv_bias = tf.get_variable('sub_stages/sub_conv' + str(layer) + '/bias')
318 | 
319 |                 loaded_kernel = weights['conv1_' + str(layer)]
320 |                 loaded_bias = weights['conv1_' + str(layer) + '_b']
321 | 
322 |                 sess.run(tf.assign(conv_kernel, loaded_kernel))
323 |                 sess.run(tf.assign(conv_bias, loaded_bias))
324 | 
325 |             # conv2
326 |             for layer in range(1, 3):
327 |                 conv_kernel = tf.get_variable('sub_stages/sub_conv' + str(layer + 2) + '/kernel')
328 |                 conv_bias = tf.get_variable('sub_stages/sub_conv' + str(layer + 2) + '/bias')
329 | 
330 |                 loaded_kernel = weights['conv2_' + str(layer)]
331 |                 loaded_bias = weights['conv2_' + str(layer) + '_b']
332 | 
333 |                 sess.run(tf.assign(conv_kernel, loaded_kernel))
334 |                 sess.run(tf.assign(conv_bias, loaded_bias))
335 | 
336 |             # conv3
337 |             for layer in range(1, 5):
338 |                 conv_kernel = tf.get_variable('sub_stages/sub_conv' + str(layer + 4) + '/kernel')
339 |                 conv_bias = tf.get_variable('sub_stages/sub_conv' + str(layer + 4) + '/bias')
340 | 
341 |                 loaded_kernel = weights['conv3_' + str(layer)]
342 |                 loaded_bias = weights['conv3_' + str(layer) + '_b']
343 | 
344 |                 sess.run(tf.assign(conv_kernel, loaded_kernel))
345 |                 sess.run(tf.assign(conv_bias, loaded_bias))
346 | 
347 |             # conv4
348 |             for layer in range(1, 5):
349 |                 conv_kernel = tf.get_variable('sub_stages/sub_conv' + str(layer + 8) + '/kernel')
350 |                 conv_bias = tf.get_variable('sub_stages/sub_conv' + str(layer + 8) + '/bias')
351 | 
352 |                 loaded_kernel = weights['conv4_' + str(layer)]
353 |                 loaded_bias = weights['conv4_' + str(layer) + '_b']
354 | 
355 |                 sess.run(tf.assign(conv_kernel, loaded_kernel))
356 |                 sess.run(tf.assign(conv_bias, loaded_bias))
357 | 
358 |             # conv5
359 |             for layer in range(1, 3):
360 |                 conv_kernel = tf.get_variable('sub_stages/sub_conv' + str(layer + 12) + '/kernel')
361 |                 conv_bias = tf.get_variable('sub_stages/sub_conv' + str(layer + 12) + '/bias')
362 | 
363 |                 loaded_kernel = weights['conv5_' + str(layer)]
364 |                 loaded_bias = weights['conv5_' + str(layer) + '_b']
365 | 
366 |                 sess.run(tf.assign(conv_kernel, loaded_kernel))
367 |                 sess.run(tf.assign(conv_bias, loaded_bias))
368 | 
369 |             # conv5_3_CPM
370 |             conv_kernel = tf.get_variable('sub_stages/sub_stage_img_feature/kernel')
371 |             conv_bias = tf.get_variable('sub_stages/sub_stage_img_feature/bias')
372 | 
373 |             loaded_kernel = weights['conv5_3_CPM']
374 |             loaded_bias = weights['conv5_3_CPM_b']
375 | 
376 |             sess.run(tf.assign(conv_kernel, loaded_kernel))
377 |             sess.run(tf.assign(conv_bias, loaded_bias))
378 | 
379 |             ## stage 1
380 |             conv_kernel = tf.get_variable('stage_1/conv1/kernel')
381 |             conv_bias = tf.get_variable('stage_1/conv1/bias')
382 | 
383 |             loaded_kernel = weights['conv6_1_CPM']
384 |             loaded_bias = weights['conv6_1_CPM_b']
385 | 
386 |             sess.run(tf.assign(conv_kernel, loaded_kernel))
387 |             sess.run(tf.assign(conv_bias, loaded_bias))
388 | 
389 |             if finetune != True:
390 |                 conv_kernel = tf.get_variable('stage_1/stage_heatmap/kernel')
391 |                 conv_bias = tf.get_variable('stage_1/stage_heatmap/bias')
392 | 
393 |                 loaded_kernel = weights['conv6_2_CPM']
394 |                 loaded_bias = weights['conv6_2_CPM_b']
395 | 
396 |                 sess.run(tf.assign(conv_kernel, loaded_kernel))
397 |                 sess.run(tf.assign(conv_bias, loaded_bias))
398 | 
399 |                 ## Stage 2 and behind
400 |                 for stage in range(2, self.stages + 1):
401 |                     for layer in range(1, 8):
402 |                         conv_kernel = tf.get_variable('stage_' + str(stage) + '/mid_conv' + str(layer) + '/kernel')
403 |                         conv_bias = tf.get_variable('stage_' + str(stage) + '/mid_conv' + str(layer) + '/bias')
404 | 
405 |                         loaded_kernel = weights['Mconv' + str(layer) + '_stage' + str(stage)]
406 |                         loaded_bias = weights['Mconv' + str(layer) + '_stage' + str(stage) + '_b']
407 | 
408 |                         sess.run(tf.assign(conv_kernel, loaded_kernel))
409 |                         sess.run(tf.assign(conv_bias, loaded_bias))
410 | 


--------------------------------------------------------------------------------
/models/nets/cpm_hand_slim.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | import tensorflow as tf
  3 | import tensorflow.contrib.slim as slim
  4 | 
  5 | 
  6 | class CPM_Model(object):
  7 |     def __init__(self, stages, joints):
  8 |         self.stages = stages
  9 |         self.stage_heatmap = []
 10 |         self.stage_loss = [0] * stages
 11 |         self.total_loss = 0
 12 |         self.input_image = None
 13 |         self.center_map = None
 14 |         self.gt_heatmap = None
 15 |         self.learning_rate = 0
 16 |         self.merged_summary = None
 17 |         self.joints = joints
 18 |         self.batch_size = 0
 19 | 
 20 |     def build_model(self, input_image, center_map, batch_size):
 21 |         self.batch_size = batch_size
 22 |         self.input_image = input_image
 23 |         self.center_map = center_map
 24 |         with tf.variable_scope('pooled_center_map'):
 25 |             # center map is a gaussion template which gather the respose
 26 |             self.center_map = slim.avg_pool2d(self.center_map,
 27 |                                               [9, 9], stride=8,
 28 |                                               padding='SAME',
 29 |                                               scope='center_map')
 30 | 
 31 |         with slim.arg_scope([slim.conv2d],
 32 |                             padding='SAME',
 33 |                             activation_fn=tf.nn.relu,
 34 |                             weights_initializer=tf.contrib.layers.xavier_initializer()):
 35 |             with tf.variable_scope('sub_stages'):
 36 |                 net = slim.conv2d(input_image, 64, [3, 3], scope='sub_conv1')
 37 |                 net = slim.conv2d(net, 64, [3, 3], scope='sub_conv2')
 38 |                 net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='sub_pool1')
 39 |                 net = slim.conv2d(net, 128, [3, 3], scope='sub_conv3')
 40 |                 net = slim.conv2d(net, 128, [3, 3], scope='sub_conv4')
 41 |                 net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='sub_pool2')
 42 |                 net = slim.conv2d(net, 256, [3, 3], scope='sub_conv5')
 43 |                 net = slim.conv2d(net, 256, [3, 3], scope='sub_conv6')
 44 |                 net = slim.conv2d(net, 256, [3, 3], scope='sub_conv7')
 45 |                 net = slim.conv2d(net, 256, [3, 3], scope='sub_conv8')
 46 |                 net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='sub_pool3')
 47 |                 net = slim.conv2d(net, 512, [3, 3], scope='sub_conv9')
 48 |                 net = slim.conv2d(net, 512, [3, 3], scope='sub_conv10')
 49 |                 net = slim.conv2d(net, 512, [3, 3], scope='sub_conv11')
 50 |                 net = slim.conv2d(net, 512, [3, 3], scope='sub_conv12')
 51 |                 net = slim.conv2d(net, 512, [3, 3], scope='sub_conv13')
 52 |                 net = slim.conv2d(net, 512, [3, 3], scope='sub_conv14')
 53 | 
 54 |                 self.sub_stage_img_feature = slim.conv2d(net, 128, [3, 3],
 55 |                                                          scope='sub_stage_img_feature')
 56 | 
 57 |             with tf.variable_scope('stage_1'):
 58 |                 conv1 = slim.conv2d(self.sub_stage_img_feature, 512, [1, 1],
 59 |                                     scope='conv1')
 60 |                 self.stage_heatmap.append(slim.conv2d(conv1, self.joints, [1, 1],
 61 |                                                       scope='stage_heatmap'))
 62 | 
 63 |             for stage in range(2, self.stages + 1):
 64 |                 self._middle_conv(stage)
 65 | 
 66 |     def _middle_conv(self, stage):
 67 |         with tf.variable_scope('stage_' + str(stage)):
 68 |             self.current_featuremap = tf.concat([self.stage_heatmap[stage-2],
 69 |                                                  self.sub_stage_img_feature,
 70 |                                                  # self.center_map,
 71 |                                                  ],
 72 |                                                 axis=3)
 73 |             with slim.arg_scope([slim.conv2d],
 74 |                                 padding='SAME',
 75 |                                 activation_fn=tf.nn.relu,
 76 |                                 weights_initializer=tf.contrib.layers.xavier_initializer()):
 77 |                 mid_net = slim.conv2d(self.current_featuremap, 128, [7, 7], scope='mid_conv1')
 78 |                 mid_net = slim.conv2d(mid_net, 128, [7, 7], scope='mid_conv2')
 79 |                 mid_net = slim.conv2d(mid_net, 128, [7, 7], scope='mid_conv3')
 80 |                 mid_net = slim.conv2d(mid_net, 128, [7, 7], scope='mid_conv4')
 81 |                 mid_net = slim.conv2d(mid_net, 128, [7, 7], scope='mid_conv5')
 82 |                 mid_net = slim.conv2d(mid_net, 128, [1, 1], scope='mid_conv6')
 83 |                 self.current_heatmap = slim.conv2d(mid_net, self.joints, [1, 1],
 84 |                                                    scope='mid_conv7')
 85 |                 self.stage_heatmap.append(self.current_heatmap)
 86 | 
 87 |     def build_loss(self, gt_heatmap, lr, lr_decay_rate, lr_decay_step):
 88 |         self.gt_heatmap = gt_heatmap
 89 |         self.total_loss = 0
 90 |         self.learning_rate = lr
 91 |         self.lr_decay_rate = lr_decay_rate
 92 |         self.lr_decay_step = lr_decay_step
 93 | 
 94 |         for stage in range(self.stages):
 95 |             with tf.variable_scope('stage' + str(stage + 1) + '_loss'):
 96 |                 self.stage_loss[stage] = tf.nn.l2_loss(self.stage_heatmap[stage] - self.gt_heatmap,
 97 |                                                        name='l2_loss') / self.batch_size
 98 |             tf.summary.scalar('stage' + str(stage + 1) + '_loss', self.stage_loss[stage])
 99 | 
100 |         with tf.variable_scope('total_loss'):
101 |             for stage in range(self.stages):
102 |                 self.total_loss += self.stage_loss[stage]
103 |             tf.summary.scalar('total loss', self.total_loss)
104 | 
105 |         with tf.variable_scope('train'):
106 |             self.global_step = tf.contrib.framework.get_or_create_global_step()
107 | 
108 |             self.lr = tf.train.exponential_decay(self.learning_rate,
109 |                                                  global_step=self.global_step,
110 |                                                  decay_rate=self.lr_decay_rate,
111 |                                                  decay_steps=self.lr_decay_step)
112 |             tf.summary.scalar('learning rate', self.lr)
113 | 
114 |             self.train_op = tf.contrib.layers.optimize_loss(loss=self.total_loss,
115 |                                                             global_step=self.global_step,
116 |                                                             learning_rate=self.lr,
117 |                                                             optimizer='Adam')
118 |         self.merged_summary = tf.summary.merge_all()
119 | 
120 |     def load_weights_from_file(self, weight_file_path, sess, finetune=True):
121 |         # weight_file_object = open(weight_file_path, 'rb')
122 |         weights = pickle.load(open(weight_file_path, 'rb'), encoding='latin1')
123 | 
124 |         with tf.variable_scope('', reuse=True):
125 |             ## Pre stage conv
126 |             # conv1
127 |             for layer in range(1, 3):
128 |                 conv_kernel = tf.get_variable('sub_stages/sub_conv' + str(layer) + '/kernel')
129 |                 conv_bias = tf.get_variable('sub_stages/sub_conv' + str(layer) + '/bias')
130 | 
131 |                 loaded_kernel = weights['conv1_' + str(layer)]
132 |                 loaded_bias = weights['conv1_' + str(layer) + '_b']
133 | 
134 |                 sess.run(tf.assign(conv_kernel, loaded_kernel))
135 |                 sess.run(tf.assign(conv_bias, loaded_bias))
136 | 
137 |             # conv2
138 |             for layer in range(1, 3):
139 |                 conv_kernel = tf.get_variable('sub_stages/sub_conv' + str(layer + 2) + '/kernel')
140 |                 conv_bias = tf.get_variable('sub_stages/sub_conv' + str(layer + 2) + '/bias')
141 | 
142 |                 loaded_kernel = weights['conv2_' + str(layer)]
143 |                 loaded_bias = weights['conv2_' + str(layer) + '_b']
144 | 
145 |                 sess.run(tf.assign(conv_kernel, loaded_kernel))
146 |                 sess.run(tf.assign(conv_bias, loaded_bias))
147 | 
148 |             # conv3
149 |             for layer in range(1, 5):
150 |                 conv_kernel = tf.get_variable('sub_stages/sub_conv' + str(layer + 4) + '/kernel')
151 |                 conv_bias = tf.get_variable('sub_stages/sub_conv' + str(layer + 4) + '/bias')
152 | 
153 |                 loaded_kernel = weights['conv3_' + str(layer)]
154 |                 loaded_bias = weights['conv3_' + str(layer) + '_b']
155 | 
156 |                 sess.run(tf.assign(conv_kernel, loaded_kernel))
157 |                 sess.run(tf.assign(conv_bias, loaded_bias))
158 | 
159 |             # conv4
160 |             for layer in range(1, 5):
161 |                 conv_kernel = tf.get_variable('sub_stages/sub_conv' + str(layer + 8) + '/kernel')
162 |                 conv_bias = tf.get_variable('sub_stages/sub_conv' + str(layer + 8) + '/bias')
163 | 
164 |                 loaded_kernel = weights['conv4_' + str(layer)]
165 |                 loaded_bias = weights['conv4_' + str(layer) + '_b']
166 | 
167 |                 sess.run(tf.assign(conv_kernel, loaded_kernel))
168 |                 sess.run(tf.assign(conv_bias, loaded_bias))
169 | 
170 |             # conv5
171 |             for layer in range(1, 3):
172 |                 conv_kernel = tf.get_variable('sub_stages/sub_conv' + str(layer + 12) + '/kernel')
173 |                 conv_bias = tf.get_variable('sub_stages/sub_conv' + str(layer + 12) + '/bias')
174 | 
175 |                 loaded_kernel = weights['conv5_' + str(layer)]
176 |                 loaded_bias = weights['conv5_' + str(layer) + '_b']
177 | 
178 |                 sess.run(tf.assign(conv_kernel, loaded_kernel))
179 |                 sess.run(tf.assign(conv_bias, loaded_bias))
180 | 
181 |             # conv5_3_CPM
182 |             conv_kernel = tf.get_variable('sub_stages/sub_stage_img_feature/kernel')
183 |             conv_bias = tf.get_variable('sub_stages/sub_stage_img_feature/bias')
184 | 
185 |             loaded_kernel = weights['conv5_3_CPM']
186 |             loaded_bias = weights['conv5_3_CPM_b']
187 | 
188 |             sess.run(tf.assign(conv_kernel, loaded_kernel))
189 |             sess.run(tf.assign(conv_bias, loaded_bias))
190 | 
191 |             ## stage 1
192 |             conv_kernel = tf.get_variable('stage_1/conv1/kernel')
193 |             conv_bias = tf.get_variable('stage_1/conv1/bias')
194 | 
195 |             loaded_kernel = weights['conv6_1_CPM']
196 |             loaded_bias = weights['conv6_1_CPM_b']
197 | 
198 |             sess.run(tf.assign(conv_kernel, loaded_kernel))
199 |             sess.run(tf.assign(conv_bias, loaded_bias))
200 | 
201 |             if finetune != True:
202 |                 conv_kernel = tf.get_variable('stage_1/stage_heatmap/kernel')
203 |                 conv_bias = tf.get_variable('stage_1/stage_heatmap/bias')
204 | 
205 |                 loaded_kernel = weights['conv6_2_CPM']
206 |                 loaded_bias = weights['conv6_2_CPM_b']
207 | 
208 |                 sess.run(tf.assign(conv_kernel, loaded_kernel))
209 |                 sess.run(tf.assign(conv_bias, loaded_bias))
210 | 
211 |                 ## stage 2 and behind
212 |                 for stage in range(2, self.stages + 1):
213 |                     for layer in range(1, 8):
214 |                         conv_kernel = tf.get_variable('stage_' + str(stage) + '/mid_conv' + str(layer) + '/kernel')
215 |                         conv_bias = tf.get_variable('stage_' + str(stage) + '/mid_conv' + str(layer) + '/bias')
216 | 
217 |                         loaded_kernel = weights['Mconv' + str(layer) + '_stage' + str(stage)]
218 |                         loaded_bias = weights['Mconv' + str(layer) + '_stage' + str(stage) + '_b']
219 | 
220 |                         sess.run(tf.assign(conv_kernel, loaded_kernel))
221 |                         sess.run(tf.assign(conv_bias, loaded_bias))
222 | 


--------------------------------------------------------------------------------
/models/nets/cpm_hand_v2.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from models.nets.CPM import CPM
  3 | 
  4 | 
  5 | 
  6 | class CPM_Model(CPM):
  7 |     def __init__(self, input_size, heatmap_size, stages, joints, img_type='RGB', is_training=True):
  8 |         self.stages = stages
  9 |         self.stage_heatmap = []
 10 |         self.stage_loss = [0 for _ in range(stages)]
 11 |         self.total_loss = 0
 12 |         self.input_image = None
 13 |         self.center_map = None
 14 |         self.gt_heatmap = None
 15 |         self.init_lr = 0
 16 |         self.merged_summary = None
 17 |         self.joints = joints
 18 |         self.batch_size = 0
 19 |         self.inference_type = 'Train'
 20 | 
 21 |         if img_type == 'RGB':
 22 |             self.input_images = tf.placeholder(dtype=tf.float32,
 23 |                                                shape=(None, input_size, input_size, 3),
 24 |                                                name='input_placeholder')
 25 |         elif img_type == 'GRAY':
 26 |             self.input_images = tf.placeholder(dtype=tf.float32,
 27 |                                                shape=(None, input_size, input_size, 1),
 28 |                                                name='input_placeholder')
 29 |         # self.cmap_placeholder = tf.placeholder(dtype=tf.float32,
 30 |         #                                        shape=(None, input_size, input_size, 1),
 31 |         #                                        name='cmap_placeholder')
 32 |         self.gt_hmap_placeholder = tf.placeholder(dtype=tf.float32,
 33 |                                                   shape=(None, heatmap_size, heatmap_size, joints + 1),
 34 |                                                   name='gt_hmap_placeholder')
 35 |         self._build_model()
 36 | 
 37 |     def _build_model(self):
 38 |         # with tf.variable_scope('pooled_center_map'):
 39 |         #     self.center_map = tf.layers.average_pooling2d(inputs=self.cmap_placeholder,
 40 |         #                                                   pool_size=[9, 9],
 41 |         #                                                   strides=[8, 8],
 42 |         #                                                   padding='same',
 43 |         #                                                   name='center_map')
 44 |         with tf.variable_scope('sub_stages'):
 45 |             sub_conv1 = tf.layers.conv2d(inputs=self.input_images,
 46 |                                          filters=64,
 47 |                                          kernel_size=[3, 3],
 48 |                                          strides=[1, 1],
 49 |                                          padding='same',
 50 |                                          activation=tf.nn.relu,
 51 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
 52 |                                          name='sub_conv1')
 53 |             sub_conv2 = tf.layers.conv2d(inputs=sub_conv1,
 54 |                                          filters=64,
 55 |                                          kernel_size=[3, 3],
 56 |                                          strides=[1, 1],
 57 |                                          padding='same',
 58 |                                          activation=tf.nn.relu,
 59 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
 60 |                                          name='sub_conv2')
 61 |             sub_pool1 = tf.layers.max_pooling2d(inputs=sub_conv2,
 62 |                                                 pool_size=[2, 2],
 63 |                                                 strides=2,
 64 |                                                 padding='valid',
 65 |                                                 name='sub_pool1')
 66 |             sub_conv3 = tf.layers.conv2d(inputs=sub_pool1,
 67 |                                          filters=128,
 68 |                                          kernel_size=[3, 3],
 69 |                                          strides=[1, 1],
 70 |                                          padding='same',
 71 |                                          activation=tf.nn.relu,
 72 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
 73 |                                          name='sub_conv3')
 74 |             sub_conv4 = tf.layers.conv2d(inputs=sub_conv3,
 75 |                                          filters=128,
 76 |                                          kernel_size=[3, 3],
 77 |                                          strides=[1, 1],
 78 |                                          padding='same',
 79 |                                          activation=tf.nn.relu,
 80 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
 81 |                                          name='sub_conv4')
 82 |             # sub_pool2 = tf.layers.max_pooling2d(inputs=sub_conv4,
 83 |             #                                     pool_size=[2, 2],
 84 |             #                                     strides=2,
 85 |             #                                     padding='valid',
 86 |             #                                     name='sub_pool2')
 87 |             sub_conv5 = tf.layers.conv2d(inputs=sub_conv4,
 88 |                                          filters=256,
 89 |                                          kernel_size=[3, 3],
 90 |                                          strides=[1, 1],
 91 |                                          padding='same',
 92 |                                          activation=tf.nn.relu,
 93 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
 94 |                                          name='sub_conv5')
 95 |             sub_conv6 = tf.layers.conv2d(inputs=sub_conv5,
 96 |                                          filters=256,
 97 |                                          kernel_size=[3, 3],
 98 |                                          strides=[1, 1],
 99 |                                          padding='same',
100 |                                          activation=tf.nn.relu,
101 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
102 |                                          name='sub_conv6')
103 |             sub_conv7 = tf.layers.conv2d(inputs=sub_conv6,
104 |                                          filters=256,
105 |                                          kernel_size=[3, 3],
106 |                                          strides=[1, 1],
107 |                                          padding='same',
108 |                                          activation=tf.nn.relu,
109 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
110 |                                          name='sub_conv7')
111 |             sub_conv8 = tf.layers.conv2d(inputs=sub_conv7,
112 |                                          filters=256,
113 |                                          kernel_size=[3, 3],
114 |                                          strides=[1, 1],
115 |                                          padding='same',
116 |                                          activation=tf.nn.relu,
117 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
118 |                                          name='sub_conv8')
119 |             sub_pool3 = tf.layers.max_pooling2d(inputs=sub_conv8,
120 |                                                 pool_size=[2, 2],
121 |                                                 strides=2,
122 |                                                 padding='valid',
123 |                                                 name='sub_pool3')
124 |             sub_conv9 = tf.layers.conv2d(inputs=sub_pool3,
125 |                                          filters=512,
126 |                                          kernel_size=[3, 3],
127 |                                          strides=[1, 1],
128 |                                          padding='same',
129 |                                          activation=tf.nn.relu,
130 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
131 |                                          name='sub_conv9')
132 |             sub_conv10 = tf.layers.conv2d(inputs=sub_conv9,
133 |                                           filters=512,
134 |                                           kernel_size=[3, 3],
135 |                                           strides=[1, 1],
136 |                                           padding='same',
137 |                                           activation=tf.nn.relu,
138 |                                           kernel_initializer=tf.contrib.layers.xavier_initializer(),
139 |                                           name='sub_conv10')
140 |             sub_conv11 = tf.layers.conv2d(inputs=sub_conv10,
141 |                                           filters=512,
142 |                                           kernel_size=[3, 3],
143 |                                           strides=[1, 1],
144 |                                           padding='same',
145 |                                           activation=tf.nn.relu,
146 |                                           kernel_initializer=tf.contrib.layers.xavier_initializer(),
147 |                                           name='sub_conv11')
148 |             sub_conv12 = tf.layers.conv2d(inputs=sub_conv11,
149 |                                           filters=512,
150 |                                           kernel_size=[3, 3],
151 |                                           strides=[1, 1],
152 |                                           padding='same',
153 |                                           activation=tf.nn.relu,
154 |                                           kernel_initializer=tf.contrib.layers.xavier_initializer(),
155 |                                           name='sub_conv12')
156 |             sub_conv13 = tf.layers.conv2d(inputs=sub_conv12,
157 |                                           filters=512,
158 |                                           kernel_size=[3, 3],
159 |                                           strides=[1, 1],
160 |                                           padding='same',
161 |                                           activation=tf.nn.relu,
162 |                                           kernel_initializer=tf.contrib.layers.xavier_initializer(),
163 |                                           name='sub_conv13')
164 |             sub_conv14 = tf.layers.conv2d(inputs=sub_conv13,
165 |                                           filters=512,
166 |                                           kernel_size=[3, 3],
167 |                                           strides=[1, 1],
168 |                                           padding='same',
169 |                                           activation=tf.nn.relu,
170 |                                           kernel_initializer=tf.contrib.layers.xavier_initializer(),
171 |                                           name='sub_conv14')
172 |             self.sub_stage_img_feature = tf.layers.conv2d(inputs=sub_conv14,
173 |                                                           filters=128,
174 |                                                           kernel_size=[3, 3],
175 |                                                           strides=[1, 1],
176 |                                                           padding='same',
177 |                                                           activation=tf.nn.relu,
178 |                                                           kernel_initializer=tf.contrib.layers.xavier_initializer(),
179 |                                                           name='sub_stage_img_feature')
180 | 
181 |         with tf.variable_scope('stage_1'):
182 |             conv1 = tf.layers.conv2d(inputs=self.sub_stage_img_feature,
183 |                                      filters=512,
184 |                                      kernel_size=[1, 1],
185 |                                      strides=[1, 1],
186 |                                      padding='valid',
187 |                                      activation=tf.nn.relu,
188 |                                      kernel_initializer=tf.contrib.layers.xavier_initializer(),
189 |                                      name='conv1')
190 |             self.stage_heatmap.append(tf.layers.conv2d(inputs=conv1,
191 |                                                        filters=self.joints+1,
192 |                                                        kernel_size=[1, 1],
193 |                                                        strides=[1, 1],
194 |                                                        padding='valid',
195 |                                                        activation=None,
196 |                                                        kernel_initializer=tf.contrib.layers.xavier_initializer(),
197 |                                                        name='stage_heatmap'))
198 |         for stage in range(2, self.stages + 1):
199 |             self._middle_conv(stage)
200 | 
201 |     def _middle_conv(self, stage):
202 |         with tf.variable_scope('stage_' + str(stage)):
203 |             self.current_featuremap = tf.concat([self.stage_heatmap[stage - 2],
204 |                                                  self.sub_stage_img_feature,
205 |                                                  # self.center_map],
206 |                                                  ],
207 |                                                 axis=3)
208 |             mid_conv1 = tf.layers.conv2d(inputs=self.current_featuremap,
209 |                                          filters=128,
210 |                                          kernel_size=[7, 7],
211 |                                          strides=[1, 1],
212 |                                          padding='same',
213 |                                          activation=tf.nn.relu,
214 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
215 |                                          name='mid_conv1')
216 |             mid_conv2 = tf.layers.conv2d(inputs=mid_conv1,
217 |                                          filters=128,
218 |                                          kernel_size=[7, 7],
219 |                                          strides=[1, 1],
220 |                                          padding='same',
221 |                                          activation=tf.nn.relu,
222 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
223 |                                          name='mid_conv2')
224 |             mid_conv3 = tf.layers.conv2d(inputs=mid_conv2,
225 |                                          filters=128,
226 |                                          kernel_size=[7, 7],
227 |                                          strides=[1, 1],
228 |                                          padding='same',
229 |                                          activation=tf.nn.relu,
230 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
231 |                                          name='mid_conv3')
232 |             mid_conv4 = tf.layers.conv2d(inputs=mid_conv3,
233 |                                          filters=128,
234 |                                          kernel_size=[7, 7],
235 |                                          strides=[1, 1],
236 |                                          padding='same',
237 |                                          activation=tf.nn.relu,
238 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
239 |                                          name='mid_conv4')
240 |             mid_conv5 = tf.layers.conv2d(inputs=mid_conv4,
241 |                                          filters=128,
242 |                                          kernel_size=[7, 7],
243 |                                          strides=[1, 1],
244 |                                          padding='same',
245 |                                          activation=tf.nn.relu,
246 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
247 |                                          name='mid_conv5')
248 |             mid_conv6 = tf.layers.conv2d(inputs=mid_conv5,
249 |                                          filters=128,
250 |                                          kernel_size=[1, 1],
251 |                                          strides=[1, 1],
252 |                                          padding='valid',
253 |                                          activation=tf.nn.relu,
254 |                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
255 |                                          name='mid_conv6')
256 |             self.current_heatmap = tf.layers.conv2d(inputs=mid_conv6,
257 |                                                     filters=self.joints+1,
258 |                                                     kernel_size=[1, 1],
259 |                                                     strides=[1, 1],
260 |                                                     padding='valid',
261 |                                                     activation=None,
262 |                                                     kernel_initializer=tf.contrib.layers.xavier_initializer(),
263 |                                                     name='mid_conv7')
264 |             self.stage_heatmap.append(self.current_heatmap)
265 | 
266 |     def build_loss(self, lr, lr_decay_rate, lr_decay_step, optimizer='Adam'):
267 |         self.total_loss = 0
268 |         self.init_lr = lr
269 |         self.lr_decay_rate = lr_decay_rate
270 |         self.lr_decay_step = lr_decay_step
271 |         self.optimizer = optimizer
272 |         self.batch_size = tf.cast(tf.shape(self.input_images)[0], dtype=tf.float32)
273 | 
274 | 
275 |         for stage in range(self.stages):
276 |             with tf.variable_scope('stage' + str(stage + 1) + '_loss'):
277 |                 self.stage_loss[stage] = tf.nn.l2_loss(self.stage_heatmap[stage] - self.gt_hmap_placeholder,
278 |                                                        name='l2_loss') / self.batch_size
279 |             tf.summary.scalar('stage' + str(stage + 1) + '_loss', self.stage_loss[stage])
280 | 
281 |         with tf.variable_scope('total_loss'):
282 |             for stage in range(self.stages):
283 |                 self.total_loss += self.stage_loss[stage]
284 |             tf.summary.scalar('total loss'.format(self.inference_type), self.total_loss)
285 | 
286 |         with tf.variable_scope('train'):
287 |             self.global_step = tf.contrib.framework.get_or_create_global_step()
288 | 
289 |             self.cur_lr = tf.train.exponential_decay(self.init_lr,
290 |                                                  global_step=self.global_step,
291 |                                                  decay_rate=self.lr_decay_rate,
292 |                                                  decay_steps=self.lr_decay_step)
293 |             tf.summary.scalar('global learning rate', self.cur_lr)
294 | 
295 |             self.train_op = tf.contrib.layers.optimize_loss(loss=self.total_loss,
296 |                                                             global_step=self.global_step,
297 |                                                             learning_rate=self.cur_lr,
298 |                                                             optimizer=self.optimizer)
299 | 


--------------------------------------------------------------------------------
/run_demo_hand_with_tracker.py:
--------------------------------------------------------------------------------
  1 | import importlib
  2 | import math
  3 | import os
  4 | import time
  5 | 
  6 | import cv2
  7 | import numpy as np
  8 | import tensorflow as tf
  9 | from config import FLAGS
 10 | from utils import cpm_utils, tracking_module, utils
 11 | 
 12 | cpm_model = importlib.import_module('models.nets.' + FLAGS.network_def)
 13 | 
 14 | joint_detections = np.zeros(shape=(21, 2))
 15 | 
 16 | 
 17 | def main(argv):
 18 |     global joint_detections
 19 |     os.environ['CUDA_VISIBLE_DEVICES'] = str(FLAGS.gpu_id)
 20 | 
 21 |     """ Initial tracker
 22 |     """
 23 |     tracker = tracking_module.SelfTracker([FLAGS.webcam_height, FLAGS.webcam_width], FLAGS.input_size)
 24 | 
 25 |     """ Build network graph
 26 |     """
 27 |     model = cpm_model.CPM_Model(input_size=FLAGS.input_size,
 28 |                                 heatmap_size=FLAGS.heatmap_size,
 29 |                                 stages=FLAGS.cpm_stages,
 30 |                                 joints=FLAGS.num_of_joints,
 31 |                                 img_type=FLAGS.color_channel,
 32 |                                 is_training=False)
 33 |     saver = tf.train.Saver()
 34 | 
 35 |     """ Get output node
 36 |     """
 37 |     output_node = tf.get_default_graph().get_tensor_by_name(name=FLAGS.output_node_names)
 38 | 
 39 |     device_count = {'GPU': 1} if FLAGS.use_gpu else {'GPU': 0}
 40 |     sess_config = tf.ConfigProto(device_count=device_count)
 41 |     sess_config.gpu_options.per_process_gpu_memory_fraction = 0.2
 42 |     sess_config.gpu_options.allow_growth = True
 43 |     sess_config.allow_soft_placement = True
 44 |     with tf.Session(config=sess_config) as sess:
 45 | 
 46 |         model_path_suffix = os.path.join(FLAGS.network_def,
 47 |                                          'input_{}_output_{}'.format(FLAGS.input_size, FLAGS.heatmap_size),
 48 |                                          'joints_{}'.format(FLAGS.num_of_joints),
 49 |                                          'stages_{}'.format(FLAGS.cpm_stages),
 50 |                                          'init_{}_rate_{}_step_{}'.format(FLAGS.init_lr, FLAGS.lr_decay_rate,
 51 |                                                                           FLAGS.lr_decay_step)
 52 |                                          )
 53 |         model_save_dir = os.path.join('models',
 54 |                                       'weights',
 55 |                                       model_path_suffix)
 56 |         print('Load model from [{}]'.format(os.path.join(model_save_dir, FLAGS.model_path)))
 57 |         if FLAGS.model_path.endswith('pkl'):
 58 |             model.load_weights_from_file(FLAGS.model_path, sess, False)
 59 |         else:
 60 |             saver.restore(sess, 'models/weights/cpm_hand')
 61 | 
 62 |         # Check weights
 63 |         for variable in tf.global_variables():
 64 |             with tf.variable_scope('', reuse=True):
 65 |                 var = tf.get_variable(variable.name.split(':0')[0])
 66 |                 print(variable.name, np.mean(sess.run(var)))
 67 | 
 68 |         # Create webcam instance
 69 |         if FLAGS.DEMO_TYPE in ['MULTI', 'SINGLE', 'Joint_HM']:
 70 |             cam = cv2.VideoCapture(FLAGS.cam_id)
 71 | 
 72 |         # Create kalman filters
 73 |         if FLAGS.use_kalman:
 74 |             kalman_filter_array = [cv2.KalmanFilter(4, 2) for _ in range(FLAGS.num_of_joints)]
 75 |             for _, joint_kalman_filter in enumerate(kalman_filter_array):
 76 |                 joint_kalman_filter.transitionMatrix = np.array(
 77 |                     [[1, 0, 1, 0], [0, 1, 0, 1], [0, 0, 1, 0], [0, 0, 0, 1]],
 78 |                     np.float32)
 79 |                 joint_kalman_filter.measurementMatrix = np.array([[1, 0, 0, 0], [0, 1, 0, 0]], np.float32)
 80 |                 joint_kalman_filter.processNoiseCov = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]],
 81 |                                                                np.float32) * FLAGS.kalman_noise
 82 |         else:
 83 |             kalman_filter_array = None
 84 | 
 85 |         if FLAGS.DEMO_TYPE.endswith(('png', 'jpg')):
 86 |             test_img = cpm_utils.read_image(FLAGS.DEMO_TYPE, [], FLAGS.input_size, 'IMAGE')
 87 |             test_img_resize = cv2.resize(test_img, (FLAGS.input_size, FLAGS.input_size))
 88 | 
 89 |             test_img_input = normalize_and_centralize_img(test_img_resize)
 90 | 
 91 |             t1 = time.time()
 92 |             predict_heatmap, stage_heatmap_np = sess.run([model.current_heatmap,
 93 |                                                           output_node,
 94 |                                                           ],
 95 |                                                          feed_dict={model.input_images: test_img_input}
 96 |                                                          )
 97 |             print('fps: %.2f' % (1 / (time.time() - t1)))
 98 | 
 99 |             correct_and_draw_hand(test_img,
100 |                                   cv2.resize(stage_heatmap_np[0], (FLAGS.input_size, FLAGS.input_size)),
101 |                                   kalman_filter_array, tracker, tracker.input_crop_ratio, test_img)
102 | 
103 |             # Show visualized image
104 |             # demo_img = visualize_result(test_img, stage_heatmap_np, kalman_filter_array)
105 |             cv2.imshow('demo_img', test_img.astype(np.uint8))
106 |             cv2.waitKey(0)
107 | 
108 |         elif FLAGS.DEMO_TYPE in ['SINGLE', 'MULTI']:
109 |             i = 0
110 |             while True:
111 |                 # Prepare input image
112 |                 _, full_img = cam.read()
113 | 
114 |                 test_img = tracker.tracking_by_joints(full_img, joint_detections=joint_detections)
115 |                 crop_full_scale = tracker.input_crop_ratio
116 |                 test_img_copy = test_img.copy()
117 | 
118 |                 # White balance
119 |                 test_img_wb = utils.img_white_balance(test_img, 5)
120 |                 test_img_input = normalize_and_centralize_img(test_img_wb)
121 | 
122 |                 # Inference
123 |                 t1 = time.time()
124 |                 stage_heatmap_np = sess.run([output_node],
125 |                                             feed_dict={model.input_images: test_img_input})
126 |                 print('FPS: %.2f' % (1 / (time.time() - t1)))
127 | 
128 |                 local_img = visualize_result(full_img, stage_heatmap_np, kalman_filter_array, tracker, crop_full_scale,
129 |                                              test_img_copy)
130 | 
131 |                 cv2.imshow('local_img', local_img.astype(np.uint8))  # 训练用图
132 |                 # cv2.imwrite('./storePic/11'+str(i)+'.jpg', local_img.astype(np.uint8), [int(cv2.IMWRITE_JPEG_QUALITY), 90])
133 |                 i += 1
134 |                 cv2.imshow('globalq_img', full_img.astype(np.uint8))  # 单人大框
135 | 
136 |                 if cv2.waitKey(1) == ord('q'): break
137 | 
138 |         elif FLAGS.DEMO_TYPE == 'Joint_HM':
139 |             while True:
140 |                 # Prepare input image
141 |                 test_img = cpm_utils.read_image([], cam, FLAGS.input_size, 'WEBCAM')
142 |                 test_img_resize = cv2.resize(test_img, (FLAGS.input_size, FLAGS.input_size))
143 | 
144 |                 test_img_input = normalize_and_centralize_img(test_img_resize)
145 | 
146 |                 # Inference
147 |                 t1 = time.time()
148 |                 stage_heatmap_np = sess.run([output_node],
149 |                                             feed_dict={model.input_images: test_img_input})
150 |                 print('FPS: %.2f' % (1 / (time.time() - t1)))
151 | 
152 |                 demo_stage_heatmap = stage_heatmap_np[len(stage_heatmap_np) - 1][0, :, :,
153 |                                      0:FLAGS.num_of_joints].reshape(
154 |                     (FLAGS.heatmap_size, FLAGS.heatmap_size, FLAGS.num_of_joints))
155 |                 demo_stage_heatmap = cv2.resize(demo_stage_heatmap, (FLAGS.input_size, FLAGS.input_size))
156 | 
157 |                 vertical_imgs = []
158 |                 tmp_img = None
159 |                 joint_coord_set = np.zeros((FLAGS.num_of_joints, 2))
160 | 
161 |                 for joint_num in range(FLAGS.num_of_joints):
162 |                     # Concat until 4 img
163 |                     if (joint_num % 4) == 0 and joint_num != 0:
164 |                         vertical_imgs.append(tmp_img)
165 |                         tmp_img = None
166 | 
167 |                     demo_stage_heatmap[:, :, joint_num] *= (255 / np.max(demo_stage_heatmap[:, :, joint_num]))
168 | 
169 |                     # Plot color joints
170 |                     if np.min(demo_stage_heatmap[:, :, joint_num]) > -50:
171 |                         joint_coord = np.unravel_index(np.argmax(demo_stage_heatmap[:, :, joint_num]),
172 |                                                        (FLAGS.input_size, FLAGS.input_size))
173 |                         joint_coord_set[joint_num, :] = joint_coord
174 |                         color_code_num = (joint_num // 4)
175 | 
176 |                         if joint_num in [0, 4, 8, 12, 16]:
177 |                             joint_color = list(
178 |                                 map(lambda x: x + 35 * (joint_num % 4), FLAGS.joint_color_code[color_code_num]))
179 |                             cv2.circle(test_img, center=(joint_coord[1], joint_coord[0]), radius=3, color=joint_color,
180 |                                        thickness=-1)
181 |                         else:
182 |                             joint_color = list(
183 |                                 map(lambda x: x + 35 * (joint_num % 4), FLAGS.joint_color_code[color_code_num]))
184 |                             cv2.circle(test_img, center=(joint_coord[1], joint_coord[0]), radius=3, color=joint_color,
185 |                                        thickness=-1)
186 | 
187 |                     # Put text
188 |                     tmp = demo_stage_heatmap[:, :, joint_num].astype(np.uint8)
189 |                     tmp = cv2.putText(tmp, 'Min:' + str(np.min(demo_stage_heatmap[:, :, joint_num])),
190 |                                       org=(5, 20), fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=0.3, color=150)
191 |                     tmp = cv2.putText(tmp, 'Mean:' + str(np.mean(demo_stage_heatmap[:, :, joint_num])),
192 |                                       org=(5, 30), fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=0.3, color=150)
193 |                     tmp_img = np.concatenate((tmp_img, tmp), axis=0) \
194 |                         if tmp_img is not None else tmp
195 | 
196 |                 # Plot FLAGS.limbs
197 |                 for limb_num in range(len(FLAGS.limbs)):
198 |                     if np.min(demo_stage_heatmap[:, :, FLAGS.limbs[limb_num][0]]) > -2000 and np.min(
199 |                             demo_stage_heatmap[:, :, FLAGS.limbs[limb_num][1]]) > -2000:
200 |                         x1 = joint_coord_set[FLAGS.limbs[limb_num][0], 0]
201 |                         y1 = joint_coord_set[FLAGS.limbs[limb_num][0], 1]
202 |                         x2 = joint_coord_set[FLAGS.limbs[limb_num][1], 0]
203 |                         y2 = joint_coord_set[FLAGS.limbs[limb_num][1], 1]
204 |                         length = ((x1 - x2) ** 2 + (y1 - y2) ** 2) ** 0.5
205 |                         if length < 10000 and length > 5:
206 |                             deg = math.degrees(math.atan2(x1 - x2, y1 - y2))
207 |                             polygon = cv2.ellipse2Poly((int((y1 + y2) / 2), int((x1 + x2) / 2)),
208 |                                                        (int(length / 2), 3),
209 |                                                        int(deg),
210 |                                                        0, 360, 1)
211 |                             color_code_num = limb_num // 4
212 |                             limb_color = list(
213 |                                 map(lambda x: x + 35 * (limb_num % 4), FLAGS.joint_color_code[color_code_num]))
214 | 
215 |                             cv2.fillConvexPoly(test_img, polygon, color=limb_color)
216 | 
217 |                 if tmp_img is not None:
218 |                     tmp_img = np.lib.pad(tmp_img, ((0, vertical_imgs[0].shape[0] - tmp_img.shape[0]), (0, 0)),
219 |                                          'constant', constant_values=(0, 0))
220 |                     vertical_imgs.append(tmp_img)
221 | 
222 |                 # Concat horizontally
223 |                 output_img = None
224 |                 for col in range(len(vertical_imgs)):
225 |                     output_img = np.concatenate((output_img, vertical_imgs[col]), axis=1) if output_img is not None else \
226 |                         vertical_imgs[col]
227 | 
228 |                 output_img = output_img.astype(np.uint8)
229 |                 output_img = cv2.applyColorMap(output_img, cv2.COLORMAP_JET)
230 |                 test_img = cv2.resize(test_img, (300, 300), cv2.INTER_LANCZOS4)
231 |                 cv2.imshow('hm', output_img)
232 |                 cv2.moveWindow('hm', 2000, 200)
233 |                 cv2.imshow('rgb', test_img)
234 |                 cv2.moveWindow('rgb', 2000, 750)
235 |                 if cv2.waitKey(1) == ord('q'): break
236 | 
237 | 
238 | def normalize_and_centralize_img(img):
239 |     if FLAGS.color_channel == 'GRAY':
240 |         img = np.dot(img[..., :3], [0.299, 0.587, 0.114]).reshape((FLAGS.input_size, FLAGS.input_size, 1))
241 | 
242 |     if FLAGS.normalize_img:
243 |         test_img_input = img / 256.0 - 0.5
244 |         test_img_input = np.expand_dims(test_img_input, axis=0)
245 |     else:
246 |         test_img_input = img - 128.0
247 |         test_img_input = np.expand_dims(test_img_input, axis=0)
248 |     return test_img_input
249 | 
250 | 
251 | def visualize_result(test_img, stage_heatmap_np, kalman_filter_array, tracker, crop_full_scale, crop_img):
252 |     demo_stage_heatmaps = []
253 |     if FLAGS.DEMO_TYPE == 'MULTI':
254 |         for stage in range(len(stage_heatmap_np)):
255 |             demo_stage_heatmap = stage_heatmap_np[stage][0, :, :, 0:FLAGS.num_of_joints].reshape(
256 |                 (FLAGS.heatmap_size, FLAGS.heatmap_size, FLAGS.num_of_joints))
257 |             demo_stage_heatmap = cv2.resize(demo_stage_heatmap, (FLAGS.input_size, FLAGS.input_size))
258 |             demo_stage_heatmap = np.amax(demo_stage_heatmap, axis=2)
259 |             demo_stage_heatmap = np.reshape(demo_stage_heatmap, (FLAGS.input_size, FLAGS.input_size, 1))
260 |             demo_stage_heatmap = np.repeat(demo_stage_heatmap, 3, axis=2)
261 |             demo_stage_heatmap *= 255
262 |             demo_stage_heatmaps.append(demo_stage_heatmap)
263 | 
264 |         last_heatmap = stage_heatmap_np[len(stage_heatmap_np) - 1][0, :, :, 0:FLAGS.num_of_joints].reshape(
265 |             (FLAGS.heatmap_size, FLAGS.heatmap_size, FLAGS.num_of_joints))
266 |         last_heatmap = cv2.resize(last_heatmap, (FLAGS.input_size, FLAGS.input_size))
267 |     else:
268 |         last_heatmap = stage_heatmap_np[len(stage_heatmap_np) - 1][0, :, :, 0:FLAGS.num_of_joints].reshape(
269 |             (FLAGS.heatmap_size, FLAGS.heatmap_size, FLAGS.num_of_joints))
270 |         last_heatmap = cv2.resize(last_heatmap, (FLAGS.input_size, FLAGS.input_size))
271 | 
272 |     correct_and_draw_hand(test_img, last_heatmap, kalman_filter_array, tracker, crop_full_scale, crop_img)
273 | 
274 |     if FLAGS.DEMO_TYPE == 'MULTI':
275 |         if len(demo_stage_heatmaps) > 3:
276 |             upper_img = np.concatenate((demo_stage_heatmaps[0], demo_stage_heatmaps[1], demo_stage_heatmaps[2]), axis=1)
277 |             lower_img = np.concatenate(
278 |                 (demo_stage_heatmaps[3], demo_stage_heatmaps[len(stage_heatmap_np) - 1], crop_img),
279 |                 axis=1)
280 |             demo_img = np.concatenate((upper_img, lower_img), axis=0)
281 |             return demo_img
282 |         else:
283 |             # return np.concatenate((demo_stage_heatmaps[0], demo_stage_heatmaps[len(stage_heatmap_np) - 1], crop_img),
284 |             #                       axis=1)
285 | 
286 |             return demo_stage_heatmaps[0]
287 |             # np.concatenate 合并array
288 | 
289 |     else:
290 |         return crop_img
291 | 
292 | 
293 | def correct_and_draw_hand(full_img, stage_heatmap_np, kalman_filter_array, tracker, crop_full_scale, crop_img):
294 |     global joint_detections
295 |     joint_coord_set = np.zeros((FLAGS.num_of_joints, 2))
296 |     local_joint_coord_set = np.zeros((FLAGS.num_of_joints, 2))
297 | 
298 |     mean_response_val = 0.0
299 | 
300 |     # Plot joint colors
301 |     if kalman_filter_array is not None:
302 |         for joint_num in range(FLAGS.num_of_joints):
303 |             tmp_heatmap = stage_heatmap_np[:, :, joint_num]
304 |             joint_coord = np.unravel_index(np.argmax(tmp_heatmap),
305 |                                            (FLAGS.input_size, FLAGS.input_size))
306 |             mean_response_val += tmp_heatmap[joint_coord[0], joint_coord[1]]
307 |             joint_coord = np.array(joint_coord).reshape((2, 1)).astype(np.float32)
308 |             kalman_filter_array[joint_num].correct(joint_coord)
309 |             kalman_pred = kalman_filter_array[joint_num].predict()
310 |             correct_coord = np.array([kalman_pred[0], kalman_pred[1]]).reshape((2))
311 |             local_joint_coord_set[joint_num, :] = correct_coord
312 | 
313 |             # Resize back
314 |             correct_coord /= crop_full_scale
315 | 
316 |             # Substract padding border
317 |             correct_coord[0] -= (tracker.pad_boundary[0] / crop_full_scale)
318 |             correct_coord[1] -= (tracker.pad_boundary[2] / crop_full_scale)
319 |             correct_coord[0] += tracker.bbox[0]
320 |             correct_coord[1] += tracker.bbox[2]
321 |             joint_coord_set[joint_num, :] = correct_coord
322 | 
323 |     else:
324 |         for joint_num in range(FLAGS.num_of_joints):
325 |             tmp_heatmap = stage_heatmap_np[:, :, joint_num]
326 |             joint_coord = np.unravel_index(np.argmax(tmp_heatmap),
327 |                                            (FLAGS.input_size, FLAGS.input_size))
328 |             mean_response_val += tmp_heatmap[joint_coord[0], joint_coord[1]]
329 |             joint_coord = np.array(joint_coord).astype(np.float32)
330 | 
331 |             local_joint_coord_set[joint_num, :] = joint_coord
332 | 
333 |             # Resize back
334 |             joint_coord /= crop_full_scale
335 | 
336 |             # Substract padding border
337 |             joint_coord[0] -= (tracker.pad_boundary[2] / crop_full_scale)
338 |             joint_coord[1] -= (tracker.pad_boundary[0] / crop_full_scale)
339 |             joint_coord[0] += tracker.bbox[0]
340 |             joint_coord[1] += tracker.bbox[2]
341 |             joint_coord_set[joint_num, :] = joint_coord
342 | 
343 |     draw_hand(full_img, joint_coord_set, tracker.loss_track)
344 |     draw_hand(crop_img, local_joint_coord_set, tracker.loss_track)
345 |     joint_detections = joint_coord_set
346 | 
347 |     if mean_response_val >= 1:
348 |         tracker.loss_track = False
349 |     else:
350 |         tracker.loss_track = True
351 | 
352 |     cv2.putText(full_img, 'Response: {:<.3f}'.format(mean_response_val),
353 |                 org=(20, 20), fontFace=cv2.FONT_HERSHEY_PLAIN, fontScale=1, color=(255, 0, 0))
354 | 
355 | 
356 | def draw_hand(full_img, joint_coords, is_loss_track):
357 |     if is_loss_track:
358 |         joint_coords = FLAGS.default_hand
359 | 
360 |     # Plot joints
361 |     for joint_num in range(FLAGS.num_of_joints):
362 |         color_code_num = (joint_num // 4)
363 |         if joint_num in [0, 4, 8, 12, 16]:
364 |             joint_color = list(map(lambda x: x + 35 * (joint_num % 4), FLAGS.joint_color_code[color_code_num]))
365 |             cv2.circle(full_img, center=(int(joint_coords[joint_num][1]), int(joint_coords[joint_num][0])), radius=3,
366 |                        color=joint_color, thickness=-1)
367 |         else:
368 |             joint_color = list(map(lambda x: x + 35 * (joint_num % 4), FLAGS.joint_color_code[color_code_num]))
369 |             cv2.circle(full_img, center=(int(joint_coords[joint_num][1]), int(joint_coords[joint_num][0])), radius=3,
370 |                        color=joint_color, thickness=-1)
371 | 
372 |     # Plot limbs
373 |     for limb_num in range(len(FLAGS.limbs)):
374 |         x1 = int(joint_coords[int(FLAGS.limbs[limb_num][0])][0])
375 |         y1 = int(joint_coords[int(FLAGS.limbs[limb_num][0])][1])
376 |         x2 = int(joint_coords[int(FLAGS.limbs[limb_num][1])][0])
377 |         y2 = int(joint_coords[int(FLAGS.limbs[limb_num][1])][1])
378 |         length = ((x1 - x2) ** 2 + (y1 - y2) ** 2) ** 0.5
379 |         if length < 150 and length > 5:
380 |             deg = math.degrees(math.atan2(x1 - x2, y1 - y2))
381 |             polygon = cv2.ellipse2Poly((int((y1 + y2) / 2), int((x1 + x2) / 2)),
382 |                                        (int(length / 2), 3),
383 |                                        int(deg),
384 |                                        0, 360, 1)
385 |             color_code_num = limb_num // 4
386 |             limb_color = list(map(lambda x: x + 35 * (limb_num % 4), FLAGS.joint_color_code[color_code_num]))
387 |             cv2.fillConvexPoly(full_img, polygon, color=limb_color)
388 | 
389 | 
390 | if __name__ == '__main__':
391 |     tf.app.run()
392 | 


--------------------------------------------------------------------------------
/useClassifyModel.py:
--------------------------------------------------------------------------------
 1 | from skimage import io,transform
 2 | import tensorflow as tf
 3 | import numpy as np
 4 | 
 5 | 
 6 | path1 = "./storePic/5.jpg"
 7 | path2 = "./storePic/7.jpg"
 8 | path3 = "./storePic/102304.jpg"
 9 | path4 = "./storePic/12034.jpg"
10 | path5 = "./storePic/ok.jpg"
11 | path6 = "./storePic/2.jpg"
12 | 
13 | dict = {0:'5',1:'7',2:'12034',3:'ok',4:'102304',5:'2'}
14 | 
15 | w=100
16 | h=100
17 | c=3
18 | 
19 | 
20 | def read_one_image(path):
21 |     img = io.imread(path)
22 |     img = transform.resize(img,(w,h))
23 |     return np.asarray(img)
24 | 
25 | with tf.Session() as sess:
26 |     data = []
27 |     data1 = read_one_image(path1)
28 |     data2 = read_one_image(path2)
29 |     data3 = read_one_image(path3)
30 |     data4 = read_one_image(path4)
31 |     data5 = read_one_image(path5)
32 |     data6 = read_one_image(path6)
33 |     data.append(data1)
34 |     data.append(data2)
35 |     data.append(data3)
36 |     data.append(data4)
37 |     data.append(data5)
38 |     data.append(data6)
39 |     saver = tf.train.import_meta_graph('./classify/modelSave/model.ckpt.meta')
40 |     saver.restore(sess,tf.train.latest_checkpoint('./classify/modelSave/'))
41 | 
42 |     graph = tf.get_default_graph()
43 |     x = graph.get_tensor_by_name("x:0")
44 |     feed_dict = {x:data}
45 | 
46 |     logits = graph.get_tensor_by_name("logits_eval:0")
47 | 
48 |     classification_result = sess.run(logits,feed_dict)
49 | 
50 |     # 打印出预测矩阵
51 |     print(classification_result)
52 |     # 打印出预测矩阵每一行最大值的索引
53 |     print(tf.argmax(classification_result, 1).eval())
54 |     # 根据索引通过字典对应花的分类
55 |     output = []
56 |     output = tf.argmax(classification_result, 1).eval()
57 |     for i in range(len(output)):
58 |         print("第",i+1,"个手势预测:"+dict[output[i]])
59 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yyyerica/HandGestureClassify/9cc5185a38f4ed48680f8d4d948929c01dbf6d2b/utils/__init__.py


--------------------------------------------------------------------------------
/utils/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yyyerica/HandGestureClassify/9cc5185a38f4ed48680f8d4d948929c01dbf6d2b/utils/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/cpm_utils.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yyyerica/HandGestureClassify/9cc5185a38f4ed48680f8d4d948929c01dbf6d2b/utils/__pycache__/cpm_utils.cpython-35.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/tracking_module.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yyyerica/HandGestureClassify/9cc5185a38f4ed48680f8d4d948929c01dbf6d2b/utils/__pycache__/tracking_module.cpython-35.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/utils.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yyyerica/HandGestureClassify/9cc5185a38f4ed48680f8d4d948929c01dbf6d2b/utils/__pycache__/utils.cpython-35.pyc


--------------------------------------------------------------------------------
/utils/cpm_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import math
  3 | import cv2
  4 | 
  5 | 
  6 | M_PI = 3.14159
  7 | 
  8 | 
  9 | # Compute gaussian kernel for input image
 10 | def gaussian_img(img_height, img_width, c_x, c_y, variance):
 11 |     gaussian_map = np.zeros((img_height, img_width))
 12 |     for x_p in range(img_width):
 13 |         for y_p in range(img_height):
 14 |             dist_sq = (x_p - c_x) * (x_p - c_x) + \
 15 |                       (y_p - c_y) * (y_p - c_y)
 16 |             exponent = dist_sq / 2.0 / variance / variance
 17 |             gaussian_map[y_p, x_p] = np.exp(-exponent)
 18 |     return gaussian_map
 19 | 
 20 | 
 21 | def read_image(file, cam, boxsize, type):
 22 |     # from file
 23 |     if type == 'IMAGE':
 24 |         oriImg = cv2.imread(file)
 25 |     # from webcam
 26 |     elif type == 'WEBCAM':
 27 |         _, oriImg = cam.read()
 28 |     # from video
 29 |     elif type == 'VIDEO':
 30 |         oriImg = cv2.cvtColor(file, cv2.COLOR_BGR2RGB)
 31 | 
 32 |     if oriImg is None:
 33 |         print('oriImg is None')
 34 |         return None
 35 | 
 36 |     scale = boxsize / (oriImg.shape[0] * 1.0)
 37 |     imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_LANCZOS4)
 38 | 
 39 |     output_img = np.ones((boxsize, boxsize, 3)) * 128
 40 | 
 41 |     img_h = imageToTest.shape[0]
 42 |     img_w = imageToTest.shape[1]
 43 |     if img_w < boxsize:
 44 |         offset = img_w % 2
 45 |         # make the origin image be the center
 46 |         output_img[:, int(boxsize / 2 - math.floor(img_w / 2)):int(
 47 |             boxsize / 2 + math.floor(img_w / 2) + offset), :] = imageToTest
 48 |     else:
 49 |         # crop the center of the origin image
 50 |         output_img = imageToTest[:,
 51 |                      int(img_w / 2 - boxsize / 2):int(img_w / 2 + boxsize / 2), :]
 52 |     return output_img
 53 | 
 54 | 
 55 | def make_gaussian(size, fwhm=3, center=None):
 56 |     """ Make a square gaussian kernel.
 57 |     size is the length of a side of the square
 58 |     fwhm is full-width-half-maximum, which
 59 |     can be thought of as an effective radius.
 60 |     """
 61 | 
 62 |     x = np.arange(0, size, 1, float)
 63 |     y = x[:, np.newaxis]
 64 | 
 65 |     if center is None:
 66 |         x0 = y0 = size // 2
 67 |     else:
 68 |         x0 = center[0]
 69 |         y0 = center[1]
 70 | 
 71 |     return np.exp(-((x - x0) ** 2 + (y - y0) ** 2) / 2.0 / fwhm / fwhm)
 72 | 
 73 | 
 74 | def make_gaussian_batch(heatmaps, size, fwhm):
 75 |     """ Make a square gaussian kernel.
 76 |     size is the length of a side of the square
 77 |     fwhm is full-width-half-maximum, which
 78 |     can be thought of as an effective radius.
 79 |     """
 80 |     stride = heatmaps.shape[1] // size
 81 | 
 82 |     batch_datum = np.zeros(shape=(heatmaps.shape[0], size, size, heatmaps.shape[3]))
 83 | 
 84 |     for data_num in range(heatmaps.shape[0]):
 85 |         for joint_num in range(heatmaps.shape[3] - 1):
 86 |             heatmap = heatmaps[data_num, :, :, joint_num]
 87 |             center = np.unravel_index(np.argmax(heatmap), (heatmap.shape[0], heatmap.shape[1]))
 88 | 
 89 |             x = np.arange(0, size, 1, float)
 90 |             y = x[:, np.newaxis]
 91 | 
 92 |             if center is None:
 93 |                 x0 = y0 = size * stride // 2
 94 |             else:
 95 |                 x0 = center[1]
 96 |                 y0 = center[0]
 97 | 
 98 |             batch_datum[data_num, :, :, joint_num] = np.exp(
 99 |                 -((x * stride - x0) ** 2 + (y * stride - y0) ** 2) / 2.0 / fwhm / fwhm)
100 |         batch_datum[data_num, :, :, heatmaps.shape[3] - 1] = np.ones((size, size)) - np.amax(
101 |             batch_datum[data_num, :, :, 0:heatmaps.shape[3] - 1], axis=2)
102 | 
103 |     return batch_datum
104 | 
105 | 
106 | def make_heatmaps_from_joints(input_size, heatmap_size, gaussian_variance, batch_joints):
107 |     # Generate ground-truth heatmaps from ground-truth 2d joints
108 |     scale_factor = input_size // heatmap_size
109 |     batch_gt_heatmap_np = []
110 |     for i in range(batch_joints.shape[0]):
111 |         gt_heatmap_np = []
112 |         invert_heatmap_np = np.ones(shape=(heatmap_size, heatmap_size))
113 |         for j in range(batch_joints.shape[1]):
114 |             cur_joint_heatmap = make_gaussian(heatmap_size,
115 |                                               gaussian_variance,
116 |                                               center=(batch_joints[i][j] // scale_factor))
117 |             gt_heatmap_np.append(cur_joint_heatmap)
118 |             invert_heatmap_np -= cur_joint_heatmap
119 |         gt_heatmap_np.append(invert_heatmap_np)
120 |         batch_gt_heatmap_np.append(gt_heatmap_np)
121 |     batch_gt_heatmap_np = np.asarray(batch_gt_heatmap_np)
122 |     batch_gt_heatmap_np = np.transpose(batch_gt_heatmap_np, (0, 2, 3, 1))
123 | 
124 |     return batch_gt_heatmap_np
125 | 
126 | 
127 | def make_heatmaps_from_joints_openpose(input_size, heatmap_size, gaussian_variance, batch_joints):
128 |     joint_map = [4, 3, 2, 1, 8, 7, 6, 5, 12, 11, 10, 9, 16, 15, 14, 13, 20, 19, 18, 17, 0]
129 |     # Generate ground-truth heatmaps from ground-truth 2d joints
130 |     scale_factor = input_size // heatmap_size
131 |     batch_gt_heatmap_np = []
132 |     for i in range(batch_joints.shape[0]):
133 |         gt_heatmap_np = []
134 |         invert_heatmap_np = np.ones(shape=(heatmap_size, heatmap_size))
135 |         for j in range(batch_joints.shape[1]):
136 |             cur_joint_heatmap = make_gaussian(heatmap_size,
137 |                                               gaussian_variance,
138 |                                               center=(batch_joints[i][joint_map[j]] // scale_factor))
139 |             gt_heatmap_np.append(cur_joint_heatmap)
140 |             invert_heatmap_np -= cur_joint_heatmap
141 |         gt_heatmap_np.append(invert_heatmap_np)
142 |         batch_gt_heatmap_np.append(gt_heatmap_np)
143 |     batch_gt_heatmap_np = np.asarray(batch_gt_heatmap_np)
144 |     batch_gt_heatmap_np = np.transpose(batch_gt_heatmap_np, (0, 2, 3, 1))
145 | 
146 |     return batch_gt_heatmap_np
147 | 
148 | 
149 | def rad2Deg(rad):
150 |     return rad * (180 / M_PI)
151 | 
152 | 
153 | def deg2Rad(deg):
154 |     return deg * (M_PI / 180)
155 | 
156 | 
157 | def warpMatrix(sw, sh, theta, phi, gamma, scale, fovy):
158 |     st = math.sin(deg2Rad(theta))
159 |     ct = math.cos(deg2Rad(theta))
160 |     sp = math.sin(deg2Rad(phi))
161 |     cp = math.cos(deg2Rad(phi))
162 |     sg = math.sin(deg2Rad(gamma))
163 |     cg = math.cos(deg2Rad(gamma))
164 | 
165 |     halfFovy = fovy * 0.5
166 |     d = math.hypot(sw, sh)
167 |     sideLength = scale * d / math.cos(deg2Rad(halfFovy))
168 |     h = d / (2.0 * math.sin(deg2Rad(halfFovy)))
169 |     n = h - (d / 2.0)
170 |     f = h + (d / 2.0)
171 | 
172 |     Rtheta = np.identity(4)
173 |     Rphi = np.identity(4)
174 |     Rgamma = np.identity(4)
175 | 
176 |     T = np.identity(4)
177 |     P = np.zeros((4, 4))
178 | 
179 |     Rtheta[0, 0] = Rtheta[1, 1] = ct
180 |     Rtheta[0, 1] = -st
181 |     Rtheta[1, 0] = st
182 | 
183 |     Rphi[1, 1] = Rphi[2, 2] = cp
184 |     Rphi[1, 2] = -sp
185 |     Rphi[2, 1] = sp
186 | 
187 |     Rgamma[0, 0] = cg
188 |     Rgamma[2, 2] = cg
189 |     Rgamma[0, 2] = sg
190 |     Rgamma[2, 0] = sg
191 | 
192 |     T[2, 3] = -h
193 | 
194 |     P[0, 0] = P[1, 1] = 1.0 / math.tan(deg2Rad(halfFovy))
195 |     P[2, 2] = -(f + n) / (f - n)
196 |     P[2, 3] = -(2.0 * f * n) / (f - n)
197 |     P[3, 2] = -1.0
198 | 
199 |     F = np.matmul(Rtheta, Rgamma)
200 |     F = np.matmul(Rphi, F)
201 |     F = np.matmul(T, F)
202 |     F = np.matmul(P, F)
203 | 
204 |     ptsIn = np.zeros(12)
205 |     ptsOut = np.zeros(12)
206 |     halfW = sw / 2
207 |     halfH = sh / 2
208 | 
209 |     ptsIn[0] = -halfW
210 |     ptsIn[1] = halfH
211 |     ptsIn[3] = halfW
212 |     ptsIn[4] = halfH
213 |     ptsIn[6] = halfW
214 |     ptsIn[7] = -halfH
215 |     ptsIn[9] = -halfW
216 |     ptsIn[10] = -halfH
217 |     ptsIn[2] = ptsIn[5] = ptsIn[8] = ptsIn[11] = 0
218 | 
219 |     ptsInMat = np.array([[ptsIn[0], ptsIn[1], ptsIn[2]], [ptsIn[3], ptsIn[4], ptsIn[5]], [ptsIn[6], ptsIn[7], ptsIn[8]],
220 |                          [ptsIn[9], ptsIn[10], ptsIn[11]]], dtype=np.float32)
221 |     ptsOutMat = np.array(
222 |         [[ptsOut[0], ptsOut[1], ptsOut[2]], [ptsOut[3], ptsOut[4], ptsOut[5]], [ptsOut[6], ptsOut[7], ptsOut[8]],
223 |          [ptsOut[9], ptsOut[10], ptsOut[11]]], dtype=np.float32)
224 |     ptsInMat = np.array([ptsInMat])
225 |     ptsOutMat = cv2.perspectiveTransform(ptsInMat, F)
226 | 
227 |     ptsInPt2f = np.array([[0, 0], [0, 0], [0, 0], [0, 0]], dtype=np.float32)
228 |     ptsOutPt2f = np.array([[0, 0], [0, 0], [0, 0], [0, 0]], dtype=np.float32)
229 | 
230 |     i = 0
231 | 
232 |     while i < 4:
233 |         ptsInPt2f[i][0] = ptsIn[i * 3 + 0] + halfW
234 |         ptsInPt2f[i][1] = ptsIn[i * 3 + 1] + halfH
235 |         ptsOutPt2f[i][0] = (ptsOutMat[0][i][0] + 1) * sideLength * 0.5
236 |         ptsOutPt2f[i][1] = (ptsOutMat[0][i][1] + 1) * sideLength * 0.5
237 |         i = i + 1
238 | 
239 |     M = cv2.getPerspectiveTransform(ptsInPt2f, ptsOutPt2f)
240 |     return M
241 | 
242 | 
243 | def warpImage(src, theta, phi, gamma, scale, fovy):
244 |     halfFovy = fovy * 0.5
245 |     d = math.hypot(src.shape[1], src.shape[0])
246 |     sideLength = scale * d / math.cos(deg2Rad(halfFovy))
247 |     sideLength = np.int32(sideLength)
248 | 
249 |     M = warpMatrix(src.shape[1], src.shape[0], theta, phi, gamma, scale, fovy)
250 |     dst = cv2.warpPerspective(src, M, (sideLength, sideLength))
251 |     mid_x = mid_y = dst.shape[0] // 2
252 |     target_x = target_y = src.shape[0] // 2
253 |     offset = (target_x % 2)
254 | 
255 |     if len(dst.shape) == 3:
256 |         dst = dst[mid_y - target_y:mid_y + target_y + offset,
257 |               mid_x - target_x:mid_x + target_x + offset,
258 |               :]
259 |     else:
260 |         dst = dst[mid_y - target_y:mid_y + target_y + offset,
261 |               mid_x - target_x:mid_x + target_x + offset]
262 | 
263 |     return dst
264 | 


--------------------------------------------------------------------------------
/utils/create_cpm_tfr_fulljoints.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import os
  3 | import time
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | import tensorflow as tf
  8 | import utils
  9 | 
 10 | tfr_file = 'cpm_sample_dataset.tfrecords'
 11 | dataset_dir = ''
 12 | 
 13 | SHOW_INFO = False
 14 | box_size = 64
 15 | num_of_joints = 21
 16 | gaussian_radius = 2
 17 | 
 18 | 
 19 | def _bytes_feature(value):
 20 |     return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
 21 | 
 22 | 
 23 | def _int64_feature(value):
 24 |     return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
 25 | 
 26 | 
 27 | def _float64_feature(value):
 28 |     return tf.train.Feature(float_list=tf.train.FloatList(value=value))
 29 | 
 30 | 
 31 | # Create writer
 32 | tfr_writer = tf.python_io.TFRecordWriter(tfr_file)
 33 | 
 34 | img_count = 0
 35 | t1 = time.time()
 36 | # Loop each dir
 37 | for person_dir in os.listdir(dataset_dir):
 38 |     if not os.path.isdir(dataset_dir + person_dir): continue
 39 | 
 40 |     gt_file = dataset_dir + person_dir + '/labels.txt'
 41 |     gt_content = open(gt_file, 'rb').readlines()
 42 | 
 43 |     for idx, line in enumerate(gt_content):
 44 |         line = line.split()
 45 | 
 46 |         # Check if it is a valid img file
 47 |         if not line[0].endswith(('jpg', 'png')):
 48 |             continue
 49 |         cur_img_path = dataset_dir + person_dir + '/imgs/' + line[0]
 50 |         cur_img = cv2.imread(cur_img_path)
 51 | 
 52 |         # Read in bbox and joints coords
 53 |         tmp = [float(x) for x in line[1:5]]
 54 |         cur_hand_bbox = [min([tmp[0], tmp[2]]),
 55 |                          min([tmp[1], tmp[3]]),
 56 |                          max([tmp[0], tmp[2]]),
 57 |                          max([tmp[1], tmp[3]])
 58 |                          ]
 59 |         if cur_hand_bbox[0] < 0: cur_hand_bbox[0] = 0
 60 |         if cur_hand_bbox[1] < 0: cur_hand_bbox[1] = 0
 61 |         if cur_hand_bbox[2] > cur_img.shape[1]: cur_hand_bbox[2] = cur_img.shape[1]
 62 |         if cur_hand_bbox[3] > cur_img.shape[0]: cur_hand_bbox[3] = cur_img.shape[0]
 63 | 
 64 |         cur_hand_joints_x = [float(i) for i in line[9:49:2]]
 65 |         cur_hand_joints_x.append(float(line[7]))
 66 |         cur_hand_joints_y = [float(i) for i in line[10:49:2]]
 67 |         cur_hand_joints_y.append(float(line[8]))
 68 | 
 69 |         # Crop image and adjust joint coords
 70 |         cur_img = cur_img[int(float(cur_hand_bbox[1])):int(float(cur_hand_bbox[3])),
 71 |                   int(float(cur_hand_bbox[0])):int(float(cur_hand_bbox[2])),
 72 |                   :]
 73 |         cur_hand_joints_x = [x - cur_hand_bbox[0] for x in cur_hand_joints_x]
 74 |         cur_hand_joints_y = [x - cur_hand_bbox[1] for x in cur_hand_joints_y]
 75 | 
 76 |         # # Display joints
 77 |         # for i in range(len(cur_hand_joints_x)):
 78 |         #     cv2.circle(cur_img, center=(int(cur_hand_joints_x[i]), int(cur_hand_joints_y[i])),radius=3, color=(255,0,0), thickness=-1)
 79 |         #     cv2.imshow('', cur_img)
 80 |         #     cv2.waitKey(500)
 81 |         # cv2.imshow('', cur_img)
 82 |         # cv2.waitKey(1)
 83 | 
 84 |         output_image = np.ones(shape=(box_size, box_size, 3)) * 128
 85 |         output_heatmaps = np.zeros((box_size, box_size, num_of_joints))
 86 | 
 87 |         # Resize and pad image to fit output image size
 88 |         if cur_img.shape[0] > cur_img.shape[1]:
 89 |             scale = box_size / (cur_img.shape[0] * 1.0)
 90 | 
 91 |             # Relocalize points
 92 |             cur_hand_joints_x = map(lambda x: x * scale, cur_hand_joints_x)
 93 |             cur_hand_joints_y = map(lambda x: x * scale, cur_hand_joints_y)
 94 | 
 95 |             # Resize image
 96 |             image = cv2.resize(cur_img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_LANCZOS4)
 97 |             offset = image.shape[1] % 2
 98 | 
 99 |             output_image[:, int(box_size / 2 - math.floor(image.shape[1] / 2)): int(
100 |                 box_size / 2 + math.floor(image.shape[1] / 2) + offset), :] = image
101 |             cur_hand_joints_x = map(lambda x: x + (box_size / 2 - math.floor(image.shape[1] / 2)),
102 |                                     cur_hand_joints_x)
103 | 
104 |             cur_hand_joints_x = np.asarray(cur_hand_joints_x)
105 |             cur_hand_joints_y = np.asarray(cur_hand_joints_y)
106 | 
107 |             if SHOW_INFO:
108 |                 hmap = np.zeros((box_size, box_size))
109 |                 # Plot joints
110 |                 for i in range(num_of_joints):
111 |                     cv2.circle(output_image, (int(cur_hand_joints_x[i]), int(cur_hand_joints_y[i])), 3, (0, 255, 0), 2)
112 | 
113 |                     # Generate joint gaussian map
114 |                     part_heatmap = utils.make_gaussian(output_image.shape[0], gaussian_radius,
115 |                                                        [cur_hand_joints_x[i], cur_hand_joints_y[i]])
116 |                     hmap += part_heatmap * 50
117 |             else:
118 |                 for i in range(num_of_joints):
119 |                     output_heatmaps[:, :, i] = utils.make_gaussian(box_size, gaussian_radius,
120 |                                                                    [cur_hand_joints_x[i], cur_hand_joints_y[i]])
121 | 
122 |         else:
123 |             scale = box_size / (cur_img.shape[1] * 1.0)
124 | 
125 |             # Relocalize points
126 |             cur_hand_joints_x = map(lambda x: x * scale, cur_hand_joints_x)
127 |             cur_hand_joints_y = map(lambda x: x * scale, cur_hand_joints_y)
128 | 
129 |             # Resize image
130 |             image = cv2.resize(cur_img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_LANCZOS4)
131 |             offset = image.shape[0] % 2
132 | 
133 |             output_image[int(box_size / 2 - math.floor(image.shape[0] / 2)): int(
134 |                 box_size / 2 + math.floor(image.shape[0] / 2) + offset), :, :] = image
135 |             cur_hand_joints_y = map(lambda x: x + (box_size / 2 - math.floor(image.shape[0] / 2)),
136 |                                     cur_hand_joints_y)
137 | 
138 |             cur_hand_joints_x = np.asarray(cur_hand_joints_x)
139 |             cur_hand_joints_y = np.asarray(cur_hand_joints_y)
140 | 
141 |             if SHOW_INFO:
142 |                 hmap = np.zeros((box_size, box_size))
143 |                 # Plot joints
144 |                 for i in range(num_of_joints):
145 |                     cv2.circle(output_image, (int(cur_hand_joints_x[i]), int(cur_hand_joints_y[i])), 3, (0, 255, 0), 2)
146 | 
147 |                     # Generate joint gaussian map
148 |                     part_heatmap = utils.make_gaussian(output_image.shape[0], gaussian_radius,
149 |                                                        [cur_hand_joints_x[i], cur_hand_joints_y[i]])
150 |                     hmap += part_heatmap * 50
151 |             else:
152 |                 for i in range(num_of_joints):
153 |                     output_heatmaps[:, :, i] = utils.make_gaussian(box_size, gaussian_radius,
154 |                                                                    [cur_hand_joints_x[i], cur_hand_joints_y[i]])
155 |         if SHOW_INFO:
156 |             cv2.imshow('', hmap.astype(np.uint8))
157 |             cv2.imshow('i', output_image.astype(np.uint8))
158 |             cv2.waitKey(0)
159 | 
160 |         # Create background map
161 |         output_background_map = np.ones((box_size, box_size)) - np.amax(output_heatmaps, axis=2)
162 |         output_heatmaps = np.concatenate((output_heatmaps, output_background_map.reshape((box_size, box_size, 1))),
163 |                                          axis=2)
164 |         # cv2.imshow('', (output_background_map*255).astype(np.uint8))
165 |         # cv2.imshow('h', (np.amax(output_heatmaps[:, :, 0:21], axis=2)*255).astype(np.uint8))
166 |         # cv2.waitKey(1000)
167 | 
168 | 
169 |         coords_set = np.concatenate((np.reshape(cur_hand_joints_x, (num_of_joints, 1)),
170 |                                      np.reshape(cur_hand_joints_y, (num_of_joints, 1))),
171 |                                     axis=1)
172 | 
173 |         output_image_raw = output_image.astype(np.uint8).tostring()
174 |         output_heatmaps_raw = output_heatmaps.flatten().tolist()
175 |         output_coords_raw = coords_set.flatten().tolist()
176 | 
177 |         raw_sample = tf.train.Example(features=tf.train.Features(feature={
178 |             'image': _bytes_feature(output_image_raw),
179 |             'heatmaps': _float64_feature(output_heatmaps_raw)
180 |         }))
181 | 
182 |         tfr_writer.write(raw_sample.SerializeToString())
183 | 
184 |         img_count += 1
185 |         if img_count % 50 == 0:
186 |             print('Processed %d images, took %f seconds' % (img_count, time.time() - t1))
187 |             t1 = time.time()
188 | 
189 | tfr_writer.close()
190 | 


--------------------------------------------------------------------------------
/utils/tf_utils.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import utils.cpm_utils as cpm_utils
  3 | 
  4 | 
  5 | def read_and_decode_cpm(tfr_queue, img_size, num_joints, center_radius):
  6 |     tfr_reader = tf.TFRecordReader()
  7 |     _, serialized_example = tfr_reader.read(tfr_queue)
  8 | 
  9 |     queue_images = []
 10 |     queue_center_maps = []
 11 |     queue_labels = []
 12 |     queue_orig_images = []
 13 | 
 14 |     for i in range(2):
 15 |         features = tf.parse_single_example(serialized_example,
 16 |                                            features={
 17 |                                                'image': tf.FixedLenFeature([], tf.string),
 18 |                                                'heatmaps': tf.FixedLenFeature(
 19 |                                                    [int(img_size * img_size * (num_joints + 1))], tf.float32)
 20 |                                            })
 21 | 
 22 |         # img_size = 128
 23 |         # center_radius = 11
 24 |         img = tf.decode_raw(features['image'], tf.uint8)
 25 |         img = tf.reshape(img, [img_size, img_size, 3])
 26 |         img = tf.cast(img, tf.float32)
 27 | 
 28 |         img = img[..., ::-1]
 29 |         img = tf.image.random_contrast(img, 0.7, 1)
 30 |         img = tf.image.random_brightness(img, max_delta=0.9)
 31 |         img = tf.image.random_hue(img, 0.05)
 32 |         img = tf.image.random_saturation(img, 0.7, 1.1)
 33 |         img = img[..., ::-1]
 34 | 
 35 |         # heatmap = tf.decode_raw(features['heatmaps'], tf.float32)
 36 |         heatmap = tf.reshape(features['heatmaps'], [img_size, img_size, (num_joints + 1)])
 37 | 
 38 |         # create centermap
 39 |         center_map = tf.constant((cpm_utils.make_gaussian(img_size, center_radius,
 40 |                                                           [int(img_size / 2), int(img_size / 2)])).reshape(
 41 |             (img_size, img_size, 1)), name='center_map')
 42 |         center_map = tf.cast(center_map, tf.float32)
 43 | 
 44 |         # merge img + centermap + heatmap
 45 |         merged_img_heatmap = tf.concat([img, center_map, heatmap], axis=2)
 46 | 
 47 |         # subtract mean before pad
 48 |         mean_volume = tf.concat((128 * tf.ones(shape=(img_size, img_size, 3)),
 49 |                                  tf.zeros(shape=(img_size, img_size, (num_joints + 1))),
 50 |                                  tf.ones(shape=(img_size, img_size, 1))), axis=2)
 51 | 
 52 |         merged_img_heatmap -= mean_volume
 53 | 
 54 |         # preprocessing
 55 |         preprocessed_merged_img_c_heatmap, _, _ = preprocess(merged_img_heatmap,
 56 |                                                              label=None,
 57 |                                                              crop_off_ratio=0.05,
 58 |                                                              rotation_angle=0.8,
 59 |                                                              has_bbox=False,
 60 |                                                              do_flip_lr=True,
 61 |                                                              do_flip_ud=False,
 62 |                                                              low_sat=None,
 63 |                                                              high_sat=None,
 64 |                                                              max_bright_delta=None,
 65 |                                                              max_hue_delta=None)
 66 | 
 67 |         padded_img_size = img_size  # * (1 + tf.random_uniform([], minval=0.0, maxval=0.3))
 68 |         padded_img_size = tf.cast(padded_img_size, tf.int32)
 69 | 
 70 |         # resize pad
 71 |         preprocessed_merged_img_c_heatmap = tf.image.resize_image_with_crop_or_pad(preprocessed_merged_img_c_heatmap,
 72 |                                                                                    padded_img_size, padded_img_size)
 73 |         preprocessed_merged_img_c_heatmap += tf.concat((128 * tf.ones(shape=(padded_img_size, padded_img_size, 3)),
 74 |                                                         tf.zeros(
 75 |                                                             shape=(padded_img_size, padded_img_size, (num_joints + 1))),
 76 |                                                         tf.ones(shape=(padded_img_size, padded_img_size, 1))), axis=2)
 77 |         preprocessed_merged_img_c_heatmap = tf.image.resize_images(preprocessed_merged_img_c_heatmap,
 78 |                                                                    size=[img_size, img_size])
 79 | 
 80 |         with tf.control_dependencies([preprocessed_merged_img_c_heatmap]):
 81 |             # preprocessed_img = tf.slice(preprocessed_merged_img_c_heatmap, [0,0,0], [368,368,3])
 82 |             # preprocessed_center_maps = tf.slice(preprocessed_merged_img_c_heatmap, [0,0,3], [368,368,1])
 83 |             # preprocessed_heatmaps = tf.slice(preprocessed_merged_img_c_heatmap, [0,0,4], [368,368,13])
 84 | 
 85 |             preprocessed_img, preprocessed_center_maps, preprocessed_heatmaps = tf.split(
 86 |                 preprocessed_merged_img_c_heatmap, [3, 1, (num_joints + 1)], axis=2)
 87 | 
 88 |             # Normalize image value
 89 |             preprocessed_img /= 256
 90 |             preprocessed_img -= 0.5
 91 | 
 92 |             queue_images.append(preprocessed_img)
 93 |             queue_center_maps.append(preprocessed_center_maps)
 94 |             queue_labels.append(preprocessed_heatmaps)
 95 |             queue_orig_images.append(img)
 96 | 
 97 |     return queue_images, queue_center_maps, queue_labels, queue_orig_images
 98 |     # return preprocessed_img, preprocessed_center_maps, preprocessed_heatmaps, img
 99 | 
100 | 
101 | def read_batch_cpm(tfr_path, img_size, hmap_size, num_joints, center_radius, batch_size=16, num_epochs=None):
102 |     """Read batch images as the input to the network
103 | 
104 |         tfr_path: path to tfrecord file
105 |         num_epochs: None=iteratively read forever
106 |                     other number=iterate whole tfr_file how many times
107 |         """
108 | 
109 |     with tf.name_scope('Batch_Inputs'):
110 |         tfr_queue = tf.train.string_input_producer(tfr_path, num_epochs=num_epochs, shuffle=True)
111 | 
112 |         # images, centers, labels, image_orig = read_and_decode_cpm(tfr_queue, img_size, num_joints, center_radius)
113 | 
114 |         data_list = [read_and_decode_cpm(tfr_queue, img_size, num_joints, center_radius) for _ in
115 |                      range(2 * len(tfr_path))]
116 | 
117 |         batch_images, batch_centers, batch_labels, batch_images_orig = tf.train.shuffle_batch_join(data_list,
118 |                                                                                                    batch_size=batch_size,
119 |                                                                                                    capacity=100 + 6 * batch_size,
120 |                                                                                                    min_after_dequeue=100,
121 |                                                                                                    enqueue_many=True,
122 |                                                                                                    name='batch_data_read')
123 | 
124 |         # batch_labels = tf.image.resize_bilinear(batch_labels, size=tf.constant((hmap_size,hmap_size), name='shape'))
125 | 
126 |     return batch_images, batch_centers, batch_labels, batch_images_orig
127 | 
128 | 
129 | def rotate_points(orig_points, angle, w, h):
130 |     """Return rotated points
131 | 
132 |     Args:
133 |         orig_points: 'Tensor' with shape [N,2], each entry is point (x,y)
134 |         angle: rotate radians
135 | 
136 |     Returns:
137 |         'Tensor' with shape [N,2], with rotated points
138 |     """
139 | 
140 |     # rotation
141 |     rotate_mat = tf.stack([[tf.cos(angle) / w, tf.sin(angle) / h],
142 |                            [-tf.sin(angle) / w, tf.cos(angle) / h]])
143 | 
144 |     # shift coord
145 |     orig_points = tf.subtract(orig_points, 0.5)
146 | 
147 |     orig_points = tf.stack([orig_points[:, 0] * w,
148 |                             orig_points[:, 1] * h], axis=1)
149 |     print(orig_points)
150 |     rotated_points = tf.matmul(orig_points, rotate_mat) + 0.5
151 | 
152 |     return rotated_points
153 | 
154 | 
155 | def preprocess(image,
156 |                label,
157 |                has_bbox=True,
158 |                rotation_angle=1.5,
159 |                crop_off_ratio=0.2,
160 |                do_flip_lr=True,
161 |                do_flip_ud=True,
162 |                max_hue_delta=0.15,
163 |                low_sat=0.5,
164 |                high_sat=2.0,
165 |                max_bright_delta=0.3):
166 |     """Do some processes for input image
167 | 
168 |     Args:
169 |         image: A 'Tensor' of RGB image
170 |         label: vector of floats with even length (be pair of (x,y))
171 |         has_bbox: if 'True', Assume first 4 numbers of 'label' are [top-left, bot-right] coords
172 |         rotation_angle: maximum allowed rotation radians
173 |         crop_off_ratio: maximum cropping offset of top-left corner
174 |                         1-crop_off_ratio be maximum cropping offset of cropped bot-right corner
175 |         do_flip_lr: with half chance flip the image left right
176 |         do_flip_ud: with half chance flip the image upper down
177 |         max_hue_delta: allowed random adjust hue range
178 |         low_sat: lowest range of saturation
179 |         high_sat: highest range of saturation
180 |         max_bright_delta: allowed random adjust brightness range
181 | 
182 |     Returns:
183 |         image: processed image 'Tensor'
184 |         new_bbox: 'Tensor' of processed bbox coords if 'has_bbox' == True
185 |         total_points: 'Tensor' of processed points coords
186 |     """
187 | 
188 |     new_bbox = []
189 |     total_points = []
190 | 
191 |     # [height, width, channel] of input image
192 |     img_shape_list = image.get_shape().as_list()
193 | 
194 |     if max_hue_delta is not None:
195 |         # random hue
196 |         image = tf.image.random_hue(image, max_delta=max_hue_delta)
197 | 
198 |     if low_sat is not None and high_sat is not None:
199 |         # random saturation
200 |         image = tf.image.random_saturation(image, lower=low_sat, upper=high_sat)
201 | 
202 |     if max_bright_delta is not None:
203 |         # random brightness
204 |         image = tf.image.random_brightness(image, max_delta=max_bright_delta)
205 | 
206 |     if label is not None:
207 |         total_points = tf.stack([label[i] for i in range(label.shape[0])])
208 | 
209 |     # crop image
210 |     new_top_left_x = crop_off_ratio * tf.random_uniform([], minval=-1.0, maxval=1.0)
211 |     off_w_ratio = tf.cond(tf.less(new_top_left_x, 0), lambda: tf.zeros([]), lambda: new_top_left_x)
212 | 
213 |     new_top_left_y = crop_off_ratio * tf.random_uniform([], minval=-1.0, maxval=1.0)
214 |     off_h_ratio = tf.cond(tf.less(new_top_left_y, 0), lambda: tf.zeros([]), lambda: new_top_left_y)
215 | 
216 |     new_bot_right_x = crop_off_ratio * tf.random_uniform([], minval=-1.0, maxval=1.0)
217 |     tar_w_ratio = tf.cond(tf.less(new_bot_right_x, 0), lambda: tf.ones([]) - off_w_ratio,
218 |                           lambda: 1 - new_bot_right_x - off_w_ratio)
219 | 
220 |     new_bot_right_y = crop_off_ratio * tf.random_uniform([], minval=-1.0, maxval=1.0)
221 |     tar_h_ratio = tf.cond(tf.less(new_bot_right_y, 0), lambda: tf.ones([]) - off_h_ratio,
222 |                           lambda: 1 - new_bot_right_y - off_h_ratio)
223 | 
224 |     pad_image_height = (1 - new_top_left_y - new_bot_right_y) * img_shape_list[0]
225 |     pad_image_width = (1 - new_top_left_x - new_bot_right_x) * img_shape_list[1]
226 |     cropped_image = tf.image.crop_to_bounding_box(image,
227 |                                                   offset_width=tf.cast(off_w_ratio * img_shape_list[1], tf.int32),
228 |                                                   offset_height=tf.cast(off_h_ratio * img_shape_list[0], tf.int32),
229 |                                                   target_height=tf.cast(tar_h_ratio * img_shape_list[0], tf.int32),
230 |                                                   target_width=tf.cast(tar_w_ratio * img_shape_list[1], tf.int32))
231 | 
232 |     image = tf.image.pad_to_bounding_box(cropped_image,
233 |                                          offset_width=tf.cast((off_w_ratio - new_top_left_x) * img_shape_list[1],
234 |                                                               tf.int32),
235 |                                          offset_height=tf.cast((off_h_ratio - new_top_left_y) * img_shape_list[0],
236 |                                                                tf.int32),
237 |                                          target_height=tf.cast(pad_image_height, tf.int32),
238 |                                          target_width=tf.cast(pad_image_width, tf.int32))
239 | 
240 |     # random rotation angle
241 |     angle = rotation_angle * tf.random_uniform([])
242 | 
243 |     # rotate image
244 |     image = tf.contrib.image.rotate(image, -angle, interpolation='BILINEAR')
245 | 
246 |     if label is not None:
247 |         if has_bbox:
248 |             # include 4 bbox points
249 |             bbox_points = tf.stack([[total_points[0][0], total_points[0][1]],
250 |                                     [total_points[1][0], total_points[0][1]],
251 |                                     [total_points[0][0], total_points[1][1]],
252 |                                     [total_points[1][0], total_points[1][1]]], axis=0)
253 |             if label.shape[0] == 4:
254 |                 total_points = bbox_points
255 |             else:
256 |                 total_points = tf.concat([bbox_points, total_points[2:]], axis=0)
257 | 
258 |         # rotate points
259 |         total_points = rotate_points(total_points, angle, pad_image_width, pad_image_height)
260 | 
261 |         if has_bbox:
262 |             # new bbox [top_left, bot_right]
263 |             new_bbox = tf.stack([[total_points[2][0], total_points[0][1]],
264 |                                  [total_points[1][0], total_points[3][1]]], axis=0)
265 |             total_points = tf.concat([new_bbox, total_points[4:]], axis=0)
266 | 
267 |     if label is not None:
268 |         # adjust points' coords for cropped image
269 |         total_points = tf.reshape(total_points[:], shape=[-1, 2])
270 |         total_points = tf.stack([(total_points[:, 0] - new_top_left_x) / (1 - new_top_left_x - new_bot_right_x),
271 |                                  (total_points[:, 1] - new_top_left_y) / (1 - new_top_left_y - new_bot_right_y)],
272 |                                 axis=1)
273 | 
274 |     if label is not None:
275 |         # chance flip left right
276 |         def flip_lr():
277 |             i = tf.image.flip_left_right(image)
278 |             l = tf.stack([1 - total_points[:, 0],
279 |                           total_points[:, 1]], axis=1)
280 |             return i, l
281 | 
282 |         def no_flip_lr():
283 |             i = image
284 |             l = total_points
285 |             return i, l
286 | 
287 |         if do_flip_lr:
288 |             image, total_points = tf.cond(tf.greater(tf.random_uniform([]), 0.5), flip_lr, no_flip_lr)
289 | 
290 |         # chance flip upside down
291 |         def flip_ud():
292 |             i = tf.image.flip_up_down(image)
293 |             l = tf.stack([total_points[:, 0],
294 |                           1 - total_points[:, 1]], axis=1)
295 |             return i, l
296 | 
297 |         def no_flip_ud():
298 |             i = image
299 |             l = total_points
300 |             return i, l
301 | 
302 |         if do_flip_ud:
303 |             image, total_points = tf.cond(tf.greater(tf.random_uniform([]), 0.5), flip_ud, no_flip_ud)
304 | 
305 |         if has_bbox:
306 |             new_bbox = tf.stack([(total_points[0, 0] + total_points[1, 0]) / 2,
307 |                                  (total_points[0, 1] + total_points[1, 1]) / 2,
308 |                                  tf.abs(total_points[1, 0] - total_points[0, 0]),
309 |                                  tf.abs(total_points[1, 1] - total_points[0, 1])], axis=0)
310 | 
311 |         total_points = tf.reshape(total_points, shape=[-1, ])
312 | 
313 |     else:
314 |         # chance flip left right
315 |         def flip_lr():
316 |             i = tf.image.flip_left_right(image)
317 |             return i
318 | 
319 |         def no_flip_lr():
320 |             i = image
321 |             return i
322 | 
323 |         if do_flip_lr:
324 |             image = tf.cond(tf.greater(tf.random_uniform([]), 0.5), flip_lr, no_flip_lr)
325 | 
326 |         # chance flip upside down
327 |         def flip_ud():
328 |             i = tf.image.flip_up_down(image)
329 |             return i
330 | 
331 |         def no_flip_ud():
332 |             i = image
333 |             return i
334 | 
335 |         if do_flip_ud:
336 |             image = tf.cond(tf.greater(tf.random_uniform([]), 0.5), flip_ud, no_flip_ud)
337 | 
338 |     return image, new_bbox, total_points
339 | 


--------------------------------------------------------------------------------
/utils/tracking_module.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | 
 4 | 
 5 | class SelfTracker(object):
 6 |     def __init__(self, img_shape, model_input_size):
 7 |         self.img_shape = img_shape
 8 |         self.loss_track = False
 9 |         self.prev_bbox = [0, 0, img_shape[0], img_shape[1]]
10 |         self.init_center = [img_shape[0]//2, img_shape[1]//2]
11 |         self.cur_center = [img_shape[0]//2, img_shape[1]//2]
12 |         self._default_crop_size = 368
13 |         self.bbox = [0, 0, 0, 0]
14 |         self.pad_boundary = [0, 0, 0, 0]
15 |         self.prev_crop_h = self._default_crop_size
16 |         self.prev_crop_w = self._default_crop_size
17 |         self.alpha = 0.2
18 |         self.input_crop_ratio = 1.0
19 |         self.input_size = float(model_input_size)
20 | 
21 |     def tracking_by_joints(self, full_img, joint_detections=None):
22 |         if self.loss_track or joint_detections is None:
23 |             cropped_img = self._crop_image(full_img, self.init_center, (self._default_crop_size, self._default_crop_size))
24 |             self.input_crop_ratio = self.input_size / max(cropped_img.shape[0], cropped_img.shape[1])
25 |             resize_img = self._resize_image(cropped_img, self.input_size)
26 |             return self._pad_image(resize_img, max(resize_img.shape[0], resize_img.shape[1]))
27 |         else:
28 |             self.cur_center = np.mean(joint_detections, axis=0, dtype=np.int)
29 |             crop_h = np.max(joint_detections[:, 0]) - np.min(joint_detections[:, 0])
30 |             crop_w = np.max(joint_detections[:, 1]) - np.min(joint_detections[:, 1])
31 |             crop_h = max(int(crop_h), 96)
32 |             crop_w = max(int(crop_w), 96)
33 |             crop_h *= 2.0
34 |             crop_w *= 2.0
35 |             self.prev_crop_h = self.alpha * crop_h + (1-self.alpha) * self.prev_crop_h
36 |             self.prev_crop_w = self.alpha * crop_w + (1-self.alpha) * self.prev_crop_w
37 | 
38 |             cropped_img = self._crop_image(full_img, self.cur_center, (int(self.prev_crop_h), int(self.prev_crop_w)))
39 |             self.input_crop_ratio = self.input_size / max(cropped_img.shape[0], cropped_img.shape[1])
40 |             resize_img = self._resize_image(cropped_img, self.input_size)
41 | 
42 |             pad_size = max(resize_img.shape[0], resize_img.shape[1])
43 |             return self._pad_image(resize_img, pad_size)
44 | 
45 |     def _resize_image(self, cropped_img, size):
46 |         h, w, _ = cropped_img.shape
47 |         if h > w:
48 |             scale = size / h
49 |             return cv2.resize(cropped_img, None, fx=scale, fy=scale)
50 |         else:
51 |             scale = size / w
52 |             return cv2.resize(cropped_img, None, fx=scale, fy=scale)
53 | 
54 |     def _crop_image(self, full_img, center, size):
55 |         h_offset = size[0] % 2
56 |         w_offset = size[1] % 2
57 |         self.bbox = [max(0, center[0]-size[0]//2), min(self.img_shape[0], center[0]+size[0]//2+h_offset),
58 |                 max(0, center[1]-size[1]//2), min(self.img_shape[1], center[1]+size[1]//2+w_offset)]
59 |         return full_img[self.bbox[0]:self.bbox[1], self.bbox[2]:self.bbox[3], :]
60 | 
61 | 
62 |     def _pad_image(self, img, size):
63 |         h, w, _ = img.shape
64 |         if size < h or size < w:
65 |             raise ValueError('Pad size cannot smaller than original image size')
66 | 
67 |         pad_h_offset = (size - h) % 2
68 |         pad_w_offset = (size - w) % 2
69 |         self.pad_boundary = [(size-h)//2+pad_h_offset, (size-h)//2, (size-w)//2+pad_w_offset, (size-w)//2]
70 |         return cv2.copyMakeBorder(img, top=self.pad_boundary[0],
71 |                                   bottom=self.pad_boundary[1],
72 |                                   left=self.pad_boundary[2],
73 |                                   right=self.pad_boundary[3], borderType=cv2.BORDER_CONSTANT, value=(128, 128, 128))
74 | 


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import math
  4 | import matplotlib.pyplot as plt
  5 | from mpl_toolkits.mplot3d import Axes3D
  6 | # from OpenGL.GL import *
  7 | # from OpenGL.GLU import *
  8 | 
  9 | 
 10 | 
 11 | def read_square_image(file, cam, boxsize, type):
 12 |     # from file
 13 |     if type == 'IMAGE':
 14 |         oriImg = cv2.imread(file)
 15 |     # from webcam
 16 |     elif type == 'WEBCAM':
 17 |         _, oriImg = cam.read()
 18 | 
 19 |     scale = boxsize / (oriImg.shape[0] * 1.0)
 20 |     imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_LANCZOS4)
 21 | 
 22 |     output_img = np.ones((boxsize, boxsize, 3)) * 128
 23 | 
 24 |     if imageToTest.shape[1] < boxsize:
 25 |         offset = imageToTest.shape[1] % 2
 26 |         output_img[:, int(boxsize/2-math.ceil(imageToTest.shape[1]/2)):int(boxsize/2+math.ceil(imageToTest.shape[1]/2)+offset), :] = imageToTest
 27 |     else:
 28 |         output_img = imageToTest[:, int(imageToTest.shape[1]/2-boxsize/2):int(imageToTest.shape[1]/2+boxsize/2), :]
 29 |     return output_img
 30 | 
 31 | 
 32 | def resize_pad_img(img, scale, output_size):
 33 |     resized_img = cv2.resize(img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)
 34 |     pad_h = (output_size - resized_img.shape[0]) // 2
 35 |     pad_w = (output_size - resized_img.shape[1]) // 2
 36 |     pad_h_offset = (output_size - resized_img.shape[0]) % 2
 37 |     pad_w_offset = (output_size - resized_img.shape[1]) % 2
 38 |     resized_pad_img = np.pad(resized_img, ((pad_w, pad_w+pad_w_offset), (pad_h, pad_h+pad_h_offset), (0, 0)),
 39 |                              mode='constant', constant_values=128)
 40 | 
 41 |     return resized_pad_img
 42 | 
 43 | 
 44 | def img_white_balance(img, white_ratio):
 45 |     for channel in range(img.shape[2]):
 46 |         channel_max = np.percentile(img[:, :, channel], 100-white_ratio)
 47 |         channel_min = np.percentile(img[:, :, channel], white_ratio)
 48 |         img[:, :, channel] = (channel_max-channel_min) * (img[:, :, channel] / 255.0)
 49 |     return img
 50 | 
 51 | 
 52 | def img_white_balance_with_bg(img, bg, white_ratio):
 53 |     for channel in range(img.shape[2]):
 54 |         channel_max = np.percentile(bg[:, :, channel], 100-white_ratio)
 55 |         channel_min = np.percentile(bg[:, :, channel], white_ratio)
 56 |         img[:, :, channel] = (channel_max-channel_min) * (img[:, :, channel] / 255.0)
 57 |     return img
 58 | 
 59 | 
 60 | def draw_predicted_heatmap(heatmap, input_size):
 61 |     heatmap_resized = cv2.resize(heatmap, (input_size, input_size))
 62 | 
 63 |     output_img = None
 64 |     tmp_concat_img = None
 65 |     h_count = 0
 66 |     for joint_num in range(heatmap_resized.shape[2]):
 67 |         if h_count < 4:
 68 |             tmp_concat_img = np.concatenate((tmp_concat_img, heatmap_resized[:, :, joint_num]), axis=1) \
 69 |                 if tmp_concat_img is not None else heatmap_resized[:, :, joint_num]
 70 |             h_count += 1
 71 |         else:
 72 |             output_img = np.concatenate((output_img, tmp_concat_img), axis=0) if output_img is not None else tmp_concat_img
 73 |             tmp_concat_img = None
 74 |             h_count = 0
 75 |     # last row img
 76 |     if h_count != 0:
 77 |         while h_count < 4:
 78 |             tmp_concat_img = np.concatenate((tmp_concat_img, np.zeros(shape=(input_size, input_size), dtype=np.float32)), axis=1)
 79 |             h_count += 1
 80 |         output_img = np.concatenate((output_img, tmp_concat_img), axis=0)
 81 | 
 82 |     # adjust heatmap color
 83 |     output_img = output_img.astype(np.uint8)
 84 |     output_img = cv2.applyColorMap(output_img, cv2.COLORMAP_JET)
 85 |     return output_img
 86 | 
 87 | 
 88 | def draw_stages_heatmaps(stage_heatmap_list, orig_img_size):
 89 | 
 90 |     output_img = None
 91 |     nStages = len(stage_heatmap_list)
 92 |     nJoints = stage_heatmap_list[0].shape[3]
 93 |     for stage in range(nStages):
 94 |         cur_heatmap = np.squeeze(stage_heatmap_list[0][0, :, :, 0:nJoints-1])
 95 |         cur_heatmap = cv2.resize(cur_heatmap, (orig_img_size, orig_img_size))
 96 | 
 97 |         channel_max = np.percentile(cur_heatmap, 99)
 98 |         channel_min = np.percentile(cur_heatmap, 1)
 99 |         cur_heatmap = 255.0 / (channel_max - channel_min) * (cur_heatmap - channel_min)
100 |         cur_heatmap = np.clip(cur_heatmap, 0, 255)
101 | 
102 |         cur_heatmap = np.repeat(np.expand_dims(np.amax(cur_heatmap, axis=2), axis=2), 3, axis=2)
103 |         output_img = np.concatenate((output_img, cur_heatmap), axis=1) if output_img is not None else cur_heatmap
104 |     return output_img.astype(np.uint8)
105 | 
106 | 
107 | def extract_2d_joint_from_heatmap(heatmap, input_size, joints_2d):
108 |     heatmap_resized = cv2.resize(heatmap, (input_size, input_size))
109 | 
110 |     for joint_num in range(heatmap_resized.shape[2]):
111 |         joint_coord = np.unravel_index(np.argmax(heatmap_resized[:, :, joint_num]), (input_size, input_size))
112 |         joints_2d[joint_num, :] = joint_coord
113 | 
114 |     return joints_2d
115 | 
116 | 
117 | def extract_3d_joints_from_heatmap(joints_2d, x_hm, y_hm, z_hm, input_size, joints_3d):
118 | 
119 |     for joint_num in range(x_hm.shape[2]):
120 |         coord_2d_y = joints_2d[joint_num][0]
121 |         coord_2d_x = joints_2d[joint_num][1]
122 | 
123 |         # x_hm_resized = cv2.resize(x_hm, (input_size, input_size))
124 |         # y_hm_resized = cv2.resize(y_hm, (input_size, input_size))
125 |         # z_hm_resized = cv2.resize(z_hm, (input_size, input_size))
126 |         # joint_x = x_hm_resized[max(int(coord_2d_x), 1), max(int(coord_2d_y), 1), joint_num] * 100
127 |         # joint_y = y_hm_resized[max(int(coord_2d_x), 1), max(int(coord_2d_y), 1), joint_num] * 100
128 |         # joint_z = z_hm_resized[max(int(coord_2d_x), 1), max(int(coord_2d_y), 1), joint_num] * 100
129 | 
130 | 
131 |         joint_x = x_hm[max(int(coord_2d_x/8), 1), max(int(coord_2d_y/8), 1), joint_num] * 10
132 |         joint_y = y_hm[max(int(coord_2d_x/8), 1), max(int(coord_2d_y/8), 1), joint_num] * 10
133 |         joint_z = z_hm[max(int(coord_2d_x/8), 1), max(int(coord_2d_y/8), 1), joint_num] * 10
134 |         joints_3d[joint_num, 0] = joint_x
135 |         joints_3d[joint_num, 1] = joint_y
136 |         joints_3d[joint_num, 2] = joint_z
137 |     joints_3d -= joints_3d[14, :]
138 | 
139 |     return joints_3d
140 | 
141 | def draw_limbs_2d(img, joints_2d, limb_parents):
142 |     for limb_num in range(len(limb_parents)-1):
143 |         x1 = joints_2d[limb_num, 0]
144 |         y1 = joints_2d[limb_num, 1]
145 |         x2 = joints_2d[limb_parents[limb_num], 0]
146 |         y2 = joints_2d[limb_parents[limb_num], 1]
147 |         length = ((x1 - x2) ** 2 + (y1 - y2) ** 2) ** 0.5
148 |         # if length < 10000 and length > 5:
149 |         deg = math.degrees(math.atan2(x1 - x2, y1 - y2))
150 |         polygon = cv2.ellipse2Poly((int((y1 + y2) / 2), int((x1 + x2) / 2)),
151 |                                    (int(length / 2), 3),
152 |                                    int(deg),
153 |                                    0, 360, 1)
154 |         cv2.fillConvexPoly(img, polygon, color=(0,255,0))
155 |     return img
156 | 
157 | def draw_limbs_3d(joints_3d, limb_parents, ax):
158 | 
159 |     for i in range(joints_3d.shape[0]):
160 |         x_pair = [joints_3d[i, 0], joints_3d[limb_parents[i], 0]]
161 |         y_pair = [joints_3d[i, 1], joints_3d[limb_parents[i], 1]]
162 |         z_pair = [joints_3d[i, 2], joints_3d[limb_parents[i], 2]]
163 |         ax.plot(x_pair, y_pair, zs=z_pair, linewidth=3)
164 | 
165 | 
166 | def draw_limb_3d_gl(joints_3d, limb_parents):
167 | 
168 |     glLineWidth(2)
169 |     glBegin(GL_LINES)
170 |     glColor3f(1,0,0)
171 |     glVertex3fv((0,0,0))
172 |     glVertex3fv((100,0,0))
173 |     glColor3f(0,1,0)
174 |     glVertex3fv((0,0,0))
175 |     glVertex3fv((0,100,0))
176 |     glColor3f(0,0,1)
177 |     glVertex3fv((0,0,0))
178 |     glVertex3fv((0,0,100))
179 |     glEnd()
180 | 
181 |     glColor3f(1,1,1)
182 |     glBegin(GL_LINES)
183 |     for i in range(joints_3d.shape[0]):
184 |         glVertex3fv((joints_3d[i, 0], joints_3d[i, 1], joints_3d[i, 2]))
185 |         glVertex3fv((joints_3d[limb_parents[i], 0], joints_3d[limb_parents[i], 1], joints_3d[limb_parents[i], 2]))
186 |     glEnd()
187 | 
188 |     # glBegin(GL_TRIANGLES)
189 |     # glVertex3f(0, 100, 0)
190 |     # glVertex3f(100, 0, 50)
191 |     # glVertex3f(0, -100, 100)
192 |     # glEnd()
193 | 
194 | 
195 | def draw_float_range_img(img):
196 |     tmp_min = np.min(img)
197 |     tmp_max = np.max(img)
198 |     img = cv2.convertScaleAbs(img, None, 255.0 / (tmp_max - tmp_min))
199 |     img = cv2.applyColorMap(img, cv2.COLORMAP_JET)
200 |     return img.astype(np.uint8)
201 | 
202 | 
203 | 
204 | 
205 | 
206 | 
207 | 
208 | 
209 | 
210 | 
211 | 
212 | 
213 | 


--------------------------------------------------------------------------------