├── README.md ├── classify └── classmain.py ├── config.py ├── models ├── __init__.py ├── __pycache__ │ └── __init__.cpython-35.pyc └── nets │ ├── CPM.py │ ├── __init__.py │ ├── __pycache__ │ ├── CPM.cpython-35.pyc │ ├── __init__.cpython-35.pyc │ └── cpm_hand.cpython-35.pyc │ ├── cpm_body.py │ ├── cpm_body_slim.py │ ├── cpm_hand.py │ ├── cpm_hand_slim.py │ └── cpm_hand_v2.py ├── run_demo_hand_with_tracker.py ├── useClassifyModel.py └── utils ├── __init__.py ├── __pycache__ ├── __init__.cpython-35.pyc ├── cpm_utils.cpython-35.pyc ├── tracking_module.cpython-35.pyc └── utils.cpython-35.pyc ├── cpm_utils.py ├── create_cpm_tfr_fulljoints.py ├── tf_utils.py ├── tracking_module.py └── utils.py /README.md: -------------------------------------------------------------------------------- 1 | # HandGestureClassify 2 | 基于tensorflow的手势识别和分类 3 | 4 | 博文地址:https://blog.csdn.net/yyyerica/article/details/80151473 5 | 6 | 原手部检测代码源自: https://github.com/timctho/convolutional-pose-machines-tensorflow 7 | 8 | 分类代码参考: https://blog.csdn.net/Enchanted_ZhouH/article/details/74116823 9 | 10 | 11 | 使用说明: 12 | 13 | 1.手势识别 14 | 15 | 运行 run_demo_hand_with_tracker.py 进行实时手势识别 16 | 17 | 修改 config.py 中的 DEMO_TYPE 可更改输出的图像类型 18 | 19 | 将 run_demo_hand_with_tracker.py 中的 cv2.imwrite('./storePic/11'+str(i)+'.jpg', local_img.astype(np.uint8),[int(cv2.IMWRITE_JPEG_QUALITY), 90]) 语句解除注释可以保存图片到项目目录下,可以自行修改存储目录 20 | 21 | 22 | 2.手势图像分类 23 | 24 | classmain.py 代码用于训练分类 25 | 26 | 用于训练的手势数据集存于 classify -- handGesturePic 中,需要自行运行run_demo_hand_with_tracker.py保存图片作为训练集 27 | 28 | 训练好的模型存在 classify--modelSave 中 29 | 30 | 调用 useClassifyModel.py 进行分类结果验证 31 | 32 | 其中,手部检测的模型请到https://github.com/timctho/convolutional-pose-machines-tensorflow 下载(tf版本) 33 | 34 | 手势图像分类的模型参数请自行训练classmain.py保存 35 | -------------------------------------------------------------------------------- /classify/classmain.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from skimage import io, transform 3 | import glob 4 | import os 5 | import tensorflow as tf 6 | import numpy as np 7 | import time 8 | 9 | path = './handGesturePic/' 10 | # 模型保存地址 11 | model_path='./modelSave/model.ckpt' 12 | 13 | # 将所有的图片resize成100*100 14 | w = 100 15 | h = 100 16 | c = 3 17 | 18 | 19 | # 读取图片 20 | def read_img(path): 21 | cate = [path + '/' + x for x in os.listdir(path) if os.path.isdir(path + '/' + x)] 22 | imgs = [] 23 | labels = [] 24 | for idx, folder in enumerate(cate): 25 | print('reading the images:%s' % (folder)) 26 | for im in glob.glob(folder + '/*.jpg'): 27 | img = io.imread(im) 28 | img = transform.resize(img, (w, h)) 29 | imgs.append(img) 30 | labels.append(idx) 31 | return np.asarray(imgs, np.float32), np.asarray(labels, np.int32) 32 | 33 | 34 | data, label = read_img(path) # data 4038*(100,100,3) label 4038个0~5 35 | 36 | # 打乱顺序 37 | num_example = data.shape[0] # 4038 38 | arr = np.arange(num_example) # [ 0 1 2 ... 4037] 39 | np.random.shuffle(arr) # 将arr乱序 40 | data = data[arr] 41 | label = label[arr] 42 | 43 | # 将所有数据分为训练集和验证集 44 | ratio = 0.8 45 | s = np.int(num_example * ratio) 46 | x_train = data[:s] 47 | y_train = label[:s] 48 | x_val = data[s:] # 验证集 49 | y_val = label[s:] 50 | 51 | # -----------------构建网络---------------------- 52 | # 占位符 53 | x = tf.placeholder(tf.float32, shape=[None, w, h, c], name='x') 54 | y_ = tf.placeholder(tf.int32, shape=[None, ], name='y_') 55 | 56 | 57 | # 100×100×3->100×100×32->50×50×32->50×50×64->25×25×64->25×25×128->12×12×128->12×12×128->6×6×128 58 | def inference(input_tensor, train, regularizer): # regularizer = tf.contrib.layers.l2_regularizer(0.0001) 59 | 60 | ''' 61 | tf.nn.conv2d(input, filter, strides(步长,一般为1 :[1, 1, 1, 1]), padding, use_cudnn_on_gpu=None, data_format=None, name=None) 62 | input的张量[batch, in_height, in_width, in_channels] 63 | 过滤器 / 内核张量 [filter_height, filter_width(filter大小), in_channels(输入通道), out_channels(输出通道)] 64 | 65 | 执行以下操作: 66 | 展平filter为一个形状为[filter_height * filter_width * in_channels, output_channels]的二维矩阵。 67 | 从input中按照filter大小提取图片子集形成一个大小为[batch, out_height, out_width, filter_height * filter_width * in_channels]的虚拟张量。 68 | 循环每个图片子集,右乘filter矩阵。 69 | ''' 70 | 71 | with tf.variable_scope('layer1-conv1'): 72 | conv1_weights = tf.get_variable("weight", [5, 5, 3, 32], initializer=tf.truncated_normal_initializer(stddev=0.1)) 73 | conv1_biases = tf.get_variable("bias", [32], initializer=tf.constant_initializer(0.0)) 74 | conv1 = tf.nn.conv2d(input_tensor, conv1_weights, strides=[1, 1, 1, 1], padding='SAME') 75 | # 当padding=SAME时,输入与输出形状相同 76 | relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_biases)) 77 | 78 | with tf.name_scope("layer2-pool1"): 79 | pool1 = tf.nn.max_pool(relu1, ksize=[1,2,2,1],strides=[1,2,2,1],padding="VALID") 80 | 81 | with tf.variable_scope("layer3-conv2"): 82 | conv2_weights = tf.get_variable("weight", [5, 5, 32, 64], initializer=tf.truncated_normal_initializer(stddev=0.1)) 83 | conv2_biases = tf.get_variable("bias", [64], initializer=tf.constant_initializer(0.0)) 84 | conv2 = tf.nn.conv2d(pool1, conv2_weights, strides=[1, 1, 1, 1], padding='SAME') 85 | relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases)) 86 | 87 | with tf.name_scope("layer4-pool2"): 88 | pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') 89 | 90 | with tf.variable_scope("layer5-conv3"): 91 | conv3_weights = tf.get_variable("weight", [3, 3, 64, 128], initializer=tf.truncated_normal_initializer(stddev=0.1)) 92 | conv3_biases = tf.get_variable("bias", [128], initializer=tf.constant_initializer(0.0)) 93 | conv3 = tf.nn.conv2d(pool2, conv3_weights, strides=[1, 1, 1, 1], padding='SAME') 94 | relu3 = tf.nn.relu(tf.nn.bias_add(conv3, conv3_biases)) 95 | 96 | with tf.name_scope("layer6-pool3"): 97 | pool3 = tf.nn.max_pool(relu3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') 98 | 99 | with tf.variable_scope("layer7-conv4"): 100 | conv4_weights = tf.get_variable("weight",[3,3,128,128],initializer=tf.truncated_normal_initializer(stddev=0.1)) 101 | conv4_biases = tf.get_variable("bias", [128], initializer=tf.constant_initializer(0.0)) 102 | conv4 = tf.nn.conv2d(pool3, conv4_weights, strides=[1, 1, 1, 1], padding='SAME') 103 | relu4 = tf.nn.relu(tf.nn.bias_add(conv4, conv4_biases)) 104 | 105 | with tf.name_scope("layer8-pool4"): 106 | pool4 = tf.nn.max_pool(relu4, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') 107 | nodes = 6*6*128 108 | reshaped = tf.reshape(pool4,[-1,nodes]) 109 | 110 | with tf.variable_scope('layer9-fc1'): 111 | fc1_weights = tf.get_variable("weight", [nodes, 1024], 112 | initializer=tf.truncated_normal_initializer(stddev=0.1)) 113 | if regularizer != None: tf.add_to_collection('losses', regularizer(fc1_weights)) 114 | # tf.add_to_collection向当前计算图中添加张量集合 115 | fc1_biases = tf.get_variable("bias", [1024], initializer=tf.constant_initializer(0.1)) 116 | 117 | fc1 = tf.nn.relu(tf.matmul(reshaped, fc1_weights) + fc1_biases) 118 | if train: fc1 = tf.nn.dropout(fc1, 0.5) 119 | 120 | with tf.variable_scope('layer10-fc2'): 121 | fc2_weights = tf.get_variable("weight", [1024, 512], 122 | initializer=tf.truncated_normal_initializer(stddev=0.1)) 123 | if regularizer != None: tf.add_to_collection('losses', regularizer(fc2_weights)) 124 | fc2_biases = tf.get_variable("bias", [512], initializer=tf.constant_initializer(0.1)) 125 | 126 | fc2 = tf.nn.relu(tf.matmul(fc1, fc2_weights) + fc2_biases) 127 | if train: fc2 = tf.nn.dropout(fc2, 0.5) 128 | 129 | with tf.variable_scope('layer11-fc3'): 130 | fc3_weights = tf.get_variable("weight", [512, 6], 131 | initializer=tf.truncated_normal_initializer(stddev=0.1)) 132 | if regularizer != None: tf.add_to_collection('losses', regularizer(fc3_weights)) 133 | fc3_biases = tf.get_variable("bias", [6], initializer=tf.constant_initializer(0.1)) 134 | logit = tf.matmul(fc2, fc3_weights) + fc3_biases 135 | 136 | return logit 137 | 138 | # ---------------------------网络结束--------------------------- 139 | regularizer = tf.contrib.layers.l2_regularizer(0.0001) # 返回一个执行L2正则化的函数.在损失函数上加上正则项是防止过拟合的一个重要方法 140 | logits = inference(x, False, regularizer) 141 | 142 | # (小处理)将logits乘以1赋值给logits_eval,定义name,方便在后续调用模型时通过tensor名字调用输出tensor 143 | b = tf.constant(value=1, dtype=tf.float32) 144 | logits_eval = tf.multiply(logits, b, name='logits_eval') 145 | 146 | loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y_) 147 | train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss) 148 | correct_prediction = tf.equal(tf.cast(tf.argmax(logits, 1), tf.int32), y_) 149 | # tf.equal Returns:A `Tensor` of type `bool`. 150 | # tf.cast :Casts a tensor to a new type. Returns:A `Tensor` or `SparseTensor` with same shape as `x`.(shape相同只改变type) 151 | acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 152 | 153 | 154 | # 定义一个函数,按批次取数据 155 | def minibatches(inputs=None, targets=None, batch_size=None, shuffle=False): 156 | assert len(inputs) == len(targets) 157 | if shuffle: 158 | indices = np.arange(len(inputs)) 159 | np.random.shuffle(indices) 160 | for start_idx in range(0, len(inputs) - batch_size + 1, batch_size): 161 | if shuffle: 162 | excerpt = indices[start_idx:start_idx + batch_size] 163 | else: 164 | excerpt = slice(start_idx, start_idx + batch_size) 165 | yield inputs[excerpt], targets[excerpt] 166 | 167 | 168 | # 训练和测试数据,可将n_epoch设置更大一些 169 | 170 | n_epoch=10 171 | batch_size=64 172 | saver=tf.train.Saver() 173 | sess=tf.Session() 174 | sess.run(tf.global_variables_initializer()) 175 | for epoch in range(n_epoch): 176 | start_time = time.time() 177 | 178 | # training 179 | train_loss, train_acc, n_batch = 0, 0, 0 180 | for x_train_a, y_train_a in minibatches(x_train, y_train, batch_size, shuffle=True): 181 | 182 | _, err, ac = sess.run([train_op,loss,acc], feed_dict={x: x_train_a, y_: y_train_a}) 183 | train_loss += err 184 | train_acc += ac 185 | n_batch += 1 186 | 187 | print("----------------epoch: %f-------------------" % epoch) 188 | print(" train loss: %f" % (np.sum(train_loss) / n_batch)) 189 | print(" train acc: %f" % (np.sum(train_acc) / n_batch)) 190 | 191 | # validation 192 | val_loss, val_acc, n_batch = 0, 0, 0 193 | for x_val_a, y_val_a in minibatches(x_val, y_val, batch_size, shuffle=False): 194 | 195 | err, ac = sess.run([loss,acc], feed_dict={x: x_val_a, y_: y_val_a}) 196 | val_loss += err 197 | val_acc += ac 198 | n_batch += 1 199 | 200 | print(" validation loss: %f" % (np.sum(val_loss) / n_batch)) 201 | print(" validation acc: %f" % (np.sum(val_acc) / n_batch)) 202 | print('\n') 203 | 204 | saver.save(sess, model_path) 205 | 206 | sess.close() 207 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | class FLAGS(object): 2 | """ """ 3 | """ 4 | General settings 5 | """ 6 | input_size = 256 7 | heatmap_size = 32 8 | cpm_stages = 3 9 | joint_gaussian_variance = 1.0 10 | center_radius = 21 11 | num_of_joints = 21 12 | color_channel = 'RGB' 13 | normalize_img = True 14 | use_gpu = True 15 | gpu_id = 0 16 | 17 | 18 | """ 19 | Demo settings 20 | """ 21 | # 'MULTI': show multiple stage heatmaps 22 | # 'SINGLE': show last stage heatmap 23 | # 'Joint_HM': show last stage heatmap for each joint 24 | # 'image or video path': show detection on single image or video 25 | DEMO_TYPE = 'MULTI' 26 | 27 | model_path = 'cpm_hand' 28 | cam_id = 0 29 | 30 | webcam_height = 480 31 | webcam_width = 640 32 | 33 | use_kalman = True 34 | kalman_noise = 0.03 35 | 36 | 37 | """ 38 | Training settings 39 | """ 40 | network_def = 'cpm_hand' 41 | train_img_dir = '' 42 | val_img_dir = '' 43 | bg_img_dir = '' 44 | pretrained_model = 'cpm_hand' 45 | batch_size = 5 46 | init_lr = 0.001 47 | lr_decay_rate = 0.5 48 | lr_decay_step = 10000 49 | training_iters = 300000 50 | verbose_iters = 10 51 | validation_iters = 1000 52 | model_save_iters = 5000 53 | augmentation_config = {'hue_shift_limit': (-5, 5), 54 | 'sat_shift_limit': (-10, 10), 55 | 'val_shift_limit': (-15, 15), 56 | 'translation_limit': (-0.15, 0.15), 57 | 'scale_limit': (-0.3, 0.5), 58 | 'rotate_limit': (-90, 90)} 59 | hnm = True # Make sure generate hnm files first 60 | do_cropping = True 61 | 62 | """ 63 | For Freeze graphs 64 | """ 65 | output_node_names = 'stage_3/mid_conv7/BiasAdd:0' 66 | 67 | 68 | """ 69 | For Drawing 70 | """ 71 | # Default Pose 72 | default_hand = [[259, 335], 73 | [245, 311], 74 | [226, 288], 75 | [206, 270], 76 | [195, 261], 77 | [203, 308], 78 | [165, 290], 79 | [139, 287], 80 | [119, 284], 81 | [199, 328], 82 | [156, 318], 83 | [128, 314], 84 | [104, 318], 85 | [204, 341], 86 | [163, 340], 87 | [133, 347], 88 | [108, 349], 89 | [206, 359], 90 | [176, 368], 91 | [164, 370], 92 | [144, 377]] 93 | 94 | # Limb connections 95 | limbs = [[0, 1], 96 | [1, 2], 97 | [2, 3], 98 | [3, 4], 99 | [0, 5], 100 | [5, 6], 101 | [6, 7], 102 | [7, 8], 103 | [0, 9], 104 | [9, 10], 105 | [10, 11], 106 | [11, 12], 107 | [0, 13], 108 | [13, 14], 109 | [14, 15], 110 | [15, 16], 111 | [0, 17], 112 | [17, 18], 113 | [18, 19], 114 | [19, 20] 115 | ] 116 | 117 | # Finger colors 118 | joint_color_code = [[139, 53, 255], 119 | [0, 56, 255], 120 | [43, 140, 237], 121 | [37, 168, 36], 122 | [147, 147, 0], 123 | [70, 17, 145]] 124 | 125 | # My hand joint order 126 | # FLAGS.limbs = [[0, 1], 127 | # [1, 2], 128 | # [2, 3], 129 | # [3, 20], 130 | # [4, 5], 131 | # [5, 6], 132 | # [6, 7], 133 | # [7, 20], 134 | # [8, 9], 135 | # [9, 10], 136 | # [10, 11], 137 | # [11, 20], 138 | # [12, 13], 139 | # [13, 14], 140 | # [14, 15], 141 | # [15, 20], 142 | # [16, 17], 143 | # [17, 18], 144 | # [18, 19], 145 | # [19, 20] 146 | # ] 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yyyerica/HandGestureClassify/9cc5185a38f4ed48680f8d4d948929c01dbf6d2b/models/__init__.py -------------------------------------------------------------------------------- /models/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yyyerica/HandGestureClassify/9cc5185a38f4ed48680f8d4d948929c01dbf6d2b/models/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /models/nets/CPM.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod, abstractproperty 2 | 3 | class CPM(object): 4 | __metaclass__ = ABCMeta 5 | 6 | @abstractmethod 7 | def __init__(self, input_size, heatmap_size, stages, joints, img_type='RGB'): 8 | pass 9 | 10 | @abstractmethod 11 | def _build_model(self): 12 | pass 13 | 14 | @abstractmethod 15 | def build_loss(self, lr, lr_decay_rate, lr_decay_step, optimizer='Adam'): 16 | pass 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /models/nets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yyyerica/HandGestureClassify/9cc5185a38f4ed48680f8d4d948929c01dbf6d2b/models/nets/__init__.py -------------------------------------------------------------------------------- /models/nets/__pycache__/CPM.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yyyerica/HandGestureClassify/9cc5185a38f4ed48680f8d4d948929c01dbf6d2b/models/nets/__pycache__/CPM.cpython-35.pyc -------------------------------------------------------------------------------- /models/nets/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yyyerica/HandGestureClassify/9cc5185a38f4ed48680f8d4d948929c01dbf6d2b/models/nets/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /models/nets/__pycache__/cpm_hand.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yyyerica/HandGestureClassify/9cc5185a38f4ed48680f8d4d948929c01dbf6d2b/models/nets/__pycache__/cpm_hand.cpython-35.pyc -------------------------------------------------------------------------------- /models/nets/cpm_body.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import pickle 3 | 4 | 5 | class CPM_Model(object): 6 | def __init__(self, stages, joints): 7 | self.stages = stages 8 | self.stage_heatmap = [] 9 | self.stage_loss = [0] * stages 10 | self.total_loss = 0 11 | self.input_image = None 12 | self.center_map = None 13 | self.gt_heatmap = None 14 | self.learning_rate = 0 15 | self.merged_summary = None 16 | self.joints = joints 17 | self.batch_size = 0 18 | 19 | def build_model(self, input_image, center_map, batch_size): 20 | self.batch_size = batch_size 21 | self.input_image = input_image 22 | self.center_map = center_map 23 | with tf.variable_scope('pooled_center_map'): 24 | self.center_map = tf.layers.average_pooling2d(inputs=self.center_map, 25 | pool_size=[9, 9], 26 | strides=[8, 8], 27 | padding='same', 28 | name='center_map') 29 | with tf.variable_scope('sub_stages'): 30 | sub_conv1 = tf.layers.conv2d(inputs=input_image, 31 | filters=64, 32 | kernel_size=[3, 3], 33 | strides=[1, 1], 34 | padding='same', 35 | activation=tf.nn.relu, 36 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 37 | name='sub_conv1') 38 | sub_conv2 = tf.layers.conv2d(inputs=sub_conv1, 39 | filters=64, 40 | kernel_size=[3, 3], 41 | strides=[1, 1], 42 | padding='same', 43 | activation=tf.nn.relu, 44 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 45 | name='sub_conv2') 46 | sub_pool1 = tf.layers.max_pooling2d(inputs=sub_conv2, 47 | pool_size=[2, 2], 48 | strides=2, 49 | padding='same', 50 | name='sub_pool1') 51 | sub_conv3 = tf.layers.conv2d(inputs=sub_pool1, 52 | filters=128, 53 | kernel_size=[3, 3], 54 | strides=[1, 1], 55 | padding='same', 56 | activation=tf.nn.relu, 57 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 58 | name='sub_conv3') 59 | sub_conv4 = tf.layers.conv2d(inputs=sub_conv3, 60 | filters=128, 61 | kernel_size=[3, 3], 62 | strides=[1, 1], 63 | padding='same', 64 | activation=tf.nn.relu, 65 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 66 | name='sub_conv4') 67 | sub_pool2 = tf.layers.max_pooling2d(inputs=sub_conv4, 68 | pool_size=[2, 2], 69 | strides=2, 70 | padding='same', 71 | name='sub_pool2') 72 | sub_conv5 = tf.layers.conv2d(inputs=sub_pool2, 73 | filters=256, 74 | kernel_size=[3, 3], 75 | strides=[1, 1], 76 | padding='same', 77 | activation=tf.nn.relu, 78 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 79 | name='sub_conv5') 80 | sub_conv6 = tf.layers.conv2d(inputs=sub_conv5, 81 | filters=256, 82 | kernel_size=[3, 3], 83 | strides=[1, 1], 84 | padding='same', 85 | activation=tf.nn.relu, 86 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 87 | name='sub_conv6') 88 | sub_conv7 = tf.layers.conv2d(inputs=sub_conv6, 89 | filters=256, 90 | kernel_size=[3, 3], 91 | strides=[1, 1], 92 | padding='same', 93 | activation=tf.nn.relu, 94 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 95 | name='sub_conv7') 96 | sub_conv8 = tf.layers.conv2d(inputs=sub_conv7, 97 | filters=256, 98 | kernel_size=[3, 3], 99 | strides=[1, 1], 100 | padding='same', 101 | activation=tf.nn.relu, 102 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 103 | name='sub_conv8') 104 | sub_pool3 = tf.layers.max_pooling2d(inputs=sub_conv8, 105 | pool_size=[2, 2], 106 | strides=2, 107 | padding='same', 108 | name='sub_pool3') 109 | sub_conv9 = tf.layers.conv2d(inputs=sub_pool3, 110 | filters=512, 111 | kernel_size=[3, 3], 112 | strides=[1, 1], 113 | padding='same', 114 | activation=tf.nn.relu, 115 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 116 | name='sub_conv9') 117 | sub_conv10 = tf.layers.conv2d(inputs=sub_conv9, 118 | filters=512, 119 | kernel_size=[3, 3], 120 | strides=[1, 1], 121 | padding='same', 122 | activation=tf.nn.relu, 123 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 124 | name='sub_conv10') 125 | sub_conv11 = tf.layers.conv2d(inputs=sub_conv10, 126 | filters=256, 127 | kernel_size=[3, 3], 128 | strides=[1, 1], 129 | padding='same', 130 | activation=tf.nn.relu, 131 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 132 | name='sub_conv11') 133 | sub_conv12 = tf.layers.conv2d(inputs=sub_conv11, 134 | filters=256, 135 | kernel_size=[3, 3], 136 | strides=[1, 1], 137 | padding='same', 138 | activation=tf.nn.relu, 139 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 140 | name='sub_conv12') 141 | sub_conv13 = tf.layers.conv2d(inputs=sub_conv12, 142 | filters=256, 143 | kernel_size=[3, 3], 144 | strides=[1, 1], 145 | padding='same', 146 | activation=tf.nn.relu, 147 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 148 | name='sub_conv13') 149 | sub_conv14 = tf.layers.conv2d(inputs=sub_conv13, 150 | filters=256, 151 | kernel_size=[3, 3], 152 | strides=[1, 1], 153 | padding='same', 154 | activation=tf.nn.relu, 155 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 156 | name='sub_conv14') 157 | 158 | self.sub_stage_img_feature = tf.layers.conv2d(inputs=sub_conv14, 159 | filters=128, 160 | kernel_size=[3, 3], 161 | strides=[1, 1], 162 | padding='same', 163 | activation=tf.nn.relu, 164 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 165 | name='sub_stage_img_feature') 166 | 167 | with tf.variable_scope('stage_1'): 168 | conv1 = tf.layers.conv2d(inputs=self.sub_stage_img_feature, 169 | filters=512, 170 | kernel_size=[1, 1], 171 | strides=[1, 1], 172 | padding='same', 173 | activation=tf.nn.relu, 174 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 175 | name='conv1') 176 | self.stage_heatmap.append(tf.layers.conv2d(inputs=conv1, 177 | filters=self.joints, 178 | kernel_size=[1, 1], 179 | strides=[1, 1], 180 | padding='same', 181 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 182 | name='stage_heatmap')) 183 | for stage in range(2, self.stages + 1): 184 | self._middle_conv(stage) 185 | 186 | def _middle_conv(self, stage): 187 | with tf.variable_scope('stage_' + str(stage)): 188 | self.current_featuremap = tf.concat([self.stage_heatmap[stage - 2], 189 | self.sub_stage_img_feature, 190 | self.center_map, 191 | ], 192 | axis=3) 193 | mid_conv1 = tf.layers.conv2d(inputs=self.current_featuremap, 194 | filters=128, 195 | kernel_size=[7, 7], 196 | strides=[1, 1], 197 | padding='same', 198 | activation=tf.nn.relu, 199 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 200 | name='mid_conv1') 201 | mid_conv2 = tf.layers.conv2d(inputs=mid_conv1, 202 | filters=128, 203 | kernel_size=[7, 7], 204 | strides=[1, 1], 205 | padding='same', 206 | activation=tf.nn.relu, 207 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 208 | name='mid_conv2') 209 | mid_conv3 = tf.layers.conv2d(inputs=mid_conv2, 210 | filters=128, 211 | kernel_size=[7, 7], 212 | strides=[1, 1], 213 | padding='same', 214 | activation=tf.nn.relu, 215 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 216 | name='mid_conv3') 217 | mid_conv4 = tf.layers.conv2d(inputs=mid_conv3, 218 | filters=128, 219 | kernel_size=[7, 7], 220 | strides=[1, 1], 221 | padding='same', 222 | activation=tf.nn.relu, 223 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 224 | name='mid_conv4') 225 | mid_conv5 = tf.layers.conv2d(inputs=mid_conv4, 226 | filters=128, 227 | kernel_size=[7, 7], 228 | strides=[1, 1], 229 | padding='same', 230 | activation=tf.nn.relu, 231 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 232 | name='mid_conv5') 233 | mid_conv6 = tf.layers.conv2d(inputs=mid_conv5, 234 | filters=128, 235 | kernel_size=[1, 1], 236 | strides=[1, 1], 237 | padding='same', 238 | activation=tf.nn.relu, 239 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 240 | name='mid_conv6') 241 | self.current_heatmap = tf.layers.conv2d(inputs=mid_conv6, 242 | filters=self.joints, 243 | kernel_size=[1, 1], 244 | strides=[1, 1], 245 | padding='same', 246 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 247 | name='mid_conv7') 248 | self.stage_heatmap.append(self.current_heatmap) 249 | 250 | def build_loss(self, gt_heatmap, lr, lr_decay_rate, lr_decay_step): 251 | self.gt_heatmap = gt_heatmap 252 | self.total_loss = 0 253 | self.learning_rate = lr 254 | self.lr_decay_rate = lr_decay_rate 255 | self.lr_decay_step = lr_decay_step 256 | 257 | for stage in range(self.stages): 258 | with tf.variable_scope('stage' + str(stage + 1) + '_loss'): 259 | self.stage_loss[stage] = tf.nn.l2_loss(self.stage_heatmap[stage] - self.gt_heatmap, 260 | name='l2_loss') / self.batch_size 261 | tf.summary.scalar('stage' + str(stage + 1) + '_loss', self.stage_loss[stage]) 262 | 263 | with tf.variable_scope('total_loss'): 264 | for stage in range(self.stages): 265 | self.total_loss += self.stage_loss[stage] 266 | tf.summary.scalar('total loss', self.total_loss) 267 | 268 | with tf.variable_scope('train'): 269 | self.global_step = tf.contrib.framework.get_or_create_global_step() 270 | 271 | self.lr = tf.train.exponential_decay(self.learning_rate, 272 | global_step=self.global_step, 273 | decay_rate=self.lr_decay_rate, 274 | decay_steps=self.lr_decay_step) 275 | tf.summary.scalar('learning rate', self.lr) 276 | 277 | self.train_op = tf.contrib.layers.optimize_loss(loss=self.total_loss, 278 | global_step=self.global_step, 279 | learning_rate=self.lr, 280 | optimizer='Adam') 281 | self.merged_summary = tf.summary.merge_all() 282 | 283 | def load_weights_from_file(self, weight_file_path, sess, finetune=True): 284 | weights = pickle.load(open(weight_file_path, 'rb'), encoding='latin1') 285 | 286 | with tf.variable_scope('', reuse=True): 287 | ## Pre stage conv 288 | # conv1 289 | for layer in range(1, 3): 290 | conv_kernel = tf.get_variable('sub_stages/sub_conv' + str(layer) + '/kernel') 291 | conv_bias = tf.get_variable('sub_stages/sub_conv' + str(layer) + '/bias') 292 | 293 | loaded_kernel = weights['conv1_' + str(layer)] 294 | loaded_bias = weights['conv1_' + str(layer) + '_b'] 295 | 296 | sess.run(tf.assign(conv_kernel, loaded_kernel)) 297 | sess.run(tf.assign(conv_bias, loaded_bias)) 298 | 299 | # conv2 300 | for layer in range(1, 3): 301 | conv_kernel = tf.get_variable('sub_stages/sub_conv' + str(layer + 2) + '/kernel') 302 | conv_bias = tf.get_variable('sub_stages/sub_conv' + str(layer + 2) + '/bias') 303 | 304 | loaded_kernel = weights['conv2_' + str(layer)] 305 | loaded_bias = weights['conv2_' + str(layer) + '_b'] 306 | 307 | sess.run(tf.assign(conv_kernel, loaded_kernel)) 308 | sess.run(tf.assign(conv_bias, loaded_bias)) 309 | 310 | # conv3 311 | for layer in range(1, 5): 312 | conv_kernel = tf.get_variable('sub_stages/sub_conv' + str(layer + 4) + '/kernel') 313 | conv_bias = tf.get_variable('sub_stages/sub_conv' + str(layer + 4) + '/bias') 314 | 315 | loaded_kernel = weights['conv3_' + str(layer)] 316 | loaded_bias = weights['conv3_' + str(layer) + '_b'] 317 | 318 | sess.run(tf.assign(conv_kernel, loaded_kernel)) 319 | sess.run(tf.assign(conv_bias, loaded_bias)) 320 | 321 | # conv4 322 | for layer in range(1, 3): 323 | conv_kernel = tf.get_variable('sub_stages/sub_conv' + str(layer + 8) + '/kernel') 324 | conv_bias = tf.get_variable('sub_stages/sub_conv' + str(layer + 8) + '/bias') 325 | 326 | loaded_kernel = weights['conv4_' + str(layer)] 327 | loaded_bias = weights['conv4_' + str(layer) + '_b'] 328 | 329 | sess.run(tf.assign(conv_kernel, loaded_kernel)) 330 | sess.run(tf.assign(conv_bias, loaded_bias)) 331 | 332 | # conv4_CPM 333 | for layer in range(1, 5): 334 | conv_kernel = tf.get_variable('sub_stages/sub_conv' + str(layer + 10) + '/kernel') 335 | conv_bias = tf.get_variable('sub_stages/sub_conv' + str(layer + 10) + '/bias') 336 | 337 | loaded_kernel = weights['conv4_' + str(2 + layer) + '_CPM'] 338 | loaded_bias = weights['conv4_' + str(2 + layer) + '_CPM_b'] 339 | 340 | sess.run(tf.assign(conv_kernel, loaded_kernel)) 341 | sess.run(tf.assign(conv_bias, loaded_bias)) 342 | 343 | # conv5_3_CPM 344 | conv_kernel = tf.get_variable('sub_stages/sub_stage_img_feature/kernel') 345 | conv_bias = tf.get_variable('sub_stages/sub_stage_img_feature/bias') 346 | 347 | loaded_kernel = weights['conv4_7_CPM'] 348 | loaded_bias = weights['conv4_7_CPM_b'] 349 | 350 | sess.run(tf.assign(conv_kernel, loaded_kernel)) 351 | sess.run(tf.assign(conv_bias, loaded_bias)) 352 | 353 | ## stage 1 354 | conv_kernel = tf.get_variable('stage_1/conv1/kernel') 355 | conv_bias = tf.get_variable('stage_1/conv1/bias') 356 | 357 | loaded_kernel = weights['conv5_1_CPM'] 358 | loaded_bias = weights['conv5_1_CPM_b'] 359 | 360 | sess.run(tf.assign(conv_kernel, loaded_kernel)) 361 | sess.run(tf.assign(conv_bias, loaded_bias)) 362 | 363 | if finetune != True: 364 | conv_kernel = tf.get_variable('stage_1/stage_heatmap/kernel') 365 | conv_bias = tf.get_variable('stage_1/stage_heatmap/bias') 366 | 367 | loaded_kernel = weights['conv5_2_CPM'] 368 | loaded_bias = weights['conv5_2_CPM_b'] 369 | 370 | sess.run(tf.assign(conv_kernel, loaded_kernel)) 371 | sess.run(tf.assign(conv_bias, loaded_bias)) 372 | 373 | ## stage 2 and behind 374 | for stage in range(2, self.stages + 1): 375 | for layer in range(1, 8): 376 | conv_kernel = tf.get_variable('stage_' + str(stage) + '/mid_conv' + str(layer) + '/kernel') 377 | conv_bias = tf.get_variable('stage_' + str(stage) + '/mid_conv' + str(layer) + '/bias') 378 | 379 | loaded_kernel = weights['Mconv' + str(layer) + '_stage' + str(stage)] 380 | loaded_bias = weights['Mconv' + str(layer) + '_stage' + str(stage) + '_b'] 381 | 382 | sess.run(tf.assign(conv_kernel, loaded_kernel)) 383 | sess.run(tf.assign(conv_bias, loaded_bias)) 384 | -------------------------------------------------------------------------------- /models/nets/cpm_body_slim.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import tensorflow as tf 3 | import tensorflow.contrib.slim as slim 4 | 5 | 6 | class CPM_Model(object): 7 | def __init__(self, stages, joints): 8 | self.stages = stages 9 | self.stage_heatmap = [] 10 | self.stage_loss = [0] * stages 11 | self.total_loss = 0 12 | self.input_image = None 13 | self.center_map = None 14 | self.gt_heatmap = None 15 | self.learning_rate = 0 16 | self.merged_summary = None 17 | self.joints = joints 18 | self.batch_size = 0 19 | 20 | def build_model(self, input_image, center_map, batch_size): 21 | self.batch_size = batch_size 22 | self.input_image = input_image 23 | self.center_map = center_map 24 | with tf.variable_scope('pooled_center_map'): 25 | self.center_map = slim.avg_pool2d(self.center_map, 26 | [9, 9], stride=8, 27 | padding='SAME', 28 | scope='center_map') 29 | with slim.arg_scope([slim.conv2d], 30 | padding='SAME', 31 | activation_fn=tf.nn.relu, 32 | weights_initializer=tf.contrib.layers.xavier_initializer()): 33 | with tf.variable_scope('sub_stages'): 34 | net = slim.conv2d(input_image, 64, [3, 3], scope='sub_conv1') 35 | net = slim.conv2d(net, 64, [3, 3], scope='sub_conv2') 36 | net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='sub_pool1') 37 | net = slim.conv2d(net, 128, [3, 3], scope='sub_conv3') 38 | net = slim.conv2d(net, 128, [3, 3], scope='sub_conv4') 39 | net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='sub_pool2') 40 | net = slim.conv2d(net, 256, [3, 3], scope='sub_conv5') 41 | net = slim.conv2d(net, 256, [3, 3], scope='sub_conv6') 42 | net = slim.conv2d(net, 256, [3, 3], scope='sub_conv7') 43 | net = slim.conv2d(net, 256, [3, 3], scope='sub_conv8') 44 | net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='sub_pool3') 45 | net = slim.conv2d(net, 512, [3, 3], scope='sub_conv9') 46 | net = slim.conv2d(net, 512, [3, 3], scope='sub_conv10') 47 | net = slim.conv2d(net, 256, [3, 3], scope='sub_conv11') 48 | net = slim.conv2d(net, 256, [3, 3], scope='sub_conv12') 49 | net = slim.conv2d(net, 256, [3, 3], scope='sub_conv13') 50 | net = slim.conv2d(net, 256, [3, 3], scope='sub_conv14') 51 | self.sub_stage_img_feature = slim.conv2d(net, 128, [3, 3], 52 | scope='sub_stage_img_feature') 53 | 54 | with tf.variable_scope('stage_1'): 55 | conv1 = slim.conv2d(self.sub_stage_img_feature, 512, [1, 1], 56 | scope='conv1') 57 | self.stage_heatmap.append(slim.conv2d(conv1, self.joints, [1, 1], 58 | scope='stage_heatmap')) 59 | 60 | for stage in range(2, self.stages+1): 61 | self._middle_conv(stage) 62 | 63 | def _middle_conv(self, stage): 64 | with tf.variable_scope('stage_' + str(stage)): 65 | self.current_featuremap = tf.concat([self.stage_heatmap[stage-2], 66 | self.sub_stage_img_feature, 67 | self.center_map], 68 | axis=3) 69 | with slim.arg_scope([slim.conv2d], 70 | padding='SAME', 71 | activation_fn=tf.nn.relu, 72 | weights_initializer=tf.contrib.layers.xavier_initializer()): 73 | mid_net = slim.conv2d(self.current_featuremap, 128, [7, 7], scope='mid_conv1') 74 | mid_net = slim.conv2d(mid_net, 128, [7, 7], scope='mid_conv2') 75 | mid_net = slim.conv2d(mid_net, 128, [7, 7], scope='mid_conv3') 76 | mid_net = slim.conv2d(mid_net, 128, [7, 7], scope='mid_conv4') 77 | mid_net = slim.conv2d(mid_net, 128, [7, 7], scope='mid_conv5') 78 | mid_net = slim.conv2d(mid_net, 128, [1, 1], scope='mid_conv6') 79 | self.current_heatmap = slim.conv2d(mid_net, self.joints, [1, 1], 80 | scope='mid_conv7') 81 | self.stage_heatmap.append(self.current_heatmap) 82 | 83 | def build_loss(self, gt_heatmap, lr, lr_decay_rate, lr_decay_step): 84 | self.gt_heatmap = gt_heatmap 85 | self.total_loss = 0 86 | self.learning_rate = lr 87 | self.lr_decay_rate = lr_decay_rate 88 | self.lr_decay_step = lr_decay_step 89 | 90 | for stage in range(self.stages): 91 | with tf.variable_scope('stage' + str(stage+1) + '_loss'): 92 | self.stage_loss[stage] = tf.nn.l2_loss(self.stage_heatmap[stage] - self.gt_heatmap, 93 | name='l2_loss') / self.batch_size 94 | tf.summary.scalar('stage' + str(stage+1) + '_loss', self.stage_loss[stage]) 95 | 96 | with tf.variable_scope('total_loss'): 97 | for stage in range(self.stages): 98 | self.total_loss += self.stage_loss[stage] 99 | tf.summary.scalar('total loss', self.total_loss) 100 | 101 | with tf.variable_scope('train'): 102 | self.global_step = tf.contrib.framework.get_or_creat_global_step() 103 | 104 | self.lr = tf.train.exponential_decay(self.learning_rate, 105 | global_step=self.global_step, 106 | decay_rate=self.lr_decay_rate, 107 | decay_steps=self.lr_decay_step) 108 | tf.summary.scalar('learning rate', self.lr) 109 | 110 | self.train_op = tf.contrib.layers.optimize_loss(loss=self.total_loss, 111 | global_step=self.global_step, 112 | learning_rate=self.lr, 113 | optimizer='Adam') 114 | self.merged_summary = tf.summary.merge_all() 115 | 116 | def load_weights_from_file(self, weight_file_path, sess, finetune=True): 117 | weights = pickle.load(open(weight_file_path, 'rb'), encoding='latin1') 118 | 119 | with tf.variable_scope('', reuse=True): 120 | ## Pre stage conv 121 | # for layer in range(1, 15): 122 | # conv_weights = tf.get_variable('sub_stages/sub_conv' + str(layer) + '/weights') 123 | # conv_biases = tf.get_variable('sub_stages/sub_conv' + str(layer) + '/biases') 124 | # 125 | # loaded_weights = weights['sub_conv' + str(layer)] 126 | # loaded_biases = weights['sub_conv' + str(layer)] 127 | # 128 | # sess.run(tf.assign(conv_weights, loaded_weights)) 129 | # sess.run(tf.assign(conv_biases, loaded_biases)) 130 | 131 | # conv1 132 | for layer in range(1, 3): 133 | conv_weights = tf.get_variable('sub_stages/sub_conv' + str(layer) + '/weights') 134 | conv_biases = tf.get_variable('sub_stages/sub_conv' + str(layer) + '/biases') 135 | 136 | loaded_weights = weights['conv1_' + str(layer)] 137 | loaded_biases = weights['conv1_' + str(layer) + '_b'] 138 | 139 | sess.run(tf.assign(conv_weights, loaded_weights)) 140 | sess.run(tf.assign(conv_biases, loaded_biases)) 141 | 142 | # conv2 143 | for layer in range(1, 3): 144 | conv_weights = tf.get_variable('sub_stages/sub_conv' + str(layer + 2) + '/weights') 145 | conv_biases = tf.get_variable('sub_stages/sub_conv' + str(layer + 2) + '/biases') 146 | 147 | loaded_weights = weights['conv2_' + str(layer)] 148 | loaded_biases = weights['conv2_' + str(layer) + '_b'] 149 | 150 | sess.run(tf.assign(conv_weights, loaded_weights)) 151 | sess.run(tf.assign(conv_biases, loaded_biases)) 152 | 153 | # conv3 154 | for layer in range(1, 5): 155 | conv_weights = tf.get_variable('sub_stages/sub_conv' + str(layer + 4) + '/weights') 156 | conv_biases = tf.get_variable('sub_stages/sub_conv' + str(layer + 4) + '/biases') 157 | 158 | loaded_weights = weights['conv3_' + str(layer)] 159 | loaded_biases = weights['conv3_' + str(layer) + '_b'] 160 | 161 | sess.run(tf.assign(conv_weights, loaded_weights)) 162 | sess.run(tf.assign(conv_biases, loaded_biases)) 163 | 164 | # conv4 165 | for layer in range(1, 3): 166 | conv_weights = tf.get_variable('sub_stages/sub_conv' + str(layer + 8) + '/weights') 167 | conv_biases = tf.get_variable('sub_stages/sub_conv' + str(layer + 8) + '/biases') 168 | 169 | loaded_weights = weights['conv4_' + str(layer)] 170 | loaded_biases = weights['conv4_' + str(layer) + '_b'] 171 | 172 | sess.run(tf.assign(conv_weights, loaded_weights)) 173 | sess.run(tf.assign(conv_biases, loaded_biases)) 174 | 175 | # conv4_CPM 176 | for layer in range(1, 5): 177 | conv_weights = tf.get_variable('sub_stages/sub_conv' + str(layer + 10) + '/weights') 178 | conv_biases = tf.get_variable('sub_stages/sub_conv' + str(layer + 10) + '/biases') 179 | 180 | loaded_weights = weights['conv4_' + str(2 + layer) + '_CPM'] 181 | loaded_biases = weights['conv4_' + str(2 + layer) + '_CPM_b'] 182 | 183 | sess.run(tf.assign(conv_weights, loaded_weights)) 184 | sess.run(tf.assign(conv_biases, loaded_biases)) 185 | 186 | # conv5_3_CPM 187 | conv_weights = tf.get_variable('sub_stages/sub_stage_img_feature/weights') 188 | conv_biases = tf.get_variable('sub_stages/sub_stage_img_feature/biases') 189 | 190 | loaded_weights = weights['conv4_7_CPM'] 191 | loaded_biases = weights['conv4_7_CPM_b'] 192 | 193 | sess.run(tf.assign(conv_weights, loaded_weights)) 194 | sess.run(tf.assign(conv_biases, loaded_biases)) 195 | 196 | ## stage 1 197 | conv_weights = tf.get_variable('stage_1/conv1/weights') 198 | conv_biases = tf.get_variable('stage_1/conv1/biases') 199 | 200 | loaded_weights = weights['conv5_1_CPM'] 201 | loaded_biases = weights['conv5_1_CPM_b'] 202 | 203 | sess.run(tf.assign(conv_weights, loaded_weights)) 204 | sess.run(tf.assign(conv_biases, loaded_biases)) 205 | 206 | if finetune != True: 207 | conv_weights = tf.get_variable('stage_1/stage_heatmap/weights') 208 | conv_biases = tf.get_variable('stage_1/stage_heatmap/biases') 209 | 210 | loaded_weights = weights['conv5_2_CPM'] 211 | loaded_biases = weights['conv5_2_CPM_b'] 212 | 213 | sess.run(tf.assign(conv_weights, loaded_weights)) 214 | sess.run(tf.assign(conv_biases, loaded_biases)) 215 | 216 | ## stage 2 and behind 217 | for stage in range(2, self.stages + 1): 218 | for layer in range(1, 8): 219 | conv_weights = tf.get_variable('stage_' + str(stage) + '/mid_conv' + str(layer) + '/weights') 220 | conv_biases = tf.get_variable('stage_' + str(stage) + '/mid_conv' + str(layer) + '/biases') 221 | 222 | loaded_weights = weights['Mconv' + str(layer) + '_stage' + str(stage)] 223 | loaded_biases = weights['Mconv' + str(layer) + '_stage' + str(stage) + '_b'] 224 | 225 | sess.run(tf.assign(conv_weights, loaded_weights)) 226 | sess.run(tf.assign(conv_biases, loaded_biases)) 227 | -------------------------------------------------------------------------------- /models/nets/cpm_hand.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | import tensorflow as tf 4 | from models.nets.CPM import CPM 5 | 6 | 7 | class CPM_Model(CPM): 8 | def __init__(self, input_size, heatmap_size, stages, joints, img_type='RGB', is_training=True): 9 | self.stages = stages 10 | self.stage_heatmap = [] 11 | self.stage_loss = [0 for _ in range(stages)] 12 | self.total_loss = 0 13 | self.input_image = None 14 | self.center_map = None 15 | self.gt_heatmap = None 16 | self.init_lr = 0 17 | self.merged_summary = None 18 | self.joints = joints 19 | self.batch_size = 0 20 | self.inference_type = 'Train' 21 | 22 | if img_type == 'RGB': 23 | self.input_images = tf.placeholder(dtype=tf.float32, 24 | shape=(None, input_size, input_size, 3), 25 | name='input_placeholder') 26 | elif img_type == 'GRAY': 27 | self.input_images = tf.placeholder(dtype=tf.float32, 28 | shape=(None, input_size, input_size, 1), 29 | name='input_placeholder') 30 | self.cmap_placeholder = tf.placeholder(dtype=tf.float32, 31 | shape=(None, input_size, input_size, 1), 32 | name='cmap_placeholder') 33 | self.gt_hmap_placeholder = tf.placeholder(dtype=tf.float32, 34 | shape=(None, heatmap_size, heatmap_size, joints + 1), 35 | name='gt_hmap_placeholder') 36 | self._build_model() 37 | 38 | def _build_model(self): 39 | with tf.variable_scope('pooled_center_map'): 40 | self.center_map = tf.layers.average_pooling2d(inputs=self.cmap_placeholder, 41 | pool_size=[9, 9], 42 | strides=[8, 8], 43 | padding='same', 44 | name='center_map') 45 | with tf.variable_scope('sub_stages'): 46 | sub_conv1 = tf.layers.conv2d(inputs=self.input_images, 47 | filters=64, 48 | kernel_size=[3, 3], 49 | strides=[1, 1], 50 | padding='same', 51 | activation=tf.nn.relu, 52 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 53 | name='sub_conv1') 54 | 55 | sub_conv2 = tf.layers.conv2d(inputs=sub_conv1, 56 | filters=64, 57 | kernel_size=[3, 3], 58 | strides=[1, 1], 59 | padding='same', 60 | activation=tf.nn.relu, 61 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 62 | name='sub_conv2') 63 | sub_pool1 = tf.layers.max_pooling2d(inputs=sub_conv2, 64 | pool_size=[2, 2], 65 | strides=2, 66 | padding='valid', 67 | name='sub_pool1') 68 | sub_conv3 = tf.layers.conv2d(inputs=sub_pool1, 69 | filters=128, 70 | kernel_size=[3, 3], 71 | strides=[1, 1], 72 | padding='same', 73 | activation=tf.nn.relu, 74 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 75 | name='sub_conv3') 76 | sub_conv4 = tf.layers.conv2d(inputs=sub_conv3, 77 | filters=128, 78 | kernel_size=[3, 3], 79 | strides=[1, 1], 80 | padding='same', 81 | activation=tf.nn.relu, 82 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 83 | name='sub_conv4') 84 | sub_pool2 = tf.layers.max_pooling2d(inputs=sub_conv4, 85 | pool_size=[2, 2], 86 | strides=2, 87 | padding='valid', 88 | name='sub_pool2') 89 | sub_conv5 = tf.layers.conv2d(inputs=sub_pool2, 90 | filters=256, 91 | kernel_size=[3, 3], 92 | strides=[1, 1], 93 | padding='same', 94 | activation=tf.nn.relu, 95 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 96 | name='sub_conv5') 97 | sub_conv6 = tf.layers.conv2d(inputs=sub_conv5, 98 | filters=256, 99 | kernel_size=[3, 3], 100 | strides=[1, 1], 101 | padding='same', 102 | activation=tf.nn.relu, 103 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 104 | name='sub_conv6') 105 | sub_conv7 = tf.layers.conv2d(inputs=sub_conv6, 106 | filters=256, 107 | kernel_size=[3, 3], 108 | strides=[1, 1], 109 | padding='same', 110 | activation=tf.nn.relu, 111 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 112 | name='sub_conv7') 113 | sub_conv8 = tf.layers.conv2d(inputs=sub_conv7, 114 | filters=256, 115 | kernel_size=[3, 3], 116 | strides=[1, 1], 117 | padding='same', 118 | activation=tf.nn.relu, 119 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 120 | name='sub_conv8') 121 | sub_pool3 = tf.layers.max_pooling2d(inputs=sub_conv8, 122 | pool_size=[2, 2], 123 | strides=2, 124 | padding='valid', 125 | name='sub_pool3') 126 | sub_conv9 = tf.layers.conv2d(inputs=sub_pool3, 127 | filters=512, 128 | kernel_size=[3, 3], 129 | strides=[1, 1], 130 | padding='same', 131 | activation=tf.nn.relu, 132 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 133 | name='sub_conv9') 134 | sub_conv10 = tf.layers.conv2d(inputs=sub_conv9, 135 | filters=512, 136 | kernel_size=[3, 3], 137 | strides=[1, 1], 138 | padding='same', 139 | activation=tf.nn.relu, 140 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 141 | name='sub_conv10') 142 | sub_conv11 = tf.layers.conv2d(inputs=sub_conv10, 143 | filters=512, 144 | kernel_size=[3, 3], 145 | strides=[1, 1], 146 | padding='same', 147 | activation=tf.nn.relu, 148 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 149 | name='sub_conv11') 150 | sub_conv12 = tf.layers.conv2d(inputs=sub_conv11, 151 | filters=512, 152 | kernel_size=[3, 3], 153 | strides=[1, 1], 154 | padding='same', 155 | activation=tf.nn.relu, 156 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 157 | name='sub_conv12') 158 | sub_conv13 = tf.layers.conv2d(inputs=sub_conv12, 159 | filters=512, 160 | kernel_size=[3, 3], 161 | strides=[1, 1], 162 | padding='same', 163 | activation=tf.nn.relu, 164 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 165 | name='sub_conv13') 166 | sub_conv14 = tf.layers.conv2d(inputs=sub_conv13, 167 | filters=512, 168 | kernel_size=[3, 3], 169 | strides=[1, 1], 170 | padding='same', 171 | activation=tf.nn.relu, 172 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 173 | name='sub_conv14') 174 | self.sub_stage_img_feature = tf.layers.conv2d(inputs=sub_conv14, 175 | filters=128, 176 | kernel_size=[3, 3], 177 | strides=[1, 1], 178 | padding='same', 179 | activation=tf.nn.relu, 180 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 181 | name='sub_stage_img_feature') 182 | 183 | with tf.variable_scope('stage_1'): 184 | conv1 = tf.layers.conv2d(inputs=self.sub_stage_img_feature, 185 | filters=512, 186 | kernel_size=[1, 1], 187 | strides=[1, 1], 188 | padding='valid', 189 | activation=tf.nn.relu, 190 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 191 | name='conv1') 192 | self.stage_heatmap.append(tf.layers.conv2d(inputs=conv1, 193 | filters=self.joints+1, 194 | kernel_size=[1, 1], 195 | strides=[1, 1], 196 | padding='valid', 197 | activation=None, 198 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 199 | name='stage_heatmap')) 200 | for stage in range(2, self.stages + 1): 201 | self._middle_conv(stage) 202 | 203 | def _middle_conv(self, stage): 204 | with tf.variable_scope('stage_' + str(stage)): 205 | self.current_featuremap = tf.concat([self.stage_heatmap[stage - 2], 206 | self.sub_stage_img_feature, 207 | # self.center_map], 208 | ], 209 | axis=3) 210 | mid_conv1 = tf.layers.conv2d(inputs=self.current_featuremap, 211 | filters=128, 212 | kernel_size=[7, 7], 213 | strides=[1, 1], 214 | padding='same', 215 | activation=tf.nn.relu, 216 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 217 | name='mid_conv1') 218 | mid_conv2 = tf.layers.conv2d(inputs=mid_conv1, 219 | filters=128, 220 | kernel_size=[7, 7], 221 | strides=[1, 1], 222 | padding='same', 223 | activation=tf.nn.relu, 224 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 225 | name='mid_conv2') 226 | mid_conv3 = tf.layers.conv2d(inputs=mid_conv2, 227 | filters=128, 228 | kernel_size=[7, 7], 229 | strides=[1, 1], 230 | padding='same', 231 | activation=tf.nn.relu, 232 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 233 | name='mid_conv3') 234 | mid_conv4 = tf.layers.conv2d(inputs=mid_conv3, 235 | filters=128, 236 | kernel_size=[7, 7], 237 | strides=[1, 1], 238 | padding='same', 239 | activation=tf.nn.relu, 240 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 241 | name='mid_conv4') 242 | mid_conv5 = tf.layers.conv2d(inputs=mid_conv4, 243 | filters=128, 244 | kernel_size=[7, 7], 245 | strides=[1, 1], 246 | padding='same', 247 | activation=tf.nn.relu, 248 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 249 | name='mid_conv5') 250 | mid_conv6 = tf.layers.conv2d(inputs=mid_conv5, 251 | filters=128, 252 | kernel_size=[1, 1], 253 | strides=[1, 1], 254 | padding='valid', 255 | activation=tf.nn.relu, 256 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 257 | name='mid_conv6') 258 | self.current_heatmap = tf.layers.conv2d(inputs=mid_conv6, 259 | filters=self.joints+1, 260 | kernel_size=[1, 1], 261 | strides=[1, 1], 262 | padding='valid', 263 | activation=None, 264 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 265 | name='mid_conv7') 266 | self.stage_heatmap.append(self.current_heatmap) 267 | 268 | def build_loss(self, lr, lr_decay_rate, lr_decay_step, optimizer='Adam'): 269 | self.total_loss = 0 270 | self.total_loss_eval = 0 271 | self.init_lr = lr 272 | self.lr_decay_rate = lr_decay_rate 273 | self.lr_decay_step = lr_decay_step 274 | self.optimizer = optimizer 275 | self.batch_size = tf.cast(tf.shape(self.input_images)[0], dtype=tf.float32) 276 | 277 | 278 | for stage in range(self.stages): 279 | with tf.variable_scope('stage' + str(stage + 1) + '_loss'): 280 | self.stage_loss[stage] = tf.nn.l2_loss(self.stage_heatmap[stage] - self.gt_hmap_placeholder, 281 | name='l2_loss') / self.batch_size 282 | tf.summary.scalar('stage' + str(stage + 1) + '_loss', self.stage_loss[stage]) 283 | 284 | with tf.variable_scope('total_loss'): 285 | for stage in range(self.stages): 286 | self.total_loss += self.stage_loss[stage] 287 | tf.summary.scalar('total loss train', self.total_loss) 288 | 289 | with tf.variable_scope('total_loss_eval'): 290 | for stage in range(self.stages): 291 | self.total_loss_eval += self.stage_loss[stage] 292 | tf.summary.scalar('total loss eval', self.total_loss) 293 | 294 | with tf.variable_scope('train'): 295 | self.global_step = tf.contrib.framework.get_or_create_global_step() 296 | 297 | self.cur_lr = tf.train.exponential_decay(self.init_lr, 298 | global_step=self.global_step, 299 | decay_rate=self.lr_decay_rate, 300 | decay_steps=self.lr_decay_step) 301 | tf.summary.scalar('global learning rate', self.cur_lr) 302 | 303 | self.train_op = tf.contrib.layers.optimize_loss(loss=self.total_loss, 304 | global_step=self.global_step, 305 | learning_rate=self.cur_lr, 306 | optimizer=self.optimizer) 307 | 308 | def load_weights_from_file(self, weight_file_path, sess, finetune=True): 309 | # weight_file_object = open(weight_file_path, 'rb') 310 | weights = pickle.load(open(weight_file_path, 'rb'))#, encoding='latin1') 311 | 312 | with tf.variable_scope('', reuse=True): 313 | ## Pre stage conv 314 | # conv1 315 | for layer in range(1, 3): 316 | conv_kernel = tf.get_variable('sub_stages/sub_conv' + str(layer) + '/kernel') 317 | conv_bias = tf.get_variable('sub_stages/sub_conv' + str(layer) + '/bias') 318 | 319 | loaded_kernel = weights['conv1_' + str(layer)] 320 | loaded_bias = weights['conv1_' + str(layer) + '_b'] 321 | 322 | sess.run(tf.assign(conv_kernel, loaded_kernel)) 323 | sess.run(tf.assign(conv_bias, loaded_bias)) 324 | 325 | # conv2 326 | for layer in range(1, 3): 327 | conv_kernel = tf.get_variable('sub_stages/sub_conv' + str(layer + 2) + '/kernel') 328 | conv_bias = tf.get_variable('sub_stages/sub_conv' + str(layer + 2) + '/bias') 329 | 330 | loaded_kernel = weights['conv2_' + str(layer)] 331 | loaded_bias = weights['conv2_' + str(layer) + '_b'] 332 | 333 | sess.run(tf.assign(conv_kernel, loaded_kernel)) 334 | sess.run(tf.assign(conv_bias, loaded_bias)) 335 | 336 | # conv3 337 | for layer in range(1, 5): 338 | conv_kernel = tf.get_variable('sub_stages/sub_conv' + str(layer + 4) + '/kernel') 339 | conv_bias = tf.get_variable('sub_stages/sub_conv' + str(layer + 4) + '/bias') 340 | 341 | loaded_kernel = weights['conv3_' + str(layer)] 342 | loaded_bias = weights['conv3_' + str(layer) + '_b'] 343 | 344 | sess.run(tf.assign(conv_kernel, loaded_kernel)) 345 | sess.run(tf.assign(conv_bias, loaded_bias)) 346 | 347 | # conv4 348 | for layer in range(1, 5): 349 | conv_kernel = tf.get_variable('sub_stages/sub_conv' + str(layer + 8) + '/kernel') 350 | conv_bias = tf.get_variable('sub_stages/sub_conv' + str(layer + 8) + '/bias') 351 | 352 | loaded_kernel = weights['conv4_' + str(layer)] 353 | loaded_bias = weights['conv4_' + str(layer) + '_b'] 354 | 355 | sess.run(tf.assign(conv_kernel, loaded_kernel)) 356 | sess.run(tf.assign(conv_bias, loaded_bias)) 357 | 358 | # conv5 359 | for layer in range(1, 3): 360 | conv_kernel = tf.get_variable('sub_stages/sub_conv' + str(layer + 12) + '/kernel') 361 | conv_bias = tf.get_variable('sub_stages/sub_conv' + str(layer + 12) + '/bias') 362 | 363 | loaded_kernel = weights['conv5_' + str(layer)] 364 | loaded_bias = weights['conv5_' + str(layer) + '_b'] 365 | 366 | sess.run(tf.assign(conv_kernel, loaded_kernel)) 367 | sess.run(tf.assign(conv_bias, loaded_bias)) 368 | 369 | # conv5_3_CPM 370 | conv_kernel = tf.get_variable('sub_stages/sub_stage_img_feature/kernel') 371 | conv_bias = tf.get_variable('sub_stages/sub_stage_img_feature/bias') 372 | 373 | loaded_kernel = weights['conv5_3_CPM'] 374 | loaded_bias = weights['conv5_3_CPM_b'] 375 | 376 | sess.run(tf.assign(conv_kernel, loaded_kernel)) 377 | sess.run(tf.assign(conv_bias, loaded_bias)) 378 | 379 | ## stage 1 380 | conv_kernel = tf.get_variable('stage_1/conv1/kernel') 381 | conv_bias = tf.get_variable('stage_1/conv1/bias') 382 | 383 | loaded_kernel = weights['conv6_1_CPM'] 384 | loaded_bias = weights['conv6_1_CPM_b'] 385 | 386 | sess.run(tf.assign(conv_kernel, loaded_kernel)) 387 | sess.run(tf.assign(conv_bias, loaded_bias)) 388 | 389 | if finetune != True: 390 | conv_kernel = tf.get_variable('stage_1/stage_heatmap/kernel') 391 | conv_bias = tf.get_variable('stage_1/stage_heatmap/bias') 392 | 393 | loaded_kernel = weights['conv6_2_CPM'] 394 | loaded_bias = weights['conv6_2_CPM_b'] 395 | 396 | sess.run(tf.assign(conv_kernel, loaded_kernel)) 397 | sess.run(tf.assign(conv_bias, loaded_bias)) 398 | 399 | ## Stage 2 and behind 400 | for stage in range(2, self.stages + 1): 401 | for layer in range(1, 8): 402 | conv_kernel = tf.get_variable('stage_' + str(stage) + '/mid_conv' + str(layer) + '/kernel') 403 | conv_bias = tf.get_variable('stage_' + str(stage) + '/mid_conv' + str(layer) + '/bias') 404 | 405 | loaded_kernel = weights['Mconv' + str(layer) + '_stage' + str(stage)] 406 | loaded_bias = weights['Mconv' + str(layer) + '_stage' + str(stage) + '_b'] 407 | 408 | sess.run(tf.assign(conv_kernel, loaded_kernel)) 409 | sess.run(tf.assign(conv_bias, loaded_bias)) 410 | -------------------------------------------------------------------------------- /models/nets/cpm_hand_slim.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import tensorflow as tf 3 | import tensorflow.contrib.slim as slim 4 | 5 | 6 | class CPM_Model(object): 7 | def __init__(self, stages, joints): 8 | self.stages = stages 9 | self.stage_heatmap = [] 10 | self.stage_loss = [0] * stages 11 | self.total_loss = 0 12 | self.input_image = None 13 | self.center_map = None 14 | self.gt_heatmap = None 15 | self.learning_rate = 0 16 | self.merged_summary = None 17 | self.joints = joints 18 | self.batch_size = 0 19 | 20 | def build_model(self, input_image, center_map, batch_size): 21 | self.batch_size = batch_size 22 | self.input_image = input_image 23 | self.center_map = center_map 24 | with tf.variable_scope('pooled_center_map'): 25 | # center map is a gaussion template which gather the respose 26 | self.center_map = slim.avg_pool2d(self.center_map, 27 | [9, 9], stride=8, 28 | padding='SAME', 29 | scope='center_map') 30 | 31 | with slim.arg_scope([slim.conv2d], 32 | padding='SAME', 33 | activation_fn=tf.nn.relu, 34 | weights_initializer=tf.contrib.layers.xavier_initializer()): 35 | with tf.variable_scope('sub_stages'): 36 | net = slim.conv2d(input_image, 64, [3, 3], scope='sub_conv1') 37 | net = slim.conv2d(net, 64, [3, 3], scope='sub_conv2') 38 | net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='sub_pool1') 39 | net = slim.conv2d(net, 128, [3, 3], scope='sub_conv3') 40 | net = slim.conv2d(net, 128, [3, 3], scope='sub_conv4') 41 | net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='sub_pool2') 42 | net = slim.conv2d(net, 256, [3, 3], scope='sub_conv5') 43 | net = slim.conv2d(net, 256, [3, 3], scope='sub_conv6') 44 | net = slim.conv2d(net, 256, [3, 3], scope='sub_conv7') 45 | net = slim.conv2d(net, 256, [3, 3], scope='sub_conv8') 46 | net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='sub_pool3') 47 | net = slim.conv2d(net, 512, [3, 3], scope='sub_conv9') 48 | net = slim.conv2d(net, 512, [3, 3], scope='sub_conv10') 49 | net = slim.conv2d(net, 512, [3, 3], scope='sub_conv11') 50 | net = slim.conv2d(net, 512, [3, 3], scope='sub_conv12') 51 | net = slim.conv2d(net, 512, [3, 3], scope='sub_conv13') 52 | net = slim.conv2d(net, 512, [3, 3], scope='sub_conv14') 53 | 54 | self.sub_stage_img_feature = slim.conv2d(net, 128, [3, 3], 55 | scope='sub_stage_img_feature') 56 | 57 | with tf.variable_scope('stage_1'): 58 | conv1 = slim.conv2d(self.sub_stage_img_feature, 512, [1, 1], 59 | scope='conv1') 60 | self.stage_heatmap.append(slim.conv2d(conv1, self.joints, [1, 1], 61 | scope='stage_heatmap')) 62 | 63 | for stage in range(2, self.stages + 1): 64 | self._middle_conv(stage) 65 | 66 | def _middle_conv(self, stage): 67 | with tf.variable_scope('stage_' + str(stage)): 68 | self.current_featuremap = tf.concat([self.stage_heatmap[stage-2], 69 | self.sub_stage_img_feature, 70 | # self.center_map, 71 | ], 72 | axis=3) 73 | with slim.arg_scope([slim.conv2d], 74 | padding='SAME', 75 | activation_fn=tf.nn.relu, 76 | weights_initializer=tf.contrib.layers.xavier_initializer()): 77 | mid_net = slim.conv2d(self.current_featuremap, 128, [7, 7], scope='mid_conv1') 78 | mid_net = slim.conv2d(mid_net, 128, [7, 7], scope='mid_conv2') 79 | mid_net = slim.conv2d(mid_net, 128, [7, 7], scope='mid_conv3') 80 | mid_net = slim.conv2d(mid_net, 128, [7, 7], scope='mid_conv4') 81 | mid_net = slim.conv2d(mid_net, 128, [7, 7], scope='mid_conv5') 82 | mid_net = slim.conv2d(mid_net, 128, [1, 1], scope='mid_conv6') 83 | self.current_heatmap = slim.conv2d(mid_net, self.joints, [1, 1], 84 | scope='mid_conv7') 85 | self.stage_heatmap.append(self.current_heatmap) 86 | 87 | def build_loss(self, gt_heatmap, lr, lr_decay_rate, lr_decay_step): 88 | self.gt_heatmap = gt_heatmap 89 | self.total_loss = 0 90 | self.learning_rate = lr 91 | self.lr_decay_rate = lr_decay_rate 92 | self.lr_decay_step = lr_decay_step 93 | 94 | for stage in range(self.stages): 95 | with tf.variable_scope('stage' + str(stage + 1) + '_loss'): 96 | self.stage_loss[stage] = tf.nn.l2_loss(self.stage_heatmap[stage] - self.gt_heatmap, 97 | name='l2_loss') / self.batch_size 98 | tf.summary.scalar('stage' + str(stage + 1) + '_loss', self.stage_loss[stage]) 99 | 100 | with tf.variable_scope('total_loss'): 101 | for stage in range(self.stages): 102 | self.total_loss += self.stage_loss[stage] 103 | tf.summary.scalar('total loss', self.total_loss) 104 | 105 | with tf.variable_scope('train'): 106 | self.global_step = tf.contrib.framework.get_or_create_global_step() 107 | 108 | self.lr = tf.train.exponential_decay(self.learning_rate, 109 | global_step=self.global_step, 110 | decay_rate=self.lr_decay_rate, 111 | decay_steps=self.lr_decay_step) 112 | tf.summary.scalar('learning rate', self.lr) 113 | 114 | self.train_op = tf.contrib.layers.optimize_loss(loss=self.total_loss, 115 | global_step=self.global_step, 116 | learning_rate=self.lr, 117 | optimizer='Adam') 118 | self.merged_summary = tf.summary.merge_all() 119 | 120 | def load_weights_from_file(self, weight_file_path, sess, finetune=True): 121 | # weight_file_object = open(weight_file_path, 'rb') 122 | weights = pickle.load(open(weight_file_path, 'rb'), encoding='latin1') 123 | 124 | with tf.variable_scope('', reuse=True): 125 | ## Pre stage conv 126 | # conv1 127 | for layer in range(1, 3): 128 | conv_kernel = tf.get_variable('sub_stages/sub_conv' + str(layer) + '/kernel') 129 | conv_bias = tf.get_variable('sub_stages/sub_conv' + str(layer) + '/bias') 130 | 131 | loaded_kernel = weights['conv1_' + str(layer)] 132 | loaded_bias = weights['conv1_' + str(layer) + '_b'] 133 | 134 | sess.run(tf.assign(conv_kernel, loaded_kernel)) 135 | sess.run(tf.assign(conv_bias, loaded_bias)) 136 | 137 | # conv2 138 | for layer in range(1, 3): 139 | conv_kernel = tf.get_variable('sub_stages/sub_conv' + str(layer + 2) + '/kernel') 140 | conv_bias = tf.get_variable('sub_stages/sub_conv' + str(layer + 2) + '/bias') 141 | 142 | loaded_kernel = weights['conv2_' + str(layer)] 143 | loaded_bias = weights['conv2_' + str(layer) + '_b'] 144 | 145 | sess.run(tf.assign(conv_kernel, loaded_kernel)) 146 | sess.run(tf.assign(conv_bias, loaded_bias)) 147 | 148 | # conv3 149 | for layer in range(1, 5): 150 | conv_kernel = tf.get_variable('sub_stages/sub_conv' + str(layer + 4) + '/kernel') 151 | conv_bias = tf.get_variable('sub_stages/sub_conv' + str(layer + 4) + '/bias') 152 | 153 | loaded_kernel = weights['conv3_' + str(layer)] 154 | loaded_bias = weights['conv3_' + str(layer) + '_b'] 155 | 156 | sess.run(tf.assign(conv_kernel, loaded_kernel)) 157 | sess.run(tf.assign(conv_bias, loaded_bias)) 158 | 159 | # conv4 160 | for layer in range(1, 5): 161 | conv_kernel = tf.get_variable('sub_stages/sub_conv' + str(layer + 8) + '/kernel') 162 | conv_bias = tf.get_variable('sub_stages/sub_conv' + str(layer + 8) + '/bias') 163 | 164 | loaded_kernel = weights['conv4_' + str(layer)] 165 | loaded_bias = weights['conv4_' + str(layer) + '_b'] 166 | 167 | sess.run(tf.assign(conv_kernel, loaded_kernel)) 168 | sess.run(tf.assign(conv_bias, loaded_bias)) 169 | 170 | # conv5 171 | for layer in range(1, 3): 172 | conv_kernel = tf.get_variable('sub_stages/sub_conv' + str(layer + 12) + '/kernel') 173 | conv_bias = tf.get_variable('sub_stages/sub_conv' + str(layer + 12) + '/bias') 174 | 175 | loaded_kernel = weights['conv5_' + str(layer)] 176 | loaded_bias = weights['conv5_' + str(layer) + '_b'] 177 | 178 | sess.run(tf.assign(conv_kernel, loaded_kernel)) 179 | sess.run(tf.assign(conv_bias, loaded_bias)) 180 | 181 | # conv5_3_CPM 182 | conv_kernel = tf.get_variable('sub_stages/sub_stage_img_feature/kernel') 183 | conv_bias = tf.get_variable('sub_stages/sub_stage_img_feature/bias') 184 | 185 | loaded_kernel = weights['conv5_3_CPM'] 186 | loaded_bias = weights['conv5_3_CPM_b'] 187 | 188 | sess.run(tf.assign(conv_kernel, loaded_kernel)) 189 | sess.run(tf.assign(conv_bias, loaded_bias)) 190 | 191 | ## stage 1 192 | conv_kernel = tf.get_variable('stage_1/conv1/kernel') 193 | conv_bias = tf.get_variable('stage_1/conv1/bias') 194 | 195 | loaded_kernel = weights['conv6_1_CPM'] 196 | loaded_bias = weights['conv6_1_CPM_b'] 197 | 198 | sess.run(tf.assign(conv_kernel, loaded_kernel)) 199 | sess.run(tf.assign(conv_bias, loaded_bias)) 200 | 201 | if finetune != True: 202 | conv_kernel = tf.get_variable('stage_1/stage_heatmap/kernel') 203 | conv_bias = tf.get_variable('stage_1/stage_heatmap/bias') 204 | 205 | loaded_kernel = weights['conv6_2_CPM'] 206 | loaded_bias = weights['conv6_2_CPM_b'] 207 | 208 | sess.run(tf.assign(conv_kernel, loaded_kernel)) 209 | sess.run(tf.assign(conv_bias, loaded_bias)) 210 | 211 | ## stage 2 and behind 212 | for stage in range(2, self.stages + 1): 213 | for layer in range(1, 8): 214 | conv_kernel = tf.get_variable('stage_' + str(stage) + '/mid_conv' + str(layer) + '/kernel') 215 | conv_bias = tf.get_variable('stage_' + str(stage) + '/mid_conv' + str(layer) + '/bias') 216 | 217 | loaded_kernel = weights['Mconv' + str(layer) + '_stage' + str(stage)] 218 | loaded_bias = weights['Mconv' + str(layer) + '_stage' + str(stage) + '_b'] 219 | 220 | sess.run(tf.assign(conv_kernel, loaded_kernel)) 221 | sess.run(tf.assign(conv_bias, loaded_bias)) 222 | -------------------------------------------------------------------------------- /models/nets/cpm_hand_v2.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from models.nets.CPM import CPM 3 | 4 | 5 | 6 | class CPM_Model(CPM): 7 | def __init__(self, input_size, heatmap_size, stages, joints, img_type='RGB', is_training=True): 8 | self.stages = stages 9 | self.stage_heatmap = [] 10 | self.stage_loss = [0 for _ in range(stages)] 11 | self.total_loss = 0 12 | self.input_image = None 13 | self.center_map = None 14 | self.gt_heatmap = None 15 | self.init_lr = 0 16 | self.merged_summary = None 17 | self.joints = joints 18 | self.batch_size = 0 19 | self.inference_type = 'Train' 20 | 21 | if img_type == 'RGB': 22 | self.input_images = tf.placeholder(dtype=tf.float32, 23 | shape=(None, input_size, input_size, 3), 24 | name='input_placeholder') 25 | elif img_type == 'GRAY': 26 | self.input_images = tf.placeholder(dtype=tf.float32, 27 | shape=(None, input_size, input_size, 1), 28 | name='input_placeholder') 29 | # self.cmap_placeholder = tf.placeholder(dtype=tf.float32, 30 | # shape=(None, input_size, input_size, 1), 31 | # name='cmap_placeholder') 32 | self.gt_hmap_placeholder = tf.placeholder(dtype=tf.float32, 33 | shape=(None, heatmap_size, heatmap_size, joints + 1), 34 | name='gt_hmap_placeholder') 35 | self._build_model() 36 | 37 | def _build_model(self): 38 | # with tf.variable_scope('pooled_center_map'): 39 | # self.center_map = tf.layers.average_pooling2d(inputs=self.cmap_placeholder, 40 | # pool_size=[9, 9], 41 | # strides=[8, 8], 42 | # padding='same', 43 | # name='center_map') 44 | with tf.variable_scope('sub_stages'): 45 | sub_conv1 = tf.layers.conv2d(inputs=self.input_images, 46 | filters=64, 47 | kernel_size=[3, 3], 48 | strides=[1, 1], 49 | padding='same', 50 | activation=tf.nn.relu, 51 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 52 | name='sub_conv1') 53 | sub_conv2 = tf.layers.conv2d(inputs=sub_conv1, 54 | filters=64, 55 | kernel_size=[3, 3], 56 | strides=[1, 1], 57 | padding='same', 58 | activation=tf.nn.relu, 59 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 60 | name='sub_conv2') 61 | sub_pool1 = tf.layers.max_pooling2d(inputs=sub_conv2, 62 | pool_size=[2, 2], 63 | strides=2, 64 | padding='valid', 65 | name='sub_pool1') 66 | sub_conv3 = tf.layers.conv2d(inputs=sub_pool1, 67 | filters=128, 68 | kernel_size=[3, 3], 69 | strides=[1, 1], 70 | padding='same', 71 | activation=tf.nn.relu, 72 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 73 | name='sub_conv3') 74 | sub_conv4 = tf.layers.conv2d(inputs=sub_conv3, 75 | filters=128, 76 | kernel_size=[3, 3], 77 | strides=[1, 1], 78 | padding='same', 79 | activation=tf.nn.relu, 80 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 81 | name='sub_conv4') 82 | # sub_pool2 = tf.layers.max_pooling2d(inputs=sub_conv4, 83 | # pool_size=[2, 2], 84 | # strides=2, 85 | # padding='valid', 86 | # name='sub_pool2') 87 | sub_conv5 = tf.layers.conv2d(inputs=sub_conv4, 88 | filters=256, 89 | kernel_size=[3, 3], 90 | strides=[1, 1], 91 | padding='same', 92 | activation=tf.nn.relu, 93 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 94 | name='sub_conv5') 95 | sub_conv6 = tf.layers.conv2d(inputs=sub_conv5, 96 | filters=256, 97 | kernel_size=[3, 3], 98 | strides=[1, 1], 99 | padding='same', 100 | activation=tf.nn.relu, 101 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 102 | name='sub_conv6') 103 | sub_conv7 = tf.layers.conv2d(inputs=sub_conv6, 104 | filters=256, 105 | kernel_size=[3, 3], 106 | strides=[1, 1], 107 | padding='same', 108 | activation=tf.nn.relu, 109 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 110 | name='sub_conv7') 111 | sub_conv8 = tf.layers.conv2d(inputs=sub_conv7, 112 | filters=256, 113 | kernel_size=[3, 3], 114 | strides=[1, 1], 115 | padding='same', 116 | activation=tf.nn.relu, 117 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 118 | name='sub_conv8') 119 | sub_pool3 = tf.layers.max_pooling2d(inputs=sub_conv8, 120 | pool_size=[2, 2], 121 | strides=2, 122 | padding='valid', 123 | name='sub_pool3') 124 | sub_conv9 = tf.layers.conv2d(inputs=sub_pool3, 125 | filters=512, 126 | kernel_size=[3, 3], 127 | strides=[1, 1], 128 | padding='same', 129 | activation=tf.nn.relu, 130 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 131 | name='sub_conv9') 132 | sub_conv10 = tf.layers.conv2d(inputs=sub_conv9, 133 | filters=512, 134 | kernel_size=[3, 3], 135 | strides=[1, 1], 136 | padding='same', 137 | activation=tf.nn.relu, 138 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 139 | name='sub_conv10') 140 | sub_conv11 = tf.layers.conv2d(inputs=sub_conv10, 141 | filters=512, 142 | kernel_size=[3, 3], 143 | strides=[1, 1], 144 | padding='same', 145 | activation=tf.nn.relu, 146 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 147 | name='sub_conv11') 148 | sub_conv12 = tf.layers.conv2d(inputs=sub_conv11, 149 | filters=512, 150 | kernel_size=[3, 3], 151 | strides=[1, 1], 152 | padding='same', 153 | activation=tf.nn.relu, 154 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 155 | name='sub_conv12') 156 | sub_conv13 = tf.layers.conv2d(inputs=sub_conv12, 157 | filters=512, 158 | kernel_size=[3, 3], 159 | strides=[1, 1], 160 | padding='same', 161 | activation=tf.nn.relu, 162 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 163 | name='sub_conv13') 164 | sub_conv14 = tf.layers.conv2d(inputs=sub_conv13, 165 | filters=512, 166 | kernel_size=[3, 3], 167 | strides=[1, 1], 168 | padding='same', 169 | activation=tf.nn.relu, 170 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 171 | name='sub_conv14') 172 | self.sub_stage_img_feature = tf.layers.conv2d(inputs=sub_conv14, 173 | filters=128, 174 | kernel_size=[3, 3], 175 | strides=[1, 1], 176 | padding='same', 177 | activation=tf.nn.relu, 178 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 179 | name='sub_stage_img_feature') 180 | 181 | with tf.variable_scope('stage_1'): 182 | conv1 = tf.layers.conv2d(inputs=self.sub_stage_img_feature, 183 | filters=512, 184 | kernel_size=[1, 1], 185 | strides=[1, 1], 186 | padding='valid', 187 | activation=tf.nn.relu, 188 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 189 | name='conv1') 190 | self.stage_heatmap.append(tf.layers.conv2d(inputs=conv1, 191 | filters=self.joints+1, 192 | kernel_size=[1, 1], 193 | strides=[1, 1], 194 | padding='valid', 195 | activation=None, 196 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 197 | name='stage_heatmap')) 198 | for stage in range(2, self.stages + 1): 199 | self._middle_conv(stage) 200 | 201 | def _middle_conv(self, stage): 202 | with tf.variable_scope('stage_' + str(stage)): 203 | self.current_featuremap = tf.concat([self.stage_heatmap[stage - 2], 204 | self.sub_stage_img_feature, 205 | # self.center_map], 206 | ], 207 | axis=3) 208 | mid_conv1 = tf.layers.conv2d(inputs=self.current_featuremap, 209 | filters=128, 210 | kernel_size=[7, 7], 211 | strides=[1, 1], 212 | padding='same', 213 | activation=tf.nn.relu, 214 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 215 | name='mid_conv1') 216 | mid_conv2 = tf.layers.conv2d(inputs=mid_conv1, 217 | filters=128, 218 | kernel_size=[7, 7], 219 | strides=[1, 1], 220 | padding='same', 221 | activation=tf.nn.relu, 222 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 223 | name='mid_conv2') 224 | mid_conv3 = tf.layers.conv2d(inputs=mid_conv2, 225 | filters=128, 226 | kernel_size=[7, 7], 227 | strides=[1, 1], 228 | padding='same', 229 | activation=tf.nn.relu, 230 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 231 | name='mid_conv3') 232 | mid_conv4 = tf.layers.conv2d(inputs=mid_conv3, 233 | filters=128, 234 | kernel_size=[7, 7], 235 | strides=[1, 1], 236 | padding='same', 237 | activation=tf.nn.relu, 238 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 239 | name='mid_conv4') 240 | mid_conv5 = tf.layers.conv2d(inputs=mid_conv4, 241 | filters=128, 242 | kernel_size=[7, 7], 243 | strides=[1, 1], 244 | padding='same', 245 | activation=tf.nn.relu, 246 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 247 | name='mid_conv5') 248 | mid_conv6 = tf.layers.conv2d(inputs=mid_conv5, 249 | filters=128, 250 | kernel_size=[1, 1], 251 | strides=[1, 1], 252 | padding='valid', 253 | activation=tf.nn.relu, 254 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 255 | name='mid_conv6') 256 | self.current_heatmap = tf.layers.conv2d(inputs=mid_conv6, 257 | filters=self.joints+1, 258 | kernel_size=[1, 1], 259 | strides=[1, 1], 260 | padding='valid', 261 | activation=None, 262 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 263 | name='mid_conv7') 264 | self.stage_heatmap.append(self.current_heatmap) 265 | 266 | def build_loss(self, lr, lr_decay_rate, lr_decay_step, optimizer='Adam'): 267 | self.total_loss = 0 268 | self.init_lr = lr 269 | self.lr_decay_rate = lr_decay_rate 270 | self.lr_decay_step = lr_decay_step 271 | self.optimizer = optimizer 272 | self.batch_size = tf.cast(tf.shape(self.input_images)[0], dtype=tf.float32) 273 | 274 | 275 | for stage in range(self.stages): 276 | with tf.variable_scope('stage' + str(stage + 1) + '_loss'): 277 | self.stage_loss[stage] = tf.nn.l2_loss(self.stage_heatmap[stage] - self.gt_hmap_placeholder, 278 | name='l2_loss') / self.batch_size 279 | tf.summary.scalar('stage' + str(stage + 1) + '_loss', self.stage_loss[stage]) 280 | 281 | with tf.variable_scope('total_loss'): 282 | for stage in range(self.stages): 283 | self.total_loss += self.stage_loss[stage] 284 | tf.summary.scalar('total loss'.format(self.inference_type), self.total_loss) 285 | 286 | with tf.variable_scope('train'): 287 | self.global_step = tf.contrib.framework.get_or_create_global_step() 288 | 289 | self.cur_lr = tf.train.exponential_decay(self.init_lr, 290 | global_step=self.global_step, 291 | decay_rate=self.lr_decay_rate, 292 | decay_steps=self.lr_decay_step) 293 | tf.summary.scalar('global learning rate', self.cur_lr) 294 | 295 | self.train_op = tf.contrib.layers.optimize_loss(loss=self.total_loss, 296 | global_step=self.global_step, 297 | learning_rate=self.cur_lr, 298 | optimizer=self.optimizer) 299 | -------------------------------------------------------------------------------- /run_demo_hand_with_tracker.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import math 3 | import os 4 | import time 5 | 6 | import cv2 7 | import numpy as np 8 | import tensorflow as tf 9 | from config import FLAGS 10 | from utils import cpm_utils, tracking_module, utils 11 | 12 | cpm_model = importlib.import_module('models.nets.' + FLAGS.network_def) 13 | 14 | joint_detections = np.zeros(shape=(21, 2)) 15 | 16 | 17 | def main(argv): 18 | global joint_detections 19 | os.environ['CUDA_VISIBLE_DEVICES'] = str(FLAGS.gpu_id) 20 | 21 | """ Initial tracker 22 | """ 23 | tracker = tracking_module.SelfTracker([FLAGS.webcam_height, FLAGS.webcam_width], FLAGS.input_size) 24 | 25 | """ Build network graph 26 | """ 27 | model = cpm_model.CPM_Model(input_size=FLAGS.input_size, 28 | heatmap_size=FLAGS.heatmap_size, 29 | stages=FLAGS.cpm_stages, 30 | joints=FLAGS.num_of_joints, 31 | img_type=FLAGS.color_channel, 32 | is_training=False) 33 | saver = tf.train.Saver() 34 | 35 | """ Get output node 36 | """ 37 | output_node = tf.get_default_graph().get_tensor_by_name(name=FLAGS.output_node_names) 38 | 39 | device_count = {'GPU': 1} if FLAGS.use_gpu else {'GPU': 0} 40 | sess_config = tf.ConfigProto(device_count=device_count) 41 | sess_config.gpu_options.per_process_gpu_memory_fraction = 0.2 42 | sess_config.gpu_options.allow_growth = True 43 | sess_config.allow_soft_placement = True 44 | with tf.Session(config=sess_config) as sess: 45 | 46 | model_path_suffix = os.path.join(FLAGS.network_def, 47 | 'input_{}_output_{}'.format(FLAGS.input_size, FLAGS.heatmap_size), 48 | 'joints_{}'.format(FLAGS.num_of_joints), 49 | 'stages_{}'.format(FLAGS.cpm_stages), 50 | 'init_{}_rate_{}_step_{}'.format(FLAGS.init_lr, FLAGS.lr_decay_rate, 51 | FLAGS.lr_decay_step) 52 | ) 53 | model_save_dir = os.path.join('models', 54 | 'weights', 55 | model_path_suffix) 56 | print('Load model from [{}]'.format(os.path.join(model_save_dir, FLAGS.model_path))) 57 | if FLAGS.model_path.endswith('pkl'): 58 | model.load_weights_from_file(FLAGS.model_path, sess, False) 59 | else: 60 | saver.restore(sess, 'models/weights/cpm_hand') 61 | 62 | # Check weights 63 | for variable in tf.global_variables(): 64 | with tf.variable_scope('', reuse=True): 65 | var = tf.get_variable(variable.name.split(':0')[0]) 66 | print(variable.name, np.mean(sess.run(var))) 67 | 68 | # Create webcam instance 69 | if FLAGS.DEMO_TYPE in ['MULTI', 'SINGLE', 'Joint_HM']: 70 | cam = cv2.VideoCapture(FLAGS.cam_id) 71 | 72 | # Create kalman filters 73 | if FLAGS.use_kalman: 74 | kalman_filter_array = [cv2.KalmanFilter(4, 2) for _ in range(FLAGS.num_of_joints)] 75 | for _, joint_kalman_filter in enumerate(kalman_filter_array): 76 | joint_kalman_filter.transitionMatrix = np.array( 77 | [[1, 0, 1, 0], [0, 1, 0, 1], [0, 0, 1, 0], [0, 0, 0, 1]], 78 | np.float32) 79 | joint_kalman_filter.measurementMatrix = np.array([[1, 0, 0, 0], [0, 1, 0, 0]], np.float32) 80 | joint_kalman_filter.processNoiseCov = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]], 81 | np.float32) * FLAGS.kalman_noise 82 | else: 83 | kalman_filter_array = None 84 | 85 | if FLAGS.DEMO_TYPE.endswith(('png', 'jpg')): 86 | test_img = cpm_utils.read_image(FLAGS.DEMO_TYPE, [], FLAGS.input_size, 'IMAGE') 87 | test_img_resize = cv2.resize(test_img, (FLAGS.input_size, FLAGS.input_size)) 88 | 89 | test_img_input = normalize_and_centralize_img(test_img_resize) 90 | 91 | t1 = time.time() 92 | predict_heatmap, stage_heatmap_np = sess.run([model.current_heatmap, 93 | output_node, 94 | ], 95 | feed_dict={model.input_images: test_img_input} 96 | ) 97 | print('fps: %.2f' % (1 / (time.time() - t1))) 98 | 99 | correct_and_draw_hand(test_img, 100 | cv2.resize(stage_heatmap_np[0], (FLAGS.input_size, FLAGS.input_size)), 101 | kalman_filter_array, tracker, tracker.input_crop_ratio, test_img) 102 | 103 | # Show visualized image 104 | # demo_img = visualize_result(test_img, stage_heatmap_np, kalman_filter_array) 105 | cv2.imshow('demo_img', test_img.astype(np.uint8)) 106 | cv2.waitKey(0) 107 | 108 | elif FLAGS.DEMO_TYPE in ['SINGLE', 'MULTI']: 109 | i = 0 110 | while True: 111 | # Prepare input image 112 | _, full_img = cam.read() 113 | 114 | test_img = tracker.tracking_by_joints(full_img, joint_detections=joint_detections) 115 | crop_full_scale = tracker.input_crop_ratio 116 | test_img_copy = test_img.copy() 117 | 118 | # White balance 119 | test_img_wb = utils.img_white_balance(test_img, 5) 120 | test_img_input = normalize_and_centralize_img(test_img_wb) 121 | 122 | # Inference 123 | t1 = time.time() 124 | stage_heatmap_np = sess.run([output_node], 125 | feed_dict={model.input_images: test_img_input}) 126 | print('FPS: %.2f' % (1 / (time.time() - t1))) 127 | 128 | local_img = visualize_result(full_img, stage_heatmap_np, kalman_filter_array, tracker, crop_full_scale, 129 | test_img_copy) 130 | 131 | cv2.imshow('local_img', local_img.astype(np.uint8)) # 训练用图 132 | # cv2.imwrite('./storePic/11'+str(i)+'.jpg', local_img.astype(np.uint8), [int(cv2.IMWRITE_JPEG_QUALITY), 90]) 133 | i += 1 134 | cv2.imshow('globalq_img', full_img.astype(np.uint8)) # 单人大框 135 | 136 | if cv2.waitKey(1) == ord('q'): break 137 | 138 | elif FLAGS.DEMO_TYPE == 'Joint_HM': 139 | while True: 140 | # Prepare input image 141 | test_img = cpm_utils.read_image([], cam, FLAGS.input_size, 'WEBCAM') 142 | test_img_resize = cv2.resize(test_img, (FLAGS.input_size, FLAGS.input_size)) 143 | 144 | test_img_input = normalize_and_centralize_img(test_img_resize) 145 | 146 | # Inference 147 | t1 = time.time() 148 | stage_heatmap_np = sess.run([output_node], 149 | feed_dict={model.input_images: test_img_input}) 150 | print('FPS: %.2f' % (1 / (time.time() - t1))) 151 | 152 | demo_stage_heatmap = stage_heatmap_np[len(stage_heatmap_np) - 1][0, :, :, 153 | 0:FLAGS.num_of_joints].reshape( 154 | (FLAGS.heatmap_size, FLAGS.heatmap_size, FLAGS.num_of_joints)) 155 | demo_stage_heatmap = cv2.resize(demo_stage_heatmap, (FLAGS.input_size, FLAGS.input_size)) 156 | 157 | vertical_imgs = [] 158 | tmp_img = None 159 | joint_coord_set = np.zeros((FLAGS.num_of_joints, 2)) 160 | 161 | for joint_num in range(FLAGS.num_of_joints): 162 | # Concat until 4 img 163 | if (joint_num % 4) == 0 and joint_num != 0: 164 | vertical_imgs.append(tmp_img) 165 | tmp_img = None 166 | 167 | demo_stage_heatmap[:, :, joint_num] *= (255 / np.max(demo_stage_heatmap[:, :, joint_num])) 168 | 169 | # Plot color joints 170 | if np.min(demo_stage_heatmap[:, :, joint_num]) > -50: 171 | joint_coord = np.unravel_index(np.argmax(demo_stage_heatmap[:, :, joint_num]), 172 | (FLAGS.input_size, FLAGS.input_size)) 173 | joint_coord_set[joint_num, :] = joint_coord 174 | color_code_num = (joint_num // 4) 175 | 176 | if joint_num in [0, 4, 8, 12, 16]: 177 | joint_color = list( 178 | map(lambda x: x + 35 * (joint_num % 4), FLAGS.joint_color_code[color_code_num])) 179 | cv2.circle(test_img, center=(joint_coord[1], joint_coord[0]), radius=3, color=joint_color, 180 | thickness=-1) 181 | else: 182 | joint_color = list( 183 | map(lambda x: x + 35 * (joint_num % 4), FLAGS.joint_color_code[color_code_num])) 184 | cv2.circle(test_img, center=(joint_coord[1], joint_coord[0]), radius=3, color=joint_color, 185 | thickness=-1) 186 | 187 | # Put text 188 | tmp = demo_stage_heatmap[:, :, joint_num].astype(np.uint8) 189 | tmp = cv2.putText(tmp, 'Min:' + str(np.min(demo_stage_heatmap[:, :, joint_num])), 190 | org=(5, 20), fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=0.3, color=150) 191 | tmp = cv2.putText(tmp, 'Mean:' + str(np.mean(demo_stage_heatmap[:, :, joint_num])), 192 | org=(5, 30), fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=0.3, color=150) 193 | tmp_img = np.concatenate((tmp_img, tmp), axis=0) \ 194 | if tmp_img is not None else tmp 195 | 196 | # Plot FLAGS.limbs 197 | for limb_num in range(len(FLAGS.limbs)): 198 | if np.min(demo_stage_heatmap[:, :, FLAGS.limbs[limb_num][0]]) > -2000 and np.min( 199 | demo_stage_heatmap[:, :, FLAGS.limbs[limb_num][1]]) > -2000: 200 | x1 = joint_coord_set[FLAGS.limbs[limb_num][0], 0] 201 | y1 = joint_coord_set[FLAGS.limbs[limb_num][0], 1] 202 | x2 = joint_coord_set[FLAGS.limbs[limb_num][1], 0] 203 | y2 = joint_coord_set[FLAGS.limbs[limb_num][1], 1] 204 | length = ((x1 - x2) ** 2 + (y1 - y2) ** 2) ** 0.5 205 | if length < 10000 and length > 5: 206 | deg = math.degrees(math.atan2(x1 - x2, y1 - y2)) 207 | polygon = cv2.ellipse2Poly((int((y1 + y2) / 2), int((x1 + x2) / 2)), 208 | (int(length / 2), 3), 209 | int(deg), 210 | 0, 360, 1) 211 | color_code_num = limb_num // 4 212 | limb_color = list( 213 | map(lambda x: x + 35 * (limb_num % 4), FLAGS.joint_color_code[color_code_num])) 214 | 215 | cv2.fillConvexPoly(test_img, polygon, color=limb_color) 216 | 217 | if tmp_img is not None: 218 | tmp_img = np.lib.pad(tmp_img, ((0, vertical_imgs[0].shape[0] - tmp_img.shape[0]), (0, 0)), 219 | 'constant', constant_values=(0, 0)) 220 | vertical_imgs.append(tmp_img) 221 | 222 | # Concat horizontally 223 | output_img = None 224 | for col in range(len(vertical_imgs)): 225 | output_img = np.concatenate((output_img, vertical_imgs[col]), axis=1) if output_img is not None else \ 226 | vertical_imgs[col] 227 | 228 | output_img = output_img.astype(np.uint8) 229 | output_img = cv2.applyColorMap(output_img, cv2.COLORMAP_JET) 230 | test_img = cv2.resize(test_img, (300, 300), cv2.INTER_LANCZOS4) 231 | cv2.imshow('hm', output_img) 232 | cv2.moveWindow('hm', 2000, 200) 233 | cv2.imshow('rgb', test_img) 234 | cv2.moveWindow('rgb', 2000, 750) 235 | if cv2.waitKey(1) == ord('q'): break 236 | 237 | 238 | def normalize_and_centralize_img(img): 239 | if FLAGS.color_channel == 'GRAY': 240 | img = np.dot(img[..., :3], [0.299, 0.587, 0.114]).reshape((FLAGS.input_size, FLAGS.input_size, 1)) 241 | 242 | if FLAGS.normalize_img: 243 | test_img_input = img / 256.0 - 0.5 244 | test_img_input = np.expand_dims(test_img_input, axis=0) 245 | else: 246 | test_img_input = img - 128.0 247 | test_img_input = np.expand_dims(test_img_input, axis=0) 248 | return test_img_input 249 | 250 | 251 | def visualize_result(test_img, stage_heatmap_np, kalman_filter_array, tracker, crop_full_scale, crop_img): 252 | demo_stage_heatmaps = [] 253 | if FLAGS.DEMO_TYPE == 'MULTI': 254 | for stage in range(len(stage_heatmap_np)): 255 | demo_stage_heatmap = stage_heatmap_np[stage][0, :, :, 0:FLAGS.num_of_joints].reshape( 256 | (FLAGS.heatmap_size, FLAGS.heatmap_size, FLAGS.num_of_joints)) 257 | demo_stage_heatmap = cv2.resize(demo_stage_heatmap, (FLAGS.input_size, FLAGS.input_size)) 258 | demo_stage_heatmap = np.amax(demo_stage_heatmap, axis=2) 259 | demo_stage_heatmap = np.reshape(demo_stage_heatmap, (FLAGS.input_size, FLAGS.input_size, 1)) 260 | demo_stage_heatmap = np.repeat(demo_stage_heatmap, 3, axis=2) 261 | demo_stage_heatmap *= 255 262 | demo_stage_heatmaps.append(demo_stage_heatmap) 263 | 264 | last_heatmap = stage_heatmap_np[len(stage_heatmap_np) - 1][0, :, :, 0:FLAGS.num_of_joints].reshape( 265 | (FLAGS.heatmap_size, FLAGS.heatmap_size, FLAGS.num_of_joints)) 266 | last_heatmap = cv2.resize(last_heatmap, (FLAGS.input_size, FLAGS.input_size)) 267 | else: 268 | last_heatmap = stage_heatmap_np[len(stage_heatmap_np) - 1][0, :, :, 0:FLAGS.num_of_joints].reshape( 269 | (FLAGS.heatmap_size, FLAGS.heatmap_size, FLAGS.num_of_joints)) 270 | last_heatmap = cv2.resize(last_heatmap, (FLAGS.input_size, FLAGS.input_size)) 271 | 272 | correct_and_draw_hand(test_img, last_heatmap, kalman_filter_array, tracker, crop_full_scale, crop_img) 273 | 274 | if FLAGS.DEMO_TYPE == 'MULTI': 275 | if len(demo_stage_heatmaps) > 3: 276 | upper_img = np.concatenate((demo_stage_heatmaps[0], demo_stage_heatmaps[1], demo_stage_heatmaps[2]), axis=1) 277 | lower_img = np.concatenate( 278 | (demo_stage_heatmaps[3], demo_stage_heatmaps[len(stage_heatmap_np) - 1], crop_img), 279 | axis=1) 280 | demo_img = np.concatenate((upper_img, lower_img), axis=0) 281 | return demo_img 282 | else: 283 | # return np.concatenate((demo_stage_heatmaps[0], demo_stage_heatmaps[len(stage_heatmap_np) - 1], crop_img), 284 | # axis=1) 285 | 286 | return demo_stage_heatmaps[0] 287 | # np.concatenate 合并array 288 | 289 | else: 290 | return crop_img 291 | 292 | 293 | def correct_and_draw_hand(full_img, stage_heatmap_np, kalman_filter_array, tracker, crop_full_scale, crop_img): 294 | global joint_detections 295 | joint_coord_set = np.zeros((FLAGS.num_of_joints, 2)) 296 | local_joint_coord_set = np.zeros((FLAGS.num_of_joints, 2)) 297 | 298 | mean_response_val = 0.0 299 | 300 | # Plot joint colors 301 | if kalman_filter_array is not None: 302 | for joint_num in range(FLAGS.num_of_joints): 303 | tmp_heatmap = stage_heatmap_np[:, :, joint_num] 304 | joint_coord = np.unravel_index(np.argmax(tmp_heatmap), 305 | (FLAGS.input_size, FLAGS.input_size)) 306 | mean_response_val += tmp_heatmap[joint_coord[0], joint_coord[1]] 307 | joint_coord = np.array(joint_coord).reshape((2, 1)).astype(np.float32) 308 | kalman_filter_array[joint_num].correct(joint_coord) 309 | kalman_pred = kalman_filter_array[joint_num].predict() 310 | correct_coord = np.array([kalman_pred[0], kalman_pred[1]]).reshape((2)) 311 | local_joint_coord_set[joint_num, :] = correct_coord 312 | 313 | # Resize back 314 | correct_coord /= crop_full_scale 315 | 316 | # Substract padding border 317 | correct_coord[0] -= (tracker.pad_boundary[0] / crop_full_scale) 318 | correct_coord[1] -= (tracker.pad_boundary[2] / crop_full_scale) 319 | correct_coord[0] += tracker.bbox[0] 320 | correct_coord[1] += tracker.bbox[2] 321 | joint_coord_set[joint_num, :] = correct_coord 322 | 323 | else: 324 | for joint_num in range(FLAGS.num_of_joints): 325 | tmp_heatmap = stage_heatmap_np[:, :, joint_num] 326 | joint_coord = np.unravel_index(np.argmax(tmp_heatmap), 327 | (FLAGS.input_size, FLAGS.input_size)) 328 | mean_response_val += tmp_heatmap[joint_coord[0], joint_coord[1]] 329 | joint_coord = np.array(joint_coord).astype(np.float32) 330 | 331 | local_joint_coord_set[joint_num, :] = joint_coord 332 | 333 | # Resize back 334 | joint_coord /= crop_full_scale 335 | 336 | # Substract padding border 337 | joint_coord[0] -= (tracker.pad_boundary[2] / crop_full_scale) 338 | joint_coord[1] -= (tracker.pad_boundary[0] / crop_full_scale) 339 | joint_coord[0] += tracker.bbox[0] 340 | joint_coord[1] += tracker.bbox[2] 341 | joint_coord_set[joint_num, :] = joint_coord 342 | 343 | draw_hand(full_img, joint_coord_set, tracker.loss_track) 344 | draw_hand(crop_img, local_joint_coord_set, tracker.loss_track) 345 | joint_detections = joint_coord_set 346 | 347 | if mean_response_val >= 1: 348 | tracker.loss_track = False 349 | else: 350 | tracker.loss_track = True 351 | 352 | cv2.putText(full_img, 'Response: {:<.3f}'.format(mean_response_val), 353 | org=(20, 20), fontFace=cv2.FONT_HERSHEY_PLAIN, fontScale=1, color=(255, 0, 0)) 354 | 355 | 356 | def draw_hand(full_img, joint_coords, is_loss_track): 357 | if is_loss_track: 358 | joint_coords = FLAGS.default_hand 359 | 360 | # Plot joints 361 | for joint_num in range(FLAGS.num_of_joints): 362 | color_code_num = (joint_num // 4) 363 | if joint_num in [0, 4, 8, 12, 16]: 364 | joint_color = list(map(lambda x: x + 35 * (joint_num % 4), FLAGS.joint_color_code[color_code_num])) 365 | cv2.circle(full_img, center=(int(joint_coords[joint_num][1]), int(joint_coords[joint_num][0])), radius=3, 366 | color=joint_color, thickness=-1) 367 | else: 368 | joint_color = list(map(lambda x: x + 35 * (joint_num % 4), FLAGS.joint_color_code[color_code_num])) 369 | cv2.circle(full_img, center=(int(joint_coords[joint_num][1]), int(joint_coords[joint_num][0])), radius=3, 370 | color=joint_color, thickness=-1) 371 | 372 | # Plot limbs 373 | for limb_num in range(len(FLAGS.limbs)): 374 | x1 = int(joint_coords[int(FLAGS.limbs[limb_num][0])][0]) 375 | y1 = int(joint_coords[int(FLAGS.limbs[limb_num][0])][1]) 376 | x2 = int(joint_coords[int(FLAGS.limbs[limb_num][1])][0]) 377 | y2 = int(joint_coords[int(FLAGS.limbs[limb_num][1])][1]) 378 | length = ((x1 - x2) ** 2 + (y1 - y2) ** 2) ** 0.5 379 | if length < 150 and length > 5: 380 | deg = math.degrees(math.atan2(x1 - x2, y1 - y2)) 381 | polygon = cv2.ellipse2Poly((int((y1 + y2) / 2), int((x1 + x2) / 2)), 382 | (int(length / 2), 3), 383 | int(deg), 384 | 0, 360, 1) 385 | color_code_num = limb_num // 4 386 | limb_color = list(map(lambda x: x + 35 * (limb_num % 4), FLAGS.joint_color_code[color_code_num])) 387 | cv2.fillConvexPoly(full_img, polygon, color=limb_color) 388 | 389 | 390 | if __name__ == '__main__': 391 | tf.app.run() 392 | -------------------------------------------------------------------------------- /useClassifyModel.py: -------------------------------------------------------------------------------- 1 | from skimage import io,transform 2 | import tensorflow as tf 3 | import numpy as np 4 | 5 | 6 | path1 = "./storePic/5.jpg" 7 | path2 = "./storePic/7.jpg" 8 | path3 = "./storePic/102304.jpg" 9 | path4 = "./storePic/12034.jpg" 10 | path5 = "./storePic/ok.jpg" 11 | path6 = "./storePic/2.jpg" 12 | 13 | dict = {0:'5',1:'7',2:'12034',3:'ok',4:'102304',5:'2'} 14 | 15 | w=100 16 | h=100 17 | c=3 18 | 19 | 20 | def read_one_image(path): 21 | img = io.imread(path) 22 | img = transform.resize(img,(w,h)) 23 | return np.asarray(img) 24 | 25 | with tf.Session() as sess: 26 | data = [] 27 | data1 = read_one_image(path1) 28 | data2 = read_one_image(path2) 29 | data3 = read_one_image(path3) 30 | data4 = read_one_image(path4) 31 | data5 = read_one_image(path5) 32 | data6 = read_one_image(path6) 33 | data.append(data1) 34 | data.append(data2) 35 | data.append(data3) 36 | data.append(data4) 37 | data.append(data5) 38 | data.append(data6) 39 | saver = tf.train.import_meta_graph('./classify/modelSave/model.ckpt.meta') 40 | saver.restore(sess,tf.train.latest_checkpoint('./classify/modelSave/')) 41 | 42 | graph = tf.get_default_graph() 43 | x = graph.get_tensor_by_name("x:0") 44 | feed_dict = {x:data} 45 | 46 | logits = graph.get_tensor_by_name("logits_eval:0") 47 | 48 | classification_result = sess.run(logits,feed_dict) 49 | 50 | # 打印出预测矩阵 51 | print(classification_result) 52 | # 打印出预测矩阵每一行最大值的索引 53 | print(tf.argmax(classification_result, 1).eval()) 54 | # 根据索引通过字典对应花的分类 55 | output = [] 56 | output = tf.argmax(classification_result, 1).eval() 57 | for i in range(len(output)): 58 | print("第",i+1,"个手势预测:"+dict[output[i]]) 59 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yyyerica/HandGestureClassify/9cc5185a38f4ed48680f8d4d948929c01dbf6d2b/utils/__init__.py -------------------------------------------------------------------------------- /utils/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yyyerica/HandGestureClassify/9cc5185a38f4ed48680f8d4d948929c01dbf6d2b/utils/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /utils/__pycache__/cpm_utils.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yyyerica/HandGestureClassify/9cc5185a38f4ed48680f8d4d948929c01dbf6d2b/utils/__pycache__/cpm_utils.cpython-35.pyc -------------------------------------------------------------------------------- /utils/__pycache__/tracking_module.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yyyerica/HandGestureClassify/9cc5185a38f4ed48680f8d4d948929c01dbf6d2b/utils/__pycache__/tracking_module.cpython-35.pyc -------------------------------------------------------------------------------- /utils/__pycache__/utils.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yyyerica/HandGestureClassify/9cc5185a38f4ed48680f8d4d948929c01dbf6d2b/utils/__pycache__/utils.cpython-35.pyc -------------------------------------------------------------------------------- /utils/cpm_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | import cv2 4 | 5 | 6 | M_PI = 3.14159 7 | 8 | 9 | # Compute gaussian kernel for input image 10 | def gaussian_img(img_height, img_width, c_x, c_y, variance): 11 | gaussian_map = np.zeros((img_height, img_width)) 12 | for x_p in range(img_width): 13 | for y_p in range(img_height): 14 | dist_sq = (x_p - c_x) * (x_p - c_x) + \ 15 | (y_p - c_y) * (y_p - c_y) 16 | exponent = dist_sq / 2.0 / variance / variance 17 | gaussian_map[y_p, x_p] = np.exp(-exponent) 18 | return gaussian_map 19 | 20 | 21 | def read_image(file, cam, boxsize, type): 22 | # from file 23 | if type == 'IMAGE': 24 | oriImg = cv2.imread(file) 25 | # from webcam 26 | elif type == 'WEBCAM': 27 | _, oriImg = cam.read() 28 | # from video 29 | elif type == 'VIDEO': 30 | oriImg = cv2.cvtColor(file, cv2.COLOR_BGR2RGB) 31 | 32 | if oriImg is None: 33 | print('oriImg is None') 34 | return None 35 | 36 | scale = boxsize / (oriImg.shape[0] * 1.0) 37 | imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_LANCZOS4) 38 | 39 | output_img = np.ones((boxsize, boxsize, 3)) * 128 40 | 41 | img_h = imageToTest.shape[0] 42 | img_w = imageToTest.shape[1] 43 | if img_w < boxsize: 44 | offset = img_w % 2 45 | # make the origin image be the center 46 | output_img[:, int(boxsize / 2 - math.floor(img_w / 2)):int( 47 | boxsize / 2 + math.floor(img_w / 2) + offset), :] = imageToTest 48 | else: 49 | # crop the center of the origin image 50 | output_img = imageToTest[:, 51 | int(img_w / 2 - boxsize / 2):int(img_w / 2 + boxsize / 2), :] 52 | return output_img 53 | 54 | 55 | def make_gaussian(size, fwhm=3, center=None): 56 | """ Make a square gaussian kernel. 57 | size is the length of a side of the square 58 | fwhm is full-width-half-maximum, which 59 | can be thought of as an effective radius. 60 | """ 61 | 62 | x = np.arange(0, size, 1, float) 63 | y = x[:, np.newaxis] 64 | 65 | if center is None: 66 | x0 = y0 = size // 2 67 | else: 68 | x0 = center[0] 69 | y0 = center[1] 70 | 71 | return np.exp(-((x - x0) ** 2 + (y - y0) ** 2) / 2.0 / fwhm / fwhm) 72 | 73 | 74 | def make_gaussian_batch(heatmaps, size, fwhm): 75 | """ Make a square gaussian kernel. 76 | size is the length of a side of the square 77 | fwhm is full-width-half-maximum, which 78 | can be thought of as an effective radius. 79 | """ 80 | stride = heatmaps.shape[1] // size 81 | 82 | batch_datum = np.zeros(shape=(heatmaps.shape[0], size, size, heatmaps.shape[3])) 83 | 84 | for data_num in range(heatmaps.shape[0]): 85 | for joint_num in range(heatmaps.shape[3] - 1): 86 | heatmap = heatmaps[data_num, :, :, joint_num] 87 | center = np.unravel_index(np.argmax(heatmap), (heatmap.shape[0], heatmap.shape[1])) 88 | 89 | x = np.arange(0, size, 1, float) 90 | y = x[:, np.newaxis] 91 | 92 | if center is None: 93 | x0 = y0 = size * stride // 2 94 | else: 95 | x0 = center[1] 96 | y0 = center[0] 97 | 98 | batch_datum[data_num, :, :, joint_num] = np.exp( 99 | -((x * stride - x0) ** 2 + (y * stride - y0) ** 2) / 2.0 / fwhm / fwhm) 100 | batch_datum[data_num, :, :, heatmaps.shape[3] - 1] = np.ones((size, size)) - np.amax( 101 | batch_datum[data_num, :, :, 0:heatmaps.shape[3] - 1], axis=2) 102 | 103 | return batch_datum 104 | 105 | 106 | def make_heatmaps_from_joints(input_size, heatmap_size, gaussian_variance, batch_joints): 107 | # Generate ground-truth heatmaps from ground-truth 2d joints 108 | scale_factor = input_size // heatmap_size 109 | batch_gt_heatmap_np = [] 110 | for i in range(batch_joints.shape[0]): 111 | gt_heatmap_np = [] 112 | invert_heatmap_np = np.ones(shape=(heatmap_size, heatmap_size)) 113 | for j in range(batch_joints.shape[1]): 114 | cur_joint_heatmap = make_gaussian(heatmap_size, 115 | gaussian_variance, 116 | center=(batch_joints[i][j] // scale_factor)) 117 | gt_heatmap_np.append(cur_joint_heatmap) 118 | invert_heatmap_np -= cur_joint_heatmap 119 | gt_heatmap_np.append(invert_heatmap_np) 120 | batch_gt_heatmap_np.append(gt_heatmap_np) 121 | batch_gt_heatmap_np = np.asarray(batch_gt_heatmap_np) 122 | batch_gt_heatmap_np = np.transpose(batch_gt_heatmap_np, (0, 2, 3, 1)) 123 | 124 | return batch_gt_heatmap_np 125 | 126 | 127 | def make_heatmaps_from_joints_openpose(input_size, heatmap_size, gaussian_variance, batch_joints): 128 | joint_map = [4, 3, 2, 1, 8, 7, 6, 5, 12, 11, 10, 9, 16, 15, 14, 13, 20, 19, 18, 17, 0] 129 | # Generate ground-truth heatmaps from ground-truth 2d joints 130 | scale_factor = input_size // heatmap_size 131 | batch_gt_heatmap_np = [] 132 | for i in range(batch_joints.shape[0]): 133 | gt_heatmap_np = [] 134 | invert_heatmap_np = np.ones(shape=(heatmap_size, heatmap_size)) 135 | for j in range(batch_joints.shape[1]): 136 | cur_joint_heatmap = make_gaussian(heatmap_size, 137 | gaussian_variance, 138 | center=(batch_joints[i][joint_map[j]] // scale_factor)) 139 | gt_heatmap_np.append(cur_joint_heatmap) 140 | invert_heatmap_np -= cur_joint_heatmap 141 | gt_heatmap_np.append(invert_heatmap_np) 142 | batch_gt_heatmap_np.append(gt_heatmap_np) 143 | batch_gt_heatmap_np = np.asarray(batch_gt_heatmap_np) 144 | batch_gt_heatmap_np = np.transpose(batch_gt_heatmap_np, (0, 2, 3, 1)) 145 | 146 | return batch_gt_heatmap_np 147 | 148 | 149 | def rad2Deg(rad): 150 | return rad * (180 / M_PI) 151 | 152 | 153 | def deg2Rad(deg): 154 | return deg * (M_PI / 180) 155 | 156 | 157 | def warpMatrix(sw, sh, theta, phi, gamma, scale, fovy): 158 | st = math.sin(deg2Rad(theta)) 159 | ct = math.cos(deg2Rad(theta)) 160 | sp = math.sin(deg2Rad(phi)) 161 | cp = math.cos(deg2Rad(phi)) 162 | sg = math.sin(deg2Rad(gamma)) 163 | cg = math.cos(deg2Rad(gamma)) 164 | 165 | halfFovy = fovy * 0.5 166 | d = math.hypot(sw, sh) 167 | sideLength = scale * d / math.cos(deg2Rad(halfFovy)) 168 | h = d / (2.0 * math.sin(deg2Rad(halfFovy))) 169 | n = h - (d / 2.0) 170 | f = h + (d / 2.0) 171 | 172 | Rtheta = np.identity(4) 173 | Rphi = np.identity(4) 174 | Rgamma = np.identity(4) 175 | 176 | T = np.identity(4) 177 | P = np.zeros((4, 4)) 178 | 179 | Rtheta[0, 0] = Rtheta[1, 1] = ct 180 | Rtheta[0, 1] = -st 181 | Rtheta[1, 0] = st 182 | 183 | Rphi[1, 1] = Rphi[2, 2] = cp 184 | Rphi[1, 2] = -sp 185 | Rphi[2, 1] = sp 186 | 187 | Rgamma[0, 0] = cg 188 | Rgamma[2, 2] = cg 189 | Rgamma[0, 2] = sg 190 | Rgamma[2, 0] = sg 191 | 192 | T[2, 3] = -h 193 | 194 | P[0, 0] = P[1, 1] = 1.0 / math.tan(deg2Rad(halfFovy)) 195 | P[2, 2] = -(f + n) / (f - n) 196 | P[2, 3] = -(2.0 * f * n) / (f - n) 197 | P[3, 2] = -1.0 198 | 199 | F = np.matmul(Rtheta, Rgamma) 200 | F = np.matmul(Rphi, F) 201 | F = np.matmul(T, F) 202 | F = np.matmul(P, F) 203 | 204 | ptsIn = np.zeros(12) 205 | ptsOut = np.zeros(12) 206 | halfW = sw / 2 207 | halfH = sh / 2 208 | 209 | ptsIn[0] = -halfW 210 | ptsIn[1] = halfH 211 | ptsIn[3] = halfW 212 | ptsIn[4] = halfH 213 | ptsIn[6] = halfW 214 | ptsIn[7] = -halfH 215 | ptsIn[9] = -halfW 216 | ptsIn[10] = -halfH 217 | ptsIn[2] = ptsIn[5] = ptsIn[8] = ptsIn[11] = 0 218 | 219 | ptsInMat = np.array([[ptsIn[0], ptsIn[1], ptsIn[2]], [ptsIn[3], ptsIn[4], ptsIn[5]], [ptsIn[6], ptsIn[7], ptsIn[8]], 220 | [ptsIn[9], ptsIn[10], ptsIn[11]]], dtype=np.float32) 221 | ptsOutMat = np.array( 222 | [[ptsOut[0], ptsOut[1], ptsOut[2]], [ptsOut[3], ptsOut[4], ptsOut[5]], [ptsOut[6], ptsOut[7], ptsOut[8]], 223 | [ptsOut[9], ptsOut[10], ptsOut[11]]], dtype=np.float32) 224 | ptsInMat = np.array([ptsInMat]) 225 | ptsOutMat = cv2.perspectiveTransform(ptsInMat, F) 226 | 227 | ptsInPt2f = np.array([[0, 0], [0, 0], [0, 0], [0, 0]], dtype=np.float32) 228 | ptsOutPt2f = np.array([[0, 0], [0, 0], [0, 0], [0, 0]], dtype=np.float32) 229 | 230 | i = 0 231 | 232 | while i < 4: 233 | ptsInPt2f[i][0] = ptsIn[i * 3 + 0] + halfW 234 | ptsInPt2f[i][1] = ptsIn[i * 3 + 1] + halfH 235 | ptsOutPt2f[i][0] = (ptsOutMat[0][i][0] + 1) * sideLength * 0.5 236 | ptsOutPt2f[i][1] = (ptsOutMat[0][i][1] + 1) * sideLength * 0.5 237 | i = i + 1 238 | 239 | M = cv2.getPerspectiveTransform(ptsInPt2f, ptsOutPt2f) 240 | return M 241 | 242 | 243 | def warpImage(src, theta, phi, gamma, scale, fovy): 244 | halfFovy = fovy * 0.5 245 | d = math.hypot(src.shape[1], src.shape[0]) 246 | sideLength = scale * d / math.cos(deg2Rad(halfFovy)) 247 | sideLength = np.int32(sideLength) 248 | 249 | M = warpMatrix(src.shape[1], src.shape[0], theta, phi, gamma, scale, fovy) 250 | dst = cv2.warpPerspective(src, M, (sideLength, sideLength)) 251 | mid_x = mid_y = dst.shape[0] // 2 252 | target_x = target_y = src.shape[0] // 2 253 | offset = (target_x % 2) 254 | 255 | if len(dst.shape) == 3: 256 | dst = dst[mid_y - target_y:mid_y + target_y + offset, 257 | mid_x - target_x:mid_x + target_x + offset, 258 | :] 259 | else: 260 | dst = dst[mid_y - target_y:mid_y + target_y + offset, 261 | mid_x - target_x:mid_x + target_x + offset] 262 | 263 | return dst 264 | -------------------------------------------------------------------------------- /utils/create_cpm_tfr_fulljoints.py: -------------------------------------------------------------------------------- 1 | import math 2 | import os 3 | import time 4 | 5 | import cv2 6 | import numpy as np 7 | import tensorflow as tf 8 | import utils 9 | 10 | tfr_file = 'cpm_sample_dataset.tfrecords' 11 | dataset_dir = '' 12 | 13 | SHOW_INFO = False 14 | box_size = 64 15 | num_of_joints = 21 16 | gaussian_radius = 2 17 | 18 | 19 | def _bytes_feature(value): 20 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) 21 | 22 | 23 | def _int64_feature(value): 24 | return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) 25 | 26 | 27 | def _float64_feature(value): 28 | return tf.train.Feature(float_list=tf.train.FloatList(value=value)) 29 | 30 | 31 | # Create writer 32 | tfr_writer = tf.python_io.TFRecordWriter(tfr_file) 33 | 34 | img_count = 0 35 | t1 = time.time() 36 | # Loop each dir 37 | for person_dir in os.listdir(dataset_dir): 38 | if not os.path.isdir(dataset_dir + person_dir): continue 39 | 40 | gt_file = dataset_dir + person_dir + '/labels.txt' 41 | gt_content = open(gt_file, 'rb').readlines() 42 | 43 | for idx, line in enumerate(gt_content): 44 | line = line.split() 45 | 46 | # Check if it is a valid img file 47 | if not line[0].endswith(('jpg', 'png')): 48 | continue 49 | cur_img_path = dataset_dir + person_dir + '/imgs/' + line[0] 50 | cur_img = cv2.imread(cur_img_path) 51 | 52 | # Read in bbox and joints coords 53 | tmp = [float(x) for x in line[1:5]] 54 | cur_hand_bbox = [min([tmp[0], tmp[2]]), 55 | min([tmp[1], tmp[3]]), 56 | max([tmp[0], tmp[2]]), 57 | max([tmp[1], tmp[3]]) 58 | ] 59 | if cur_hand_bbox[0] < 0: cur_hand_bbox[0] = 0 60 | if cur_hand_bbox[1] < 0: cur_hand_bbox[1] = 0 61 | if cur_hand_bbox[2] > cur_img.shape[1]: cur_hand_bbox[2] = cur_img.shape[1] 62 | if cur_hand_bbox[3] > cur_img.shape[0]: cur_hand_bbox[3] = cur_img.shape[0] 63 | 64 | cur_hand_joints_x = [float(i) for i in line[9:49:2]] 65 | cur_hand_joints_x.append(float(line[7])) 66 | cur_hand_joints_y = [float(i) for i in line[10:49:2]] 67 | cur_hand_joints_y.append(float(line[8])) 68 | 69 | # Crop image and adjust joint coords 70 | cur_img = cur_img[int(float(cur_hand_bbox[1])):int(float(cur_hand_bbox[3])), 71 | int(float(cur_hand_bbox[0])):int(float(cur_hand_bbox[2])), 72 | :] 73 | cur_hand_joints_x = [x - cur_hand_bbox[0] for x in cur_hand_joints_x] 74 | cur_hand_joints_y = [x - cur_hand_bbox[1] for x in cur_hand_joints_y] 75 | 76 | # # Display joints 77 | # for i in range(len(cur_hand_joints_x)): 78 | # cv2.circle(cur_img, center=(int(cur_hand_joints_x[i]), int(cur_hand_joints_y[i])),radius=3, color=(255,0,0), thickness=-1) 79 | # cv2.imshow('', cur_img) 80 | # cv2.waitKey(500) 81 | # cv2.imshow('', cur_img) 82 | # cv2.waitKey(1) 83 | 84 | output_image = np.ones(shape=(box_size, box_size, 3)) * 128 85 | output_heatmaps = np.zeros((box_size, box_size, num_of_joints)) 86 | 87 | # Resize and pad image to fit output image size 88 | if cur_img.shape[0] > cur_img.shape[1]: 89 | scale = box_size / (cur_img.shape[0] * 1.0) 90 | 91 | # Relocalize points 92 | cur_hand_joints_x = map(lambda x: x * scale, cur_hand_joints_x) 93 | cur_hand_joints_y = map(lambda x: x * scale, cur_hand_joints_y) 94 | 95 | # Resize image 96 | image = cv2.resize(cur_img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_LANCZOS4) 97 | offset = image.shape[1] % 2 98 | 99 | output_image[:, int(box_size / 2 - math.floor(image.shape[1] / 2)): int( 100 | box_size / 2 + math.floor(image.shape[1] / 2) + offset), :] = image 101 | cur_hand_joints_x = map(lambda x: x + (box_size / 2 - math.floor(image.shape[1] / 2)), 102 | cur_hand_joints_x) 103 | 104 | cur_hand_joints_x = np.asarray(cur_hand_joints_x) 105 | cur_hand_joints_y = np.asarray(cur_hand_joints_y) 106 | 107 | if SHOW_INFO: 108 | hmap = np.zeros((box_size, box_size)) 109 | # Plot joints 110 | for i in range(num_of_joints): 111 | cv2.circle(output_image, (int(cur_hand_joints_x[i]), int(cur_hand_joints_y[i])), 3, (0, 255, 0), 2) 112 | 113 | # Generate joint gaussian map 114 | part_heatmap = utils.make_gaussian(output_image.shape[0], gaussian_radius, 115 | [cur_hand_joints_x[i], cur_hand_joints_y[i]]) 116 | hmap += part_heatmap * 50 117 | else: 118 | for i in range(num_of_joints): 119 | output_heatmaps[:, :, i] = utils.make_gaussian(box_size, gaussian_radius, 120 | [cur_hand_joints_x[i], cur_hand_joints_y[i]]) 121 | 122 | else: 123 | scale = box_size / (cur_img.shape[1] * 1.0) 124 | 125 | # Relocalize points 126 | cur_hand_joints_x = map(lambda x: x * scale, cur_hand_joints_x) 127 | cur_hand_joints_y = map(lambda x: x * scale, cur_hand_joints_y) 128 | 129 | # Resize image 130 | image = cv2.resize(cur_img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_LANCZOS4) 131 | offset = image.shape[0] % 2 132 | 133 | output_image[int(box_size / 2 - math.floor(image.shape[0] / 2)): int( 134 | box_size / 2 + math.floor(image.shape[0] / 2) + offset), :, :] = image 135 | cur_hand_joints_y = map(lambda x: x + (box_size / 2 - math.floor(image.shape[0] / 2)), 136 | cur_hand_joints_y) 137 | 138 | cur_hand_joints_x = np.asarray(cur_hand_joints_x) 139 | cur_hand_joints_y = np.asarray(cur_hand_joints_y) 140 | 141 | if SHOW_INFO: 142 | hmap = np.zeros((box_size, box_size)) 143 | # Plot joints 144 | for i in range(num_of_joints): 145 | cv2.circle(output_image, (int(cur_hand_joints_x[i]), int(cur_hand_joints_y[i])), 3, (0, 255, 0), 2) 146 | 147 | # Generate joint gaussian map 148 | part_heatmap = utils.make_gaussian(output_image.shape[0], gaussian_radius, 149 | [cur_hand_joints_x[i], cur_hand_joints_y[i]]) 150 | hmap += part_heatmap * 50 151 | else: 152 | for i in range(num_of_joints): 153 | output_heatmaps[:, :, i] = utils.make_gaussian(box_size, gaussian_radius, 154 | [cur_hand_joints_x[i], cur_hand_joints_y[i]]) 155 | if SHOW_INFO: 156 | cv2.imshow('', hmap.astype(np.uint8)) 157 | cv2.imshow('i', output_image.astype(np.uint8)) 158 | cv2.waitKey(0) 159 | 160 | # Create background map 161 | output_background_map = np.ones((box_size, box_size)) - np.amax(output_heatmaps, axis=2) 162 | output_heatmaps = np.concatenate((output_heatmaps, output_background_map.reshape((box_size, box_size, 1))), 163 | axis=2) 164 | # cv2.imshow('', (output_background_map*255).astype(np.uint8)) 165 | # cv2.imshow('h', (np.amax(output_heatmaps[:, :, 0:21], axis=2)*255).astype(np.uint8)) 166 | # cv2.waitKey(1000) 167 | 168 | 169 | coords_set = np.concatenate((np.reshape(cur_hand_joints_x, (num_of_joints, 1)), 170 | np.reshape(cur_hand_joints_y, (num_of_joints, 1))), 171 | axis=1) 172 | 173 | output_image_raw = output_image.astype(np.uint8).tostring() 174 | output_heatmaps_raw = output_heatmaps.flatten().tolist() 175 | output_coords_raw = coords_set.flatten().tolist() 176 | 177 | raw_sample = tf.train.Example(features=tf.train.Features(feature={ 178 | 'image': _bytes_feature(output_image_raw), 179 | 'heatmaps': _float64_feature(output_heatmaps_raw) 180 | })) 181 | 182 | tfr_writer.write(raw_sample.SerializeToString()) 183 | 184 | img_count += 1 185 | if img_count % 50 == 0: 186 | print('Processed %d images, took %f seconds' % (img_count, time.time() - t1)) 187 | t1 = time.time() 188 | 189 | tfr_writer.close() 190 | -------------------------------------------------------------------------------- /utils/tf_utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import utils.cpm_utils as cpm_utils 3 | 4 | 5 | def read_and_decode_cpm(tfr_queue, img_size, num_joints, center_radius): 6 | tfr_reader = tf.TFRecordReader() 7 | _, serialized_example = tfr_reader.read(tfr_queue) 8 | 9 | queue_images = [] 10 | queue_center_maps = [] 11 | queue_labels = [] 12 | queue_orig_images = [] 13 | 14 | for i in range(2): 15 | features = tf.parse_single_example(serialized_example, 16 | features={ 17 | 'image': tf.FixedLenFeature([], tf.string), 18 | 'heatmaps': tf.FixedLenFeature( 19 | [int(img_size * img_size * (num_joints + 1))], tf.float32) 20 | }) 21 | 22 | # img_size = 128 23 | # center_radius = 11 24 | img = tf.decode_raw(features['image'], tf.uint8) 25 | img = tf.reshape(img, [img_size, img_size, 3]) 26 | img = tf.cast(img, tf.float32) 27 | 28 | img = img[..., ::-1] 29 | img = tf.image.random_contrast(img, 0.7, 1) 30 | img = tf.image.random_brightness(img, max_delta=0.9) 31 | img = tf.image.random_hue(img, 0.05) 32 | img = tf.image.random_saturation(img, 0.7, 1.1) 33 | img = img[..., ::-1] 34 | 35 | # heatmap = tf.decode_raw(features['heatmaps'], tf.float32) 36 | heatmap = tf.reshape(features['heatmaps'], [img_size, img_size, (num_joints + 1)]) 37 | 38 | # create centermap 39 | center_map = tf.constant((cpm_utils.make_gaussian(img_size, center_radius, 40 | [int(img_size / 2), int(img_size / 2)])).reshape( 41 | (img_size, img_size, 1)), name='center_map') 42 | center_map = tf.cast(center_map, tf.float32) 43 | 44 | # merge img + centermap + heatmap 45 | merged_img_heatmap = tf.concat([img, center_map, heatmap], axis=2) 46 | 47 | # subtract mean before pad 48 | mean_volume = tf.concat((128 * tf.ones(shape=(img_size, img_size, 3)), 49 | tf.zeros(shape=(img_size, img_size, (num_joints + 1))), 50 | tf.ones(shape=(img_size, img_size, 1))), axis=2) 51 | 52 | merged_img_heatmap -= mean_volume 53 | 54 | # preprocessing 55 | preprocessed_merged_img_c_heatmap, _, _ = preprocess(merged_img_heatmap, 56 | label=None, 57 | crop_off_ratio=0.05, 58 | rotation_angle=0.8, 59 | has_bbox=False, 60 | do_flip_lr=True, 61 | do_flip_ud=False, 62 | low_sat=None, 63 | high_sat=None, 64 | max_bright_delta=None, 65 | max_hue_delta=None) 66 | 67 | padded_img_size = img_size # * (1 + tf.random_uniform([], minval=0.0, maxval=0.3)) 68 | padded_img_size = tf.cast(padded_img_size, tf.int32) 69 | 70 | # resize pad 71 | preprocessed_merged_img_c_heatmap = tf.image.resize_image_with_crop_or_pad(preprocessed_merged_img_c_heatmap, 72 | padded_img_size, padded_img_size) 73 | preprocessed_merged_img_c_heatmap += tf.concat((128 * tf.ones(shape=(padded_img_size, padded_img_size, 3)), 74 | tf.zeros( 75 | shape=(padded_img_size, padded_img_size, (num_joints + 1))), 76 | tf.ones(shape=(padded_img_size, padded_img_size, 1))), axis=2) 77 | preprocessed_merged_img_c_heatmap = tf.image.resize_images(preprocessed_merged_img_c_heatmap, 78 | size=[img_size, img_size]) 79 | 80 | with tf.control_dependencies([preprocessed_merged_img_c_heatmap]): 81 | # preprocessed_img = tf.slice(preprocessed_merged_img_c_heatmap, [0,0,0], [368,368,3]) 82 | # preprocessed_center_maps = tf.slice(preprocessed_merged_img_c_heatmap, [0,0,3], [368,368,1]) 83 | # preprocessed_heatmaps = tf.slice(preprocessed_merged_img_c_heatmap, [0,0,4], [368,368,13]) 84 | 85 | preprocessed_img, preprocessed_center_maps, preprocessed_heatmaps = tf.split( 86 | preprocessed_merged_img_c_heatmap, [3, 1, (num_joints + 1)], axis=2) 87 | 88 | # Normalize image value 89 | preprocessed_img /= 256 90 | preprocessed_img -= 0.5 91 | 92 | queue_images.append(preprocessed_img) 93 | queue_center_maps.append(preprocessed_center_maps) 94 | queue_labels.append(preprocessed_heatmaps) 95 | queue_orig_images.append(img) 96 | 97 | return queue_images, queue_center_maps, queue_labels, queue_orig_images 98 | # return preprocessed_img, preprocessed_center_maps, preprocessed_heatmaps, img 99 | 100 | 101 | def read_batch_cpm(tfr_path, img_size, hmap_size, num_joints, center_radius, batch_size=16, num_epochs=None): 102 | """Read batch images as the input to the network 103 | 104 | tfr_path: path to tfrecord file 105 | num_epochs: None=iteratively read forever 106 | other number=iterate whole tfr_file how many times 107 | """ 108 | 109 | with tf.name_scope('Batch_Inputs'): 110 | tfr_queue = tf.train.string_input_producer(tfr_path, num_epochs=num_epochs, shuffle=True) 111 | 112 | # images, centers, labels, image_orig = read_and_decode_cpm(tfr_queue, img_size, num_joints, center_radius) 113 | 114 | data_list = [read_and_decode_cpm(tfr_queue, img_size, num_joints, center_radius) for _ in 115 | range(2 * len(tfr_path))] 116 | 117 | batch_images, batch_centers, batch_labels, batch_images_orig = tf.train.shuffle_batch_join(data_list, 118 | batch_size=batch_size, 119 | capacity=100 + 6 * batch_size, 120 | min_after_dequeue=100, 121 | enqueue_many=True, 122 | name='batch_data_read') 123 | 124 | # batch_labels = tf.image.resize_bilinear(batch_labels, size=tf.constant((hmap_size,hmap_size), name='shape')) 125 | 126 | return batch_images, batch_centers, batch_labels, batch_images_orig 127 | 128 | 129 | def rotate_points(orig_points, angle, w, h): 130 | """Return rotated points 131 | 132 | Args: 133 | orig_points: 'Tensor' with shape [N,2], each entry is point (x,y) 134 | angle: rotate radians 135 | 136 | Returns: 137 | 'Tensor' with shape [N,2], with rotated points 138 | """ 139 | 140 | # rotation 141 | rotate_mat = tf.stack([[tf.cos(angle) / w, tf.sin(angle) / h], 142 | [-tf.sin(angle) / w, tf.cos(angle) / h]]) 143 | 144 | # shift coord 145 | orig_points = tf.subtract(orig_points, 0.5) 146 | 147 | orig_points = tf.stack([orig_points[:, 0] * w, 148 | orig_points[:, 1] * h], axis=1) 149 | print(orig_points) 150 | rotated_points = tf.matmul(orig_points, rotate_mat) + 0.5 151 | 152 | return rotated_points 153 | 154 | 155 | def preprocess(image, 156 | label, 157 | has_bbox=True, 158 | rotation_angle=1.5, 159 | crop_off_ratio=0.2, 160 | do_flip_lr=True, 161 | do_flip_ud=True, 162 | max_hue_delta=0.15, 163 | low_sat=0.5, 164 | high_sat=2.0, 165 | max_bright_delta=0.3): 166 | """Do some processes for input image 167 | 168 | Args: 169 | image: A 'Tensor' of RGB image 170 | label: vector of floats with even length (be pair of (x,y)) 171 | has_bbox: if 'True', Assume first 4 numbers of 'label' are [top-left, bot-right] coords 172 | rotation_angle: maximum allowed rotation radians 173 | crop_off_ratio: maximum cropping offset of top-left corner 174 | 1-crop_off_ratio be maximum cropping offset of cropped bot-right corner 175 | do_flip_lr: with half chance flip the image left right 176 | do_flip_ud: with half chance flip the image upper down 177 | max_hue_delta: allowed random adjust hue range 178 | low_sat: lowest range of saturation 179 | high_sat: highest range of saturation 180 | max_bright_delta: allowed random adjust brightness range 181 | 182 | Returns: 183 | image: processed image 'Tensor' 184 | new_bbox: 'Tensor' of processed bbox coords if 'has_bbox' == True 185 | total_points: 'Tensor' of processed points coords 186 | """ 187 | 188 | new_bbox = [] 189 | total_points = [] 190 | 191 | # [height, width, channel] of input image 192 | img_shape_list = image.get_shape().as_list() 193 | 194 | if max_hue_delta is not None: 195 | # random hue 196 | image = tf.image.random_hue(image, max_delta=max_hue_delta) 197 | 198 | if low_sat is not None and high_sat is not None: 199 | # random saturation 200 | image = tf.image.random_saturation(image, lower=low_sat, upper=high_sat) 201 | 202 | if max_bright_delta is not None: 203 | # random brightness 204 | image = tf.image.random_brightness(image, max_delta=max_bright_delta) 205 | 206 | if label is not None: 207 | total_points = tf.stack([label[i] for i in range(label.shape[0])]) 208 | 209 | # crop image 210 | new_top_left_x = crop_off_ratio * tf.random_uniform([], minval=-1.0, maxval=1.0) 211 | off_w_ratio = tf.cond(tf.less(new_top_left_x, 0), lambda: tf.zeros([]), lambda: new_top_left_x) 212 | 213 | new_top_left_y = crop_off_ratio * tf.random_uniform([], minval=-1.0, maxval=1.0) 214 | off_h_ratio = tf.cond(tf.less(new_top_left_y, 0), lambda: tf.zeros([]), lambda: new_top_left_y) 215 | 216 | new_bot_right_x = crop_off_ratio * tf.random_uniform([], minval=-1.0, maxval=1.0) 217 | tar_w_ratio = tf.cond(tf.less(new_bot_right_x, 0), lambda: tf.ones([]) - off_w_ratio, 218 | lambda: 1 - new_bot_right_x - off_w_ratio) 219 | 220 | new_bot_right_y = crop_off_ratio * tf.random_uniform([], minval=-1.0, maxval=1.0) 221 | tar_h_ratio = tf.cond(tf.less(new_bot_right_y, 0), lambda: tf.ones([]) - off_h_ratio, 222 | lambda: 1 - new_bot_right_y - off_h_ratio) 223 | 224 | pad_image_height = (1 - new_top_left_y - new_bot_right_y) * img_shape_list[0] 225 | pad_image_width = (1 - new_top_left_x - new_bot_right_x) * img_shape_list[1] 226 | cropped_image = tf.image.crop_to_bounding_box(image, 227 | offset_width=tf.cast(off_w_ratio * img_shape_list[1], tf.int32), 228 | offset_height=tf.cast(off_h_ratio * img_shape_list[0], tf.int32), 229 | target_height=tf.cast(tar_h_ratio * img_shape_list[0], tf.int32), 230 | target_width=tf.cast(tar_w_ratio * img_shape_list[1], tf.int32)) 231 | 232 | image = tf.image.pad_to_bounding_box(cropped_image, 233 | offset_width=tf.cast((off_w_ratio - new_top_left_x) * img_shape_list[1], 234 | tf.int32), 235 | offset_height=tf.cast((off_h_ratio - new_top_left_y) * img_shape_list[0], 236 | tf.int32), 237 | target_height=tf.cast(pad_image_height, tf.int32), 238 | target_width=tf.cast(pad_image_width, tf.int32)) 239 | 240 | # random rotation angle 241 | angle = rotation_angle * tf.random_uniform([]) 242 | 243 | # rotate image 244 | image = tf.contrib.image.rotate(image, -angle, interpolation='BILINEAR') 245 | 246 | if label is not None: 247 | if has_bbox: 248 | # include 4 bbox points 249 | bbox_points = tf.stack([[total_points[0][0], total_points[0][1]], 250 | [total_points[1][0], total_points[0][1]], 251 | [total_points[0][0], total_points[1][1]], 252 | [total_points[1][0], total_points[1][1]]], axis=0) 253 | if label.shape[0] == 4: 254 | total_points = bbox_points 255 | else: 256 | total_points = tf.concat([bbox_points, total_points[2:]], axis=0) 257 | 258 | # rotate points 259 | total_points = rotate_points(total_points, angle, pad_image_width, pad_image_height) 260 | 261 | if has_bbox: 262 | # new bbox [top_left, bot_right] 263 | new_bbox = tf.stack([[total_points[2][0], total_points[0][1]], 264 | [total_points[1][0], total_points[3][1]]], axis=0) 265 | total_points = tf.concat([new_bbox, total_points[4:]], axis=0) 266 | 267 | if label is not None: 268 | # adjust points' coords for cropped image 269 | total_points = tf.reshape(total_points[:], shape=[-1, 2]) 270 | total_points = tf.stack([(total_points[:, 0] - new_top_left_x) / (1 - new_top_left_x - new_bot_right_x), 271 | (total_points[:, 1] - new_top_left_y) / (1 - new_top_left_y - new_bot_right_y)], 272 | axis=1) 273 | 274 | if label is not None: 275 | # chance flip left right 276 | def flip_lr(): 277 | i = tf.image.flip_left_right(image) 278 | l = tf.stack([1 - total_points[:, 0], 279 | total_points[:, 1]], axis=1) 280 | return i, l 281 | 282 | def no_flip_lr(): 283 | i = image 284 | l = total_points 285 | return i, l 286 | 287 | if do_flip_lr: 288 | image, total_points = tf.cond(tf.greater(tf.random_uniform([]), 0.5), flip_lr, no_flip_lr) 289 | 290 | # chance flip upside down 291 | def flip_ud(): 292 | i = tf.image.flip_up_down(image) 293 | l = tf.stack([total_points[:, 0], 294 | 1 - total_points[:, 1]], axis=1) 295 | return i, l 296 | 297 | def no_flip_ud(): 298 | i = image 299 | l = total_points 300 | return i, l 301 | 302 | if do_flip_ud: 303 | image, total_points = tf.cond(tf.greater(tf.random_uniform([]), 0.5), flip_ud, no_flip_ud) 304 | 305 | if has_bbox: 306 | new_bbox = tf.stack([(total_points[0, 0] + total_points[1, 0]) / 2, 307 | (total_points[0, 1] + total_points[1, 1]) / 2, 308 | tf.abs(total_points[1, 0] - total_points[0, 0]), 309 | tf.abs(total_points[1, 1] - total_points[0, 1])], axis=0) 310 | 311 | total_points = tf.reshape(total_points, shape=[-1, ]) 312 | 313 | else: 314 | # chance flip left right 315 | def flip_lr(): 316 | i = tf.image.flip_left_right(image) 317 | return i 318 | 319 | def no_flip_lr(): 320 | i = image 321 | return i 322 | 323 | if do_flip_lr: 324 | image = tf.cond(tf.greater(tf.random_uniform([]), 0.5), flip_lr, no_flip_lr) 325 | 326 | # chance flip upside down 327 | def flip_ud(): 328 | i = tf.image.flip_up_down(image) 329 | return i 330 | 331 | def no_flip_ud(): 332 | i = image 333 | return i 334 | 335 | if do_flip_ud: 336 | image = tf.cond(tf.greater(tf.random_uniform([]), 0.5), flip_ud, no_flip_ud) 337 | 338 | return image, new_bbox, total_points 339 | -------------------------------------------------------------------------------- /utils/tracking_module.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | 5 | class SelfTracker(object): 6 | def __init__(self, img_shape, model_input_size): 7 | self.img_shape = img_shape 8 | self.loss_track = False 9 | self.prev_bbox = [0, 0, img_shape[0], img_shape[1]] 10 | self.init_center = [img_shape[0]//2, img_shape[1]//2] 11 | self.cur_center = [img_shape[0]//2, img_shape[1]//2] 12 | self._default_crop_size = 368 13 | self.bbox = [0, 0, 0, 0] 14 | self.pad_boundary = [0, 0, 0, 0] 15 | self.prev_crop_h = self._default_crop_size 16 | self.prev_crop_w = self._default_crop_size 17 | self.alpha = 0.2 18 | self.input_crop_ratio = 1.0 19 | self.input_size = float(model_input_size) 20 | 21 | def tracking_by_joints(self, full_img, joint_detections=None): 22 | if self.loss_track or joint_detections is None: 23 | cropped_img = self._crop_image(full_img, self.init_center, (self._default_crop_size, self._default_crop_size)) 24 | self.input_crop_ratio = self.input_size / max(cropped_img.shape[0], cropped_img.shape[1]) 25 | resize_img = self._resize_image(cropped_img, self.input_size) 26 | return self._pad_image(resize_img, max(resize_img.shape[0], resize_img.shape[1])) 27 | else: 28 | self.cur_center = np.mean(joint_detections, axis=0, dtype=np.int) 29 | crop_h = np.max(joint_detections[:, 0]) - np.min(joint_detections[:, 0]) 30 | crop_w = np.max(joint_detections[:, 1]) - np.min(joint_detections[:, 1]) 31 | crop_h = max(int(crop_h), 96) 32 | crop_w = max(int(crop_w), 96) 33 | crop_h *= 2.0 34 | crop_w *= 2.0 35 | self.prev_crop_h = self.alpha * crop_h + (1-self.alpha) * self.prev_crop_h 36 | self.prev_crop_w = self.alpha * crop_w + (1-self.alpha) * self.prev_crop_w 37 | 38 | cropped_img = self._crop_image(full_img, self.cur_center, (int(self.prev_crop_h), int(self.prev_crop_w))) 39 | self.input_crop_ratio = self.input_size / max(cropped_img.shape[0], cropped_img.shape[1]) 40 | resize_img = self._resize_image(cropped_img, self.input_size) 41 | 42 | pad_size = max(resize_img.shape[0], resize_img.shape[1]) 43 | return self._pad_image(resize_img, pad_size) 44 | 45 | def _resize_image(self, cropped_img, size): 46 | h, w, _ = cropped_img.shape 47 | if h > w: 48 | scale = size / h 49 | return cv2.resize(cropped_img, None, fx=scale, fy=scale) 50 | else: 51 | scale = size / w 52 | return cv2.resize(cropped_img, None, fx=scale, fy=scale) 53 | 54 | def _crop_image(self, full_img, center, size): 55 | h_offset = size[0] % 2 56 | w_offset = size[1] % 2 57 | self.bbox = [max(0, center[0]-size[0]//2), min(self.img_shape[0], center[0]+size[0]//2+h_offset), 58 | max(0, center[1]-size[1]//2), min(self.img_shape[1], center[1]+size[1]//2+w_offset)] 59 | return full_img[self.bbox[0]:self.bbox[1], self.bbox[2]:self.bbox[3], :] 60 | 61 | 62 | def _pad_image(self, img, size): 63 | h, w, _ = img.shape 64 | if size < h or size < w: 65 | raise ValueError('Pad size cannot smaller than original image size') 66 | 67 | pad_h_offset = (size - h) % 2 68 | pad_w_offset = (size - w) % 2 69 | self.pad_boundary = [(size-h)//2+pad_h_offset, (size-h)//2, (size-w)//2+pad_w_offset, (size-w)//2] 70 | return cv2.copyMakeBorder(img, top=self.pad_boundary[0], 71 | bottom=self.pad_boundary[1], 72 | left=self.pad_boundary[2], 73 | right=self.pad_boundary[3], borderType=cv2.BORDER_CONSTANT, value=(128, 128, 128)) 74 | -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import math 4 | import matplotlib.pyplot as plt 5 | from mpl_toolkits.mplot3d import Axes3D 6 | # from OpenGL.GL import * 7 | # from OpenGL.GLU import * 8 | 9 | 10 | 11 | def read_square_image(file, cam, boxsize, type): 12 | # from file 13 | if type == 'IMAGE': 14 | oriImg = cv2.imread(file) 15 | # from webcam 16 | elif type == 'WEBCAM': 17 | _, oriImg = cam.read() 18 | 19 | scale = boxsize / (oriImg.shape[0] * 1.0) 20 | imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_LANCZOS4) 21 | 22 | output_img = np.ones((boxsize, boxsize, 3)) * 128 23 | 24 | if imageToTest.shape[1] < boxsize: 25 | offset = imageToTest.shape[1] % 2 26 | output_img[:, int(boxsize/2-math.ceil(imageToTest.shape[1]/2)):int(boxsize/2+math.ceil(imageToTest.shape[1]/2)+offset), :] = imageToTest 27 | else: 28 | output_img = imageToTest[:, int(imageToTest.shape[1]/2-boxsize/2):int(imageToTest.shape[1]/2+boxsize/2), :] 29 | return output_img 30 | 31 | 32 | def resize_pad_img(img, scale, output_size): 33 | resized_img = cv2.resize(img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR) 34 | pad_h = (output_size - resized_img.shape[0]) // 2 35 | pad_w = (output_size - resized_img.shape[1]) // 2 36 | pad_h_offset = (output_size - resized_img.shape[0]) % 2 37 | pad_w_offset = (output_size - resized_img.shape[1]) % 2 38 | resized_pad_img = np.pad(resized_img, ((pad_w, pad_w+pad_w_offset), (pad_h, pad_h+pad_h_offset), (0, 0)), 39 | mode='constant', constant_values=128) 40 | 41 | return resized_pad_img 42 | 43 | 44 | def img_white_balance(img, white_ratio): 45 | for channel in range(img.shape[2]): 46 | channel_max = np.percentile(img[:, :, channel], 100-white_ratio) 47 | channel_min = np.percentile(img[:, :, channel], white_ratio) 48 | img[:, :, channel] = (channel_max-channel_min) * (img[:, :, channel] / 255.0) 49 | return img 50 | 51 | 52 | def img_white_balance_with_bg(img, bg, white_ratio): 53 | for channel in range(img.shape[2]): 54 | channel_max = np.percentile(bg[:, :, channel], 100-white_ratio) 55 | channel_min = np.percentile(bg[:, :, channel], white_ratio) 56 | img[:, :, channel] = (channel_max-channel_min) * (img[:, :, channel] / 255.0) 57 | return img 58 | 59 | 60 | def draw_predicted_heatmap(heatmap, input_size): 61 | heatmap_resized = cv2.resize(heatmap, (input_size, input_size)) 62 | 63 | output_img = None 64 | tmp_concat_img = None 65 | h_count = 0 66 | for joint_num in range(heatmap_resized.shape[2]): 67 | if h_count < 4: 68 | tmp_concat_img = np.concatenate((tmp_concat_img, heatmap_resized[:, :, joint_num]), axis=1) \ 69 | if tmp_concat_img is not None else heatmap_resized[:, :, joint_num] 70 | h_count += 1 71 | else: 72 | output_img = np.concatenate((output_img, tmp_concat_img), axis=0) if output_img is not None else tmp_concat_img 73 | tmp_concat_img = None 74 | h_count = 0 75 | # last row img 76 | if h_count != 0: 77 | while h_count < 4: 78 | tmp_concat_img = np.concatenate((tmp_concat_img, np.zeros(shape=(input_size, input_size), dtype=np.float32)), axis=1) 79 | h_count += 1 80 | output_img = np.concatenate((output_img, tmp_concat_img), axis=0) 81 | 82 | # adjust heatmap color 83 | output_img = output_img.astype(np.uint8) 84 | output_img = cv2.applyColorMap(output_img, cv2.COLORMAP_JET) 85 | return output_img 86 | 87 | 88 | def draw_stages_heatmaps(stage_heatmap_list, orig_img_size): 89 | 90 | output_img = None 91 | nStages = len(stage_heatmap_list) 92 | nJoints = stage_heatmap_list[0].shape[3] 93 | for stage in range(nStages): 94 | cur_heatmap = np.squeeze(stage_heatmap_list[0][0, :, :, 0:nJoints-1]) 95 | cur_heatmap = cv2.resize(cur_heatmap, (orig_img_size, orig_img_size)) 96 | 97 | channel_max = np.percentile(cur_heatmap, 99) 98 | channel_min = np.percentile(cur_heatmap, 1) 99 | cur_heatmap = 255.0 / (channel_max - channel_min) * (cur_heatmap - channel_min) 100 | cur_heatmap = np.clip(cur_heatmap, 0, 255) 101 | 102 | cur_heatmap = np.repeat(np.expand_dims(np.amax(cur_heatmap, axis=2), axis=2), 3, axis=2) 103 | output_img = np.concatenate((output_img, cur_heatmap), axis=1) if output_img is not None else cur_heatmap 104 | return output_img.astype(np.uint8) 105 | 106 | 107 | def extract_2d_joint_from_heatmap(heatmap, input_size, joints_2d): 108 | heatmap_resized = cv2.resize(heatmap, (input_size, input_size)) 109 | 110 | for joint_num in range(heatmap_resized.shape[2]): 111 | joint_coord = np.unravel_index(np.argmax(heatmap_resized[:, :, joint_num]), (input_size, input_size)) 112 | joints_2d[joint_num, :] = joint_coord 113 | 114 | return joints_2d 115 | 116 | 117 | def extract_3d_joints_from_heatmap(joints_2d, x_hm, y_hm, z_hm, input_size, joints_3d): 118 | 119 | for joint_num in range(x_hm.shape[2]): 120 | coord_2d_y = joints_2d[joint_num][0] 121 | coord_2d_x = joints_2d[joint_num][1] 122 | 123 | # x_hm_resized = cv2.resize(x_hm, (input_size, input_size)) 124 | # y_hm_resized = cv2.resize(y_hm, (input_size, input_size)) 125 | # z_hm_resized = cv2.resize(z_hm, (input_size, input_size)) 126 | # joint_x = x_hm_resized[max(int(coord_2d_x), 1), max(int(coord_2d_y), 1), joint_num] * 100 127 | # joint_y = y_hm_resized[max(int(coord_2d_x), 1), max(int(coord_2d_y), 1), joint_num] * 100 128 | # joint_z = z_hm_resized[max(int(coord_2d_x), 1), max(int(coord_2d_y), 1), joint_num] * 100 129 | 130 | 131 | joint_x = x_hm[max(int(coord_2d_x/8), 1), max(int(coord_2d_y/8), 1), joint_num] * 10 132 | joint_y = y_hm[max(int(coord_2d_x/8), 1), max(int(coord_2d_y/8), 1), joint_num] * 10 133 | joint_z = z_hm[max(int(coord_2d_x/8), 1), max(int(coord_2d_y/8), 1), joint_num] * 10 134 | joints_3d[joint_num, 0] = joint_x 135 | joints_3d[joint_num, 1] = joint_y 136 | joints_3d[joint_num, 2] = joint_z 137 | joints_3d -= joints_3d[14, :] 138 | 139 | return joints_3d 140 | 141 | def draw_limbs_2d(img, joints_2d, limb_parents): 142 | for limb_num in range(len(limb_parents)-1): 143 | x1 = joints_2d[limb_num, 0] 144 | y1 = joints_2d[limb_num, 1] 145 | x2 = joints_2d[limb_parents[limb_num], 0] 146 | y2 = joints_2d[limb_parents[limb_num], 1] 147 | length = ((x1 - x2) ** 2 + (y1 - y2) ** 2) ** 0.5 148 | # if length < 10000 and length > 5: 149 | deg = math.degrees(math.atan2(x1 - x2, y1 - y2)) 150 | polygon = cv2.ellipse2Poly((int((y1 + y2) / 2), int((x1 + x2) / 2)), 151 | (int(length / 2), 3), 152 | int(deg), 153 | 0, 360, 1) 154 | cv2.fillConvexPoly(img, polygon, color=(0,255,0)) 155 | return img 156 | 157 | def draw_limbs_3d(joints_3d, limb_parents, ax): 158 | 159 | for i in range(joints_3d.shape[0]): 160 | x_pair = [joints_3d[i, 0], joints_3d[limb_parents[i], 0]] 161 | y_pair = [joints_3d[i, 1], joints_3d[limb_parents[i], 1]] 162 | z_pair = [joints_3d[i, 2], joints_3d[limb_parents[i], 2]] 163 | ax.plot(x_pair, y_pair, zs=z_pair, linewidth=3) 164 | 165 | 166 | def draw_limb_3d_gl(joints_3d, limb_parents): 167 | 168 | glLineWidth(2) 169 | glBegin(GL_LINES) 170 | glColor3f(1,0,0) 171 | glVertex3fv((0,0,0)) 172 | glVertex3fv((100,0,0)) 173 | glColor3f(0,1,0) 174 | glVertex3fv((0,0,0)) 175 | glVertex3fv((0,100,0)) 176 | glColor3f(0,0,1) 177 | glVertex3fv((0,0,0)) 178 | glVertex3fv((0,0,100)) 179 | glEnd() 180 | 181 | glColor3f(1,1,1) 182 | glBegin(GL_LINES) 183 | for i in range(joints_3d.shape[0]): 184 | glVertex3fv((joints_3d[i, 0], joints_3d[i, 1], joints_3d[i, 2])) 185 | glVertex3fv((joints_3d[limb_parents[i], 0], joints_3d[limb_parents[i], 1], joints_3d[limb_parents[i], 2])) 186 | glEnd() 187 | 188 | # glBegin(GL_TRIANGLES) 189 | # glVertex3f(0, 100, 0) 190 | # glVertex3f(100, 0, 50) 191 | # glVertex3f(0, -100, 100) 192 | # glEnd() 193 | 194 | 195 | def draw_float_range_img(img): 196 | tmp_min = np.min(img) 197 | tmp_max = np.max(img) 198 | img = cv2.convertScaleAbs(img, None, 255.0 / (tmp_max - tmp_min)) 199 | img = cv2.applyColorMap(img, cv2.COLORMAP_JET) 200 | return img.astype(np.uint8) 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | --------------------------------------------------------------------------------