├── .gitignore ├── README.md ├── clear.sh ├── cpm.py ├── custom_ops.py ├── labels └── python │ └── delete_points.py ├── read_data.py ├── script ├── check.sh ├── clear_log.sh ├── init_dir.sh └── run.sh ├── test.py └── train.py /.gitignore: -------------------------------------------------------------------------------- 1 | # python 2 | *.py[cod] 3 | *.so 4 | *.egg 5 | *.egg-info 6 | dist 7 | build 8 | 9 | data/* 10 | tmp 11 | test 12 | params/* 13 | log 14 | backup 15 | nohup.out 16 | #clear.sh 17 | *.backup 18 | labels/txt/ 19 | labels/python/set_zeros.py 20 | 21 | 22 | #!data/python 23 | #!params 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pose_estimation 2 | 3 | Steps to run this project: 4 | 5 | * run the command `./script/init_dir.sh` to create necessary directories. 6 | * place your training data into the `data` directory 7 | * place your label inio the `labels/txt` directory 8 | * modify the class `Config()` in `train.py` 9 | * modify the data path and label path for reader object, which locate at LINE 69 in `train.py` 10 | 11 | run you model with folloing command 12 | 13 | ``` 14 | python train 15 | ``` 16 | or 17 | 18 | ``` 19 | ./script/run.sh 20 | ``` 21 | -------------------------------------------------------------------------------- /clear.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | bash ./script/clear_log.sh 4 | rm ./params/* 5 | rm nohup.out 6 | -------------------------------------------------------------------------------- /cpm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | import tensorflow as tf 4 | from datetime import datetime 5 | import os 6 | import numpy as np 7 | import read_data 8 | 9 | class CPM: 10 | def __init__(self, config): 11 | # self.global_step = tf.get_vari(0, trainable=False, name="global_step") 12 | self.global_step = tf.get_variable("global_step", initializer=0, 13 | dtype=tf.int32, trainable=False) 14 | self.wd = config.wd 15 | self.stddev = config.stddev 16 | self.batch_size = config.batch_size 17 | self.use_fp16 = config.use_fp16 18 | self.points_num = config.points_num 19 | self.fm_channel = config.fm_channel 20 | self.moving_average_decay = config.moving_average_decay 21 | self.params_dir = config.params_dir 22 | 23 | self.fm_height = config.fm_height 24 | self.fm_width = config.fm_width 25 | 26 | self.images = tf.placeholder( 27 | dtype = tf.float32, 28 | shape = (self.batch_size, config.img_height, config.img_width, 1) 29 | ) 30 | self.labels = tf.placeholder( 31 | dtype = tf.float32, 32 | shape = (self.batch_size, config.fm_height, config.fm_width, self.points_num)) 33 | self.coords = tf.placeholder( 34 | dtype = tf.float32, 35 | shape = (self.batch_size, self.points_num * 2)) 36 | 37 | 38 | def build_fc(self, is_train): 39 | fc_is_train = is_train & True 40 | 41 | with tf.name_scope("original_images") as scope: 42 | self._image_summary(self.images, 1) 43 | out_fc = self.cnn_fc(self.images, fc_is_train, 'fc') 44 | self.add_to_euclidean_loss(self.batch_size, out_fc, self.coords, 'fcn') 45 | 46 | return out_fc 47 | 48 | def cnn_fc(self, input_, is_train, name): 49 | trainable = is_train 50 | is_BN = True 51 | 52 | with tf.variable_scope(name) as scope: 53 | conv1 = self.conv_layer(input_, 5, 96, 54 | 'conv1', is_BN, trainable) 55 | pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], 56 | strides=[1, 2, 2, 1], 57 | padding="SAME", name="pool1") 58 | 59 | conv2 = self.conv_layer(pool1, 5, 256, 60 | 'conv2', is_BN, trainable) 61 | pool2 = tf.nn.max_pool(conv2, ksize=[1, 3, 3, 1], 62 | strides=[1, 2, 2, 1], 63 | padding="SAME", name="pool2") 64 | 65 | conv3 = self.conv_layer(pool2, 5, 384, 66 | 'conv3', is_BN, trainable) 67 | pool3 = tf.nn.max_pool(conv3, ksize=[1, 3, 3, 1], 68 | strides=[1, 2, 2, 1], 69 | padding="SAME", name="pool3") 70 | 71 | conv4 = self.conv_layer(pool3, 3, 384, 72 | 'conv4', is_BN, trainable) 73 | 74 | conv5 = self.conv_layer(conv4, 3, 256, 75 | 'conv5', is_BN, trainable) 76 | conv6 = self.conv_layer(conv5, 3, 256, 77 | 'conv6', is_BN, trainable) 78 | conv7 = self.conv_layer(conv6, 3, 256, 79 | 'conv7', is_BN, trainable) 80 | conv8 = self.conv_layer(conv7, 3, 128, 81 | 'conv8', is_BN, trainable) 82 | conv9 = self.conv_layer(conv8, 1, 128, 83 | 'conv9', is_BN, trainable) 84 | if is_train: 85 | conv9 = tf.nn.dropout(conv9, 0.5) 86 | fc1 = self.fc_layer(conv9, 128, 'fc1', is_BN, trainable) 87 | if is_train: 88 | fc1 = tf.nn.dropout(fc1, 0.5) 89 | fc2 = self.final_fc_layer(fc1, self.points_num * 2, 90 | 'fc2', trainable) 91 | 92 | return fc2 93 | 94 | 95 | def final_fc_block(self, input_, is_train, name): 96 | trainable = is_train 97 | is_BN = True 98 | 99 | with tf.variable_scope(name) as scope: 100 | final_fc = self.final_fc_layer(input_, 101 | self.points_num * 2, 'final_fc', trainable) 102 | 103 | return final_fc 104 | 105 | def loss(self): 106 | return tf.add_n(tf.get_collection('losses'), name = "total_loss") 107 | 108 | def add_to_euclidean_loss(self, batch_size, predicts, labels, name): 109 | flatten_labels = tf.reshape(labels, [batch_size, -1]) 110 | flatten_predicts = tf.reshape(predicts, [batch_size, -1]) 111 | 112 | with tf.name_scope(name) as scope: 113 | euclidean_loss = tf.sqrt(tf.reduce_sum( 114 | tf.square(tf.subtract(flatten_predicts, flatten_labels)), 1)) 115 | euclidean_loss_mean = tf.reduce_mean(euclidean_loss, 116 | name='euclidean_loss_mean') 117 | 118 | tf.add_to_collection("losses", euclidean_loss_mean) 119 | 120 | def train_op(self, total_loss, global_step): 121 | self._loss_summary(total_loss) 122 | 123 | optimizer = tf.train.AdamOptimizer() 124 | grads = optimizer.compute_gradients(total_loss) 125 | 126 | apply_gradient_op = optimizer.apply_gradients(grads, global_step=global_step) 127 | 128 | variable_averages = tf.train.ExponentialMovingAverage( 129 | self.moving_average_decay, global_step) 130 | variable_averages_op = variable_averages.apply(tf.trainable_variables()) 131 | 132 | with tf.control_dependencies([apply_gradient_op, variable_averages_op]): 133 | train_op = tf.no_op(name = "train") 134 | 135 | return train_op 136 | 137 | def save(self, sess, saver, filename, global_step): 138 | path = saver.save(sess, self.params_dir+filename, global_step=global_step) 139 | print "Save params at " + path 140 | 141 | def restore(self, sess, saver, filename): 142 | print "Restore from previous model: ", self.params_dir+filename 143 | saver.restore(sess, self.params_dir+filename) 144 | 145 | def fc_layer(self, bottom, out_num, name, is_BN, trainable): 146 | flatten_bottom = tf.reshape(bottom, [self.batch_size, -1]) 147 | with tf.variable_scope(name) as scope: 148 | weights = self._variable_with_weight_decay( 149 | "weights", 150 | shape = [flatten_bottom.get_shape()[-1], out_num], 151 | stddev = self.stddev, 152 | wd = self.wd, 153 | trainable=trainable) 154 | mul = tf.matmul(flatten_bottom, weights) 155 | biases = self._variable_on_cpu('biases', [out_num], 156 | tf.constant_initializer(0.0), trainable) 157 | pre_activation = tf.nn.bias_add(mul, biases) 158 | if is_BN: 159 | bn_activation = tf.layers.batch_normalization(pre_activation) 160 | top = tf.nn.relu(bn_activation, name=scope.name) 161 | else: 162 | top = tf.nn.relu(pre_activation, name=scope.name) 163 | self._activation_summary(top) 164 | return top 165 | 166 | def final_fc_layer(self, bottom, out_num, name, trainable): 167 | flatten_bottom = tf.reshape(bottom, [self.batch_size, -1]) 168 | with tf.variable_scope(name) as scope: 169 | weights = self._variable_with_weight_decay( 170 | "weights", 171 | shape = [flatten_bottom.get_shape()[-1], out_num], 172 | stddev = self.stddev, 173 | wd = self.wd, 174 | trainable=trainable) 175 | mul = tf.matmul(flatten_bottom, weights) 176 | biases = self._variable_on_cpu('biases', [out_num], 177 | tf.constant_initializer(0.0), trainable) 178 | top = tf.nn.bias_add(mul, biases) 179 | self._activation_summary(top) 180 | return top 181 | 182 | def conv_layer(self, bottom, kernel_size, out_channel, name, is_BN, trainable): 183 | with tf.variable_scope(name) as scope: 184 | kernel = self._variable_with_weight_decay( 185 | "weights", 186 | shape = [kernel_size, kernel_size, bottom.get_shape()[-1], 187 | out_channel], 188 | stddev = self.stddev, 189 | wd = self.wd, 190 | trainable=trainable) 191 | conv = tf.nn.conv2d(bottom, kernel, [1, 1, 1, 1], padding="SAME") 192 | biases = self._variable_on_cpu('biases', [out_channel], 193 | tf.constant_initializer(0.0), trainable) 194 | pre_activation = tf.nn.bias_add(conv, biases) 195 | if is_BN: 196 | bn_activation = tf.layers.batch_normalization(pre_activation) 197 | top = tf.nn.relu(bn_activation, name=scope.name) 198 | else: 199 | top = tf.nn.relu(pre_activation, name=scope.name) 200 | self._activation_summary(top) 201 | return top 202 | 203 | def final_conv_layer(self, bottom, kernel_size, out_channel, name, trainable): 204 | with tf.variable_scope(name) as scope: 205 | kernel = self._variable_with_weight_decay( 206 | "weights", 207 | shape = [kernel_size, kernel_size, bottom.get_shape()[-1], 208 | out_channel], 209 | stddev = self.stddev, 210 | wd = self.wd, 211 | trainable=trainable) 212 | conv = tf.nn.conv2d(bottom, kernel, [1, 1, 1, 1], padding="SAME") 213 | biases = self._variable_on_cpu('biases', [out_channel], 214 | tf.constant_initializer(0.0), trainable) 215 | top = tf.nn.bias_add(conv, biases) 216 | self._activation_summary(top) 217 | return top 218 | 219 | def _variable_on_cpu(self, name, shape, initializer, trainable): 220 | with tf.device('/cpu:0'): 221 | dtype = tf.float16 if self.use_fp16 else tf.float32 222 | var = tf.get_variable(name, shape, initializer=initializer, 223 | dtype=dtype, trainable=trainable) 224 | return var 225 | 226 | def _variable_with_weight_decay(self, name, shape, stddev, wd, trainable): 227 | dtype = tf.float16 if self.use_fp16 else tf.float32 228 | var = self._variable_on_cpu(name, shape, 229 | tf.truncated_normal_initializer(stddev=stddev, dtype=dtype), 230 | trainable) 231 | if wd is not None: 232 | weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, 233 | name='weights_loss') 234 | tf.add_to_collection("losses", weight_decay) 235 | return var 236 | 237 | def _activation_summary(self, x): 238 | name = x.op.name 239 | tf.summary.histogram(name + '/activations', x) 240 | tf.summary.scalar(name + '/sparsity', tf.nn.zero_fraction(x)) 241 | 242 | def _image_summary(self, x, channels): 243 | def sub(batch, idx): 244 | name = x.op.name 245 | tmp = x[batch, :, :, idx] * 255 246 | tmp = tf.expand_dims(tmp, axis = 2) 247 | tmp = tf.expand_dims(tmp, axis = 0) 248 | tf.summary.image(name + '-' + str(idx), tmp, max_outputs = 100) 249 | if (self.batch_size > 1): 250 | for idx in xrange(channels): 251 | # the first batch 252 | sub(0, idx) 253 | # the last batch 254 | sub(-1, idx) 255 | else: 256 | for idx in xrange(channels): 257 | sub(0, idx) 258 | 259 | def _loss_summary(self, loss): 260 | tf.summary.scalar(loss.op.name + " (raw)", loss) 261 | 262 | def _fm_summary(self, predicts): 263 | with tf.name_scope("fcn_summary") as scope: 264 | self._image_summary(self.labels, self.points_num) 265 | tmp_predicts = tf.nn.relu(predicts) 266 | self._image_summary(tmp_predicts, self.points_num) 267 | 268 | 269 | def main(): 270 | pass 271 | 272 | if __name__ == "__main__": 273 | main() 274 | 275 | 276 | -------------------------------------------------------------------------------- /custom_ops.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | import numpy as np 5 | import tensorflow as tf 6 | 7 | ratio = 1.0 8 | class_weights = np.array([ 9 | 1.0, 1.0, 1.0, 1.0, 1.0, 10 | 1.0, 1.0, 1.0, 1.0, 1.0, 11 | 1.0, 1.0, 1.0, 1.0, 1.0, 12 | 1.0 13 | # 1.0, 10.0, 10.0, 10.0, 10.0, 14 | # 10.0, 10.0, 10.0, 10.0, 10.0, 15 | # 10.0, 10.0, 10.0, 10.0, 10.0, 16 | # 10.0 17 | ]).astype(np.float32).reshape([16]) 18 | 19 | def py_func(func, inp, Tout, stateful=True, name=None, grad=None): 20 | # Need to generate a unique name to avoid duplicates: 21 | rnd_name = 'PyFuncGrad' + str(np.random.randint(0, 1E+8)) 22 | 23 | tf.RegisterGradient(rnd_name)(grad) # see _MySquareGrad for grad example 24 | g = tf.get_default_graph() 25 | with g.gradient_override_map({"PyFunc": rnd_name}): 26 | return tf.py_func(func, inp, Tout, stateful=stateful, name=name) 27 | 28 | def _softmax_cross_entropy(predict, labels): 29 | scratch = np.max(predict, axis = -1) 30 | backprop = predict - np.expand_dims(scratch, axis = -1) 31 | scratch = np.sum(np.exp(backprop), axis=-1) 32 | loss = labels * (np.expand_dims(np.log(scratch), axis=-1) - backprop) 33 | loss = np.sum(loss, axis = -1) 34 | 35 | backprop = np.exp(backprop) / np.expand_dims(scratch, axis=-1) - labels 36 | 37 | return loss, backprop 38 | 39 | def self_loss(predicts, labels): 40 | 41 | # shape = predicts.shape 42 | # predicts = predicts.reshape([-1, 20]) 43 | # labels = labels.reshape([-1, 20]) 44 | 45 | batch_label = np.argmax(labels, axis = -1).astype(np.float32) 46 | batch_zeros = np.zeros_like(batch_label).astype(np.float32) 47 | mask1 = np.not_equal(batch_zeros, batch_label) 48 | rand_u = np.random.uniform(low=0.0, high=1.0, size=batch_label.shape) 49 | mask2 = rand_u < ratio 50 | mask = mask1 | mask2 51 | 52 | loss, backprop = _softmax_cross_entropy(predicts, labels) 53 | loss = np.where(mask, loss, batch_zeros) 54 | backprop_zeros = np.zeros_like(backprop) 55 | backprop = np.where(np.expand_dims(mask, axis=-1), backprop, backprop_zeros) 56 | backprop = backprop * class_weights 57 | # backprop = np.reshape(backprop, shape) 58 | 59 | # loss = np.mean(loss) 60 | # return loss 61 | return loss, backprop 62 | 63 | def custom_loss(predicts, labels, name=None): 64 | # with tf.op_scope([predicts, labels], name, "CustomLoss") as name: 65 | # loss, grad = tf.py_func(self_loss, [predicts, labels], 66 | # [tf.float64, tf.float64], stateful=False, name="My") 67 | with tf.name_scope(name, "CustomLoss", [predicts, labels]) as name: 68 | 69 | loss, backprop = py_func(self_loss, [predicts, labels], 70 | [tf.float32, tf.float32], name=name, 71 | grad=_CustomLossGrad) 72 | # return tf.reduce_mean(loss) 73 | return loss 74 | 75 | 76 | def _BroadcastMul(vec, mat): 77 | vec = tf.expand_dims(vec, -1) 78 | return vec*mat 79 | 80 | # def _CustomLossGrad(op, grad_loss): 81 | def _CustomLossGrad(op, grad_loss, grad_grad): 82 | softmax_grad = op.outputs[1] 83 | grad = _BroadcastMul(grad_loss, softmax_grad) 84 | 85 | if grad_grad.op.type not in ("ZerosLike", "Zeros"): 86 | logits = op.inputs[0] 87 | softmax = tf.nn.softmax(logits) 88 | grad += ((grad_grad - tf.squeeze(tf.matmul(grad_grad[:, None, :], 89 | softmax[:, :, None]), axis=1) * softmax)) 90 | 91 | grad /= tf.cast(tf.size(grad) / tf.shape(grad)[-1], tf.float32) 92 | 93 | return grad, None 94 | 95 | def main(): 96 | with tf.Session() as sess: 97 | predicts = np.random.uniform(0.0, 1.0, (100, 20)).reshape((2, 10, 5, 98 | 20)).astype(np.float32) 99 | # labels = np.zeros([200]).reshape((10, 20)) 100 | tmp = np.random.randint(0, 20, size=(2, 10, 5)).astype(np.int32) 101 | # tmp[3] = 0 102 | # tmp[1] = 0 103 | labels = np.eye(20)[tmp].astype(np.float32) 104 | print labels.shape 105 | 106 | # lloss, ggrad = self_loss(predicts, labels) 107 | # loss = self_loss(predicts, labels) 108 | # print loss 109 | # print grad 110 | 111 | predicts = tf.constant(predicts) 112 | labels = tf.constant(labels) 113 | 114 | lloss= custom_loss(predicts, labels) 115 | loss= tf.losses.softmax_cross_entropy(labels, predicts) 116 | 117 | eval_lloss = lloss.eval() 118 | eval_loss = loss.eval() 119 | # print sess.run(loss) 120 | 121 | # print (tf.gradients(loss, predicts)) 122 | print "Grad" 123 | # print ggrad 124 | my = tf.gradients(lloss, predicts)[0].eval() 125 | original = tf.gradients(loss, predicts)[0].eval() 126 | # my /= 10 127 | mask = np.isclose(my, original) 128 | print mask 129 | print mask.shape 130 | print eval_lloss 131 | print eval_loss 132 | # print a 133 | # print "----" 134 | # print b 135 | # print (tf.gradients(loss, predicts)[0].eval()) 136 | 137 | 138 | 139 | # print (x.eval(), y.eval(), tf.gradients(y, x).eval()) 140 | # print (predicts.eval(), labels.eval()) 141 | 142 | if __name__ == "__main__": 143 | main() 144 | -------------------------------------------------------------------------------- /labels/python/delete_points.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | import re 5 | import sys 6 | 7 | def _get_point(from_list, idx): 8 | return [from_list[idx*2], from_list[idx*2 + 1]] 9 | 10 | def delete_points(src_file, dst_file): 11 | def del_points(from_list): 12 | delete = [2, 4, 7, 11] 13 | to_list = list() 14 | for idx in xrange(19): 15 | if idx in delete: 16 | continue 17 | to_list += _get_point(from_list, idx) 18 | return to_list 19 | 20 | with open(src_file, 'rb') as fr, open(dst_file, 'wb') as fw: 21 | for line in fr: 22 | tmp = re.split(" |,", line.strip()) 23 | if(len(tmp) != 40): 24 | print len(tmp) 25 | print ("Length of Data Error.") 26 | sys.exit(0) 27 | filename = tmp[0] 28 | coords = tmp[1:39] 29 | begin = tmp[39] 30 | coords = del_points(coords) 31 | 32 | fw.write(filename + ' ') 33 | for item in coords: 34 | fw.write(item + ',') 35 | fw.write(begin + '\n') 36 | 37 | def main(): 38 | src_file = "../final/correct_19.txt" 39 | dst_file = "./lala.txt" 40 | delete_points(src_file, dst_file) 41 | 42 | if __name__ == "__main__": 43 | main() 44 | -------------------------------------------------------------------------------- /read_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | import numpy as np 5 | import cv2 6 | import sys 7 | import os 8 | import re 9 | import random 10 | import math 11 | 12 | class PoseReader(): 13 | 14 | def __init__(self, annos_path, data_path, config): 15 | self.records = list() 16 | self.batch_size = config.batch_size 17 | self.points_num = config.points_num 18 | self.fm_channel = config.fm_channel 19 | self.img_width = config.img_width 20 | self.img_height = config.img_height 21 | self.origin_width = config.origin_width 22 | self.origin_height = config.origin_height 23 | self.record_len = self.points_num * 2 + 2 24 | self.data_path = data_path 25 | self.line_idx = 0 26 | 27 | self.fm_width = config.fm_width 28 | self.fm_height = config.fm_height 29 | self.sigma = config.sigma 30 | self.alpha = config.alpha 31 | self.radius = config.radius 32 | 33 | # self.float_max = 1.0 - 1.0 / self.img_width 34 | self.float_max = 1.0 35 | 36 | self.degree = config.degree 37 | 38 | if config.is_color: 39 | self.color_mode = 1 40 | else: 41 | self.color_mode = 0 42 | with open(annos_path, 'rb') as fr: 43 | for line in fr: 44 | tmp = re.split(',| ', line.strip()) 45 | if(len(tmp) != self.record_len): 46 | print "Length Error: ", len(tmp) 47 | sys.exit(0) 48 | filename = tmp[0] 49 | coords = [int(x) for x in tmp[1:self.record_len - 1]] 50 | begin = int(tmp[-1]) 51 | self.records.append((filename, np.array(coords), begin)) 52 | self.size = len(self.records) 53 | 54 | 55 | def random_batch(self): 56 | rand = random.sample(xrange(self.size), self.batch_size) 57 | filename_list = list() 58 | coords_list = list() 59 | begins_list = list() 60 | for idx in rand: 61 | filename_list.append(self.records[idx][0]) 62 | coords_list.append(self.records[idx][1]) 63 | begins_list.append(self.records[idx][2]) 64 | 65 | img_list = list() 66 | for filename in filename_list: 67 | img = cv2.imread(os.path.join(self.data_path, filename), 68 | self.color_mode) 69 | img = cv2.resize(img, (self.img_width, self.img_height)) 70 | img_list.append(img) 71 | 72 | out_imgs = self._img_preprocess(np.stack(img_list)) 73 | out_labels = self._label_preprocess(np.stack(coords_list)) 74 | out_begins = np.stack(begins_list) 75 | 76 | return out_imgs, out_labels, out_begins, filename_list 77 | 78 | 79 | def batch(self, line_idx=None): 80 | if line_idx is not None: 81 | self.line_idx = line_idx 82 | end_idx = self.line_idx + self.batch_size 83 | idxs = range(self.line_idx, end_idx) 84 | for idx in xrange(len(idxs)): 85 | if idxs[idx] >= self.size: 86 | idxs[idx] %= self.size 87 | if end_idx < self.size: 88 | self.line_idx = end_idx 89 | else: 90 | self.line_idx = end_idx % self.size 91 | 92 | filename_list = list() 93 | coords_list = list() 94 | begins_list = list() 95 | for idx in idxs: 96 | filename_list.append(self.records[idx][0]) 97 | coords_list.append(self.records[idx][1]) 98 | begins_list.append(self.records[idx][2]) 99 | 100 | img_list = list() 101 | for filename in filename_list: 102 | img = cv2.imread(os.path.join(self.data_path, filename), 103 | self.color_mode) 104 | img = cv2.resize(img, (self.img_width, self.img_height)) 105 | img_list.append(img) 106 | 107 | out_imgs = self._img_preprocess(np.stack(img_list)) 108 | out_labels = self._label_preprocess(np.stack(coords_list)) 109 | out_begins = np.stack(begins_list) 110 | 111 | return out_imgs, out_labels, out_begins, filename_list 112 | 113 | def _img_preprocess(self, imgs): 114 | if self.color_mode == 0: 115 | output = np.reshape(imgs, [-1, self.img_height, self.img_width, 1]) 116 | elif self.color_mode == 1: 117 | output = np.reshape(imgs, [-1, self.img_height, self.img_width, 3]) 118 | else: 119 | raise Exception ("color_mode error.") 120 | 121 | output = output.astype(np.float32) * (1. / 255) - 0.5 122 | return output 123 | 124 | def _label_preprocess(self, label): 125 | output = np.reshape(label, [-1, self.points_num * 2]).astype(np.float32) 126 | output[:, ::2] /= self.origin_width 127 | output[:, 1::2] /= self.origin_height 128 | return output 129 | 130 | def label2fm(self, label): 131 | def get_point(a_list, idx): 132 | w, h = a_list[idx * 2: idx * 2 + 2] 133 | return int(w * self.fm_width), int(h * self.fm_height) 134 | 135 | def _gaussian2d(x, y, x0, y0, a, sigmax, sigmay): 136 | xx = (float(x) - x0)** 2 / 2 / sigmax **2 137 | yy = (float(y) - y0)** 2 / 2 / sigmay **2 138 | return a * math.exp(- xx - yy) 139 | 140 | def draw(img, center): 141 | w0, h0 = center 142 | height, width = img.shape 143 | # for h in xrange(height): 144 | # for w in xrange(width): 145 | # if(math.fabs(h - h0) + math.fabs(w - w0) < self.radius): 146 | for w in xrange(max(0, w0-self.radius), min(width, w0+self.radius+1)): 147 | for h in xrange(max(0, h0-self.radius), min(height, h0+self.radius+1)): 148 | if(math.fabs(h - h0) + math.fabs(w - w0) < self.radius): 149 | img[h, w] = _gaussian2d(w, h, w0, h0, self.alpha, self.sigma, 150 | self.sigma) 151 | 152 | fm_label = np.zeros((label.shape[0], self.fm_height, self.fm_width, self.points_num)) 153 | for batch_idx in xrange(len(fm_label)): 154 | for ii in xrange(self.points_num): 155 | w, h = get_point(label[batch_idx], ii) 156 | draw(fm_label[batch_idx, :, :, ii], (w, h)) 157 | return fm_label.astype(np.float32) 158 | 159 | def label2sm_fm(self, label): 160 | def get_point(a_list, idx): 161 | w, h = a_list[idx * 2: idx * 2 + 2] 162 | return int(w * self.fm_width), int(h * self.fm_height) 163 | 164 | def p8_distance(h1, h2, w1, w2): 165 | return max(math.fabs(h1 - h2), math.fabs(w1 - w2)) 166 | 167 | def p4_distance(h1, h2, w1, w2): 168 | return math.fabs(h1 - h2) + math.fabs(w1 - w2) 169 | 170 | def draw(img, center, idx): 171 | w0, h0 = center 172 | height, width = img.shape 173 | for w in xrange(max(0, w0-self.radius), min(width, w0+self.radius+1)): 174 | for h in xrange(max(0, h0-self.radius), min(height, h0+self.radius+1)): 175 | if(p8_distance(h, h0, w, w0) < self.radius): 176 | img[h, w] = idx + 1 177 | fm_label = np.zeros((label.shape[0], self.fm_height, self.fm_width)) 178 | for batch_idx in xrange(len(fm_label)): 179 | for ii in xrange(self.points_num): 180 | w, h = get_point(label[batch_idx], ii) 181 | draw(fm_label[batch_idx], (w, h), ii) 182 | # fm_label = fm_label.astype(np.int32) 183 | return fm_label.astype(np.int32) 184 | 185 | def _visualize(self, imgs, merge): 186 | import matplotlib.pyplot as plt 187 | if (merge): 188 | imgs = np.amax(imgs, axis = 2) 189 | imgs = np.squeeze(imgs) 190 | plt.imshow(imgs, cmap='gray') 191 | plt.show() 192 | 193 | def _draw_imgs(self, imgs, coords): 194 | def get_point(a_list, idx): 195 | w, h = a_list[idx * 2: idx * 2 + 2] 196 | return int(w * self.img_width), int(h * self.img_height) 197 | 198 | import matplotlib.pyplot as plt 199 | for idx in xrange(len(imgs)): 200 | img = np.squeeze(imgs[idx]) 201 | coord = coords[idx] 202 | for ii in xrange(self.points_num): 203 | w, h = get_point(coord, ii) 204 | cv2.circle(img, (w, h), 1, 1) 205 | plt.imshow(img, cmap='gray') 206 | plt.show() 207 | 208 | def _rotate(self, origin, angle): 209 | x, y = origin 210 | o_y = 0.5 + (y - 0.5) * math.cos(angle) + (x - 0.5) * math.sin(angle) 211 | o_x = 0.5 + (x - 0.5) * math.cos(angle) - (y - 0.5) * math.sin(angle) 212 | return o_x, o_y 213 | 214 | def _random_roate(self, images, labels, degree): 215 | if(images.shape[0] != labels.shape[0]): 216 | raise Exception("Batch size Error.") 217 | degree = degree * math.pi / 180 218 | rand_degree = np.random.uniform(-degree, degree, images.shape[0]) 219 | 220 | o_images = np.zeros_like(images) 221 | o_labels = np.zeros_like(labels) 222 | for idx in xrange(images.shape[0]): 223 | theta = rand_degree[idx] 224 | 225 | # labels 226 | for ii in xrange(self.points_num): 227 | o_labels[idx, 2*ii: 2*ii+2] = self._rotate(labels[idx, ii*2: 2*ii+2], theta) 228 | 229 | # image 230 | M = cv2.getRotationMatrix2D((self.img_width/2,self.img_height/2),-theta*180/math.pi,1) 231 | o_images[idx] = np.expand_dims(cv2.warpAffine(images[idx],M,(self.img_width,self.img_height)), axis=2) 232 | 233 | return o_images, o_labels 234 | 235 | def _batch_random_roate(self, images, labels, degree): 236 | if(images.shape[0] != labels.shape[0]): 237 | raise Exception("Batch size Error.") 238 | degree = degree * math.pi / 180 239 | rand_degree = np.random.uniform(-degree, degree) 240 | 241 | o_images = np.zeros_like(images) 242 | o_labels = np.zeros_like(labels) 243 | for idx in xrange(images.shape[0]): 244 | theta = rand_degree 245 | 246 | # labels 247 | for ii in xrange(self.points_num): 248 | o_labels[idx, 2*ii: 2*ii+2] = self._rotate(labels[idx, ii*2: 2*ii+2], theta) 249 | 250 | # image 251 | M = cv2.getRotationMatrix2D((self.img_width/2,self.img_height/2),-theta*180/math.pi,1) 252 | o_images[idx] = np.expand_dims(cv2.warpAffine(images[idx],M,(self.img_width,self.img_height)), axis=2) 253 | 254 | return o_images, o_labels 255 | 256 | def _random_flip_lr(self, images, labels): 257 | if(images.shape[0] != labels.shape[0]): 258 | raise Exception("Batch size Error.") 259 | rand_u = np.random.uniform(0.0, 1.0, images.shape[0]) 260 | rand_cond = rand_u > 0.5 261 | 262 | o_images = np.zeros_like(images) 263 | o_labels = np.zeros_like(labels) 264 | 265 | for idx in xrange(images.shape[0]): 266 | condition = rand_cond[idx] 267 | if condition: 268 | # "flip" 269 | o_images[idx] = np.fliplr(images[idx]) 270 | o_labels[idx, ::2] = self.float_max - labels[idx, ::2] 271 | o_labels[idx, 1::2] = labels[idx, 1::2] 272 | else: 273 | # "origin" 274 | o_images[idx] = images[idx] 275 | o_labels[idx] = labels[idx] 276 | 277 | return o_images, o_labels 278 | 279 | def _batch_random_flip_lr(self, images, labels): 280 | if(images.shape[0] != labels.shape[0]): 281 | raise Exception("Batch size Error.") 282 | rand_u = np.random.uniform(0.0, 1.0) 283 | rand_cond = rand_u > 0.5 284 | 285 | o_images = np.zeros_like(images) 286 | o_labels = np.zeros_like(labels) 287 | 288 | for idx in xrange(images.shape[0]): 289 | condition = rand_cond 290 | if condition: 291 | # "flip" 292 | o_images[idx] = np.fliplr(images[idx]) 293 | o_labels[idx, ::2] = self.float_max - labels[idx, ::2] 294 | o_labels[idx, 1::2] = labels[idx, 1::2] 295 | else: 296 | # "origin" 297 | o_images[idx] = images[idx] 298 | o_labels[idx] = labels[idx] 299 | 300 | return o_images, o_labels 301 | 302 | def get_random_batch(self, distort=True): 303 | 304 | imgs, labels, begins, filename_list = self.random_batch() 305 | if distort: 306 | imgs, labels = self._random_flip_lr(imgs, labels) 307 | imgs, labels = self._random_roate(imgs, labels, self.degree) 308 | fm = self.label2fm(labels) 309 | 310 | return (imgs.reshape([self.batch_size, self.img_height, self.img_width, 1]), 311 | fm.reshape([self.batch_size, self.fm_height, self.fm_width, self.points_num]), 312 | labels.reshape([self.batch_size, self.points_num * 2]), 313 | begins, 314 | filename_list) 315 | 316 | def get_batch(self, distort=True, line_idx=None): 317 | 318 | imgs, labels, begins, filename_list = self.batch( 319 | line_idx=line_idx) 320 | if distort: 321 | imgs, labels = self._batch_random_flip_lr(imgs, labels) 322 | imgs, labels = self._batch_random_roate(imgs, labels, self.degree) 323 | fm = self.label2fm(labels) 324 | 325 | return (imgs.reshape([self.batch_size, self.img_height, self.img_width, 1]), 326 | fm.reshape([self.batch_size, self.fm_height, self.fm_width, self.points_num]), 327 | labels.reshape([self.batch_size, self.points_num * 2]), 328 | begins, 329 | filename_list) 330 | 331 | 332 | 333 | def main(): 334 | pass 335 | 336 | 337 | if __name__ == "__main__": 338 | main() 339 | 340 | -------------------------------------------------------------------------------- /script/check.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | tensorboard --logdir=./log/train_log 4 | -------------------------------------------------------------------------------- /script/clear_log.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | rm -rf ./log/* 4 | -------------------------------------------------------------------------------- /script/init_dir.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mkdir -p params 4 | mkdir -p log 5 | mkdir -p data 6 | 7 | mkdir -p labels 8 | mkdir -p labels/txt 9 | mkdir -p labels/python 10 | -------------------------------------------------------------------------------- /script/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | nohup \ 4 | python train.py & 5 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | from datetime import datetime 5 | import os 6 | import random 7 | 8 | import tensorflow as tf 9 | import numpy as np 10 | 11 | import cpm 12 | import read_data 13 | 14 | 15 | class Config(): 16 | 17 | # 18 | batch_size = 1 19 | initialize = False 20 | steps = "-1" 21 | gpu = '/gpu:0' 22 | # the number of images in your test dataset 23 | test_num = 0 24 | 25 | # image config 26 | points_num = 15 27 | fm_channel = points_num + 1 28 | origin_height = 212 29 | origin_width = 256 30 | img_height = 216 31 | img_width = 256 32 | is_color = False 33 | 34 | 35 | # feature map config 36 | fm_width = img_width >> 1 37 | fm_height = img_height >> 1 38 | sigma = 2.0 39 | alpha = 1.0 40 | radius = 12 41 | 42 | # random distortion 43 | degree = 15 44 | 45 | # solver config 46 | wd = 5e-4 47 | #wd = None 48 | stddev = 5e-2 49 | use_fp16 = False 50 | moving_average_decay = 0.999 51 | 52 | # checkpoint path and filename 53 | logdir = "./log/train_log/" 54 | params_dir = "./params/" 55 | load_filename = "cpm" + '-' + steps 56 | save_filename = "cpm" 57 | 58 | # iterations config 59 | max_iteration = 500000 60 | checkpoint_iters = 2000 61 | summary_iters = 100 62 | validate_iters = 2000 63 | 64 | 65 | def main(): 66 | 67 | config = Config() 68 | with tf.Graph().as_default(): 69 | 70 | # create a reader object 71 | reader = read_data.PoseReader("./labels/txt/validate_annos.txt", 72 | "./data/train_imgs/", config) 73 | 74 | # create a model object 75 | model = cpm.CPM(config) 76 | 77 | # feedforward 78 | predicts = model.build_fc(False) 79 | 80 | # return the loss 81 | loss = model.loss() 82 | 83 | # Initializing operation 84 | init_op = tf.global_variables_initializer() 85 | 86 | saver = tf.train.Saver(max_to_keep = 100) 87 | 88 | sess_config = tf.ConfigProto() 89 | sess_config.gpu_options.allow_growth = True 90 | with tf.Session(config=sess_config) as sess: 91 | 92 | sess.run(init_op) 93 | model.restore(sess, saver, config.load_filename) 94 | 95 | # start testing 96 | for idx in xrange(config.test_num): 97 | with tf.device("/cpu:0"): 98 | imgs, fm, coords, begins, filename_list = reader.get_batch(distort=False) 99 | 100 | # feed data into the model 101 | feed_dict = { 102 | model.images: imgs, 103 | model.coords: coords, 104 | model.labels: fm 105 | } 106 | with tf.device(config.gpu): 107 | # 108 | predict_coords = sess.run(predicts, feed_dict=feed_dict) 109 | 110 | 111 | 112 | 113 | if __name__ == "__main__": 114 | main() 115 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | from datetime import datetime 5 | import os 6 | import random 7 | 8 | import tensorflow as tf 9 | import numpy as np 10 | 11 | import cpm 12 | import read_data 13 | 14 | 15 | class Config(): 16 | 17 | # 18 | batch_size = 1 19 | initialize = True 20 | steps = "-1" 21 | gpu = '/gpu:0' 22 | 23 | # image config 24 | points_num = 15 25 | fm_channel = points_num + 1 26 | origin_height = 212 27 | origin_width = 256 28 | img_height = 216 29 | img_width = 256 30 | is_color = False 31 | 32 | 33 | # feature map config 34 | fm_width = img_width >> 1 35 | fm_height = img_height >> 1 36 | sigma = 2.0 37 | alpha = 1.0 38 | radius = 12 39 | 40 | # random distortion 41 | degree = 15 42 | 43 | # solver config 44 | wd = 5e-4 45 | #wd = None 46 | stddev = 5e-2 47 | use_fp16 = False 48 | moving_average_decay = 0.999 49 | 50 | # checkpoint path and filename 51 | logdir = "./log/train_log/" 52 | params_dir = "./params/" 53 | load_filename = "cpm" + '-' + steps 54 | save_filename = "cpm" 55 | 56 | # iterations config 57 | max_iteration = 500000 58 | checkpoint_iters = 2000 59 | summary_iters = 100 60 | validate_iters = 2000 61 | 62 | 63 | def main(): 64 | 65 | config = Config() 66 | with tf.Graph().as_default(): 67 | 68 | # create a reader object 69 | reader = read_data.PoseReader("./labels/txt/validate_annos.txt", 70 | "./data/train_imgs/", config) 71 | 72 | # create a model object 73 | model = cpm.CPM(config) 74 | 75 | # feedforward 76 | predicts = model.build_fc(True) 77 | 78 | # return the loss 79 | loss = model.loss() 80 | 81 | # training operation 82 | train_op = model.train_op(loss, model.global_step) 83 | # Initializing operation 84 | init_op = tf.global_variables_initializer() 85 | 86 | saver = tf.train.Saver(max_to_keep = 100) 87 | 88 | sess_config = tf.ConfigProto() 89 | sess_config.gpu_options.allow_growth = True 90 | with tf.Session(config=sess_config) as sess: 91 | 92 | # initialize parameters or restore from previous model 93 | if not os.path.exists(config.params_dir): 94 | os.makedirs(config.params_dir) 95 | if os.listdir(config.params_dir) == [] or config.initialize: 96 | print "Initializing Network" 97 | sess.run(init_op) 98 | else: 99 | sess.run(init_op) 100 | model.restore(sess, saver, config.load_filename) 101 | 102 | merged = tf.summary.merge_all() 103 | logdir = os.path.join(config.logdir, 104 | datetime.now().strftime("%Y-%m-%d_%H-%M-%S")) 105 | 106 | writer = tf.summary.FileWriter(logdir, sess.graph) 107 | 108 | # start training 109 | for idx in xrange(config.max_iteration): 110 | with tf.device("/cpu:0"): 111 | imgs, fm, coords, begins, filename_list = reader.get_random_batch() 112 | 113 | # feed data into the model 114 | feed_dict = { 115 | model.images: imgs, 116 | model.coords: coords, 117 | model.labels: fm 118 | } 119 | with tf.device(config.gpu): 120 | # run the training operation 121 | sess.run(train_op, feed_dict=feed_dict) 122 | 123 | 124 | with tf.device('/cpu:0'): 125 | # write summary 126 | if (idx + 1) % config.summary_iters == 0: 127 | tmp_global_step = model.global_step.eval() 128 | summary = sess.run(merged, feed_dict=feed_dict) 129 | writer.add_summary(summary, tmp_global_step) 130 | # save checkpoint 131 | if (idx + 1) % config.checkpoint_iters == 0: 132 | tmp_global_step = model.global_step.eval() 133 | model.save(sess, saver, config.save_filename, tmp_global_step) 134 | 135 | 136 | if __name__ == "__main__": 137 | main() 138 | --------------------------------------------------------------------------------