├── .gitignore
├── README.md
├── clear.sh
├── cpm.py
├── custom_ops.py
├── labels
    └── python
    │   └── delete_points.py
├── read_data.py
├── script
    ├── check.sh
    ├── clear_log.sh
    ├── init_dir.sh
    └── run.sh
├── test.py
└── train.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # python
 2 | *.py[cod]
 3 | *.so
 4 | *.egg
 5 | *.egg-info
 6 | dist
 7 | build
 8 | 
 9 | data/*
10 | tmp
11 | test
12 | params/*
13 | log
14 | backup
15 | nohup.out
16 | #clear.sh
17 | *.backup
18 | labels/txt/
19 | labels/python/set_zeros.py
20 | 
21 | 
22 | #!data/python
23 | #!params
24 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # pose_estimation
 2 | 
 3 | Steps to run this project:
 4 | 
 5 | * run the command `./script/init_dir.sh` to create necessary directories.
 6 | * place your training data into the `data` directory
 7 | * place your label inio the `labels/txt` directory
 8 | * modify the class `Config()` in `train.py`
 9 | * modify the data path and label path for reader object, which locate at LINE 69 in `train.py`
10 | 
11 | run you model with folloing command
12 | 
13 | ```
14 |     python train
15 | ```
16 | or
17 | 
18 | ```
19 |     ./script/run.sh
20 | ```
21 | 


--------------------------------------------------------------------------------
/clear.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | bash ./script/clear_log.sh
4 | rm ./params/*
5 | rm nohup.out
6 | 


--------------------------------------------------------------------------------
/cpm.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # encoding: utf-8
  3 | import tensorflow as tf
  4 | from datetime import datetime
  5 | import os
  6 | import numpy as np
  7 | import read_data
  8 | 
  9 | class CPM:
 10 |     def __init__(self, config):
 11 |         # self.global_step = tf.get_vari(0, trainable=False, name="global_step")
 12 |         self.global_step = tf.get_variable("global_step", initializer=0,
 13 |                     dtype=tf.int32, trainable=False)
 14 |         self.wd = config.wd
 15 |         self.stddev = config.stddev
 16 |         self.batch_size = config.batch_size
 17 |         self.use_fp16 = config.use_fp16
 18 |         self.points_num = config.points_num
 19 |         self.fm_channel = config.fm_channel
 20 |         self.moving_average_decay = config.moving_average_decay
 21 |         self.params_dir = config.params_dir
 22 | 
 23 |         self.fm_height = config.fm_height
 24 |         self.fm_width = config.fm_width
 25 | 
 26 |         self.images = tf.placeholder(
 27 |                 dtype = tf.float32,
 28 |                 shape = (self.batch_size, config.img_height, config.img_width, 1)
 29 |                 )
 30 |         self.labels = tf.placeholder(
 31 |                 dtype = tf.float32,
 32 |                 shape = (self.batch_size, config.fm_height, config.fm_width, self.points_num))
 33 |         self.coords = tf.placeholder(
 34 |                 dtype = tf.float32,
 35 |                 shape = (self.batch_size, self.points_num * 2))
 36 | 
 37 | 
 38 |     def build_fc(self, is_train):
 39 |       fc_is_train = is_train & True
 40 | 
 41 |       with tf.name_scope("original_images") as scope:
 42 |         self._image_summary(self.images, 1)
 43 |       out_fc = self.cnn_fc(self.images, fc_is_train, 'fc')
 44 |       self.add_to_euclidean_loss(self.batch_size, out_fc, self.coords, 'fcn')
 45 | 
 46 |       return out_fc
 47 | 
 48 |     def cnn_fc(self, input_, is_train, name):
 49 |       trainable = is_train
 50 |       is_BN = True
 51 | 
 52 |       with tf.variable_scope(name) as scope:
 53 |         conv1 = self.conv_layer(input_, 5, 96,
 54 |             'conv1', is_BN, trainable)
 55 |         pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1],
 56 |                 strides=[1, 2, 2, 1],
 57 |                 padding="SAME", name="pool1")
 58 | 
 59 |         conv2 = self.conv_layer(pool1, 5, 256,
 60 |             'conv2', is_BN, trainable)
 61 |         pool2 = tf.nn.max_pool(conv2, ksize=[1, 3, 3, 1],
 62 |                 strides=[1, 2, 2, 1],
 63 |                 padding="SAME", name="pool2")
 64 | 
 65 |         conv3 = self.conv_layer(pool2, 5, 384,
 66 |             'conv3', is_BN, trainable)
 67 |         pool3 = tf.nn.max_pool(conv3, ksize=[1, 3, 3, 1],
 68 |                 strides=[1, 2, 2, 1],
 69 |                 padding="SAME", name="pool3")
 70 | 
 71 |         conv4 = self.conv_layer(pool3, 3, 384,
 72 |             'conv4', is_BN, trainable)
 73 | 
 74 |         conv5 = self.conv_layer(conv4, 3, 256,
 75 |             'conv5', is_BN, trainable)
 76 |         conv6 = self.conv_layer(conv5, 3, 256,
 77 |             'conv6', is_BN, trainable)
 78 |         conv7 = self.conv_layer(conv6, 3, 256,
 79 |             'conv7', is_BN, trainable)
 80 |         conv8 = self.conv_layer(conv7, 3, 128,
 81 |             'conv8', is_BN, trainable)
 82 |         conv9 = self.conv_layer(conv8, 1, 128,
 83 |             'conv9', is_BN, trainable)
 84 |         if is_train:
 85 |           conv9 = tf.nn.dropout(conv9, 0.5)
 86 |         fc1 = self.fc_layer(conv9, 128, 'fc1', is_BN, trainable)
 87 |         if is_train:
 88 |           fc1 = tf.nn.dropout(fc1, 0.5)
 89 |         fc2 = self.final_fc_layer(fc1, self.points_num * 2,
 90 |                 'fc2', trainable)
 91 | 
 92 |       return fc2
 93 | 
 94 | 
 95 |     def final_fc_block(self, input_, is_train, name):
 96 |       trainable = is_train
 97 |       is_BN = True
 98 | 
 99 |       with tf.variable_scope(name) as scope:
100 |         final_fc = self.final_fc_layer(input_,
101 |             self.points_num * 2, 'final_fc', trainable)
102 | 
103 |       return final_fc
104 | 
105 |     def loss(self):
106 |       return tf.add_n(tf.get_collection('losses'), name = "total_loss")
107 | 
108 |     def add_to_euclidean_loss(self, batch_size, predicts, labels, name):
109 |         flatten_labels = tf.reshape(labels, [batch_size, -1])
110 |         flatten_predicts = tf.reshape(predicts, [batch_size, -1])
111 | 
112 |         with tf.name_scope(name) as scope:
113 |             euclidean_loss = tf.sqrt(tf.reduce_sum(
114 |               tf.square(tf.subtract(flatten_predicts, flatten_labels)), 1))
115 |             euclidean_loss_mean = tf.reduce_mean(euclidean_loss,
116 |                 name='euclidean_loss_mean')
117 | 
118 |         tf.add_to_collection("losses", euclidean_loss_mean)
119 | 
120 |     def train_op(self, total_loss, global_step):
121 |         self._loss_summary(total_loss)
122 | 
123 |         optimizer = tf.train.AdamOptimizer()
124 |         grads = optimizer.compute_gradients(total_loss)
125 | 
126 |         apply_gradient_op = optimizer.apply_gradients(grads, global_step=global_step)
127 | 
128 |         variable_averages = tf.train.ExponentialMovingAverage(
129 |                 self.moving_average_decay, global_step)
130 |         variable_averages_op = variable_averages.apply(tf.trainable_variables())
131 | 
132 |         with tf.control_dependencies([apply_gradient_op, variable_averages_op]):
133 |             train_op = tf.no_op(name = "train")
134 | 
135 |         return train_op
136 | 
137 |     def save(self, sess, saver, filename, global_step):
138 |         path = saver.save(sess, self.params_dir+filename, global_step=global_step)
139 |         print "Save params at " + path
140 | 
141 |     def restore(self, sess, saver, filename):
142 |         print "Restore from previous model: ", self.params_dir+filename
143 |         saver.restore(sess, self.params_dir+filename)
144 | 
145 |     def fc_layer(self, bottom, out_num, name, is_BN, trainable):
146 |         flatten_bottom = tf.reshape(bottom, [self.batch_size, -1])
147 |         with tf.variable_scope(name) as scope:
148 |             weights = self._variable_with_weight_decay(
149 |                     "weights",
150 |                     shape = [flatten_bottom.get_shape()[-1], out_num],
151 |                     stddev = self.stddev,
152 |                     wd = self.wd,
153 |                     trainable=trainable)
154 |             mul = tf.matmul(flatten_bottom, weights)
155 |             biases = self._variable_on_cpu('biases', [out_num],
156 |                     tf.constant_initializer(0.0), trainable)
157 |             pre_activation = tf.nn.bias_add(mul, biases)
158 |             if is_BN:
159 |                 bn_activation = tf.layers.batch_normalization(pre_activation)
160 |                 top = tf.nn.relu(bn_activation, name=scope.name)
161 |             else:
162 |                 top = tf.nn.relu(pre_activation, name=scope.name)
163 |             self._activation_summary(top)
164 |         return top
165 | 
166 |     def final_fc_layer(self, bottom, out_num, name, trainable):
167 |         flatten_bottom = tf.reshape(bottom, [self.batch_size, -1])
168 |         with tf.variable_scope(name) as scope:
169 |             weights = self._variable_with_weight_decay(
170 |                     "weights",
171 |                     shape = [flatten_bottom.get_shape()[-1], out_num],
172 |                     stddev = self.stddev,
173 |                     wd = self.wd,
174 |                     trainable=trainable)
175 |             mul = tf.matmul(flatten_bottom, weights)
176 |             biases = self._variable_on_cpu('biases', [out_num],
177 |                     tf.constant_initializer(0.0), trainable)
178 |             top = tf.nn.bias_add(mul, biases)
179 |             self._activation_summary(top)
180 |         return top
181 | 
182 |     def conv_layer(self, bottom, kernel_size, out_channel, name, is_BN, trainable):
183 |         with tf.variable_scope(name) as scope:
184 |             kernel = self._variable_with_weight_decay(
185 |                     "weights",
186 |                     shape = [kernel_size, kernel_size, bottom.get_shape()[-1],
187 |                       out_channel],
188 |                     stddev = self.stddev,
189 |                     wd = self.wd,
190 |                     trainable=trainable)
191 |             conv = tf.nn.conv2d(bottom, kernel, [1, 1, 1, 1], padding="SAME")
192 |             biases = self._variable_on_cpu('biases', [out_channel],
193 |                     tf.constant_initializer(0.0), trainable)
194 |             pre_activation = tf.nn.bias_add(conv, biases)
195 |             if is_BN:
196 |                 bn_activation = tf.layers.batch_normalization(pre_activation)
197 |                 top = tf.nn.relu(bn_activation, name=scope.name)
198 |             else:
199 |                 top = tf.nn.relu(pre_activation, name=scope.name)
200 |             self._activation_summary(top)
201 |         return top
202 | 
203 |     def final_conv_layer(self, bottom, kernel_size, out_channel, name, trainable):
204 |         with tf.variable_scope(name) as scope:
205 |             kernel = self._variable_with_weight_decay(
206 |                     "weights",
207 |                     shape = [kernel_size, kernel_size, bottom.get_shape()[-1],
208 |                       out_channel],
209 |                     stddev = self.stddev,
210 |                     wd = self.wd,
211 |                     trainable=trainable)
212 |             conv = tf.nn.conv2d(bottom, kernel, [1, 1, 1, 1], padding="SAME")
213 |             biases = self._variable_on_cpu('biases', [out_channel],
214 |                     tf.constant_initializer(0.0), trainable)
215 |             top = tf.nn.bias_add(conv, biases)
216 |             self._activation_summary(top)
217 |         return top
218 | 
219 |     def _variable_on_cpu(self, name, shape, initializer, trainable):
220 |         with tf.device('/cpu:0'):
221 |             dtype = tf.float16 if self.use_fp16 else tf.float32
222 |             var = tf.get_variable(name, shape, initializer=initializer,
223 |                     dtype=dtype, trainable=trainable)
224 |         return var
225 | 
226 |     def _variable_with_weight_decay(self, name, shape, stddev, wd, trainable):
227 |         dtype = tf.float16 if self.use_fp16 else tf.float32
228 |         var = self._variable_on_cpu(name, shape,
229 |                 tf.truncated_normal_initializer(stddev=stddev, dtype=dtype),
230 |                 trainable)
231 |         if wd is not None:
232 |             weight_decay = tf.multiply(tf.nn.l2_loss(var), wd,
233 |                 name='weights_loss')
234 |             tf.add_to_collection("losses", weight_decay)
235 |         return var
236 | 
237 |     def _activation_summary(self, x):
238 |         name = x.op.name
239 |         tf.summary.histogram(name + '/activations', x)
240 |         tf.summary.scalar(name + '/sparsity', tf.nn.zero_fraction(x))
241 | 
242 |     def _image_summary(self, x, channels):
243 |         def sub(batch, idx):
244 |             name = x.op.name
245 |             tmp = x[batch, :, :, idx] * 255
246 |             tmp = tf.expand_dims(tmp, axis = 2)
247 |             tmp = tf.expand_dims(tmp, axis = 0)
248 |             tf.summary.image(name + '-' + str(idx), tmp, max_outputs = 100)
249 |         if (self.batch_size > 1):
250 |           for idx in xrange(channels):
251 |             # the first batch
252 |             sub(0, idx)
253 |             # the last batch
254 |             sub(-1, idx)
255 |         else:
256 |           for idx in xrange(channels):
257 |             sub(0, idx)
258 | 
259 |     def _loss_summary(self, loss):
260 |         tf.summary.scalar(loss.op.name + " (raw)", loss)
261 | 
262 |     def _fm_summary(self, predicts):
263 |       with tf.name_scope("fcn_summary") as scope:
264 |           self._image_summary(self.labels, self.points_num)
265 |           tmp_predicts = tf.nn.relu(predicts)
266 |           self._image_summary(tmp_predicts, self.points_num)
267 | 
268 | 
269 | def main():
270 |     pass
271 | 
272 | if __name__ == "__main__":
273 |     main()
274 | 
275 | 
276 | 


--------------------------------------------------------------------------------
/custom_ops.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # encoding: utf-8
  3 | 
  4 | import numpy as np
  5 | import tensorflow as tf
  6 | 
  7 | ratio = 1.0
  8 | class_weights = np.array([
  9 |   1.0, 1.0, 1.0, 1.0, 1.0,
 10 |   1.0, 1.0, 1.0, 1.0, 1.0,
 11 |   1.0, 1.0, 1.0, 1.0, 1.0,
 12 |   1.0
 13 |   # 1.0, 10.0, 10.0, 10.0, 10.0,
 14 |   # 10.0, 10.0, 10.0, 10.0, 10.0,
 15 |   # 10.0, 10.0, 10.0, 10.0, 10.0,
 16 |   # 10.0
 17 |   ]).astype(np.float32).reshape([16])
 18 | 
 19 | def py_func(func, inp, Tout, stateful=True, name=None, grad=None):
 20 |   # Need to generate a unique name to avoid duplicates:
 21 |   rnd_name = 'PyFuncGrad' + str(np.random.randint(0, 1E+8))
 22 | 
 23 |   tf.RegisterGradient(rnd_name)(grad)  # see _MySquareGrad for grad example
 24 |   g = tf.get_default_graph()
 25 |   with g.gradient_override_map({"PyFunc": rnd_name}):
 26 |     return tf.py_func(func, inp, Tout, stateful=stateful, name=name)
 27 | 
 28 | def _softmax_cross_entropy(predict, labels):
 29 |     scratch = np.max(predict, axis = -1)
 30 |     backprop = predict - np.expand_dims(scratch, axis = -1)
 31 |     scratch = np.sum(np.exp(backprop), axis=-1)
 32 |     loss = labels * (np.expand_dims(np.log(scratch), axis=-1) - backprop)
 33 |     loss = np.sum(loss, axis = -1)
 34 | 
 35 |     backprop = np.exp(backprop) / np.expand_dims(scratch, axis=-1) - labels
 36 | 
 37 |     return loss, backprop
 38 | 
 39 | def self_loss(predicts, labels):
 40 | 
 41 |     # shape = predicts.shape
 42 |     # predicts = predicts.reshape([-1, 20])
 43 |     # labels = labels.reshape([-1, 20])
 44 | 
 45 |     batch_label = np.argmax(labels, axis = -1).astype(np.float32)
 46 |     batch_zeros = np.zeros_like(batch_label).astype(np.float32)
 47 |     mask1 = np.not_equal(batch_zeros, batch_label)
 48 |     rand_u = np.random.uniform(low=0.0, high=1.0, size=batch_label.shape)
 49 |     mask2 = rand_u < ratio
 50 |     mask = mask1 | mask2
 51 | 
 52 |     loss, backprop = _softmax_cross_entropy(predicts, labels)
 53 |     loss = np.where(mask, loss, batch_zeros)
 54 |     backprop_zeros = np.zeros_like(backprop)
 55 |     backprop = np.where(np.expand_dims(mask, axis=-1), backprop, backprop_zeros)
 56 |     backprop = backprop * class_weights
 57 |     # backprop = np.reshape(backprop, shape)
 58 | 
 59 |     # loss = np.mean(loss)
 60 |     # return loss
 61 |     return loss, backprop
 62 | 
 63 | def custom_loss(predicts, labels, name=None):
 64 |   # with tf.op_scope([predicts, labels], name, "CustomLoss") as name:
 65 |     # loss, grad =  tf.py_func(self_loss, [predicts, labels],
 66 |         # [tf.float64, tf.float64], stateful=False, name="My")
 67 |   with tf.name_scope(name, "CustomLoss", [predicts, labels]) as name:
 68 | 
 69 |     loss, backprop = py_func(self_loss, [predicts, labels],
 70 |         [tf.float32, tf.float32], name=name,
 71 |         grad=_CustomLossGrad)
 72 |   # return tf.reduce_mean(loss)
 73 |   return loss
 74 | 
 75 | 
 76 | def _BroadcastMul(vec, mat):
 77 |   vec = tf.expand_dims(vec, -1)
 78 |   return vec*mat
 79 | 
 80 | # def _CustomLossGrad(op, grad_loss):
 81 | def _CustomLossGrad(op, grad_loss, grad_grad):
 82 |   softmax_grad = op.outputs[1]
 83 |   grad = _BroadcastMul(grad_loss, softmax_grad)
 84 | 
 85 |   if grad_grad.op.type not in ("ZerosLike", "Zeros"):
 86 |     logits = op.inputs[0]
 87 |     softmax = tf.nn.softmax(logits)
 88 |     grad += ((grad_grad - tf.squeeze(tf.matmul(grad_grad[:, None, :],
 89 |       softmax[:, :, None]), axis=1) * softmax))
 90 | 
 91 |   grad /= tf.cast(tf.size(grad) / tf.shape(grad)[-1], tf.float32)
 92 | 
 93 |   return grad, None
 94 | 
 95 | def main():
 96 |   with tf.Session() as sess:
 97 |     predicts = np.random.uniform(0.0, 1.0, (100, 20)).reshape((2, 10, 5,
 98 |       20)).astype(np.float32)
 99 |     # labels = np.zeros([200]).reshape((10, 20))
100 |     tmp = np.random.randint(0, 20, size=(2, 10, 5)).astype(np.int32)
101 |     # tmp[3] = 0
102 |     # tmp[1] = 0
103 |     labels = np.eye(20)[tmp].astype(np.float32)
104 |     print labels.shape
105 | 
106 |     # lloss, ggrad = self_loss(predicts, labels)
107 |     # loss = self_loss(predicts, labels)
108 |     # print loss
109 |     # print grad
110 | 
111 |     predicts = tf.constant(predicts)
112 |     labels = tf.constant(labels)
113 | 
114 |     lloss=  custom_loss(predicts, labels)
115 |     loss=  tf.losses.softmax_cross_entropy(labels, predicts)
116 | 
117 |     eval_lloss = lloss.eval()
118 |     eval_loss = loss.eval()
119 |     # print sess.run(loss)
120 | 
121 |     # print (tf.gradients(loss, predicts))
122 |     print "Grad"
123 |     # print ggrad
124 |     my = tf.gradients(lloss, predicts)[0].eval()
125 |     original = tf.gradients(loss, predicts)[0].eval()
126 |     # my /= 10
127 |     mask = np.isclose(my, original)
128 |     print mask
129 |     print mask.shape
130 |     print eval_lloss
131 |     print eval_loss
132 |     # print a
133 |     # print "----"
134 |     # print b
135 |     # print (tf.gradients(loss, predicts)[0].eval())
136 | 
137 | 
138 | 
139 |   # print (x.eval(), y.eval(), tf.gradients(y, x).eval())
140 |     # print (predicts.eval(), labels.eval())
141 | 
142 | if __name__ == "__main__":
143 |   main()
144 | 


--------------------------------------------------------------------------------
/labels/python/delete_points.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # encoding: utf-8
 3 | 
 4 | import re
 5 | import sys
 6 | 
 7 | def _get_point(from_list, idx):
 8 |     return [from_list[idx*2], from_list[idx*2 + 1]]
 9 | 
10 | def delete_points(src_file, dst_file):
11 |     def del_points(from_list):
12 |         delete = [2, 4, 7, 11]
13 |         to_list = list()
14 |         for idx in xrange(19):
15 |             if idx in delete:
16 |                 continue
17 |             to_list += _get_point(from_list, idx)
18 |         return to_list
19 | 
20 |     with open(src_file, 'rb') as fr, open(dst_file, 'wb') as fw:
21 |         for line in fr:
22 |             tmp = re.split(" |,", line.strip())
23 |             if(len(tmp) != 40):
24 |                 print len(tmp)
25 |                 print ("Length of Data Error.")
26 |                 sys.exit(0)
27 |             filename = tmp[0]
28 |             coords = tmp[1:39]
29 |             begin = tmp[39]
30 |             coords = del_points(coords)
31 | 
32 |             fw.write(filename + ' ')
33 |             for item in coords:
34 |                 fw.write(item + ',')
35 |             fw.write(begin + '\n')
36 | 
37 | def main():
38 |     src_file = "../final/correct_19.txt"
39 |     dst_file = "./lala.txt"
40 |     delete_points(src_file, dst_file)
41 | 
42 | if __name__ == "__main__":
43 |     main()
44 | 


--------------------------------------------------------------------------------
/read_data.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # encoding: utf-8
  3 | 
  4 | import numpy as np
  5 | import cv2
  6 | import sys
  7 | import os
  8 | import re
  9 | import random
 10 | import math
 11 | 
 12 | class PoseReader():
 13 | 
 14 |     def __init__(self, annos_path, data_path, config):
 15 |         self.records = list()
 16 |         self.batch_size = config.batch_size
 17 |         self.points_num = config.points_num
 18 |         self.fm_channel = config.fm_channel
 19 |         self.img_width = config.img_width
 20 |         self.img_height = config.img_height
 21 |         self.origin_width = config.origin_width
 22 |         self.origin_height = config.origin_height
 23 |         self.record_len = self.points_num * 2 + 2
 24 |         self.data_path = data_path
 25 |         self.line_idx = 0
 26 | 
 27 |         self.fm_width = config.fm_width
 28 |         self.fm_height = config.fm_height
 29 |         self.sigma = config.sigma
 30 |         self.alpha = config.alpha
 31 |         self.radius = config.radius
 32 | 
 33 |         # self.float_max = 1.0 - 1.0 / self.img_width
 34 |         self.float_max = 1.0
 35 | 
 36 |         self.degree = config.degree
 37 | 
 38 |         if config.is_color:
 39 |             self.color_mode = 1
 40 |         else:
 41 |             self.color_mode = 0
 42 |         with open(annos_path, 'rb') as fr:
 43 |             for line in fr:
 44 |                 tmp = re.split(',| ', line.strip())
 45 |                 if(len(tmp) != self.record_len):
 46 |                     print "Length Error: ", len(tmp)
 47 |                     sys.exit(0)
 48 |                 filename = tmp[0]
 49 |                 coords = [int(x) for x in tmp[1:self.record_len - 1]]
 50 |                 begin = int(tmp[-1])
 51 |                 self.records.append((filename, np.array(coords), begin))
 52 |         self.size = len(self.records)
 53 | 
 54 | 
 55 |     def random_batch(self):
 56 |         rand = random.sample(xrange(self.size), self.batch_size)
 57 |         filename_list = list()
 58 |         coords_list = list()
 59 |         begins_list = list()
 60 |         for idx in rand:
 61 |             filename_list.append(self.records[idx][0])
 62 |             coords_list.append(self.records[idx][1])
 63 |             begins_list.append(self.records[idx][2])
 64 | 
 65 |         img_list = list()
 66 |         for filename in filename_list:
 67 |             img = cv2.imread(os.path.join(self.data_path, filename),
 68 |                     self.color_mode)
 69 |             img = cv2.resize(img, (self.img_width, self.img_height))
 70 |             img_list.append(img)
 71 | 
 72 |         out_imgs = self._img_preprocess(np.stack(img_list))
 73 |         out_labels = self._label_preprocess(np.stack(coords_list))
 74 |         out_begins = np.stack(begins_list)
 75 | 
 76 |         return out_imgs, out_labels, out_begins, filename_list
 77 | 
 78 | 
 79 |     def batch(self, line_idx=None):
 80 |         if line_idx is not None:
 81 |             self.line_idx = line_idx
 82 |         end_idx = self.line_idx + self.batch_size
 83 |         idxs = range(self.line_idx, end_idx)
 84 |         for idx in xrange(len(idxs)):
 85 |             if idxs[idx] >= self.size:
 86 |                 idxs[idx] %= self.size
 87 |         if end_idx < self.size:
 88 |             self.line_idx = end_idx
 89 |         else:
 90 |             self.line_idx = end_idx % self.size
 91 | 
 92 |         filename_list = list()
 93 |         coords_list = list()
 94 |         begins_list = list()
 95 |         for idx in idxs:
 96 |             filename_list.append(self.records[idx][0])
 97 |             coords_list.append(self.records[idx][1])
 98 |             begins_list.append(self.records[idx][2])
 99 | 
100 |         img_list = list()
101 |         for filename in filename_list:
102 |             img = cv2.imread(os.path.join(self.data_path, filename),
103 |                     self.color_mode)
104 |             img = cv2.resize(img, (self.img_width, self.img_height))
105 |             img_list.append(img)
106 | 
107 |         out_imgs = self._img_preprocess(np.stack(img_list))
108 |         out_labels = self._label_preprocess(np.stack(coords_list))
109 |         out_begins = np.stack(begins_list)
110 | 
111 |         return out_imgs, out_labels, out_begins, filename_list
112 | 
113 |     def _img_preprocess(self, imgs):
114 |         if self.color_mode == 0:
115 |             output = np.reshape(imgs, [-1, self.img_height, self.img_width, 1])
116 |         elif self.color_mode == 1:
117 |             output = np.reshape(imgs, [-1, self.img_height, self.img_width, 3])
118 |         else:
119 |             raise Exception ("color_mode error.")
120 | 
121 |         output = output.astype(np.float32) * (1. / 255) - 0.5
122 |         return output
123 | 
124 |     def _label_preprocess(self, label):
125 |         output = np.reshape(label, [-1, self.points_num * 2]).astype(np.float32)
126 |         output[:, ::2] /= self.origin_width
127 |         output[:, 1::2] /= self.origin_height
128 |         return output
129 | 
130 |     def label2fm(self, label):
131 |         def get_point(a_list, idx):
132 |             w, h = a_list[idx * 2: idx * 2 + 2]
133 |             return int(w * self.fm_width), int(h * self.fm_height)
134 | 
135 |         def _gaussian2d(x, y, x0, y0, a, sigmax, sigmay):
136 |             xx = (float(x) - x0)** 2 / 2 / sigmax **2
137 |             yy = (float(y) - y0)** 2 / 2 / sigmay **2
138 |             return a * math.exp(- xx - yy)
139 | 
140 |         def draw(img, center):
141 |             w0, h0 = center
142 |             height, width = img.shape
143 |             # for h in xrange(height):
144 |                 # for w in xrange(width):
145 |                     # if(math.fabs(h - h0) + math.fabs(w - w0) < self.radius):
146 |             for w in xrange(max(0, w0-self.radius), min(width, w0+self.radius+1)):
147 |                 for h in xrange(max(0, h0-self.radius), min(height, h0+self.radius+1)):
148 |                     if(math.fabs(h - h0) + math.fabs(w - w0) < self.radius):
149 |                         img[h, w] = _gaussian2d(w, h, w0, h0, self.alpha, self.sigma,
150 |                                 self.sigma)
151 | 
152 |         fm_label = np.zeros((label.shape[0], self.fm_height, self.fm_width, self.points_num))
153 |         for batch_idx in xrange(len(fm_label)):
154 |             for ii in xrange(self.points_num):
155 |                 w, h = get_point(label[batch_idx], ii)
156 |                 draw(fm_label[batch_idx, :, :, ii], (w, h))
157 |         return fm_label.astype(np.float32)
158 | 
159 |     def label2sm_fm(self, label):
160 |         def get_point(a_list, idx):
161 |             w, h = a_list[idx * 2: idx * 2 + 2]
162 |             return int(w * self.fm_width), int(h * self.fm_height)
163 | 
164 |         def p8_distance(h1, h2, w1, w2):
165 |             return max(math.fabs(h1 - h2), math.fabs(w1 - w2))
166 | 
167 |         def p4_distance(h1, h2, w1, w2):
168 |             return math.fabs(h1 - h2) + math.fabs(w1 - w2)
169 | 
170 |         def draw(img, center, idx):
171 |             w0, h0 = center
172 |             height, width = img.shape
173 |             for w in xrange(max(0, w0-self.radius), min(width, w0+self.radius+1)):
174 |                 for h in xrange(max(0, h0-self.radius), min(height, h0+self.radius+1)):
175 |                     if(p8_distance(h, h0, w, w0) < self.radius):
176 |                         img[h, w] = idx + 1
177 |         fm_label = np.zeros((label.shape[0], self.fm_height, self.fm_width))
178 |         for batch_idx in xrange(len(fm_label)):
179 |             for ii in xrange(self.points_num):
180 |                 w, h = get_point(label[batch_idx], ii)
181 |                 draw(fm_label[batch_idx], (w, h), ii)
182 |         # fm_label = fm_label.astype(np.int32)
183 |         return fm_label.astype(np.int32)
184 | 
185 |     def _visualize(self, imgs, merge):
186 |         import matplotlib.pyplot as plt
187 |         if (merge):
188 |             imgs = np.amax(imgs, axis = 2)
189 |         imgs = np.squeeze(imgs)
190 |         plt.imshow(imgs, cmap='gray')
191 |         plt.show()
192 | 
193 |     def _draw_imgs(self, imgs, coords):
194 |         def get_point(a_list, idx):
195 |             w, h = a_list[idx * 2: idx * 2 + 2]
196 |             return int(w * self.img_width), int(h * self.img_height)
197 | 
198 |         import matplotlib.pyplot as plt
199 |         for idx in xrange(len(imgs)):
200 |             img = np.squeeze(imgs[idx])
201 |             coord = coords[idx]
202 |             for ii in xrange(self.points_num):
203 |                 w, h = get_point(coord, ii)
204 |                 cv2.circle(img, (w, h), 1, 1)
205 |             plt.imshow(img, cmap='gray')
206 |             plt.show()
207 | 
208 |     def _rotate(self, origin, angle):
209 |         x, y = origin
210 |         o_y = 0.5 + (y - 0.5) * math.cos(angle) + (x - 0.5) * math.sin(angle)
211 |         o_x = 0.5 + (x - 0.5) * math.cos(angle) - (y - 0.5) * math.sin(angle)
212 |         return o_x, o_y
213 | 
214 |     def _random_roate(self, images, labels, degree):
215 |         if(images.shape[0] != labels.shape[0]):
216 |             raise Exception("Batch size Error.")
217 |         degree = degree * math.pi / 180
218 |         rand_degree = np.random.uniform(-degree, degree, images.shape[0])
219 | 
220 |         o_images = np.zeros_like(images)
221 |         o_labels = np.zeros_like(labels)
222 |         for idx in xrange(images.shape[0]):
223 |             theta = rand_degree[idx]
224 | 
225 |             # labels
226 |             for ii in xrange(self.points_num):
227 |                 o_labels[idx, 2*ii: 2*ii+2] = self._rotate(labels[idx, ii*2: 2*ii+2], theta)
228 | 
229 |             # image
230 |             M = cv2.getRotationMatrix2D((self.img_width/2,self.img_height/2),-theta*180/math.pi,1)
231 |             o_images[idx] = np.expand_dims(cv2.warpAffine(images[idx],M,(self.img_width,self.img_height)), axis=2)
232 | 
233 |         return o_images, o_labels
234 | 
235 |     def _batch_random_roate(self, images, labels, degree):
236 |         if(images.shape[0] != labels.shape[0]):
237 |             raise Exception("Batch size Error.")
238 |         degree = degree * math.pi / 180
239 |         rand_degree = np.random.uniform(-degree, degree)
240 | 
241 |         o_images = np.zeros_like(images)
242 |         o_labels = np.zeros_like(labels)
243 |         for idx in xrange(images.shape[0]):
244 |             theta = rand_degree
245 | 
246 |             # labels
247 |             for ii in xrange(self.points_num):
248 |                 o_labels[idx, 2*ii: 2*ii+2] = self._rotate(labels[idx, ii*2: 2*ii+2], theta)
249 | 
250 |             # image
251 |             M = cv2.getRotationMatrix2D((self.img_width/2,self.img_height/2),-theta*180/math.pi,1)
252 |             o_images[idx] = np.expand_dims(cv2.warpAffine(images[idx],M,(self.img_width,self.img_height)), axis=2)
253 | 
254 |         return o_images, o_labels
255 | 
256 |     def _random_flip_lr(self, images, labels):
257 |         if(images.shape[0] != labels.shape[0]):
258 |             raise Exception("Batch size Error.")
259 |         rand_u = np.random.uniform(0.0, 1.0, images.shape[0])
260 |         rand_cond = rand_u > 0.5
261 | 
262 |         o_images = np.zeros_like(images)
263 |         o_labels = np.zeros_like(labels)
264 | 
265 |         for idx in xrange(images.shape[0]):
266 |             condition = rand_cond[idx]
267 |             if condition:
268 |                 # "flip"
269 |                 o_images[idx] = np.fliplr(images[idx])
270 |                 o_labels[idx, ::2] = self.float_max - labels[idx, ::2]
271 |                 o_labels[idx, 1::2] = labels[idx, 1::2]
272 |             else:
273 |                 # "origin"
274 |                 o_images[idx] = images[idx]
275 |                 o_labels[idx] = labels[idx]
276 | 
277 |         return o_images, o_labels
278 | 
279 |     def _batch_random_flip_lr(self, images, labels):
280 |         if(images.shape[0] != labels.shape[0]):
281 |             raise Exception("Batch size Error.")
282 |         rand_u = np.random.uniform(0.0, 1.0)
283 |         rand_cond = rand_u > 0.5
284 | 
285 |         o_images = np.zeros_like(images)
286 |         o_labels = np.zeros_like(labels)
287 | 
288 |         for idx in xrange(images.shape[0]):
289 |             condition = rand_cond
290 |             if condition:
291 |                 # "flip"
292 |                 o_images[idx] = np.fliplr(images[idx])
293 |                 o_labels[idx, ::2] = self.float_max - labels[idx, ::2]
294 |                 o_labels[idx, 1::2] = labels[idx, 1::2]
295 |             else:
296 |                 # "origin"
297 |                 o_images[idx] = images[idx]
298 |                 o_labels[idx] = labels[idx]
299 | 
300 |         return o_images, o_labels
301 | 
302 |     def get_random_batch(self, distort=True):
303 | 
304 |         imgs, labels, begins, filename_list = self.random_batch()
305 |         if distort:
306 |             imgs, labels = self._random_flip_lr(imgs, labels)
307 |             imgs, labels = self._random_roate(imgs, labels, self.degree)
308 |         fm = self.label2fm(labels)
309 | 
310 |         return (imgs.reshape([self.batch_size, self.img_height, self.img_width, 1]),
311 |                 fm.reshape([self.batch_size, self.fm_height, self.fm_width, self.points_num]),
312 |                 labels.reshape([self.batch_size, self.points_num * 2]),
313 |                 begins,
314 |                 filename_list)
315 | 
316 |     def get_batch(self, distort=True, line_idx=None):
317 | 
318 |         imgs, labels, begins, filename_list = self.batch(
319 |                 line_idx=line_idx)
320 |         if distort:
321 |             imgs, labels = self._batch_random_flip_lr(imgs, labels)
322 |             imgs, labels = self._batch_random_roate(imgs, labels, self.degree)
323 |         fm = self.label2fm(labels)
324 | 
325 |         return (imgs.reshape([self.batch_size, self.img_height, self.img_width, 1]),
326 |                 fm.reshape([self.batch_size, self.fm_height, self.fm_width, self.points_num]),
327 |                 labels.reshape([self.batch_size, self.points_num * 2]),
328 |                 begins,
329 |                 filename_list)
330 | 
331 | 
332 | 
333 | def main():
334 |     pass
335 | 
336 | 
337 | if __name__ == "__main__":
338 |     main()
339 | 
340 | 


--------------------------------------------------------------------------------
/script/check.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | tensorboard --logdir=./log/train_log
4 | 


--------------------------------------------------------------------------------
/script/clear_log.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | rm -rf ./log/*
4 | 


--------------------------------------------------------------------------------
/script/init_dir.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | mkdir -p params
 4 | mkdir -p log
 5 | mkdir -p data
 6 | 
 7 | mkdir -p labels
 8 | mkdir -p labels/txt
 9 | mkdir -p labels/python
10 | 


--------------------------------------------------------------------------------
/script/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | nohup \
4 | python train.py &
5 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # encoding: utf-8
  3 | 
  4 | from datetime import datetime
  5 | import os
  6 | import random
  7 | 
  8 | import tensorflow as tf
  9 | import numpy as np
 10 | 
 11 | import cpm
 12 | import read_data
 13 | 
 14 | 
 15 | class Config():
 16 | 
 17 |   #
 18 |   batch_size = 1
 19 |   initialize = False
 20 |   steps = "-1"
 21 |   gpu = '/gpu:0'
 22 |   # the number of images in your test dataset
 23 |   test_num = 0
 24 | 
 25 |   # image config
 26 |   points_num = 15
 27 |   fm_channel = points_num + 1
 28 |   origin_height = 212
 29 |   origin_width = 256
 30 |   img_height = 216
 31 |   img_width = 256
 32 |   is_color = False
 33 | 
 34 | 
 35 |   # feature map config
 36 |   fm_width = img_width >> 1
 37 |   fm_height = img_height >> 1
 38 |   sigma = 2.0
 39 |   alpha = 1.0
 40 |   radius = 12
 41 | 
 42 |   # random distortion
 43 |   degree = 15
 44 | 
 45 |   # solver config
 46 |   wd = 5e-4
 47 |   #wd = None
 48 |   stddev = 5e-2
 49 |   use_fp16 = False
 50 |   moving_average_decay = 0.999
 51 | 
 52 |   # checkpoint path and filename
 53 |   logdir = "./log/train_log/"
 54 |   params_dir = "./params/"
 55 |   load_filename = "cpm" + '-' + steps
 56 |   save_filename = "cpm"
 57 | 
 58 |   # iterations config
 59 |   max_iteration = 500000
 60 |   checkpoint_iters = 2000
 61 |   summary_iters = 100
 62 |   validate_iters = 2000
 63 | 
 64 | 
 65 | def main():
 66 | 
 67 |     config = Config()
 68 |     with tf.Graph().as_default():
 69 | 
 70 |         # create a reader object
 71 |         reader = read_data.PoseReader("./labels/txt/validate_annos.txt",
 72 |             "./data/train_imgs/", config)
 73 | 
 74 |         # create a model object
 75 |         model = cpm.CPM(config)
 76 | 
 77 |         # feedforward
 78 |         predicts = model.build_fc(False)
 79 | 
 80 |         # return the loss
 81 |         loss = model.loss()
 82 | 
 83 |         # Initializing operation
 84 |         init_op = tf.global_variables_initializer()
 85 | 
 86 |         saver = tf.train.Saver(max_to_keep = 100)
 87 | 
 88 |         sess_config = tf.ConfigProto()
 89 |         sess_config.gpu_options.allow_growth = True
 90 |         with tf.Session(config=sess_config) as sess:
 91 | 
 92 |             sess.run(init_op)
 93 |             model.restore(sess, saver, config.load_filename)
 94 | 
 95 |             # start testing
 96 |             for idx in xrange(config.test_num):
 97 |                 with tf.device("/cpu:0"):
 98 |                   imgs, fm, coords, begins, filename_list = reader.get_batch(distort=False)
 99 | 
100 |                 # feed data into the model
101 |                 feed_dict = {
102 |                     model.images: imgs,
103 |                     model.coords: coords,
104 |                     model.labels: fm
105 |                     }
106 |                 with tf.device(config.gpu):
107 |                     #
108 |                     predict_coords = sess.run(predicts, feed_dict=feed_dict)
109 | 
110 | 
111 | 
112 | 
113 | if __name__ == "__main__":
114 |     main()
115 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # encoding: utf-8
  3 | 
  4 | from datetime import datetime
  5 | import os
  6 | import random
  7 | 
  8 | import tensorflow as tf
  9 | import numpy as np
 10 | 
 11 | import cpm
 12 | import read_data
 13 | 
 14 | 
 15 | class Config():
 16 | 
 17 |   #
 18 |   batch_size = 1
 19 |   initialize = True
 20 |   steps = "-1"
 21 |   gpu = '/gpu:0'
 22 | 
 23 |   # image config
 24 |   points_num = 15
 25 |   fm_channel = points_num + 1
 26 |   origin_height = 212
 27 |   origin_width = 256
 28 |   img_height = 216
 29 |   img_width = 256
 30 |   is_color = False
 31 | 
 32 | 
 33 |   # feature map config
 34 |   fm_width = img_width >> 1
 35 |   fm_height = img_height >> 1
 36 |   sigma = 2.0
 37 |   alpha = 1.0
 38 |   radius = 12
 39 | 
 40 |   # random distortion
 41 |   degree = 15
 42 | 
 43 |   # solver config
 44 |   wd = 5e-4
 45 |   #wd = None
 46 |   stddev = 5e-2
 47 |   use_fp16 = False
 48 |   moving_average_decay = 0.999
 49 | 
 50 |   # checkpoint path and filename
 51 |   logdir = "./log/train_log/"
 52 |   params_dir = "./params/"
 53 |   load_filename = "cpm" + '-' + steps
 54 |   save_filename = "cpm"
 55 | 
 56 |   # iterations config
 57 |   max_iteration = 500000
 58 |   checkpoint_iters = 2000
 59 |   summary_iters = 100
 60 |   validate_iters = 2000
 61 | 
 62 | 
 63 | def main():
 64 | 
 65 |     config = Config()
 66 |     with tf.Graph().as_default():
 67 | 
 68 |         # create a reader object
 69 |         reader = read_data.PoseReader("./labels/txt/validate_annos.txt",
 70 |             "./data/train_imgs/", config)
 71 | 
 72 |         # create a model object
 73 |         model = cpm.CPM(config)
 74 | 
 75 |         # feedforward
 76 |         predicts = model.build_fc(True)
 77 | 
 78 |         # return the loss
 79 |         loss = model.loss()
 80 | 
 81 |         # training operation
 82 |         train_op = model.train_op(loss, model.global_step)
 83 |         # Initializing operation
 84 |         init_op = tf.global_variables_initializer()
 85 | 
 86 |         saver = tf.train.Saver(max_to_keep = 100)
 87 | 
 88 |         sess_config = tf.ConfigProto()
 89 |         sess_config.gpu_options.allow_growth = True
 90 |         with tf.Session(config=sess_config) as sess:
 91 | 
 92 |             # initialize parameters or restore from previous model
 93 |             if not os.path.exists(config.params_dir):
 94 |                 os.makedirs(config.params_dir)
 95 |             if os.listdir(config.params_dir) == [] or config.initialize:
 96 |                 print "Initializing Network"
 97 |                 sess.run(init_op)
 98 |             else:
 99 |                 sess.run(init_op)
100 |                 model.restore(sess, saver, config.load_filename)
101 | 
102 |             merged = tf.summary.merge_all()
103 |             logdir = os.path.join(config.logdir,
104 |                 datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
105 | 
106 |             writer = tf.summary.FileWriter(logdir, sess.graph)
107 | 
108 |             # start training
109 |             for idx in xrange(config.max_iteration):
110 |                 with tf.device("/cpu:0"):
111 |                   imgs, fm, coords, begins, filename_list = reader.get_random_batch()
112 | 
113 |                 # feed data into the model
114 |                 feed_dict = {
115 |                     model.images: imgs,
116 |                     model.coords: coords,
117 |                     model.labels: fm
118 |                     }
119 |                 with tf.device(config.gpu):
120 |                     # run the training operation
121 |                     sess.run(train_op, feed_dict=feed_dict)
122 | 
123 | 
124 |                 with tf.device('/cpu:0'):
125 |                   # write summary
126 |                   if (idx + 1) % config.summary_iters == 0:
127 |                       tmp_global_step = model.global_step.eval()
128 |                       summary = sess.run(merged, feed_dict=feed_dict)
129 |                       writer.add_summary(summary, tmp_global_step)
130 |                   # save checkpoint
131 |                   if (idx + 1) % config.checkpoint_iters == 0:
132 |                       tmp_global_step = model.global_step.eval()
133 |                       model.save(sess, saver, config.save_filename, tmp_global_step)
134 | 
135 | 
136 | if __name__ == "__main__":
137 |     main()
138 | 


--------------------------------------------------------------------------------