├── .gitignore ├── LICENSE.md ├── README.md ├── images └── .empty ├── models └── .empty ├── posereg ├── __init__.py ├── activations.py ├── layers.py ├── math.py ├── measures.py ├── network.py ├── objectives.py └── pose.py └── webcan.py /.gitignore: -------------------------------------------------------------------------------- 1 | # h5 files (weights) 2 | *.h5 3 | 4 | # Ignore compiled python code 5 | __pycache__ 6 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016-2017 Diogo Luvizon, Hedi Tabia, and David Picard 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## pose-regression - Human pose regression from RGB images 2 | 3 | This software implements a human pose regression method based on the Soft-argmax approach, as described in the following paper: 4 | > Human Pose Regression by Combining Indirect Part Detection and Contextual Information ([link](https://arxiv.org/abs/1710.02322)) 5 | 6 | ## Dependencies 7 | 8 | The network is implemented using [Keras](https://keras.io/) of top of TensorFlow and Python 3. 9 | 10 | We provide a [code](webcan.py) for live demonstration using video frames captured by a webcan. Small changes in the code may be required for hardware compatibility. 11 | 12 | The software requires the following packges: 13 | 14 | * numpy 15 | * scipy 16 | * keras (2.0 or higher) 17 | * tensorflow (with GPU is better, but is not required) 18 | * pygame (1.9 or higher, only for demonstration) 19 | * matplotlib (only for demonstration) 20 | 21 | ## Citing 22 | 23 | If any part of this source code or the pre-trained weights are useful for you, 24 | please cite the paper: 25 | 26 | 27 | ``` 28 | @article{LUVIZON201915, 29 | title = "Human pose regression by combining indirect part detection and contextual information", 30 | author = "Diogo C. Luvizon and Hedi Tabia and David Picard", 31 | journal = "Computers \& Graphics", 32 | volume = "85", 33 | pages = "15 - 22", 34 | year = "2019", 35 | issn = "0097-8493", 36 | doi = "https://doi.org/10.1016/j.cag.2019.09.002", 37 | } 38 | ``` 39 | 40 | ## License 41 | 42 | The source code and the weights are given under the MIT License. 43 | -------------------------------------------------------------------------------- /images/.empty: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dluvizon/pose-regression/5cf4f2816a1c795b037075933b6aba87466a194f/images/.empty -------------------------------------------------------------------------------- /models/.empty: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dluvizon/pose-regression/5cf4f2816a1c795b037075933b6aba87466a194f/models/.empty -------------------------------------------------------------------------------- /posereg/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | if sys.version_info[0] < 3: 3 | sys.stderr.write('This package was not tested on Python 2.\n') 4 | sys.stderr.write('It is better to use Python 3!\n') 5 | 6 | from .network import build 7 | from .measures import pckh 8 | from .pose import pa16j 9 | -------------------------------------------------------------------------------- /posereg/activations.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from keras import backend as K 3 | 4 | def channel_softmax_2d(): 5 | 6 | def _channel_softmax_2d(x): 7 | ndim = K.ndim(x) 8 | if ndim == 4: 9 | e = K.exp(x - K.max(x, axis=(1,2), keepdims=True)) 10 | s = K.sum(e, axis=(1,2), keepdims=True) 11 | return e / s 12 | else: 13 | raise ValueError('This function is specific for 4D tensors. ' 14 | 'Here, ndim=' + str(ndim)) 15 | 16 | return _channel_softmax_2d 17 | 18 | -------------------------------------------------------------------------------- /posereg/layers.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from keras import backend as K 3 | 4 | from keras.layers import Input 5 | from keras.layers import Lambda 6 | from keras.layers import Dense 7 | from keras.layers import Activation 8 | from keras.layers import Conv2D 9 | from keras.layers import SeparableConv2D 10 | from keras.layers import BatchNormalization 11 | 12 | from keras.layers import MaxPooling2D 13 | from keras.layers import UpSampling2D 14 | 15 | from keras.layers import multiply 16 | from keras.layers import concatenate 17 | from keras.layers import add 18 | 19 | from posereg.math import linspace_2d 20 | from posereg.activations import channel_softmax_2d 21 | 22 | 23 | def conv(x, filters, size, strides=(1, 1), padding='same', name=None): 24 | x = Conv2D(filters, size, strides=strides, padding=padding, 25 | use_bias=False, name=name)(x) 26 | return x 27 | 28 | 29 | def conv_bn(x, filters, size, strides=(1, 1), padding='same', name=None): 30 | if name is not None: 31 | conv_name = name + '_conv' 32 | else: 33 | conv_name = None 34 | 35 | x = conv(x, filters, size, strides, padding, conv_name) 36 | x = BatchNormalization(axis=-1, scale=False, name=name)(x) 37 | return x 38 | 39 | 40 | def conv_bn_act(x, filters, size, strides=(1, 1), padding='same', name=None): 41 | if name is not None: 42 | conv_name = name + '_conv' 43 | bn_name = name + '_bn' 44 | else: 45 | conv_name = None 46 | bn_name = None 47 | 48 | x = conv(x, filters, size, strides, padding, conv_name) 49 | x = BatchNormalization(axis=-1, scale=False, name=bn_name)(x) 50 | x = Activation('relu', name=name)(x) 51 | return x 52 | 53 | 54 | def act_conv_bn(x, filters, size, strides=(1, 1), padding='same', name=None): 55 | if name is not None: 56 | conv_name = name + '_conv' 57 | act_name = name + '_act' 58 | else: 59 | conv_name = None 60 | act_name = None 61 | 62 | x = Activation('relu', name=act_name)(x) 63 | x = conv(x, filters, size, strides, padding, conv_name) 64 | x = BatchNormalization(axis=-1, scale=False, name=name)(x) 65 | return x 66 | 67 | 68 | def separable_act_conv_bn(x, filters, size, strides=(1, 1), padding='same', 69 | name=None): 70 | if name is not None: 71 | conv_name = name + '_conv' 72 | act_name = name + '_act' 73 | else: 74 | conv_name = None 75 | act_name = None 76 | 77 | x = Activation('relu', name=act_name)(x) 78 | x = SeparableConv2D(filters, size, strides=strides, padding=padding, 79 | use_bias=False, name=conv_name)(x) 80 | x = BatchNormalization(axis=-1, scale=False, name=name)(x) 81 | return x 82 | 83 | 84 | 85 | def act_conv(x, filters, size, strides=(1, 1), padding='same', name=None): 86 | if name is not None: 87 | act_name = name + '_act' 88 | else: 89 | act_name = None 90 | 91 | x = Activation('relu', name=act_name)(x) 92 | x = conv(x, filters, size, strides, padding, name) 93 | return x 94 | 95 | 96 | def act_channel_softmax(x, name=None): 97 | x = Activation(channel_softmax_2d(), name=name)(x) 98 | return x 99 | 100 | 101 | def lin_interpolation_2d(inp, dim): 102 | 103 | num_rows, num_cols, num_filters = K.int_shape(inp)[1:] 104 | conv = SeparableConv2D(num_filters, (num_rows, num_cols), use_bias=False) 105 | x = conv(inp) 106 | 107 | w = conv.get_weights() 108 | w[0].fill(0) 109 | w[1].fill(0) 110 | linspace = linspace_2d(num_rows, num_cols, dim=dim) 111 | 112 | for i in range(num_filters): 113 | w[0][:,:, i, 0] = linspace[:,:] 114 | w[1][0, 0, i, i] = 1. 115 | 116 | conv.set_weights(w) 117 | conv.trainable = False 118 | 119 | x = Lambda(lambda x: K.squeeze(x, axis=1))(x) 120 | x = Lambda(lambda x: K.squeeze(x, axis=1))(x) 121 | x = Lambda(lambda x: K.expand_dims(x, axis=-1))(x) 122 | 123 | return x 124 | 125 | -------------------------------------------------------------------------------- /posereg/math.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import numpy as np 3 | from scipy.stats import multivariate_normal 4 | 5 | from keras import backend as K 6 | 7 | def linspace_2d(nb_rols, nb_cols, dim=0): 8 | 9 | def _lin_sp_aux(size, nb_repeat, start, end): 10 | linsp = np.linspace(start, end, num=size) 11 | x = np.empty((nb_repeat, size), dtype=np.float32) 12 | 13 | for d in range(nb_repeat): 14 | x[d] = linsp 15 | 16 | return x 17 | 18 | if dim == 1: 19 | return (_lin_sp_aux(nb_rols, nb_cols, 0.0, 1.0)).T 20 | return _lin_sp_aux(nb_cols, nb_rols, 0.0, 1.0) 21 | 22 | def normalpdf2d(numbins, xmean, ymean, var): 23 | lin = np.linspace(0, numbins-1, numbins) 24 | 25 | # Produce a 2D gaussian 26 | x = multivariate_normal.pdf(lin, mean=xmean, cov=var) 27 | x = x.reshape((1, numbins)).repeat(numbins, axis=0) 28 | y = multivariate_normal.pdf(lin, mean=ymean, cov=var) 29 | y = y.reshape((numbins, 1)).repeat(numbins, axis=1) 30 | g = x * y 31 | 32 | if g.sum() > K.epsilon(): 33 | return g / g.sum() 34 | 35 | return np.zeros(g.shape) 36 | 37 | -------------------------------------------------------------------------------- /posereg/measures.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import numpy as np 3 | 4 | 5 | def norm(x, axis=None): 6 | return np.sqrt(np.sum(np.power(x, 2), axis=axis)) 7 | 8 | def valid_joints(y, min_valid=-1e6): 9 | def and_all(x): 10 | if x.all(): 11 | return 1 12 | return 0 13 | 14 | return np.apply_along_axis(and_all, axis=1, arr=(y > min_valid)) 15 | 16 | 17 | def pckh(y_true, y_pred, head_size, refp=0.5): 18 | """Compute the PCKh measure (using refp of the head size) on predicted 19 | samples, considering the PA16J pose layout (see file pose.py). 20 | 21 | # Arguments 22 | y_true: [num_samples, nb_joints, 2] 23 | y_pred: [num_samples, nb_joints, 2] 24 | head_size: [num_samples, 1] 25 | 26 | # Return 27 | The PCKh score. 28 | """ 29 | 30 | assert y_true.shape == y_pred.shape 31 | assert len(y_true) == len(head_size) 32 | num_samples = len(y_true) 33 | 34 | # Ignore the joints pelvis and thorax (respectively 0 and 1 on the PA16J 35 | # pose layout. 36 | used_joints = range(2, 16) 37 | y_true = y_true[:, used_joints, :] 38 | y_pred = y_pred[:, used_joints, :] 39 | dist = np.zeros((num_samples, len(used_joints))) 40 | valid = np.zeros((num_samples, len(used_joints))) 41 | 42 | for i in range(num_samples): 43 | valid[i,:] = valid_joints(y_true[i]) 44 | dist[i,:] = norm(y_true[i] - y_pred[i], axis=1) / head_size[i] 45 | match = (dist <= refp) * valid 46 | 47 | return match.sum() / valid.sum() 48 | 49 | -------------------------------------------------------------------------------- /posereg/network.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Define the ReceptionNet for human pose estimation for Keras and TensorFlow. 3 | 4 | The network is defined as: 5 | 6 | ------- ------ 7 | |Input|-->|Stem|--> [...], 8 | ------- ------ 9 | 10 | end every prediction block: 11 | 12 | ----------------------------------------------- 13 | | --------------------------------| 14 | --------- | ---------- | --------- --------- | 15 | [...]->|rBlockN|--->|SepConvN|--->|RegMapN|-(H)->|fReMapN|--->(+)-->[...] 16 | --------- ---------- --------- --------- 17 | 18 | 19 | |-->(sSAM)------------------- 20 | |--(Hs)--| | 21 | | |-->(sjProp)--> *visible* | 22 | H -> | | 23 | | |-->(cSAM)----------------(Agg)--> *pose* 24 | |--(Hc)--| | 25 | |-->(cjProp)----------------| 26 | """ 27 | from keras.models import Model 28 | from keras.optimizers import RMSprop 29 | 30 | # Needs tf.divide, which is not implemented in Keras backend 31 | import tensorflow as tf 32 | 33 | from posereg.objectives import elasticnet_loss_on_valid_joints 34 | from posereg.layers import * 35 | 36 | 37 | def sepconv_residual(x, out_size, name, kernel_size=(3, 3)): 38 | shortcut_name = name + '_shortcut' 39 | reduce_name = name + '_reduce' 40 | 41 | num_filters = K.int_shape(x)[-1] 42 | if num_filters == out_size: 43 | ident = x 44 | else: 45 | ident = act_conv_bn(x, out_size, (1, 1), name=shortcut_name) 46 | 47 | if out_size < num_filters: 48 | x = act_conv_bn(x, out_size, (1, 1), name=reduce_name) 49 | 50 | x = separable_act_conv_bn(x, out_size, kernel_size, name=name) 51 | x = add([ident, x]) 52 | 53 | return x 54 | 55 | 56 | def stem(inp): 57 | xi = Input(shape=K.int_shape(inp)[1:]) # Expected 256 x 256 x 3 58 | 59 | x = conv_bn_act(xi, 32, (3, 3), strides=(2, 2)) 60 | x = conv_bn_act(x, 32, (3, 3)) 61 | x = conv_bn_act(x, 64, (3, 3)) 62 | 63 | a = conv_bn_act(x, 96, (3, 3), strides=(2, 2)) 64 | b = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x) 65 | x = concatenate([a, b]) 66 | 67 | a = conv_bn_act(x, 64, (1, 1)) 68 | a = conv_bn(a, 96, (3, 3)) 69 | b = conv_bn_act(x, 64, (1, 1)) 70 | b = conv_bn_act(b, 64, (5, 1)) 71 | b = conv_bn_act(b, 64, (1, 5)) 72 | b = conv_bn(b, 96, (3, 3)) 73 | x = concatenate([a, b]) 74 | 75 | a = act_conv_bn(x, 192, (3, 3), strides=(2, 2)) 76 | b = MaxPooling2D((2, 2), strides=(2, 2))(x) 77 | x = concatenate([a, b]) 78 | 79 | x = sepconv_residual(x, 3*192, name='sepconv1') 80 | 81 | model = Model(xi, x, name='Stem') 82 | x = model(inp) 83 | 84 | return x 85 | 86 | 87 | def build_reception_block(inp, name, ksize=(3, 3)): 88 | input_shape = K.int_shape(inp)[1:] 89 | size = input_shape[-1] 90 | 91 | xi = Input(shape=input_shape) 92 | a = sepconv_residual(xi, size, name='sepconv_l1', kernel_size=ksize) 93 | 94 | low1 = MaxPooling2D((2, 2))(xi) 95 | low1 = act_conv_bn(low1, int(size/2), (1, 1)) 96 | low1 = sepconv_residual(low1, int(size/2), name='sepconv_l2_1', 97 | kernel_size=ksize) 98 | b = sepconv_residual(low1, int(size/2), name='sepconv_l2_2', 99 | kernel_size=ksize) 100 | 101 | c = MaxPooling2D((2, 2))(low1) 102 | c = sepconv_residual(c, int(size/2), name='sepconv_l3_1', 103 | kernel_size=ksize) 104 | c = sepconv_residual(c, int(size/2), name='sepconv_l3_2', 105 | kernel_size=ksize) 106 | c = sepconv_residual(c, int(size/2), name='sepconv_l3_3', 107 | kernel_size=ksize) 108 | c = UpSampling2D((2, 2))(c) 109 | 110 | b = add([b, c]) 111 | b = sepconv_residual(b, size, name='sepconv_l2_3', kernel_size=ksize) 112 | b = UpSampling2D((2, 2))(b) 113 | x = add([a, b]) 114 | 115 | model = Model(inputs=xi, outputs=x, name=name) 116 | 117 | return model(inp) 118 | 119 | 120 | def build_sconv_block(inp, name=None, ksize=(3, 3)): 121 | input_shape = K.int_shape(inp)[1:] 122 | 123 | xi = Input(shape=input_shape) 124 | x = separable_act_conv_bn(xi, input_shape[-1], ksize) 125 | 126 | model = Model(inputs=xi, outputs=x, name=name) 127 | 128 | return model(inp) 129 | 130 | 131 | def build_regmap_block(inp, num_maps, name=None): 132 | input_shape = K.int_shape(inp)[1:] 133 | 134 | xi = Input(shape=input_shape) 135 | x = act_conv(xi, num_maps, (1, 1)) 136 | 137 | model = Model(inputs=xi, outputs=x, name=name) 138 | 139 | return model(inp) 140 | 141 | 142 | def build_fremap_block(inp, num_filters, name=None): 143 | input_shape = K.int_shape(inp)[1:] 144 | 145 | xi = Input(shape=input_shape) 146 | x = act_conv_bn(xi, num_filters, (1, 1)) 147 | 148 | model = Model(inputs=xi, outputs=x, name=name) 149 | 150 | return model(inp) 151 | 152 | 153 | def pose_regression_context(h, num_joints, sam_s_model, 154 | sam_c_model, jprob_c_model, agg_model, jprob_s_model): 155 | 156 | # Split heatmaps for specialized and contextual information 157 | hs = Lambda(lambda x: x[:,:,:,:num_joints])(h) 158 | hc = Lambda(lambda x: x[:,:,:,num_joints:])(h) 159 | 160 | # Soft-argmax and joint probability for each heatmap 161 | ps = sam_s_model(hs) 162 | pc = sam_c_model(hc) 163 | vc = jprob_c_model(hc) 164 | 165 | pose = agg_model([ps, pc, vc]) 166 | visible = jprob_s_model(hs) 167 | 168 | return pose, visible, hs 169 | 170 | 171 | def pose_regression(h, sam_s_model, jprob_s_model): 172 | 173 | pose = sam_s_model(h) 174 | visible = jprob_s_model(h) 175 | 176 | return pose, visible, h 177 | 178 | 179 | def build_softargmax_2d(input_shape, name=None): 180 | 181 | if name is None: 182 | name_sm = None 183 | else: 184 | name_sm = name + '_softmax' 185 | 186 | inp = Input(shape=input_shape) 187 | x = act_channel_softmax(inp, name=name_sm) 188 | 189 | x_x = lin_interpolation_2d(x, dim=0) 190 | x_y = lin_interpolation_2d(x, dim=1) 191 | x = concatenate([x_x, x_y]) 192 | 193 | model = Model(inputs=inp, outputs=x, name=name) 194 | model.trainable = False 195 | 196 | return model 197 | 198 | 199 | def build_joints_probability(input_shape, name=None): 200 | 201 | num_rows, num_cols = input_shape[0:2] 202 | inp = Input(shape=input_shape) 203 | 204 | x = MaxPooling2D((num_rows, num_cols))(inp) 205 | x = Activation('sigmoid')(x) 206 | 207 | x = Lambda(lambda x: K.squeeze(x, axis=1))(x) 208 | x = Lambda(lambda x: K.squeeze(x, axis=1))(x) 209 | x = Lambda(lambda x: K.expand_dims(x, axis=-1))(x) 210 | 211 | model = Model(inputs=inp, outputs=x, name=name) 212 | 213 | return model 214 | 215 | 216 | def build_context_aggregation(num_joints, num_context, alpha, 217 | num_frames=1, name=None): 218 | 219 | inp = Input(shape=(num_joints * num_context, 1)) 220 | d = Dense(num_joints, use_bias=False) 221 | 222 | x = Lambda(lambda x: K.squeeze(x, axis=-1))(inp) 223 | x = d(x) 224 | x = Lambda(lambda x: K.expand_dims(x, axis=-1))(x) 225 | 226 | w = d.get_weights() 227 | w[0].fill(0) 228 | for j in range(num_joints): 229 | start = j*num_context 230 | w[0][j * num_context : (j + 1) * num_context, j] = 1. 231 | d.set_weights(w) 232 | d.trainable = False 233 | 234 | ctx_sum = Model(inputs=inp, outputs=x) 235 | ctx_sum.trainable = False 236 | if num_frames > 1: 237 | ctx_sum = TimeDistributed(ctx_sum, 238 | input_shape=(num_frames,) + K.int_shape(inp)[1:]) 239 | 240 | # Define auxiliary layers. 241 | mul_alpha = Lambda(lambda x: alpha * x) 242 | mul_1alpha = Lambda(lambda x: (1 - alpha) * x) 243 | 244 | # This depends on TensorFlow because keras does not implement divide. 245 | tf_div = Lambda(lambda x: tf.divide(x[0], x[1])) 246 | 247 | if num_frames == 1: 248 | # Define inputs 249 | ys = Input(shape=(num_joints, 2)) 250 | yc = Input(shape=(num_joints * num_context, 2)) 251 | pc = Input(shape=(num_joints * num_context, 1)) 252 | 253 | # Split contextual predictions in x and y and do computations separately 254 | xi = Lambda(lambda x: x[:,:, 0:1])(yc) 255 | yi = Lambda(lambda x: x[:,:, 1:2])(yc) 256 | else: 257 | ys = Input(shape=(num_frames, num_joints, 2)) 258 | yc = Input(shape=(num_frames, num_joints * num_context, 2)) 259 | pc = Input(shape=(num_frames, num_joints * num_context, 1)) 260 | 261 | # Split contextual predictions in x and y and do computations separately 262 | xi = Lambda(lambda x: x[:,:,:, 0:1])(yc) 263 | yi = Lambda(lambda x: x[:,:,:, 1:2])(yc) 264 | 265 | pxi = multiply([xi, pc]) 266 | pyi = multiply([yi, pc]) 267 | 268 | pc_sum = ctx_sum(pc) 269 | pxi_sum = ctx_sum(pxi) 270 | pyi_sum = ctx_sum(pyi) 271 | pc_div = Lambda(lambda x: x / num_context)(pc_sum) 272 | pxi_div = tf_div([pxi_sum, pc_sum]) 273 | pyi_div = tf_div([pyi_sum, pc_sum]) 274 | yc_div = concatenate([pxi_div, pyi_div]) 275 | 276 | ys_alpha = mul_alpha(ys) 277 | yc_div_1alpha = mul_1alpha(yc_div) 278 | 279 | y = add([ys_alpha, yc_div_1alpha]) 280 | 281 | model = Model(inputs=[ys, yc, pc], outputs=y, name=name) 282 | model.trainable = False 283 | 284 | return model 285 | 286 | 287 | def build(input_shape, num_joints, 288 | num_context_per_joint=2, 289 | alpha=0.8, 290 | num_blocks=8, 291 | ksize=(5, 5), 292 | export_heatmaps=False): 293 | 294 | inp = Input(shape=input_shape) 295 | outputs = [] 296 | 297 | num_heatmaps = (num_context_per_joint + 1) * num_joints 298 | 299 | x = stem(inp) 300 | 301 | num_rows, num_cols, num_filters = K.int_shape(x)[1:] 302 | 303 | # Build the soft-argmax models (no parameters) for specialized and 304 | # contextual maps. 305 | sams_input_shape = (num_rows, num_cols, num_joints) 306 | sam_s_model = build_softargmax_2d(sams_input_shape, name='sSAM') 307 | jprob_s_model = build_joints_probability(sams_input_shape, name='sjProb') 308 | 309 | # Build the aggregation model (no parameters) 310 | if num_context_per_joint > 0: 311 | samc_input_shape = (num_rows, num_cols, num_heatmaps - num_joints) 312 | sam_c_model = build_softargmax_2d(samc_input_shape, name='cSAM') 313 | jprob_c_model = build_joints_probability(samc_input_shape, 314 | name='cjProb') 315 | agg_model = build_context_aggregation(num_joints, 316 | num_context_per_joint, alpha, name='Agg') 317 | 318 | for bidx in range(num_blocks): 319 | block_shape = K.int_shape(x)[1:] 320 | x = build_reception_block(x, name='rBlock%d' % (bidx + 1), ksize=ksize) 321 | 322 | ident_map = x 323 | x = build_sconv_block(x, name='SepConv%d' % (bidx + 1), ksize=ksize) 324 | h = build_regmap_block(x, num_heatmaps, name='RegMap%d' % (bidx + 1)) 325 | 326 | if num_context_per_joint > 0: 327 | pose, visible, hm = pose_regression_context(h, num_joints, 328 | sam_s_model, sam_c_model, jprob_c_model, agg_model, 329 | jprob_s_model) 330 | else: 331 | pose, visible, hm = pose_regression(h, sam_s_model, jprob_s_model) 332 | 333 | outputs.append(pose) 334 | outputs.append(visible) 335 | if export_heatmaps: 336 | outputs.append(hm) 337 | 338 | if bidx < num_blocks - 1: 339 | h = build_fremap_block(h, block_shape[-1], 340 | name='fReMap%d' % (bidx + 1)) 341 | x = add([ident_map, x, h]) 342 | 343 | model = Model(inputs=inp, outputs=outputs) 344 | 345 | return model 346 | 347 | -------------------------------------------------------------------------------- /posereg/objectives.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from keras import backend as K 3 | 4 | def elasticnet_loss_on_valid_joints(y_true, y_pred): 5 | idx = K.cast(K.greater(y_true, -1e6), 'float32') 6 | y_true = idx * y_true 7 | y_pred = idx * y_pred 8 | l1 = K.sum(K.abs(y_pred - y_true), axis=(-2, -1)) 9 | l2 = K.sum(K.square(y_pred - y_true), axis=(-2, -1)) 10 | return l1 + l2 11 | 12 | -------------------------------------------------------------------------------- /posereg/pose.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | class pa16j(): 4 | """Alternated pose layout with 16 joints (like on Penn Action, but with 5 | three more joints on the spine. 6 | """ 7 | num_joints = 16 8 | 9 | """Horizontal flip mapping""" 10 | map_hflip = [0, 1, 2, 3, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14] 11 | 12 | """Projections from other layouts to the PA16J standard""" 13 | map_from_mpii = [6, 7, 8, 9, 12, 13, 11, 14, 10, 15, 2, 3, 1, 4, 0, 5] 14 | 15 | """Projections of PA16J to other formats""" 16 | map_to_mpii = [14, 12, 10, 11, 13, 15, 0, 1, 2, 3, 8, 6, 4, 5, 7, 9] 17 | 18 | """Color map""" 19 | color = ['g', 'r', 'b', 'y', 'm'] 20 | cmap = [0, 0, 0, 0, 1, 2, 1, 2, 1, 2, 3, 4, 3, 4, 3, 4] 21 | links = [[0, 1], [1, 2], [2, 3], [4, 6], [6, 8], [5, 7], [7, 9], 22 | [10, 12], [12, 14], [11, 13], [13, 15]] 23 | 24 | -------------------------------------------------------------------------------- /webcan.py: -------------------------------------------------------------------------------- 1 | import queue 2 | import threading 3 | 4 | import numpy as np 5 | import scipy 6 | 7 | import pygame 8 | pygame.init() 9 | import pygame.camera 10 | pygame.camera.init() 11 | import pygame.display 12 | 13 | import matplotlib.pyplot as plt 14 | 15 | video_device = '/dev/video0' 16 | cam_res = (1280, 720) 17 | 18 | weights_file = 'reception_mpii_weights_tf_ch_last_v1.h5' 19 | TF_WEIGHTS_PATH = \ 20 | 'https://github.com/dluvizon/pose-regression/releases/download/0.1.1/' \ 21 | + weights_file 22 | md5_hash = '0f41d21e6c049ca590b520367f950f7f' 23 | cache_subdir = 'models' 24 | 25 | try: 26 | print ('Opening device ' + str(video_device) + ' with resolution ' + 27 | str(cam_res)) 28 | cam = pygame.camera.Camera(video_device, cam_res) 29 | cam.start() 30 | cam_res = cam.get_size() 31 | print ('Device started with resolution ' + str(cam_res)) 32 | except Exception as e: 33 | print ('Got an exception: ' + str(e)) 34 | sys.exit() 35 | 36 | img_size = (min(cam_res), min(cam_res)) 37 | print ('Cropping current frame size to ' + str(img_size)) 38 | 39 | hmsurf_size = (170, 170) 40 | lateral_margin = 2*hmsurf_size[1] 41 | screen_size = (img_size[0] + lateral_margin, img_size[1]) 42 | print ('Screen size: ' + str(screen_size)) 43 | 44 | win_size = (256, 256) 45 | input_shape = win_size + (3,) 46 | print ('Network input shape ' + str(input_shape)) 47 | 48 | 49 | # Load keras and posereg libs 50 | from keras.models import Model 51 | from keras.utils.data_utils import get_file 52 | 53 | import posereg 54 | from posereg import pa16j 55 | 56 | 57 | # Define the colors we will use in RGB format 58 | BLUE = ( 64, 64, 255) 59 | GREEN = ( 0, 228, 16) 60 | RED = (255, 16, 32) 61 | YELLOW = (255, 255, 0) 62 | MAGENTA = (255, 0, 255) 63 | 64 | links = pa16j.links 65 | cmap = pa16j.cmap 66 | color = [GREEN, RED, BLUE, YELLOW, MAGENTA] 67 | 68 | 69 | def get_frame(frame, skipframe=False): 70 | if skipframe: 71 | for i in range(3): 72 | """Stupid way to discard internal buffered frames, 73 | since there is no easy way to control it using pygame. 74 | """ 75 | img = cam.get_image() 76 | else: 77 | img = cam.get_image() 78 | 79 | x1 = int((cam_res[0] - img_size[0]) / 2) 80 | x2 = x1 + img_size[0] 81 | frame.blit(img, (0,0), (x1, 0, x2, img_size[1])) 82 | 83 | 84 | def surface_to_array(win, frame): 85 | pygame.transform.scale(frame, win.get_size(), win) 86 | x = pygame.surfarray.pixels3d(win).copy() 87 | x = x.transpose((1, 0, 2)).astype(np.float32) 88 | x /= 255. 89 | x -= 0.5 90 | x *= 2. 91 | return x.reshape((1,) + x.shape) 92 | 93 | 94 | def draw_pose(screen, pose, visible, w, h, prob_thr=0.): 95 | pose = pose.squeeze() 96 | visible = visible.squeeze() 97 | pose[:,0] *= w 98 | pose[:,1] *= h 99 | for i in links: 100 | if ((visible[i[0]] > prob_thr) and (visible[i[1]] > prob_thr)): 101 | c = color[cmap[i[0]]] 102 | pygame.draw.lines(screen, c, False, pose[i,:], 10) 103 | 104 | def draw_heatmaps(screen, surf, hm, thr=0.5, vmin=-15, vmax=10): 105 | hm_idx = [ 106 | ( 8, 0*hmsurf_size[0], 0*hmsurf_size[1]), # R. wrist 107 | ( 9, 1*hmsurf_size[0], 0*hmsurf_size[1]), # L. wrist 108 | ( 6, 0*hmsurf_size[0], 1*hmsurf_size[1]), # R. elbow 109 | ( 7, 1*hmsurf_size[0], 1*hmsurf_size[1]), # L. elbow 110 | ( 3, 0*hmsurf_size[0], 2*hmsurf_size[1]), # Head 111 | ( 0, 1*hmsurf_size[0], 2*hmsurf_size[1]), # Pelvis 112 | (12, 0*hmsurf_size[0], 3*hmsurf_size[1]), # R. knee 113 | (13, 1*hmsurf_size[0], 3*hmsurf_size[1])] # L. knee 114 | 115 | for idx in hm_idx: 116 | h = np.transpose(hm[:,:,idx[0]].copy(), (1, 0)) 117 | h[h < vmin] = vmin 118 | h[h > vmax] = vmax 119 | cmap = plt.cm.jet 120 | norm = plt.Normalize(vmin=vmin, vmax=vmax) 121 | cm = np.zeros((34, 34, 3)) 122 | cm[1:33, 1:33, :] = cmap(norm(h))[:,:,0:3] 123 | cm = scipy.ndimage.zoom(cm, (5, 5, 1), order=1) 124 | pygame.surfarray.pixels3d(surf)[:,:,:] = np.array(255.*cm, dtype=int) 125 | screen.blit(surf, (idx[1] + img_size[0], idx[2])) 126 | 127 | 128 | def thread_grab_frames(queue_frames, queue_poses): 129 | win = pygame.Surface(win_size) 130 | frame = pygame.Surface(img_size) 131 | hmsurf = pygame.Surface(hmsurf_size) 132 | screen = pygame.display.set_mode(screen_size) 133 | 134 | while True: 135 | get_frame(frame) 136 | x = surface_to_array(win, frame) 137 | queue_frames.put(x) 138 | 139 | screen.blit(frame, (0,0)) 140 | pred = queue_poses.get() 141 | 142 | # Unpack received data 143 | x = pred[-1][0] 144 | hm = pred[-2][0] 145 | v = pred[-3] 146 | p = pred[-4] 147 | 148 | draw_pose(screen, p, v, img_size[0], img_size[1], prob_thr=0.7) 149 | draw_heatmaps(screen, hmsurf, hm) 150 | 151 | pygame.display.update() 152 | 153 | 154 | def main_thread(): 155 | 156 | # Build the model and load the pre-trained weights on MPII 157 | model = posereg.build(input_shape, pa16j.num_joints, export_heatmaps=True) 158 | weights_path = get_file(weights_file, TF_WEIGHTS_PATH, md5_hash=md5_hash, 159 | cache_subdir=cache_subdir) 160 | model.load_weights(weights_path) 161 | 162 | queue_frames = queue.Queue(2) 163 | queue_poses = queue.Queue(2) 164 | proc = threading.Thread(target=thread_grab_frames, 165 | args=(queue_frames, queue_poses)) 166 | proc.daemon = True 167 | proc.start() 168 | 169 | clock = pygame.time.Clock() 170 | 171 | show_fps_cnt = 0 172 | while True: 173 | x = queue_frames.get() 174 | pred = model.predict(x) 175 | pred.append(x) # Append the input frame 176 | queue_poses.put(pred) 177 | 178 | clock.tick() 179 | show_fps_cnt += 1 180 | if show_fps_cnt == 10: 181 | show_fps_cnt = 0 182 | print ('fps: ' + str(clock.get_fps())) 183 | 184 | if __name__ == "__main__": 185 | main_thread() 186 | 187 | --------------------------------------------------------------------------------