├── LICENSE ├── README.md ├── baselines.py ├── body_rnn_cell_extensions.py ├── body_rnn_cell_extensions_v1.py ├── core_rnn.py ├── data_utils.py ├── deltaRNN.py ├── forward_kinematics.py ├── forward_kinematics_v2.py ├── metrics.py ├── motion_rnn_lm.py ├── motion_rnn_lm_v2.py ├── motion_rnn_lm_v2_flow.py ├── motion_rnn_simple_lm.py ├── motion_rnn_simple_lm_flow.py ├── rnn.py ├── rnn_cell_extensions.py ├── rnn_cell_impl.py ├── rnn_cell_implement.py ├── rnn_cell_implement_flow.py ├── rnn_mod.py ├── translate_lm.py ├── translate_lm_v2.py ├── translate_lm_v2_flow.py ├── translate_simple_lm.py ├── translate_simple_lm_flow.py └── viz.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Anand & Ankur 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Code and results/visualizations for the paper "A Neural Temporal Model for Human Motion Prediction", CVPR 2019 2 | -------------------------------------------------------------------------------- /baselines.py: -------------------------------------------------------------------------------- 1 | 2 | """Super-simple baselines for short term human motion prediction.""" 3 | 4 | from __future__ import absolute_import 5 | from __future__ import division 6 | from __future__ import print_function 7 | 8 | import numpy as np 9 | from six.moves import xrange # pylint: disable=redefined-builtin 10 | import tensorflow as tf 11 | 12 | import translate 13 | import data_utils 14 | import seq2seq_model 15 | 16 | 17 | # Dummy object to create parameters for also-dummy model 18 | class Object(object): 19 | pass 20 | 21 | def running_average( actions_dict, actions, k ): 22 | """ 23 | Compute the error if we simply take the average of the last k frames. 24 | 25 | Args 26 | actions_dict: Dictionary where keys are the actions, and each entry has a 27 | tuple of (enc_in, dec_in, dec_out) poses. 28 | actions: List of strings. The keys of actions_dict. 29 | k:Integer. Number of frames to use for running average. 30 | 31 | Returns 32 | errs: a dictionary where, for each action, we have a 100-long list with the 33 | error at each point in time. 34 | """ 35 | 36 | # Get how many batches we have 37 | enc_in, dec_in, dec_out = actions_dict[ actions[0] ] 38 | 39 | n_sequences = len( enc_in ) 40 | seq_length_out = dec_out[0].shape[0] 41 | 42 | errs = dict() 43 | 44 | for action in actions: 45 | 46 | # Make space for the error 47 | errs[ action ] = np.zeros( (n_sequences, seq_length_out) ) 48 | 49 | # Get the lists for this action 50 | enc_in, dec_in, dec_out = actions_dict[action] 51 | 52 | for i in np.arange( n_sequences ): 53 | 54 | n, d = dec_out[i].shape 55 | 56 | # The last frame 57 | last_frame = dec_in[i][0, :] 58 | last_frame[0:6] = 0 59 | 60 | if k > 1: 61 | # Get the last k-1 frames 62 | last_k = enc_in[i][(-k+1):, :] 63 | assert( last_k.shape[0] == (k-1) ) 64 | 65 | # Merge and average them 66 | avg = np.mean( np.vstack( (last_k, last_frame) ), 0 ) 67 | else: 68 | avg = last_frame 69 | 70 | dec_out[i][:, 0:6] = 0 71 | idx_to_use = np.where( np.std( dec_out[i], 0 ) > 1e-4 )[0] 72 | 73 | ee = np.power( dec_out[i][:,idx_to_use] - avg[idx_to_use], 2 ) 74 | ee = np.sum( ee, 1 ) 75 | ee = np.sqrt( ee ) 76 | errs[ action ][i, :] = ee 77 | 78 | errs[action] = np.mean( errs[action], 0 ) 79 | 80 | return errs 81 | 82 | 83 | def last_buffer_frame( actions_dict, actions ): 84 | """ 85 | Compute the error if we simply take the last buffer frame as a fixed prediction 86 | 87 | Args 88 | actions_dict: Dictionary where keys are the actions, and each entry has a 89 | tuple of (enc_in, dec_in, dec_out) poses. 90 | actions: List of strings. The keys of actions_dict. 91 | 92 | 93 | Returns 94 | errs: a dictionary where, for each action, we have a 100-long list with the 95 | error at each point in time. 96 | """ 97 | 98 | # Get how many batches we have 99 | enc_in, dec_in, dec_out = actions_dict[ actions[0] ] 100 | 101 | n_sequences = len( enc_in ) 102 | seq_length_out = dec_out[0].shape[0] 103 | 104 | errs = dict() 105 | 106 | for action in actions: 107 | 108 | # Make space for the error 109 | errs[ action ] = np.zeros( (n_sequences, seq_length_out) ) 110 | 111 | # Get the lists for this action 112 | enc_in, dec_in, dec_out = actions_dict[action] 113 | 114 | for i in np.arange( n_sequences ): 115 | 116 | n, d = dec_out[i].shape 117 | 118 | # The last buffer frame 119 | last_buffer_frame = enc_in[i][-1, :] 120 | last_buffer_frame[0:6] = 0 121 | 122 | dec_out[i][:, 0:6] = 0 123 | idx_to_use = np.where( np.std( dec_out[i], 0 ) > 1e-4 )[0] 124 | 125 | ee = np.power( dec_out[i][:,idx_to_use] - last_buffer_frame[idx_to_use], 2 ) 126 | ee = np.sum( ee, 1 ) 127 | ee = np.sqrt( ee ) 128 | errs[ action ][i, :] = ee 129 | 130 | errs[action] = np.mean( errs[action], 0 ) 131 | 132 | return errs 133 | 134 | 135 | 136 | def denormalize_and_convert_to_euler( data, data_mean, data_std, dim_to_ignore, actions, one_hot ): 137 | """ 138 | Denormalizes data and converts to Euler angles 139 | (all losses are computed on Euler angles). 140 | 141 | Args 142 | data: dictionary with human poses. 143 | data_mean: d-long vector with the mean of the training data. 144 | data_std: d-long vector with the standard deviation of the training data. 145 | dim_to_ignore: dimensions to ignore because the std is too small or for other reasons. 146 | actions: list of strings with the actions in the data dictionary. 147 | one_hot: whether the data comes with one-hot encoding. 148 | 149 | Returns 150 | all_denormed: a list with nbatch entries. Each entry is an n-by-d matrix 151 | that corresponds to a denormalized sequence in Euler angles 152 | """ 153 | 154 | all_denormed = [] 155 | 156 | # expmap -> rotmat -> euler 157 | for i in np.arange( data.shape[0] ): 158 | denormed = data_utils.unNormalizeData(data[i,:,:], data_mean, data_std, dim_to_ignore, actions, one_hot ) 159 | 160 | for j in np.arange( denormed.shape[0] ): 161 | for k in np.arange(3,97,3): 162 | denormed[j,k:k+3] = data_utils.rotmat2euler( data_utils.expmap2rotmat( denormed[j,k:k+3] )) 163 | 164 | all_denormed.append( denormed ) 165 | 166 | return all_denormed 167 | 168 | 169 | def main(): 170 | 171 | 172 | actions = ["walking", "eating", "smoking", "discussion"] 173 | 174 | # TODO make this a runtime option 175 | # Uncomment th eliens 176 | 177 | # actions.extend(["directions", "greeting", "phoning", "posing", "purchases", 178 | # "sitting", "sittingdown", "takingphoto", "waiting", "walkingdog", "walkingtogether"]) 179 | 180 | # Parameters for dummy model. We only build the model to load the data. 181 | one_hot = False 182 | FLAGS = Object() 183 | FLAGS.data_dir = "./data/h3.6m/dataset" 184 | FLAGS.architecture = "tied" 185 | FLAGS.seq_length_in = 50 186 | FLAGS.seq_length_out = 100 187 | FLAGS.num_layers = 1 188 | FLAGS.size = 128 189 | FLAGS.max_gradient_norm = 5 190 | FLAGS.batch_size = 8 191 | FLAGS.learning_rate = 0.005 192 | FLAGS.learning_rate_decay_factor = 1 193 | summaries_dir = "./log/" 194 | FLAGS.loss_to_use = "sampling_based" 195 | FLAGS.omit_one_hot = True, 196 | FLAGS.residual_velocities = False, 197 | dtype = tf.float32 198 | 199 | # Baselines are very simple. No need to use the GPU. 200 | with tf.Session(config=tf.ConfigProto( device_count = {"GPU": 0})) as sess: 201 | 202 | model = seq2seq_model.Seq2SeqModel( 203 | FLAGS.architecture, 204 | FLAGS.seq_length_in, 205 | FLAGS.seq_length_out, 206 | FLAGS.size, # hidden layer size 207 | FLAGS.num_layers, 208 | FLAGS.max_gradient_norm, 209 | FLAGS.batch_size, 210 | FLAGS.learning_rate, 211 | FLAGS.learning_rate_decay_factor, 212 | summaries_dir, 213 | FLAGS.loss_to_use, 214 | len( actions ), 215 | not FLAGS.omit_one_hot, 216 | FLAGS.residual_velocities, 217 | dtype=dtype) 218 | 219 | # Load the data 220 | _, test_set, data_mean, data_std, dim_to_ignore, dim_to_use = translate.read_all_data(actions, FLAGS.seq_length_in, FLAGS.seq_length_out, FLAGS.data_dir, not FLAGS.omit_one_hot ) 221 | 222 | # Get all the data, denormalize and convert it to euler angles 223 | poses_data = {} 224 | for action in actions: 225 | enc_in, dec_in, dec_out = model.get_batch_srnn( test_set, action ) 226 | 227 | enc_in = denormalize_and_convert_to_euler(enc_in, data_mean, data_std, dim_to_ignore, actions, not FLAGS.omit_one_hot ) 228 | dec_in = denormalize_and_convert_to_euler(dec_in, data_mean, data_std, dim_to_ignore, actions, not FLAGS.omit_one_hot ) 229 | dec_out = denormalize_and_convert_to_euler(dec_out, data_mean, data_std, dim_to_ignore, actions, not FLAGS.omit_one_hot ) 230 | 231 | poses_data[action] = (enc_in, dec_in, dec_out) 232 | 233 | # Compute baseline errors 234 | errs_constant_frame = running_average( poses_data, actions, 1 ) 235 | running_average_2 = running_average( poses_data, actions, 2 ) 236 | running_average_4 = running_average( poses_data, actions, 4 ) 237 | last_buffer_frame_const = last_buffer_frame( poses_data, actions) 238 | 239 | print() 240 | print("=== Zero-velocity (running avg. 1) ===") 241 | print("{0: <16} | {1:4d} | {2:4d} | {3:4d} | {4:4d} | {5:4d} | {6:4d} | {7:4d}".format("milliseconds", 80, 160, 320, 400, 560, 1000, 2000)) 242 | for action in actions: 243 | print("{0: <16} | {1:.2f} | {2:.2f} | {3:.2f} | {4:.2f} | {5:.2f} | {6:.2f} | {7:.2f}".format( action, errs_constant_frame[action][1], errs_constant_frame[action][3], 244 | errs_constant_frame[action][7], errs_constant_frame[action][9], errs_constant_frame[action][13], errs_constant_frame[action][24], errs_constant_frame[action][49] )) 245 | 246 | print() 247 | print("=== Runnning avg. 2 ===") 248 | print("{0: <16} | {1:4d} | {2:4d} | {3:4d} | {4:4d} | {5:4d} | {6:4d} | {7:4d}".format("milliseconds", 80, 160, 320, 400, 560, 1000, 2000)) 249 | for action in actions: 250 | print("{0: <16} | {1:.2f} | {2:.2f} | {3:.2f} | {4:.2f} | {5:.2f} | {6:.2f} | {7:.2f}".format( action, running_average_2[action][1], running_average_2[action][3], 251 | running_average_2[action][7], running_average_2[action][9], running_average_2[action][13], running_average_2[action][24], running_average_2[action][49] )) 252 | 253 | print() 254 | print("=== Runnning avg. 4 ===") 255 | print("{0: <16} | {1:4d} | {2:4d} | {3:4d} | {4:4d} | {5:4d} | {6:4d} | {7:4d}".format("milliseconds", 80, 160, 320, 400, 560, 1000, 2000)) 256 | for action in actions: 257 | print("{0: <16} | {1:.2f} | {2:.2f} | {3:.2f} | {4:.2f} | {5:.2f} | {6:.2f} | {7:.2f}".format( action, running_average_4[action][1], running_average_4[action][3], 258 | running_average_4[action][7], running_average_4[action][9], running_average_4[action][13], running_average_4[action][24], running_average_4[action][49] )) 259 | 260 | print() 261 | print("=== Last buffer frame ===") 262 | print("{0: <16} | {1:4d} | {2:4d} | {3:4d} | {4:4d} | {5:4d} | {6:4d} | {7:4d}".format("milliseconds", 80, 160, 320, 400, 560, 1000, 2000)) 263 | for action in actions: 264 | print("{0: <16} | {1:.2f} | {2:.2f} | {3:.2f} | {4:.2f} | {5:.2f} | {6:.2f} | {7:.2f}".format( action, last_buffer_frame_const[action][1], last_buffer_frame_const[action][3], 265 | last_buffer_frame_const[action][7], last_buffer_frame_const[action][9], last_buffer_frame_const[action][13], last_buffer_frame_const[action][24], last_buffer_frame_const[action][49] )) 266 | 267 | 268 | if __name__ == "__main__": 269 | main() 270 | -------------------------------------------------------------------------------- /body_rnn_cell_extensions.py: -------------------------------------------------------------------------------- 1 | 2 | """ Extensions to TF RNN class by una_dinosaria""" 3 | 4 | from __future__ import absolute_import 5 | from __future__ import division 6 | from __future__ import print_function 7 | 8 | import tensorflow as tf 9 | 10 | #from tensorflow.contrib.rnn.python.ops.core_rnn_cell import RNNCell 11 | from rnn_cell_implement import RNNCell # modified body cell definitions 12 | #from deltaRNN import RNNCell # only for delta-RNN 13 | #from rnn_cell_implement import MultiRNNCell 14 | import hard_att 15 | import queue 16 | 17 | # The import for LSTMStateTuple changes in TF >= 1.2.0 18 | from pkg_resources import parse_version as pv 19 | if pv(tf.__version__) >= pv('1.2.0'): 20 | from tensorflow.contrib.rnn import LSTMStateTuple 21 | else: 22 | from tensorflow.contrib.rnn.python.ops.core_rnn_cell import LSTMStateTuple 23 | del pv 24 | 25 | from tensorflow.python.ops import variable_scope as vs 26 | 27 | import collections 28 | import math 29 | 30 | class ResidualWrapper(RNNCell): 31 | """Operator adding residual connections to a given cell.""" 32 | 33 | def __init__(self, cell): 34 | """Create a cell with added residual connection. 35 | 36 | Args: 37 | cell: an RNNCell. The input is added to the output. 38 | 39 | Raises: 40 | TypeError: if cell is not an RNNCell. 41 | """ 42 | if not isinstance(cell, RNNCell): 43 | raise TypeError("The parameter cell is not a RNNCell.") 44 | 45 | self._cell = cell 46 | 47 | @property 48 | def state_size(self): 49 | return self._cell.state_size 50 | 51 | @property 52 | def output_size(self): 53 | return self._cell.output_size 54 | 55 | def __call__(self, inputs, state, context, scope=None): # modified 56 | """Run the cell and add a residual connection.""" 57 | 58 | # Run the rnn as usual 59 | output, new_state = self._cell(inputs, state, context, scope) # modified 60 | 61 | # Add the residual connection 62 | output = tf.add(output, inputs) 63 | 64 | return output, new_state 65 | 66 | class ResidualWrapperv1(RNNCell): 67 | """Operator adding residual connections to a given cell.""" 68 | 69 | def __init__(self, cell, output_size): 70 | """Create a cell with added residual connection. 71 | 72 | Args: 73 | cell: an RNNCell. The input is added to the output. 74 | 75 | Raises: 76 | TypeError: if cell is not an RNNCell. 77 | """ 78 | if not isinstance(cell, RNNCell): 79 | raise TypeError("The parameter cell is not a RNNCell.") 80 | 81 | self._cell = cell 82 | self._output_size = output_size 83 | 84 | self.r = tf.get_variable("r_interp", [self._output_size], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) 85 | 86 | @property 87 | def state_size(self): 88 | return self._cell.state_size 89 | 90 | @property 91 | def output_size(self): 92 | return self._cell.output_size 93 | 94 | def __call__(self, inputs, state, context, scope=None): # modified 95 | """Run the cell and add a residual connection.""" 96 | 97 | # Run the rnn as usual 98 | output, new_state = self._cell(inputs, state, context, scope) # modified 99 | 100 | # perform residual_v1 interpolation op 101 | output = (1.0 - self.r) * output + self.r * inputs 102 | 103 | return output, new_state 104 | 105 | 106 | class ResidualWrapperv2(RNNCell): 107 | """Operator adding residual connections to a given cell.""" 108 | 109 | def __init__(self, cell, output_size): 110 | """Create a cell with added residual connection. 111 | 112 | Args: 113 | cell: an RNNCell. The input is added to the output. 114 | 115 | Raises: 116 | TypeError: if cell is not an RNNCell. 117 | """ 118 | if not isinstance(cell, RNNCell): 119 | raise TypeError("The parameter cell is not a RNNCell.") 120 | 121 | self._cell = cell 122 | self._output_size = output_size 123 | 124 | self.r = tf.get_variable("r_interp", [self._output_size], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) 125 | self.W_res = tf.get_variable("W_res", [self._output_size, self._output_size], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) 126 | self.b_res = tf.get_variable("b_res", [self._output_size], dtype=tf.float32, initializer=tf.constant_initializer(0.1)) 127 | 128 | 129 | @property 130 | def state_size(self): 131 | return self._cell.state_size 132 | 133 | @property 134 | def output_size(self): 135 | return self._cell.output_size 136 | 137 | def __call__(self, inputs, state, context, scope=None): # modified 138 | """Run the cell and add a residual connection.""" 139 | 140 | # Run the rnn as usual 141 | output, new_state = self._cell(inputs, state, context, scope) # modified 142 | 143 | # perform residual_v2 interpolation op 144 | output = (1.0 - self.r) * output + self.r * (tf.matmul(inputs, self.W_res) + self.b_res) 145 | 146 | return output, new_state 147 | 148 | 149 | class LinearSpaceDecoderWrapper(RNNCell): # modified 150 | """Operator adding a linear encoder to an RNN cell""" 151 | 152 | def __init__(self, cell, output_size, is_attention, num_attn_units, num_actions, memory_length): 153 | """Create a cell with with a linear encoder in space. 154 | 155 | Args: 156 | cell: an RNNCell. The input is passed through a linear layer. 157 | 158 | Raises: 159 | TypeError: if cell is not an RNNCell. 160 | """ 161 | if not isinstance(cell, RNNCell): # modified 162 | raise TypeError("The parameter cell is not a RNNCell.") 163 | 164 | self._cell = cell 165 | self.is_attention = is_attention 166 | self.num_attn_units = num_attn_units 167 | self.num_actions = num_actions 168 | self.memory_length = memory_length 169 | 170 | print( 'output_size = {0}'.format(output_size) ) 171 | print( ' state_size = {0}'.format(self._cell.state_size) ) 172 | 173 | # Tuple if multi-rnn 174 | if isinstance(self._cell.state_size,tuple): 175 | 176 | # Fine if GRU... 177 | insize = self._cell.state_size[-1] 178 | 179 | # LSTMStateTuple if LSTM 180 | if isinstance( insize, LSTMStateTuple ): 181 | insize = insize.h 182 | 183 | else: 184 | # Fine if not multi-rnn 185 | insize = self._cell.state_size 186 | 187 | # output projection params 188 | self.w_out = tf.get_variable("proj_w_out", [insize, output_size], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) 189 | self.b_out = tf.get_variable("proj_b_out", [output_size], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) 190 | 191 | if self.is_attention: # flag to indicate whether we're using attention-based LM 192 | # init attention params 193 | self.num_attn_units = num_attn_units 194 | self.W_1_attn = tf.get_variable("W_1_attn", [insize+self.num_actions, self.num_attn_units], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) 195 | self.W_2_attn = tf.get_variable("W_2_attn", [insize+self.num_actions, self.num_attn_units], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) 196 | self.v_a = tf.get_variable("v_a_attn", [1, self.num_attn_units], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) 197 | self.memory_length = memory_length 198 | self.w_out_c_t = tf.get_variable("w_out_c_t", [insize+self.num_actions, output_size], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) 199 | self.attn_memory = queue.Queue(self.memory_length) 200 | #self.call_counter = 0 201 | 202 | self.linear_output_size = output_size 203 | 204 | 205 | @property 206 | def state_size(self): 207 | return self._cell.state_size 208 | 209 | @property 210 | def output_size(self): 211 | return self.linear_output_size 212 | 213 | def __call__(self, inputs, state, context, scope=None): 214 | """Use a linear layer and pass the output to the cell.""" 215 | 216 | #self.call_counter = self.call_counter + 1 # temp fix 217 | 218 | # Run the rnn as usual 219 | output, new_state = self._cell(inputs, state, context, scope) 220 | 221 | if self.is_attention and self.attn_memory.full(): 222 | # store t-50 prev states (h_enc) 223 | self.attn_memory.get() 224 | self.attn_memory.put(tf.concat([new_state, context], axis=1)) 225 | 226 | elif self.is_attention and (not self.attn_memory.full()): 227 | self.attn_memory.put(tf.concat([new_state, context], axis=1)) 228 | 229 | if self.is_attention: #and self.call_counter>50: # some flag to indicate when to use attention 230 | # convert attn_memory -> list 231 | list_attn_memory = list(self.attn_memory.queue) 232 | 233 | # applying attention and include c_t to decode and get y_hat 234 | alpha, c_t = hard_att.bahdanau_attention(tf.concat([state, context], axis=1), list_attn_memory, self.v_a, self.W_1_attn, self.W_2_attn, self.memory_length) 235 | output = tf.matmul(output, self.w_out) + tf.matmul(c_t, self.w_out_c_t) + self.b_out 236 | 237 | if not self.is_attention: #) or (self.is_attention and self.call_counter <= 50): 238 | # Apply the multiplication to everything (when no attention is used to decode) 239 | output = tf.matmul(output, self.w_out) + self.b_out 240 | 241 | # setting counter back after 150 timesteps when attention is being used 242 | #if self.is_attention and self.call_counter == 150: 243 | # self.call_counter = 0 244 | 245 | return output, new_state 246 | -------------------------------------------------------------------------------- /body_rnn_cell_extensions_v1.py: -------------------------------------------------------------------------------- 1 | 2 | """ Extensions to TF RNN class by una_dinosaria""" 3 | 4 | from __future__ import absolute_import 5 | from __future__ import division 6 | from __future__ import print_function 7 | 8 | import tensorflow as tf 9 | 10 | #from tensorflow.contrib.rnn.python.ops.core_rnn_cell import RNNCell 11 | from rnn_cell_implement import RNNCell # modified body cell definitions 12 | #from deltaRNN import RNNCell # only for delta-RNN 13 | #from rnn_cell_implement import MultiRNNCell 14 | import hard_att 15 | import queue 16 | 17 | # The import for LSTMStateTuple changes in TF >= 1.2.0 18 | from pkg_resources import parse_version as pv 19 | if pv(tf.__version__) >= pv('1.2.0'): 20 | from tensorflow.contrib.rnn import LSTMStateTuple 21 | else: 22 | from tensorflow.contrib.rnn.python.ops.core_rnn_cell import LSTMStateTuple 23 | del pv 24 | 25 | from tensorflow.python.ops import variable_scope as vs 26 | 27 | import collections 28 | import math 29 | 30 | class ResidualWrapper(RNNCell): 31 | """Operator adding residual connections to a given cell.""" 32 | 33 | def __init__(self, cell): 34 | """Create a cell with added residual connection. 35 | 36 | Args: 37 | cell: an RNNCell. The input is added to the output. 38 | 39 | Raises: 40 | TypeError: if cell is not an RNNCell. 41 | """ 42 | if not isinstance(cell, RNNCell): 43 | raise TypeError("The parameter cell is not a RNNCell.") 44 | 45 | self._cell = cell 46 | 47 | @property 48 | def state_size(self): 49 | return self._cell.state_size 50 | 51 | @property 52 | def output_size(self): 53 | return self._cell.output_size 54 | 55 | def __call__(self, inputs, state, context, scope=None): # modified 56 | """Run the cell and add a residual connection.""" 57 | 58 | # Run the rnn as usual 59 | output, new_state = self._cell(inputs, state, context, scope) # modified 60 | 61 | # Add the residual connection 62 | output = tf.add(output, inputs) 63 | 64 | return output, new_state 65 | 66 | class ResidualWrapperv1(RNNCell): 67 | """Operator adding residual connections to a given cell.""" 68 | 69 | def __init__(self, cell, output_size): 70 | """Create a cell with added residual connection. 71 | 72 | Args: 73 | cell: an RNNCell. The input is added to the output. 74 | 75 | Raises: 76 | TypeError: if cell is not an RNNCell. 77 | """ 78 | if not isinstance(cell, RNNCell): 79 | raise TypeError("The parameter cell is not a RNNCell.") 80 | 81 | self._cell = cell 82 | self._output_size = output_size 83 | 84 | self.r = tf.get_variable("r_interp", [self._output_size], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) 85 | 86 | @property 87 | def state_size(self): 88 | return self._cell.state_size 89 | 90 | @property 91 | def output_size(self): 92 | return self._cell.output_size 93 | 94 | def __call__(self, inputs, state, context, scope=None): # modified 95 | """Run the cell and add a residual connection.""" 96 | 97 | # Run the rnn as usual 98 | output, new_state = self._cell(inputs, state, context, scope) # modified 99 | 100 | # perform residual_v1 interpolation op 101 | output = (1.0 - self.r) * output + self.r * inputs 102 | 103 | return output, new_state 104 | 105 | 106 | class ResidualWrapperv2(RNNCell): 107 | """Operator adding residual connections to a given cell.""" 108 | 109 | def __init__(self, cell, output_size): 110 | """Create a cell with added residual connection. 111 | 112 | Args: 113 | cell: an RNNCell. The input is added to the output. 114 | 115 | Raises: 116 | TypeError: if cell is not an RNNCell. 117 | """ 118 | if not isinstance(cell, RNNCell): 119 | raise TypeError("The parameter cell is not a RNNCell.") 120 | 121 | self._cell = cell 122 | self._output_size = output_size 123 | 124 | self.r = tf.get_variable("r_interp", [self._output_size], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) 125 | self.W_res = tf.get_variable("W_res", [self._output_size, self._output_size], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) 126 | self.b_res = tf.get_variable("b_res", [self._output_size], dtype=tf.float32, initializer=tf.constant_initializer(0.1)) 127 | 128 | 129 | @property 130 | def state_size(self): 131 | return self._cell.state_size 132 | 133 | @property 134 | def output_size(self): 135 | return self._cell.output_size 136 | 137 | def __call__(self, inputs, state, context, scope=None): # modified 138 | """Run the cell and add a residual connection.""" 139 | 140 | # Run the rnn as usual 141 | output, new_state = self._cell(inputs, state, context, scope) # modified 142 | 143 | # perform residual_v2 interpolation op 144 | output = (1.0 - self.r) * output + self.r * (tf.matmul(inputs, self.W_res) + self.b_res) 145 | 146 | return output, new_state 147 | 148 | 149 | class LinearSpaceDecoderWrapper(RNNCell): # modified 150 | """Operator adding a linear encoder to an RNN cell""" 151 | 152 | def __init__(self, cell, output_size): 153 | """Create a cell with with a linear encoder in space. 154 | 155 | Args: 156 | cell: an RNNCell. The input is passed through a linear layer. 157 | 158 | Raises: 159 | TypeError: if cell is not an RNNCell. 160 | """ 161 | if not isinstance(cell, RNNCell): # modified 162 | raise TypeError("The parameter cell is not a RNNCell.") 163 | 164 | self._cell = cell 165 | 166 | print( 'output_size = {0}'.format(output_size) ) 167 | print( ' state_size = {0}'.format(self._cell.state_size) ) 168 | 169 | # Tuple if multi-rnn 170 | if isinstance(self._cell.state_size,tuple): 171 | 172 | # Fine if GRU... 173 | insize = self._cell.state_size[-1] 174 | 175 | # LSTMStateTuple if LSTM 176 | if isinstance( insize, LSTMStateTuple ): 177 | insize = insize.h 178 | 179 | else: 180 | # Fine if not multi-rnn 181 | insize = self._cell.state_size 182 | 183 | # output projection params 184 | self.w_out = tf.get_variable("proj_w_out", [insize, output_size], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) 185 | self.b_out = tf.get_variable("proj_b_out", [output_size], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) 186 | self.linear_output_size = output_size 187 | 188 | 189 | @property 190 | def state_size(self): 191 | return self._cell.state_size 192 | 193 | @property 194 | def output_size(self): 195 | return self.linear_output_size 196 | 197 | def __call__(self, inputs, state, context, scope=None): 198 | """Use a linear layer and pass the output to the cell.""" 199 | 200 | # Run the rnn as usual 201 | output, new_state = self._cell(inputs, state, context, scope) 202 | 203 | output = tf.matmul(output, self.w_out) + self.b_out 204 | 205 | return output, new_state 206 | -------------------------------------------------------------------------------- /core_rnn.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """RNN helpers for TensorFlow models.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from tensorflow.contrib.rnn.python.ops import core_rnn_cell 23 | 24 | import rnn_cell_implement # has modified RNNcell definitions 25 | 26 | from tensorflow.python.framework import ops 27 | from tensorflow.python.framework import tensor_shape 28 | from tensorflow.python.ops import array_ops 29 | from tensorflow.python.ops import math_ops 30 | from tensorflow.python.ops import rnn 31 | from tensorflow.python.ops import rnn_cell_impl 32 | from tensorflow.python.ops import variable_scope as vs 33 | from tensorflow.python.util import nest 34 | 35 | 36 | # pylint: disable=protected-access 37 | #_state_size_with_prefix = rnn_cell_impl._state_size_with_prefix 38 | _state_size_with_prefix = rnn_cell_implement._state_size # modified 39 | _infer_state_dtype = rnn._infer_state_dtype 40 | _reverse_seq = rnn._reverse_seq 41 | _rnn_step = rnn._rnn_step 42 | # pylint: enable=protected-access 43 | 44 | 45 | def static_rnn(cell, inputs, context, initial_state=None, dtype=None, 46 | sequence_length=None, scope=None): 47 | """Creates a recurrent neural network specified by RNNCell `cell`. 48 | The simplest form of RNN network generated is: 49 | ```python 50 | state = cell.zero_state(...) 51 | outputs = [] 52 | for input_ in inputs: 53 | output, state = cell(input_, state) 54 | outputs.append(output) 55 | return (outputs, state) 56 | ``` 57 | However, a few other options are available: 58 | An initial state can be provided. 59 | If the sequence_length vector is provided, dynamic calculation is performed. 60 | This method of calculation does not compute the RNN steps past the maximum 61 | sequence length of the minibatch (thus saving computational time), 62 | and properly propagates the state at an example's sequence length 63 | to the final state output. 64 | The dynamic calculation performed is, at time `t` for batch row `b`, 65 | ```python 66 | (output, state)(b, t) = 67 | (t >= sequence_length(b)) 68 | ? (zeros(cell.output_size), states(b, sequence_length(b) - 1)) 69 | : cell(input(b, t), state(b, t - 1)) 70 | ``` 71 | Args: 72 | cell: An instance of RNNCell. 73 | inputs: A length T list of inputs, each a `Tensor` of shape 74 | `[batch_size, input_size]`, or a nested tuple of such elements. 75 | initial_state: (optional) An initial state for the RNN. 76 | If `cell.state_size` is an integer, this must be 77 | a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`. 78 | If `cell.state_size` is a tuple, this should be a tuple of 79 | tensors having shapes `[batch_size, s] for s in cell.state_size`. 80 | dtype: (optional) The data type for the initial state and expected output. 81 | Required if initial_state is not provided or RNN state has a heterogeneous 82 | dtype. 83 | sequence_length: Specifies the length of each sequence in inputs. 84 | An int32 or int64 vector (tensor) size `[batch_size]`, values in `[0, T)`. 85 | scope: VariableScope for the created subgraph; defaults to "rnn". 86 | Returns: 87 | A pair (outputs, state) where: 88 | - outputs is a length T list of outputs (one for each input), or a nested 89 | tuple of such elements. 90 | - state is the final state 91 | Raises: 92 | TypeError: If `cell` is not an instance of RNNCell. 93 | ValueError: If `inputs` is `None` or an empty list, or if the input depth 94 | (column size) cannot be inferred from inputs via shape inference. 95 | """ 96 | 97 | if not isinstance(cell, rnn_cell_implement.RNNCell): # checking instance in modified cell def file 98 | raise TypeError("cell must be an instance of RNNCell") 99 | if not nest.is_sequence(inputs): 100 | raise TypeError("inputs must be a sequence") 101 | if not inputs: 102 | raise ValueError("inputs must not be empty") 103 | 104 | outputs = [] 105 | # Create a new scope in which the caching device is either 106 | # determined by the parent scope, or is set to place the cached 107 | # Variable using the same placement as for the rest of the RNN. 108 | with vs.variable_scope(scope or "rnn") as varscope: 109 | if varscope.caching_device is None: 110 | varscope.set_caching_device(lambda op: op.device) 111 | 112 | # Obtain the first sequence of the input 113 | first_input = inputs 114 | while nest.is_sequence(first_input): 115 | first_input = first_input[0] 116 | 117 | # Temporarily avoid EmbeddingWrapper and seq2seq badness 118 | # TODO(lukaszkaiser): remove EmbeddingWrapper 119 | if first_input.get_shape().ndims != 1: 120 | 121 | input_shape = first_input.get_shape().with_rank_at_least(2) 122 | fixed_batch_size = input_shape[0] 123 | 124 | flat_inputs = nest.flatten(inputs) 125 | for flat_input in flat_inputs: 126 | input_shape = flat_input.get_shape().with_rank_at_least(2) 127 | batch_size, input_size = input_shape[0], input_shape[1:] 128 | fixed_batch_size.merge_with(batch_size) 129 | for i, size in enumerate(input_size): 130 | if size.value is None: 131 | raise ValueError( 132 | "Input size (dimension %d of inputs) must be accessible via " 133 | "shape inference, but saw value None." % i) 134 | else: 135 | fixed_batch_size = first_input.get_shape().with_rank_at_least(1)[0] 136 | 137 | if fixed_batch_size.value: 138 | batch_size = fixed_batch_size.value 139 | else: 140 | batch_size = array_ops.shape(first_input)[0] 141 | if initial_state is not None: 142 | state = initial_state 143 | else: 144 | if not dtype: 145 | raise ValueError("If no initial_state is provided, " 146 | "dtype must be specified") 147 | state = cell.zero_state(batch_size, dtype) 148 | 149 | if sequence_length is not None: # Prepare variables 150 | sequence_length = ops.convert_to_tensor( 151 | sequence_length, name="sequence_length") 152 | if sequence_length.get_shape().ndims not in (None, 1): 153 | raise ValueError( 154 | "sequence_length must be a vector of length batch_size") 155 | def _create_zero_output(output_size): 156 | # convert int to TensorShape if necessary 157 | size = _state_size_with_prefix(output_size, prefix=[batch_size]) 158 | output = array_ops.zeros( 159 | array_ops.stack(size), _infer_state_dtype(dtype, state)) 160 | shape = _state_size_with_prefix( 161 | output_size, prefix=[fixed_batch_size.value]) 162 | output.set_shape(tensor_shape.TensorShape(shape)) 163 | return output 164 | 165 | output_size = cell.output_size 166 | flat_output_size = nest.flatten(output_size) 167 | flat_zero_output = tuple( 168 | _create_zero_output(size) for size in flat_output_size) 169 | zero_output = nest.pack_sequence_as(structure=output_size, 170 | flat_sequence=flat_zero_output) 171 | 172 | sequence_length = math_ops.to_int32(sequence_length) 173 | min_sequence_length = math_ops.reduce_min(sequence_length) 174 | max_sequence_length = math_ops.reduce_max(sequence_length) 175 | 176 | for time, (input_, ctxt_) in enumerate(zip(inputs, context)): # modified to include context 177 | if time > 0: varscope.reuse_variables() 178 | # pylint: disable=cell-var-from-loop 179 | call_cell = lambda: cell(input_, state, ctxt_) # call to modified RNNcell 180 | # pylint: enable=cell-var-from-loop 181 | if sequence_length is not None: 182 | (output, state) = _rnn_step( 183 | time=time, 184 | sequence_length=sequence_length, 185 | min_sequence_length=min_sequence_length, 186 | max_sequence_length=max_sequence_length, 187 | zero_output=zero_output, 188 | state=state, 189 | call_cell=call_cell, 190 | state_size=cell.state_size) 191 | else: 192 | (output, state) = call_cell() 193 | 194 | outputs.append(output) 195 | 196 | return (outputs, state) 197 | 198 | 199 | def static_state_saving_rnn(cell, inputs, state_saver, state_name, 200 | sequence_length=None, scope=None): 201 | """RNN that accepts a state saver for time-truncated RNN calculation. 202 | Args: 203 | cell: An instance of `RNNCell`. 204 | inputs: A length T list of inputs, each a `Tensor` of shape 205 | `[batch_size, input_size]`. 206 | state_saver: A state saver object with methods `state` and `save_state`. 207 | state_name: Python string or tuple of strings. The name to use with the 208 | state_saver. If the cell returns tuples of states (i.e., 209 | `cell.state_size` is a tuple) then `state_name` should be a tuple of 210 | strings having the same length as `cell.state_size`. Otherwise it should 211 | be a single string. 212 | sequence_length: (optional) An int32/int64 vector size [batch_size]. 213 | See the documentation for rnn() for more details about sequence_length. 214 | scope: VariableScope for the created subgraph; defaults to "rnn". 215 | Returns: 216 | A pair (outputs, state) where: 217 | outputs is a length T list of outputs (one for each input) 218 | states is the final state 219 | Raises: 220 | TypeError: If `cell` is not an instance of RNNCell. 221 | ValueError: If `inputs` is `None` or an empty list, or if the arity and 222 | type of `state_name` does not match that of `cell.state_size`. 223 | """ 224 | state_size = cell.state_size 225 | state_is_tuple = nest.is_sequence(state_size) 226 | state_name_tuple = nest.is_sequence(state_name) 227 | 228 | if state_is_tuple != state_name_tuple: 229 | raise ValueError( 230 | "state_name should be the same type as cell.state_size. " 231 | "state_name: %s, cell.state_size: %s" 232 | % (str(state_name), str(state_size))) 233 | 234 | if state_is_tuple: 235 | state_name_flat = nest.flatten(state_name) 236 | state_size_flat = nest.flatten(state_size) 237 | 238 | if len(state_name_flat) != len(state_size_flat): 239 | raise ValueError("#elems(state_name) != #elems(state_size): %d vs. %d" 240 | % (len(state_name_flat), len(state_size_flat))) 241 | 242 | initial_state = nest.pack_sequence_as( 243 | structure=state_size, 244 | flat_sequence=[state_saver.state(s) for s in state_name_flat]) 245 | else: 246 | initial_state = state_saver.state(state_name) 247 | 248 | (outputs, state) = static_rnn(cell, inputs, initial_state=initial_state, 249 | sequence_length=sequence_length, scope=scope) 250 | 251 | if state_is_tuple: 252 | flat_state = nest.flatten(state) 253 | state_name = nest.flatten(state_name) 254 | save_state = [state_saver.save_state(name, substate) 255 | for name, substate in zip(state_name, flat_state)] 256 | else: 257 | save_state = [state_saver.save_state(state_name, state)] 258 | 259 | with ops.control_dependencies(save_state): 260 | last_output = outputs[-1] 261 | flat_last_output = nest.flatten(last_output) 262 | flat_last_output = [ 263 | array_ops.identity(output) for output in flat_last_output] 264 | outputs[-1] = nest.pack_sequence_as(structure=last_output, 265 | flat_sequence=flat_last_output) 266 | 267 | return (outputs, state) 268 | 269 | 270 | def static_bidirectional_rnn(cell_fw, cell_bw, inputs, 271 | initial_state_fw=None, initial_state_bw=None, 272 | dtype=None, sequence_length=None, scope=None): 273 | """Creates a bidirectional recurrent neural network. 274 | Similar to the unidirectional case above (rnn) but takes input and builds 275 | independent forward and backward RNNs with the final forward and backward 276 | outputs depth-concatenated, such that the output will have the format 277 | [time][batch][cell_fw.output_size + cell_bw.output_size]. The input_size of 278 | forward and backward cell must match. The initial state for both directions 279 | is zero by default (but can be set optionally) and no intermediate states are 280 | ever returned -- the network is fully unrolled for the given (passed in) 281 | length(s) of the sequence(s) or completely unrolled if length(s) is not given. 282 | Args: 283 | cell_fw: An instance of RNNCell, to be used for forward direction. 284 | cell_bw: An instance of RNNCell, to be used for backward direction. 285 | inputs: A length T list of inputs, each a tensor of shape 286 | [batch_size, input_size], or a nested tuple of such elements. 287 | initial_state_fw: (optional) An initial state for the forward RNN. 288 | This must be a tensor of appropriate type and shape 289 | `[batch_size, cell_fw.state_size]`. 290 | If `cell_fw.state_size` is a tuple, this should be a tuple of 291 | tensors having shapes `[batch_size, s] for s in cell_fw.state_size`. 292 | initial_state_bw: (optional) Same as for `initial_state_fw`, but using 293 | the corresponding properties of `cell_bw`. 294 | dtype: (optional) The data type for the initial state. Required if 295 | either of the initial states are not provided. 296 | sequence_length: (optional) An int32/int64 vector, size `[batch_size]`, 297 | containing the actual lengths for each of the sequences. 298 | scope: VariableScope for the created subgraph; defaults to 299 | "bidirectional_rnn" 300 | Returns: 301 | A tuple (outputs, output_state_fw, output_state_bw) where: 302 | outputs is a length `T` list of outputs (one for each input), which 303 | are depth-concatenated forward and backward outputs. 304 | output_state_fw is the final state of the forward rnn. 305 | output_state_bw is the final state of the backward rnn. 306 | Raises: 307 | TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`. 308 | ValueError: If inputs is None or an empty list. 309 | """ 310 | 311 | if not isinstance(cell_fw, core_rnn_cell.RNNCell): 312 | raise TypeError("cell_fw must be an instance of RNNCell") 313 | if not isinstance(cell_bw, core_rnn_cell.RNNCell): 314 | raise TypeError("cell_bw must be an instance of RNNCell") 315 | if not nest.is_sequence(inputs): 316 | raise TypeError("inputs must be a sequence") 317 | if not inputs: 318 | raise ValueError("inputs must not be empty") 319 | 320 | with vs.variable_scope(scope or "bidirectional_rnn"): 321 | # Forward direction 322 | with vs.variable_scope("fw") as fw_scope: 323 | output_fw, output_state_fw = static_rnn( 324 | cell_fw, inputs, initial_state_fw, dtype, 325 | sequence_length, scope=fw_scope) 326 | 327 | # Backward direction 328 | with vs.variable_scope("bw") as bw_scope: 329 | reversed_inputs = _reverse_seq(inputs, sequence_length) 330 | tmp, output_state_bw = static_rnn( 331 | cell_bw, reversed_inputs, initial_state_bw, 332 | dtype, sequence_length, scope=bw_scope) 333 | 334 | output_bw = _reverse_seq(tmp, sequence_length) 335 | # Concat each of the forward/backward outputs 336 | flat_output_fw = nest.flatten(output_fw) 337 | flat_output_bw = nest.flatten(output_bw) 338 | 339 | flat_outputs = tuple( 340 | array_ops.concat([fw, bw], 1) 341 | for fw, bw in zip(flat_output_fw, flat_output_bw)) 342 | 343 | outputs = nest.pack_sequence_as(structure=output_fw, 344 | flat_sequence=flat_outputs) 345 | 346 | return (outputs, output_state_fw, output_state_bw) 347 | 348 | -------------------------------------------------------------------------------- /data_utils.py: -------------------------------------------------------------------------------- 1 | 2 | """Functions that help with data processing for human3.6m""" 3 | 4 | from __future__ import absolute_import 5 | from __future__ import division 6 | from __future__ import print_function 7 | 8 | import numpy as np 9 | from six.moves import xrange # pylint: disable=redefined-builtin 10 | import copy 11 | 12 | import itertools 13 | 14 | def rotmat2euler( R ): 15 | """ 16 | Converts a rotation matrix to Euler angles 17 | Matlab port to python for evaluation purposes 18 | https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/RotMat2Euler.m#L1 19 | 20 | Args 21 | R: a 3x3 rotation matrix 22 | Returns 23 | eul: a 3x1 Euler angle representation of R 24 | """ 25 | if R[0,2] == 1 or R[0,2] == -1: 26 | # special case 27 | E3 = 0 # set arbitrarily 28 | dlta = np.arctan2( R[0,1], R[0,2] ); 29 | 30 | if R[0,2] == -1: 31 | E2 = np.pi/2; 32 | E1 = E3 + dlta; 33 | else: 34 | E2 = -np.pi/2; 35 | E1 = -E3 + dlta; 36 | 37 | else: 38 | E2 = -np.arcsin( R[0,2] ) 39 | E1 = np.arctan2( R[1,2]/np.cos(E2), R[2,2]/np.cos(E2) ) 40 | E3 = np.arctan2( R[0,1]/np.cos(E2), R[0,0]/np.cos(E2) ) 41 | 42 | eul = np.array([E1, E2, E3]); 43 | return eul 44 | 45 | 46 | def quat2expmap(q): 47 | """ 48 | Converts a quaternion to an exponential map 49 | Matlab port to python for evaluation purposes 50 | https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/quat2expmap.m#L1 51 | 52 | Args 53 | q: 1x4 quaternion 54 | Returns 55 | r: 1x3 exponential map 56 | Raises 57 | ValueError if the l2 norm of the quaternion is not close to 1 58 | """ 59 | if (np.abs(np.linalg.norm(q)-1)>1e-3): 60 | raise(ValueError, "quat2expmap: input quaternion is not norm 1") 61 | 62 | sinhalftheta = np.linalg.norm(q[1:]) 63 | coshalftheta = q[0] 64 | 65 | r0 = np.divide( q[1:], (np.linalg.norm(q[1:]) + np.finfo(np.float32).eps)); 66 | theta = 2 * np.arctan2( sinhalftheta, coshalftheta ) 67 | theta = np.mod( theta + 2*np.pi, 2*np.pi ) 68 | 69 | if theta > np.pi: 70 | theta = 2 * np.pi - theta 71 | r0 = -r0 72 | 73 | r = r0 * theta 74 | return r 75 | 76 | def rotmat2quat(R): 77 | """ 78 | Converts a rotation matrix to a quaternion 79 | Matlab port to python for evaluation purposes 80 | https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/rotmat2quat.m#L4 81 | 82 | Args 83 | R: 3x3 rotation matrix 84 | Returns 85 | q: 1x4 quaternion 86 | """ 87 | rotdiff = R - R.T; 88 | 89 | r = np.zeros(3) 90 | r[0] = -rotdiff[1,2] 91 | r[1] = rotdiff[0,2] 92 | r[2] = -rotdiff[0,1] 93 | sintheta = np.linalg.norm(r) / 2; 94 | r0 = np.divide(r, np.linalg.norm(r) + np.finfo(np.float32).eps ); 95 | 96 | costheta = (np.trace(R)-1) / 2; 97 | 98 | theta = np.arctan2( sintheta, costheta ); 99 | 100 | q = np.zeros(4) 101 | q[0] = np.cos(theta/2) 102 | q[1:] = r0*np.sin(theta/2) 103 | return q 104 | 105 | def rotmat2expmap(R): 106 | return quat2expmap( rotmat2quat(R) ); 107 | 108 | def expmap2rotmat(r): 109 | """ 110 | Converts an exponential map angle to a rotation matrix 111 | Matlab port to python for evaluation purposes 112 | I believe this is also called Rodrigues' formula 113 | https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/expmap2rotmat.m 114 | 115 | Args 116 | r: 1x3 exponential map 117 | Returns 118 | R: 3x3 rotation matrix 119 | """ 120 | theta = np.linalg.norm( r ) 121 | r0 = np.divide( r, theta + np.finfo(np.float32).eps ) 122 | r0x = np.array([0, -r0[2], r0[1], 0, 0, -r0[0], 0, 0, 0]).reshape(3,3) 123 | r0x = r0x - r0x.T 124 | R = np.eye(3,3) + np.sin(theta)*r0x + (1-np.cos(theta))*(r0x).dot(r0x); 125 | return R 126 | 127 | 128 | def unNormalizeData(normalizedData, data_mean, data_std, dimensions_to_ignore, actions, one_hot ): 129 | """Borrowed from SRNN code. Reads a csv file and returns a float32 matrix. 130 | https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/generateMotionData.py#L12 131 | 132 | Args 133 | normalizedData: nxd matrix with normalized data 134 | data_mean: vector of mean used to normalize the data 135 | data_std: vector of standard deviation used to normalize the data 136 | dimensions_to_ignore: vector with dimensions not used by the model 137 | actions: list of strings with the encoded actions 138 | one_hot: whether the data comes with one-hot encoding 139 | Returns 140 | origData: data originally used to 141 | """ 142 | T = normalizedData.shape[0] 143 | D = data_mean.shape[0] 144 | 145 | origData = np.zeros((T, D), dtype=np.float32) 146 | dimensions_to_use = [] 147 | for i in range(D): 148 | if i in dimensions_to_ignore: 149 | continue 150 | dimensions_to_use.append(i) 151 | dimensions_to_use = np.array(dimensions_to_use) 152 | 153 | if one_hot: 154 | origData[:, dimensions_to_use] = normalizedData[:, :-len(actions)] 155 | else: 156 | origData[:, dimensions_to_use] = normalizedData 157 | 158 | # potentially ineficient, but only done once per experiment 159 | stdMat = data_std.reshape((1, D)) 160 | stdMat = np.repeat(stdMat, T, axis=0) 161 | meanMat = data_mean.reshape((1, D)) 162 | meanMat = np.repeat(meanMat, T, axis=0) 163 | origData = np.multiply(origData, stdMat) + meanMat 164 | return origData 165 | 166 | 167 | def revert_output_format(poses, data_mean, data_std, dim_to_ignore, actions, one_hot): 168 | """ 169 | Converts the output of the neural network to a format that is more easy to 170 | manipulate for, e.g. conversion to other format or visualization 171 | 172 | Args 173 | poses: The output from the TF model. A list with (seq_length) entries, 174 | each with a (batch_size, dim) output 175 | Returns 176 | poses_out: A tensor of size (batch_size, seq_length, dim) output. Each 177 | batch is an n-by-d sequence of poses. 178 | """ 179 | seq_len = len(poses) 180 | if seq_len == 0: 181 | return [] 182 | 183 | batch_size, dim = poses[0].shape 184 | 185 | poses_out = np.concatenate(poses) 186 | poses_out = np.reshape(poses_out, (seq_len, batch_size, dim)) 187 | poses_out = np.transpose(poses_out, [1, 0, 2]) 188 | 189 | poses_out_list = [] 190 | for i in xrange(poses_out.shape[0]): 191 | poses_out_list.append(unNormalizeData(poses_out[i, :, :], data_mean, data_std, dim_to_ignore, actions, one_hot)) 192 | 193 | return poses_out_list 194 | 195 | 196 | def readCSVasFloat(filename): 197 | """ 198 | Borrowed from SRNN code. Reads a csv and returns a float matrix. 199 | https://github.com/asheshjain399/NeuralModels/blob/master/neuralmodels/utils.py#L34 200 | 201 | Args 202 | filename: string. Path to the csv file 203 | Returns 204 | returnArray: the read data in a float32 matrix 205 | """ 206 | returnArray = [] 207 | lines = open(filename).readlines() 208 | for line in lines: 209 | line = line.strip().split(',') 210 | if len(line) > 0: 211 | returnArray.append(np.array([np.float32(x) for x in line])) 212 | 213 | returnArray = np.array(returnArray) 214 | return returnArray 215 | 216 | 217 | def load_data(path_to_dataset, subjects, actions, one_hot): 218 | """ 219 | Borrowed from SRNN code. This is how the SRNN code reads the provided .txt files 220 | https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/processdata.py#L270 221 | 222 | Args 223 | path_to_dataset: string. directory where the data resides 224 | subjects: list of numbers. The subjects to load 225 | actions: list of string. The actions to load 226 | one_hot: Whether to add a one-hot encoding to the data 227 | Returns 228 | trainData: dictionary with k:v 229 | k=(subject, action, subaction, 'even'), v=(nxd) un-normalized data 230 | completeData: nxd matrix with all the data. Used to normlization stats 231 | """ 232 | nactions = len( actions ) 233 | 234 | trainData = {} 235 | completeData = [] 236 | total_frames = 0 237 | for subj in subjects: 238 | for action_idx in np.arange(len(actions)): 239 | 240 | action = actions[ action_idx ] 241 | 242 | for subact in [1, 2]: # subactions 243 | 244 | print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, subact)) 245 | 246 | filename = '{0}/S{1}/{2}_{3}.txt'.format( path_to_dataset, subj, action, subact) 247 | action_sequence = readCSVasFloat(filename) 248 | 249 | n, d = action_sequence.shape 250 | even_list = range(0, n, 2) 251 | 252 | if one_hot: 253 | # Add a one-hot encoding at the end of the representation 254 | the_sequence = np.zeros( (len(even_list), d + nactions), dtype=float ) 255 | the_sequence[ :, 0:d ] = action_sequence[even_list, :] 256 | the_sequence[ :, d+action_idx ] = 1 257 | trainData[(subj, action, subact, 'even')] = the_sequence 258 | 259 | else: 260 | trainData[(subj, action, subact, 'even')] = action_sequence[even_list, :] 261 | 262 | 263 | if len(completeData) == 0: 264 | completeData = copy.deepcopy(action_sequence) 265 | else: 266 | completeData = np.append(completeData, action_sequence, axis=0) 267 | 268 | return trainData, completeData 269 | 270 | 271 | def normalize_data( data, data_mean, data_std, dim_to_use, actions, one_hot ): 272 | """ 273 | Normalize input data by removing unused dimensions, subtracting the mean and 274 | dividing by the standard deviation 275 | 276 | Args 277 | data: nx99 matrix with data to normalize 278 | data_mean: vector of mean used to normalize the data 279 | data_std: vector of standard deviation used to normalize the data 280 | dim_to_use: vector with dimensions used by the model 281 | actions: list of strings with the encoded actions 282 | one_hot: whether the data comes with one-hot encoding 283 | Returns 284 | data_out: the passed data matrix, but normalized 285 | """ 286 | data_out = {} 287 | nactions = len(actions) 288 | 289 | if not one_hot: 290 | # No one-hot encoding... no need to do anything special 291 | for key in data.keys(): 292 | data_out[ key ] = np.divide( (data[key] - data_mean), data_std ) 293 | data_out[ key ] = data_out[ key ][ :, dim_to_use ] # comment this line if you want to model all_dims 294 | 295 | else: 296 | # TODO hard-coding 99 dimensions for un-normalized human poses 297 | for key in data.keys(): 298 | data_out[ key ] = np.divide( (data[key][:, 0:99] - data_mean), data_std ) 299 | data_out[ key ] = data_out[ key ][ :, dim_to_use ] # comment this line if you want to model all_dims 300 | data_out[ key ] = np.hstack( (data_out[key], data[key][:,-nactions:]) ) 301 | 302 | return data_out 303 | 304 | 305 | def normalization_stats(completeData): 306 | """" 307 | Also borrowed for SRNN code. Computes mean, stdev and dimensions to ignore. 308 | https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/processdata.py#L33 309 | 310 | Args 311 | completeData: nx99 matrix with data to normalize 312 | Returns 313 | data_mean: vector of mean used to normalize the data 314 | data_std: vector of standard deviation used to normalize the data 315 | dimensions_to_ignore: vector with dimensions not used by the model 316 | dimensions_to_use: vector with dimensions used by the model 317 | """ 318 | data_mean = np.mean(completeData, axis=0) 319 | data_std = np.std(completeData, axis=0) 320 | 321 | dimensions_to_ignore = [] 322 | dimensions_to_use = [] 323 | 324 | dimensions_to_ignore.extend( list(np.where(data_std < 1e-4)[0]) ) 325 | dimensions_to_use.extend( list(np.where(data_std >= 1e-4)[0]) ) 326 | 327 | data_std[dimensions_to_ignore] = 1.0 # comment line to avoid modifying std of ignored dims 328 | 329 | return data_mean, data_std, dimensions_to_ignore, dimensions_to_use 330 | 331 | 332 | def body_part_features(): 333 | """ 334 | function to return feature_idx ranges of dims_to_use vector for different body parts ex: torso, left_arm, right_arm, left_leg 335 | right_leg. 336 | Outputs: dict which contains start_idx:end_idx for each of above mentioned body parts 337 | key: 'torso', 'right_arm', 'left_arm', 'right_leg', 'left_leg' 338 | value: list of idxs of dims_to_use vector relevant to that body part 339 | """ 340 | dims_to_use = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15, 21, 22, 23, 24, 27, 28, 29, 30, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53, 54, 55, 56, 57, 60, 61, 62, 75, 76, 77, 78, 79, 80, 81, 84, 85, 86] 341 | np_dims_to_use = np.asarray(dims_to_use) 342 | 343 | # defining the ranges of features for different body parts 344 | node_feature_ranges = {} 345 | node_feature_ranges['torso'] = ((0,5),(36,50)) 346 | node_feature_ranges['right_arm'] = ((75,98),) 347 | node_feature_ranges['left_arm'] = ((51,74),) 348 | node_feature_ranges['right_leg'] = ((6,20),) 349 | node_feature_ranges['left_leg'] = ((21,35),) 350 | 351 | body_part_dims = {} 352 | 353 | for key in node_feature_ranges.keys(): 354 | # resetting list which stores idxs of body parts 355 | part_dims = [] 356 | for value in node_feature_ranges[key]: 357 | # find indices in dims_to_use which fall in specified body ranges 358 | idxs = np.where( (np_dims_to_use >= value[0]) & (np_dims_to_use <= value[1]) ) 359 | 360 | # convert back to tuple 361 | #idxs = np.ndarray.tolist(idxs[0]) 362 | 363 | # collect and store them in a list 364 | part_dims.append(idxs[0]) 365 | 366 | merged_part_dims = list(itertools.chain(*part_dims)) 367 | merged_part_dims = np.asarray(merged_part_dims) 368 | # assign dims found 369 | body_part_dims[key] = merged_part_dims 370 | 371 | return body_part_dims 372 | 373 | def pearson_corr_coef(X, Y): 374 | """ 375 | function to return Pearson's Correlation Coefficient between 2 sample X,Y 376 | Inputs: 377 | X - n X D (n = samples , D = feature_dims) 378 | Y - same shape as X 379 | Outputs: 380 | r - Pearson's Corr Coeff 381 | """ 382 | r = np.sum( np.mean(X - np.mean(X,0),1) * np.mean(Y - np.mean(Y,0),1) ) / ( np.mean(np.std(X,0)) * np.mean(np.std(Y,0)) ) 383 | 384 | return r 385 | 386 | def KL_multi_var(X_mean, X_cov, Y_mean, Y_cov): 387 | 388 | #function to return Multi-variate KL Divergence for Gaussian RV 389 | #Inputs: 390 | #X_mean = mean vector of 1st RV 391 | #X_cov = covariance matrix of 1st RV 392 | #Y_mean = mean vector of 2nd RV 393 | #Y_cov = covariance matrix of 2nd RV 394 | 395 | inv_X_cov = np.linalg.inv(X_cov) 396 | inv_Y_cov = np.linalg.inv(Y_cov) 397 | last_term = np.matmul((X_mean - Y_mean), (X_mean - Y_mean).T) + X_cov - Y_cov 398 | 399 | kl_xy = 0.5*np.log(np.linalg.det(np.matmul(Y_cov, inv_X_cov))) + 0.5*np.trace(np.matmul(inv_Y_cov, last_term)) 400 | return kl_xy 401 | 402 | -------------------------------------------------------------------------------- /deltaRNN.py: -------------------------------------------------------------------------------- 1 | #""" 2 | #Author :- Ankur Mali 3 | #""" 4 | 5 | import os 6 | import sys 7 | import tensorflow as tf 8 | import numpy as np 9 | #from tensorflow.python.ops.rnn_cell import RNNCell 10 | #from rnn_cell_impl import RNNCell 11 | from rnn_cell_implement import RNNCell 12 | 13 | class DeltaRNNCell(RNNCell): 14 | #""" 15 | #Delta RNN - Differential Framework. 16 | #Alexander G. Ororbia II, Tomas Mikolov and David Reitter, 17 | #"Learning Simpler Language Models with the 18 | # Delta Recurrent Neural Network Framework" 19 | #""" 20 | 21 | def __init__(self, num_units, apply_layer_norm=False): 22 | self._num_units = num_units 23 | self._apply_layer_norm = apply_layer_norm 24 | if self._apply_layer_norm: 25 | self._layer_norm = tf.contrib.layers.layer_norm 26 | 27 | @property 28 | def input_size(self): 29 | return self._num_units 30 | 31 | @property 32 | def output_size(self): 33 | return self._num_units 34 | 35 | @property 36 | def state_size(self): 37 | return self._num_units 38 | 39 | def _outer_function(self, inner_function_output, 40 | past_hidden_state, activation=tf.nn.relu, 41 | wx_parameterization_gate=True, scope=None): 42 | #"""Check Equation 3 in Delta RNN paper 43 | # for basic understanding and to relate our code with papers maths. 44 | #""" 45 | 46 | assert inner_function_output.get_shape().as_list() == \ 47 | past_hidden_state.get_shape().as_list() 48 | 49 | with tf.variable_scope(scope or type(self).__name__): 50 | with tf.variable_scope("OuterFunction"): 51 | r_bias = tf.get_variable( 52 | "outer_function_gate", 53 | [self._num_units], 54 | dtype=tf.float32, initializer=tf.zeros_initializer) 55 | 56 | # Equation 5 in Alex(DRNN paper) 57 | if wx_parameterization_gate: 58 | r = self._W_x_inputs + r_bias 59 | else: 60 | r = r_bias 61 | 62 | gate = tf.nn.sigmoid(r) 63 | output = activation((1.0 - gate) * inner_function_output + gate * past_hidden_state) 64 | 65 | return output 66 | # End of outer function 67 | 68 | # Inner function 69 | def _inner_function(self, inputs, past_hidden_state, 70 | activation=tf.nn.tanh, scope=None): 71 | #second order function as described equation 11 in delta rnn paper 72 | #This is used in inner function 73 | 74 | with tf.variable_scope(scope or type(self).__name__): 75 | with tf.variable_scope("InnerFunction"): 76 | with tf.variable_scope("Vh"): 77 | V_h = _linear(past_hidden_state, self._num_units, True) 78 | 79 | with tf.variable_scope("Wx"): 80 | self._W_x_inputs = _linear(inputs, self._num_units, True) 81 | 82 | alpha = tf.get_variable( 83 | "alpha", [self._num_units], dtype=tf.float32, 84 | initializer=tf.constant_initializer(2.0)) 85 | # alpha value 2.0 works better than 1.0 86 | beta_one = tf.get_variable( 87 | "beta_one", [self._num_units], dtype=tf.float32, 88 | initializer=tf.constant_initializer(1.0)) 89 | 90 | beta_two = tf.get_variable( 91 | "beta_two", [self._num_units], dtype=tf.float32, 92 | initializer=tf.constant_initializer(1.0)) 93 | 94 | z_t_bias = tf.get_variable( 95 | "z_t_bias", [self._num_units], dtype=tf.float32, 96 | initializer=tf.constant_initializer(0.0)) 97 | 98 | # 2nd order calculation 99 | #You can change activation function but before get familiar with gating operations and mathematical notations 100 | d_1_t = alpha * V_h * self._W_x_inputs 101 | d_2_t = beta_one * V_h + beta_two * self._W_x_inputs 102 | 103 | if self._apply_layer_norm: 104 | d_1_t = self._layer_norm(d_1_t) 105 | d_2_t = self._layer_norm(d_2_t) 106 | 107 | z_t = activation(d_1_t + d_2_t + z_t_bias) 108 | 109 | return z_t 110 | 111 | def __call__(self, inputs, state, scope=None): 112 | inner_function_output = self._inner_function(inputs, state) 113 | output = self._outer_function(inner_function_output, state) 114 | 115 | 116 | return output, output 117 | 118 | 119 | 120 | class DeltaRNNCellBody(RNNCell): 121 | # 122 | #Delta RNN - Differential Framework. 123 | #Alexander G. Ororbia II, Tomas Mikolov and David Reitter, 124 | #"Learning Simpler Language Models with the 125 | # Delta Recurrent Neural Network Framework" 126 | #""" 127 | 128 | def __init__(self, num_units, apply_layer_norm=False): 129 | self._num_units = num_units 130 | self._apply_layer_norm = apply_layer_norm 131 | if self._apply_layer_norm: 132 | self._layer_norm = tf.contrib.layers.layer_norm 133 | 134 | @property 135 | def input_size(self): 136 | return self._num_units 137 | 138 | @property 139 | def output_size(self): 140 | return self._num_units 141 | 142 | @property 143 | def state_size(self): 144 | return self._num_units 145 | 146 | def _outer_function(self, inner_function_output, 147 | past_hidden_state, activation=tf.nn.relu, 148 | wx_parameterization_gate=True, scope=None): 149 | #"""Check Equation 3 in Delta RNN paper 150 | # for basic understanding and to relate our code with papers maths. 151 | #""" 152 | 153 | assert inner_function_output.get_shape().as_list() == \ 154 | past_hidden_state.get_shape().as_list() 155 | 156 | with tf.variable_scope(scope or type(self).__name__): 157 | with tf.variable_scope("OuterFunction"): 158 | r_bias = tf.get_variable( 159 | "outer_function_gate", 160 | [self._num_units], 161 | dtype=tf.float32, initializer=tf.zeros_initializer) 162 | 163 | # Equation 5 in Alex(DRNN paper) 164 | if wx_parameterization_gate: 165 | r = self._W_x_inputs + r_bias 166 | else: 167 | r = r_bias 168 | 169 | gate = tf.nn.sigmoid(r) 170 | output = activation((1.0 - gate) * inner_function_output + gate * past_hidden_state) 171 | 172 | return output 173 | # """ End of outer function """ 174 | 175 | # """ Inner function """ 176 | def _inner_function(self, inputs, past_hidden_state, context, activation=tf.nn.tanh, scope=None): # modified 177 | #"""second order function as described equation 11 in delta rnn paper 178 | #This is used in inner function 179 | #""" 180 | with tf.variable_scope(scope or type(self).__name__): 181 | with tf.variable_scope("InnerFunction"): 182 | with tf.variable_scope("Vh"): 183 | V_h = _linear(past_hidden_state, self._num_units, True) 184 | 185 | with tf.variable_scope("Qm"): # modified 186 | Q_m = _linear(context, self._num_units, True) 187 | 188 | with tf.variable_scope("Wx"): 189 | self._W_x_inputs = _linear(inputs, self._num_units, True) 190 | 191 | alpha = tf.get_variable( 192 | "alpha", [self._num_units], dtype=tf.float32, 193 | initializer=tf.constant_initializer(2.0)) 194 | #""" alpha value 2.0 works better than 1.0""" 195 | beta_one = tf.get_variable( 196 | "beta_one", [self._num_units], dtype=tf.float32, 197 | initializer=tf.constant_initializer(1.0)) 198 | 199 | beta_two = tf.get_variable( 200 | "beta_two", [self._num_units], dtype=tf.float32, 201 | initializer=tf.constant_initializer(1.0)) 202 | 203 | z_t_bias = tf.get_variable( 204 | "z_t_bias", [self._num_units], dtype=tf.float32, 205 | initializer=tf.constant_initializer(0.0)) 206 | 207 | # 2nd order calculation 208 | #You can change activation function but before get familiar with gating operations and mathematical notations 209 | d_1_t = alpha * V_h * ( self._W_x_inputs + Q_m ) # modified 210 | d_2_t = beta_one * V_h + beta_two * ( self._W_x_inputs + Q_m ) # modified 211 | 212 | if self._apply_layer_norm: 213 | d_1_t = self._layer_norm(d_1_t) 214 | d_2_t = self._layer_norm(d_2_t) 215 | 216 | z_t = activation(d_1_t + d_2_t + z_t_bias) 217 | 218 | return z_t 219 | 220 | def __call__(self, inputs, state, context, scope=None): 221 | inner_function_output = self._inner_function(inputs, state, context) 222 | output = self._outer_function(inner_function_output, state) 223 | 224 | 225 | return output, output 226 | 227 | 228 | class DeltaRNNCellBodyFlow(RNNCell): 229 | # 230 | #Delta RNN - Differential Framework. 231 | #Alexander G. Ororbia II, Tomas Mikolov and David Reitter, 232 | #"Learning Simpler Language Models with the 233 | # Delta Recurrent Neural Network Framework" 234 | #""" 235 | 236 | def __init__(self, num_units, apply_layer_norm=False): 237 | self._num_units = num_units 238 | self._apply_layer_norm = apply_layer_norm 239 | if self._apply_layer_norm: 240 | self._layer_norm = tf.contrib.layers.layer_norm 241 | 242 | @property 243 | def input_size(self): 244 | return self._num_units 245 | 246 | @property 247 | def output_size(self): 248 | return self._num_units 249 | 250 | @property 251 | def state_size(self): 252 | return self._num_units 253 | 254 | def _outer_function(self, inputs, inner_function_output, 255 | past_hidden_state, activation=tf.nn.relu, 256 | wx_parameterization_gate=True, scope=None): 257 | #"""Check Equation 3 in Delta RNN paper 258 | # for basic understanding and to relate our code with papers maths. 259 | #""" 260 | 261 | assert inner_function_output.get_shape().as_list() == \ 262 | past_hidden_state.get_shape().as_list() 263 | 264 | with tf.variable_scope(scope or type(self).__name__): 265 | with tf.variable_scope("OuterFunction"): 266 | r_bias = tf.get_variable("outer_function_vel_bias", [self._num_units], dtype=tf.float32, initializer=tf.zeros_initializer) 267 | W_vel = tf.get_variable("outer_function_W_vel", [54, self._num_units ], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) 268 | 269 | # Equation 5 in Alex(DRNN paper) 270 | if wx_parameterization_gate: 271 | #r = self._W_x_inputs + r_bias 272 | r = tf.matmul(inputs[:,54:108], W_vel) + r_bias # modified 273 | else: 274 | r = r_bias 275 | 276 | gate = tf.nn.sigmoid(r) 277 | output = activation((1.0 - gate) * inner_function_output + gate * past_hidden_state) 278 | 279 | return output 280 | # """ End of outer function """ 281 | 282 | # """ Inner function """ 283 | def _inner_function(self, inputs, past_hidden_state, context, activation=tf.nn.tanh, scope=None): # modified 284 | #"""second order function as described equation 11 in delta rnn paper 285 | #This is used in inner function 286 | #""" 287 | with tf.variable_scope(scope or type(self).__name__): 288 | with tf.variable_scope("InnerFunction"): 289 | with tf.variable_scope("Vh"): 290 | V_h = _linear(past_hidden_state, self._num_units, True) 291 | 292 | with tf.variable_scope("Qm"): # modified 293 | Q_m = _linear(context, self._num_units, True) 294 | 295 | with tf.variable_scope("Wx"): 296 | self._W_x_inputs = _linear(inputs[:,0:54], self._num_units, True) 297 | 298 | alpha = tf.get_variable( 299 | "alpha", [self._num_units], dtype=tf.float32, 300 | initializer=tf.constant_initializer(2.0)) 301 | #""" alpha value 2.0 works better than 1.0""" 302 | beta_one = tf.get_variable( 303 | "beta_one", [self._num_units], dtype=tf.float32, 304 | initializer=tf.constant_initializer(1.0)) 305 | 306 | beta_two = tf.get_variable( 307 | "beta_two", [self._num_units], dtype=tf.float32, 308 | initializer=tf.constant_initializer(1.0)) 309 | 310 | z_t_bias = tf.get_variable( 311 | "z_t_bias", [self._num_units], dtype=tf.float32, 312 | initializer=tf.constant_initializer(0.0)) 313 | 314 | # 2nd order calculation 315 | #You can change activation function but before get familiar with gating operations and mathematical notations 316 | d_1_t = alpha * V_h * ( self._W_x_inputs + Q_m ) # modified 317 | d_2_t = beta_one * V_h + beta_two * ( self._W_x_inputs + Q_m ) # modified 318 | 319 | if self._apply_layer_norm: 320 | d_1_t = self._layer_norm(d_1_t) 321 | d_2_t = self._layer_norm(d_2_t) 322 | 323 | z_t = activation(d_1_t + d_2_t + z_t_bias) 324 | 325 | return z_t 326 | 327 | def __call__(self, inputs, state, context, scope=None): 328 | inner_function_output = self._inner_function(inputs, state, context) 329 | output = self._outer_function(inputs, inner_function_output, state) 330 | 331 | 332 | return output, output 333 | 334 | 335 | def _linear(args, output_size, bias, bias_start=0.0, scope=None): 336 | #"""Linear mapping """ 337 | if args is None or (isinstance(args, (list, tuple)) and not args): 338 | raise ValueError("`args` must be specified, please check definition for input variables") 339 | if not isinstance(args, (list, tuple)): 340 | args = [args] 341 | 342 | # dimension 1 cell size calculation. 343 | total_arg_size = 0 344 | shapes = [a.get_shape().as_list() for a in args] 345 | for shape in shapes: 346 | if len(shape) != 2: 347 | raise ValueError( 348 | "Linear is expecting 2Dimensional Arguments: %s" % str(shapes)) 349 | if not shape[1]: 350 | raise ValueError( 351 | "Linear expects shape[1] of arguments: %s" % str(shapes)) 352 | else: 353 | total_arg_size += shape[1] 354 | 355 | with tf.variable_scope(scope or "Linear"): 356 | matrix = tf.get_variable("Matrix", [total_arg_size, output_size]) 357 | if len(args) == 1: 358 | res = tf.matmul(args[0], matrix) 359 | else: 360 | res = tf.matmul(tf.concat(1, args), matrix) 361 | if not bias: 362 | return res 363 | bias_term = tf.get_variable( 364 | "Bias", [output_size], 365 | initializer=tf.constant_initializer(bias_start)) 366 | return res + bias_term 367 | -------------------------------------------------------------------------------- /forward_kinematics.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import numpy as np 4 | import h5py 5 | import matplotlib 6 | import matplotlib.pyplot as plt 7 | import matplotlib.animation as animation 8 | from mpl_toolkits.mplot3d import Axes3D 9 | import viz 10 | import time 11 | import copy 12 | import data_utils 13 | import cv2 14 | from PIL import Image 15 | 16 | def fkl( angles, parent, offset, rotInd, expmapInd ): 17 | """ 18 | Convert joint angles and bone lenghts into the 3d points of a person. 19 | Based on expmap2xyz.m, available at 20 | https://github.com/asheshjain399/RNNexp/blob/7fc5a53292dc0f232867beb66c3a9ef845d705cb/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/exp2xyz.m 21 | 22 | Args 23 | angles: 99-long vector with 3d position and 3d joint angles in expmap format 24 | parent: 32-long vector with parent-child relationships in the kinematic tree 25 | offset: 96-long vector with bone lenghts 26 | rotInd: 32-long list with indices into angles 27 | expmapInd: 32-long list with indices into expmap angles 28 | Returns 29 | xyz: 32x3 3d points that represent a person in 3d space 30 | """ 31 | 32 | assert len(angles) == 99 33 | 34 | # Structure that indicates parents for each joint 35 | njoints = 32 36 | xyzStruct = [dict() for x in range(njoints)] 37 | 38 | for i in np.arange( njoints ): 39 | 40 | if not rotInd[i] : # If the list is empty 41 | xangle, yangle, zangle = 0, 0, 0 42 | else: 43 | xangle = angles[ rotInd[i][0]-1 ] 44 | yangle = angles[ rotInd[i][1]-1 ] 45 | zangle = angles[ rotInd[i][2]-1 ] 46 | 47 | r = angles[ expmapInd[i] ] 48 | 49 | thisRotation = data_utils.expmap2rotmat(r) 50 | thisPosition = np.array([xangle, yangle, zangle]) 51 | 52 | if parent[i] == -1: # Root node 53 | xyzStruct[i]['rotation'] = thisRotation 54 | xyzStruct[i]['xyz'] = np.reshape(offset[i,:], (1,3)) + thisPosition 55 | else: 56 | xyzStruct[i]['xyz'] = (offset[i,:] + thisPosition).dot( xyzStruct[ parent[i] ]['rotation'] ) + xyzStruct[ parent[i] ]['xyz'] 57 | xyzStruct[i]['rotation'] = thisRotation.dot( xyzStruct[ parent[i] ]['rotation'] ) 58 | 59 | xyz = [xyzStruct[i]['xyz'] for i in range(njoints)] 60 | xyz = np.array( xyz ).squeeze() 61 | xyz = xyz[:,[0,2,1]] 62 | # xyz = xyz[:,[2,0,1]] 63 | 64 | 65 | return np.reshape( xyz, [-1] ) 66 | 67 | def revert_coordinate_space(channels, R0, T0): 68 | """ 69 | Bring a series of poses to a canonical form so they are facing the camera when they start. 70 | Adapted from 71 | https://github.com/asheshjain399/RNNexp/blob/7fc5a53292dc0f232867beb66c3a9ef845d705cb/structural_rnn/CRFProblems/H3.6m/dataParser/Utils/revertCoordinateSpace.m 72 | 73 | Args 74 | channels: n-by-99 matrix of poses 75 | R0: 3x3 rotation for the first frame 76 | T0: 1x3 position for the first frame 77 | Returns 78 | channels_rec: The passed poses, but the first has T0 and R0, and the 79 | rest of the sequence is modified accordingly. 80 | """ 81 | n, d = channels.shape 82 | 83 | channels_rec = copy.copy(channels) 84 | R_prev = R0 85 | T_prev = T0 86 | rootRotInd = np.arange(3,6) 87 | 88 | # Loop through the passed posses 89 | for ii in range(n): 90 | R_diff = data_utils.expmap2rotmat( channels[ii, rootRotInd] ) 91 | R = R_diff.dot( R_prev ) 92 | 93 | channels_rec[ii, rootRotInd] = data_utils.rotmat2expmap(R) 94 | T = T_prev + ((R_prev.T).dot( np.reshape(channels[ii,:3],[3,1]))).reshape(-1) 95 | channels_rec[ii,:3] = T 96 | T_prev = T 97 | R_prev = R 98 | 99 | return channels_rec 100 | 101 | 102 | def _some_variables(): 103 | """ 104 | We define some variables that are useful to run the kinematic tree 105 | 106 | Args 107 | None 108 | Returns 109 | parent: 32-long vector with parent-child relationships in the kinematic tree 110 | offset: 96-long vector with bone lenghts 111 | rotInd: 32-long list with indices into angles 112 | expmapInd: 32-long list with indices into expmap angles 113 | """ 114 | 115 | parent = np.array([0, 1, 2, 3, 4, 5, 1, 7, 8, 9,10, 1,12,13,14,15,13, 116 | 17,18,19,20,21,20,23,13,25,26,27,28,29,28,31])-1 117 | 118 | offset = np.array([0.000000,0.000000,0.000000,-132.948591,0.000000,0.000000,0.000000,-442.894612,0.000000,0.000000,-454.206447,0.000000,0.000000,0.000000,162.767078,0.000000,0.000000,74.999437,132.948826,0.000000,0.000000,0.000000,-442.894413,0.000000,0.000000,-454.206590,0.000000,0.000000,0.000000,162.767426,0.000000,0.000000,74.999948,0.000000,0.100000,0.000000,0.000000,233.383263,0.000000,0.000000,257.077681,0.000000,0.000000,121.134938,0.000000,0.000000,115.002227,0.000000,0.000000,257.077681,0.000000,0.000000,151.034226,0.000000,0.000000,278.882773,0.000000,0.000000,251.733451,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,99.999627,0.000000,100.000188,0.000000,0.000000,0.000000,0.000000,0.000000,257.077681,0.000000,0.000000,151.031437,0.000000,0.000000,278.892924,0.000000,0.000000,251.728680,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,99.999888,0.000000,137.499922,0.000000,0.000000,0.000000,0.000000]) 119 | offset = offset.reshape(-1,3) 120 | 121 | rotInd = [[5, 6, 4], 122 | [8, 9, 7], 123 | [11, 12, 10], 124 | [14, 15, 13], 125 | [17, 18, 16], 126 | [], 127 | [20, 21, 19], 128 | [23, 24, 22], 129 | [26, 27, 25], 130 | [29, 30, 28], 131 | [], 132 | [32, 33, 31], 133 | [35, 36, 34], 134 | [38, 39, 37], 135 | [41, 42, 40], 136 | [], 137 | [44, 45, 43], 138 | [47, 48, 46], 139 | [50, 51, 49], 140 | [53, 54, 52], 141 | [56, 57, 55], 142 | [], 143 | [59, 60, 58], 144 | [], 145 | [62, 63, 61], 146 | [65, 66, 64], 147 | [68, 69, 67], 148 | [71, 72, 70], 149 | [74, 75, 73], 150 | [], 151 | [77, 78, 76], 152 | []] 153 | 154 | expmapInd = np.split(np.arange(4,100)-1,32) 155 | 156 | return parent, offset, rotInd, expmapInd 157 | 158 | def main(): 159 | 160 | # Load all the data 161 | parent, offset, rotInd, expmapInd = _some_variables() 162 | action = 'eating' 163 | test_set_sequence = '5' 164 | # numpy implementation 165 | with h5py.File( 'samples.h5', 'r' ) as h5f: 166 | expmap_pred = h5f['expmap/preds/' + action + '_' + test_set_sequence][:] 167 | expmap_gt = h5f['expmap/gt/' + action + '_' + test_set_sequence ][:] 168 | 169 | nframes_gt, nframes_pred = expmap_gt.shape[0], expmap_pred.shape[0] 170 | 171 | # Put them together and revert the coordinate space 172 | expmap_all = revert_coordinate_space( np.vstack((expmap_gt, expmap_pred)), np.eye(3), np.zeros(3) ) 173 | expmap_gt = expmap_all[:nframes_gt,:] 174 | expmap_pred = expmap_all[nframes_gt:,:] 175 | 176 | # Compute 3d points for each frame 177 | xyz_gt, xyz_pred = np.zeros((nframes_gt, 96)), np.zeros((nframes_pred, 96)) 178 | for i in range( nframes_gt ): 179 | xyz_gt[i,:] = fkl( expmap_gt[i,:], parent, offset, rotInd, expmapInd ) 180 | for i in range( nframes_pred ): 181 | xyz_pred[i,:] = fkl( expmap_pred[i,:], parent, offset, rotInd, expmapInd ) 182 | 183 | # setting up stuff to save video 184 | FFMpegWriter = animation.writers['ffmpeg'] 185 | metadata = dict(title= action + '_' + test_set_sequence, artist='Matplotlib', comment='Movie support!') 186 | writer = FFMpegWriter(fps=25, codec="libx264", bitrate=-1, metadata=metadata) 187 | 188 | # === Plot and animate === 189 | fig = plt.figure() 190 | gt_ax = fig.add_subplot(1, 2, 1, projection='3d') 191 | pred_ax = fig.add_subplot(1, 2, 2, projection='3d') 192 | ob_gt = viz.Ax3DPose(gt_ax) 193 | ob_pred = viz.Ax3DPose(pred_ax) 194 | 195 | # setting viewing angle 196 | gt_ax.view_init(azim=135) 197 | pred_ax.view_init(azim=45) 198 | 199 | with writer.saving(fig, action + "_" + test_set_sequence +".mp4", 100): 200 | 201 | # Plot the conditioning ground truth 202 | for i in range(nframes_gt): 203 | ob_gt.update( xyz_gt[i,:] ) 204 | #plt.show(block=False) 205 | fig.canvas.draw() 206 | #plt.pause(0.001) 207 | #writer.grab_frame() 208 | 209 | # Plot the prediction 210 | #for i in range(nframes_pred): 211 | ob_pred.update( xyz_pred[i,:], lcolor="#9b59b6", rcolor="#2ecc71" ) 212 | plt.show(block=False) 213 | fig.canvas.draw() 214 | plt.pause(0.001) 215 | writer.grab_frame() 216 | 217 | if __name__ == '__main__': 218 | main() 219 | -------------------------------------------------------------------------------- /forward_kinematics_v2.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import numpy as np 4 | import h5py 5 | import matplotlib 6 | import matplotlib.pyplot as plt 7 | import matplotlib.animation as animation 8 | from mpl_toolkits.mplot3d import Axes3D 9 | import viz 10 | import time 11 | import copy 12 | import data_utils 13 | import cv2 14 | from PIL import Image 15 | 16 | def fkl( angles, parent, offset, rotInd, expmapInd ): 17 | """ 18 | Convert joint angles and bone lenghts into the 3d points of a person. 19 | Based on expmap2xyz.m, available at 20 | https://github.com/asheshjain399/RNNexp/blob/7fc5a53292dc0f232867beb66c3a9ef845d705cb/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/exp2xyz.m 21 | 22 | Args 23 | angles: 99-long vector with 3d position and 3d joint angles in expmap format 24 | parent: 32-long vector with parent-child relationships in the kinematic tree 25 | offset: 96-long vector with bone lenghts 26 | rotInd: 32-long list with indices into angles 27 | expmapInd: 32-long list with indices into expmap angles 28 | Returns 29 | xyz: 32x3 3d points that represent a person in 3d space 30 | """ 31 | 32 | assert len(angles) == 99 33 | 34 | # Structure that indicates parents for each joint 35 | njoints = 32 36 | xyzStruct = [dict() for x in range(njoints)] 37 | 38 | for i in np.arange( njoints ): 39 | 40 | if not rotInd[i] : # If the list is empty 41 | xangle, yangle, zangle = 0, 0, 0 42 | else: 43 | xangle = angles[ rotInd[i][0]-1 ] 44 | yangle = angles[ rotInd[i][1]-1 ] 45 | zangle = angles[ rotInd[i][2]-1 ] 46 | 47 | r = angles[ expmapInd[i] ] 48 | 49 | thisRotation = data_utils.expmap2rotmat(r) 50 | thisPosition = np.array([xangle, yangle, zangle]) 51 | 52 | if parent[i] == -1: # Root node 53 | xyzStruct[i]['rotation'] = thisRotation 54 | xyzStruct[i]['xyz'] = np.reshape(offset[i,:], (1,3)) + thisPosition 55 | else: 56 | xyzStruct[i]['xyz'] = (offset[i,:] + thisPosition).dot( xyzStruct[ parent[i] ]['rotation'] ) + xyzStruct[ parent[i] ]['xyz'] 57 | xyzStruct[i]['rotation'] = thisRotation.dot( xyzStruct[ parent[i] ]['rotation'] ) 58 | 59 | xyz = [xyzStruct[i]['xyz'] for i in range(njoints)] 60 | xyz = np.array( xyz ).squeeze() 61 | xyz = xyz[:,[0,2,1]] 62 | # xyz = xyz[:,[2,0,1]] 63 | 64 | 65 | return np.reshape( xyz, [-1] ) 66 | 67 | def revert_coordinate_space(channels, R0, T0): 68 | """ 69 | Bring a series of poses to a canonical form so they are facing the camera when they start. 70 | Adapted from 71 | https://github.com/asheshjain399/RNNexp/blob/7fc5a53292dc0f232867beb66c3a9ef845d705cb/structural_rnn/CRFProblems/H3.6m/dataParser/Utils/revertCoordinateSpace.m 72 | 73 | Args 74 | channels: n-by-99 matrix of poses 75 | R0: 3x3 rotation for the first frame 76 | T0: 1x3 position for the first frame 77 | Returns 78 | channels_rec: The passed poses, but the first has T0 and R0, and the 79 | rest of the sequence is modified accordingly. 80 | """ 81 | n, d = channels.shape 82 | 83 | channels_rec = copy.copy(channels) 84 | R_prev = R0 85 | T_prev = T0 86 | rootRotInd = np.arange(3,6) 87 | 88 | # Loop through the passed posses 89 | for ii in range(n): 90 | R_diff = data_utils.expmap2rotmat( channels[ii, rootRotInd] ) 91 | R = R_diff.dot( R_prev ) 92 | 93 | channels_rec[ii, rootRotInd] = data_utils.rotmat2expmap(R) 94 | T = T_prev + ((R_prev.T).dot( np.reshape(channels[ii,:3],[3,1]))).reshape(-1) 95 | channels_rec[ii,:3] = T 96 | T_prev = T 97 | R_prev = R 98 | 99 | return channels_rec 100 | 101 | 102 | def _some_variables(): 103 | """ 104 | We define some variables that are useful to run the kinematic tree 105 | 106 | Args 107 | None 108 | Returns 109 | parent: 32-long vector with parent-child relationships in the kinematic tree 110 | offset: 96-long vector with bone lenghts 111 | rotInd: 32-long list with indices into angles 112 | expmapInd: 32-long list with indices into expmap angles 113 | """ 114 | 115 | parent = np.array([0, 1, 2, 3, 4, 5, 1, 7, 8, 9,10, 1,12,13,14,15,13, 116 | 17,18,19,20,21,20,23,13,25,26,27,28,29,28,31])-1 117 | 118 | offset = np.array([0.000000,0.000000,0.000000,-132.948591,0.000000,0.000000,0.000000,-442.894612,0.000000,0.000000,-454.206447,0.000000,0.000000,0.000000,162.767078,0.000000,0.000000,74.999437,132.948826,0.000000,0.000000,0.000000,-442.894413,0.000000,0.000000,-454.206590,0.000000,0.000000,0.000000,162.767426,0.000000,0.000000,74.999948,0.000000,0.100000,0.000000,0.000000,233.383263,0.000000,0.000000,257.077681,0.000000,0.000000,121.134938,0.000000,0.000000,115.002227,0.000000,0.000000,257.077681,0.000000,0.000000,151.034226,0.000000,0.000000,278.882773,0.000000,0.000000,251.733451,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,99.999627,0.000000,100.000188,0.000000,0.000000,0.000000,0.000000,0.000000,257.077681,0.000000,0.000000,151.031437,0.000000,0.000000,278.892924,0.000000,0.000000,251.728680,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,99.999888,0.000000,137.499922,0.000000,0.000000,0.000000,0.000000]) 119 | offset = offset.reshape(-1,3) 120 | 121 | rotInd = [[5, 6, 4], 122 | [8, 9, 7], 123 | [11, 12, 10], 124 | [14, 15, 13], 125 | [17, 18, 16], 126 | [], 127 | [20, 21, 19], 128 | [23, 24, 22], 129 | [26, 27, 25], 130 | [29, 30, 28], 131 | [], 132 | [32, 33, 31], 133 | [35, 36, 34], 134 | [38, 39, 37], 135 | [41, 42, 40], 136 | [], 137 | [44, 45, 43], 138 | [47, 48, 46], 139 | [50, 51, 49], 140 | [53, 54, 52], 141 | [56, 57, 55], 142 | [], 143 | [59, 60, 58], 144 | [], 145 | [62, 63, 61], 146 | [65, 66, 64], 147 | [68, 69, 67], 148 | [71, 72, 70], 149 | [74, 75, 73], 150 | [], 151 | [77, 78, 76], 152 | []] 153 | 154 | expmapInd = np.split(np.arange(4,100)-1,32) 155 | 156 | return parent, offset, rotInd, expmapInd 157 | 158 | def read_data(gt_sequences, pred_sequences): 159 | 160 | euler_gt_sequences = np.zeros((100, 99)) 161 | euler_pred_sequences = np.zeros((100, 99)) 162 | 163 | # converting back to euler angles 164 | for j in np.arange( gt_sequences.shape[1] ): 165 | for k in np.arange(3,97,3): 166 | euler_gt_sequences[j, k:k+3] = data_utils.rotmat2euler(data_utils.expmap2rotmat( gt_sequences[j, k:k+3] )) 167 | euler_pred_sequences[j, k:k+3] = data_utils.rotmat2euler(data_utils.expmap2rotmat( pred_sequences[j, k:k+3] )) 168 | 169 | euler_gt_sequences[:,0:6] = 0 170 | euler_pred_sequences[:,0:6] = 0 171 | 172 | return euler_gt_sequences, euler_pred_sequences 173 | 174 | def compute_metrics(euler_gt_sequences, euler_pred_sequences): 175 | 176 | # computing 1) fourier coeffs 2)power of fft 3) normalizing power of fft dim-wise 4) cumsum over freq. 5) EMD 177 | gt_fourier_coeffs = np.zeros(euler_gt_sequences.shape) 178 | pred_fourier_coeffs = np.zeros(euler_pred_sequences.shape) 179 | 180 | # power vars 181 | gt_power = np.zeros((gt_fourier_coeffs.shape)) 182 | pred_power = np.zeros((gt_fourier_coeffs.shape)) 183 | 184 | # normalizing power vars 185 | gt_norm_power = np.zeros(gt_fourier_coeffs.shape) 186 | pred_norm_power = np.zeros(gt_fourier_coeffs.shape) 187 | 188 | cdf_gt_power = np.zeros(gt_norm_power.shape) 189 | cdf_pred_power = np.zeros(pred_norm_power.shape) 190 | 191 | emd = np.zeros(cdf_pred_power.shape[1]) 192 | 193 | # used to store powers of feature_dims and sequences used for avg later 194 | seq_feature_power = np.zeros(euler_gt_sequences.shape[1]) 195 | power_weighted_emd = 0 196 | 197 | for d in range(euler_gt_sequences.shape[1]): 198 | gt_fourier_coeffs[:,d] = np.fft.fft(euler_gt_sequences[:,d]) # slice is 1D array 199 | pred_fourier_coeffs[:,d] = np.fft.fft(euler_pred_sequences[:,d]) 200 | 201 | # computing power of fft per sequence per dim 202 | gt_power[:,d] = np.square(np.absolute(gt_fourier_coeffs[:,d])) 203 | pred_power[:,d] = np.square(np.absolute(pred_fourier_coeffs[:,d])) 204 | 205 | # matching power of gt and pred sequences 206 | gt_total_power = np.sum(gt_power[:,d]) 207 | pred_total_power = np.sum(pred_power[:,d]) 208 | 209 | # computing seq_power and feature_dims power 210 | seq_feature_power[d] = gt_total_power 211 | 212 | # normalizing power per sequence per dim 213 | if gt_total_power != 0: 214 | gt_norm_power[:,d] = gt_power[:,d] / gt_total_power 215 | 216 | if pred_total_power !=0: 217 | pred_norm_power[:,d] = pred_power[:,d] / pred_total_power 218 | 219 | # computing cumsum over freq 220 | cdf_gt_power[:,d] = np.cumsum(gt_norm_power[:,d]) # slice is 1D 221 | cdf_pred_power[:,d] = np.cumsum(pred_norm_power[:,d]) 222 | 223 | # computing EMD 224 | emd[d] = np.linalg.norm((cdf_pred_power[:,d] - cdf_gt_power[:,d]), ord=1) 225 | 226 | # computing weighted emd (by sequence and feature powers) 227 | power_weighted_emd = np.average(emd, weights=seq_feature_power) 228 | 229 | return power_weighted_emd 230 | 231 | 232 | def main(): 233 | 234 | # Load all the data 235 | parent, offset, rotInd, expmapInd = _some_variables() 236 | 237 | # short-term models 238 | with h5py.File( '../final_exp_samples/short-term/jul_unsup_sa/walking_samples.h5', 'r' ) as h5f5: 239 | jul_unsup_sa_expmap_pred = h5f5['expmap/preds/walking_6'][:] 240 | expmap_gt_5 = h5f5['expmap/gt/walking_6'][:] 241 | 242 | with h5py.File( '../final_exp_samples/short-term/pgru_skip_1/walking_samples_v2.h5', 'r' ) as h5f6: 243 | pgru_skip_1_expmap_pred = h5f6['expmap/preds/walking_6'][:] 244 | expmap_gt_6 = h5f6['expmap/gt/walking_6'][:] 245 | 246 | # load mocap gt and PGRU-d model predictions 247 | with h5py.File( '../final_exp_samples/long-term/pgru-d/walking_samples_v2.h5', 'r' ) as h5f1: 248 | pgru_d_expmap_pred = h5f1['expmap/preds/walking_6'][:] 249 | expmap_gt_1 = h5f1['expmap/gt/walking_6'][:] 250 | 251 | with h5py.File( '../final_exp_samples/long-term/gru-d/walking_samples.h5', 'r' ) as h5f2: 252 | gru_d_expmap_pred = h5f2['expmap/preds/walking_6'][:] 253 | expmap_gt_2 = h5f2['expmap/gt/walking_6'][:] 254 | 255 | with h5py.File( '../final_exp_samples/long-term/pgru-a/walking_samples.h5', 'r' ) as h5f3: 256 | pgru_a_expmap_pred = h5f3['expmap/preds/walking_6'][:] 257 | expmap_gt_3 = h5f3['expmap/gt/walking_6'][:] 258 | 259 | with h5py.File( '../final_exp_samples/long-term/julietta/walking_samples.h5', 'r' ) as h5f4: 260 | jul_long_expmap_pred = h5f4['expmap/preds/walking_6'][:] 261 | expmap_gt_4 = h5f4['expmap/gt/walking_6'][:] 262 | 263 | nframes_gt, nframes_pred = expmap_gt_1.shape[0], pgru_d_expmap_pred.shape[0] 264 | 265 | # computing NPSS metric for all models 266 | #euler_gt_5_seq, euler_jul_unsup_sa_seq = read_data(jul_unsup_sa_expmap_pred, expmap_gt_5) 267 | #euler_gt_6_seq, euler_pgru_skip_1_seq = read_data(pgru_skip_1_expmap_pred, expmap_gt_6) 268 | 269 | #euler_gt_1_seq, euler_pgru_d_seq = read_data(pgru_d_expmap_pred, expmap_gt_1) 270 | #euler_gt_2_seq, euler_gru_d_seq = read_data(gru_d_expmap_pred, expmap_gt_2) 271 | #euler_gt_3_seq, euler_pgru_a_seq = read_data(pgru_a_expmap_pred, expmap_gt_3) 272 | #euler_gt_4_seq, euler_jul_long_seq = read_data(jul_long_expmap_pred, expmap_gt_4) 273 | 274 | #jul_unsup_sa_emd = compute_metrics(euler_gt_5_seq, euler_jul_unsup_sa_seq) 275 | #pgru_skip_1_emd = compute_metrics(euler_gt_6_seq, euler_pgru_skip_1_seq) 276 | 277 | #pgru_d_emd = compute_metrics(euler_gt_1_seq, euler_pgru_d_seq) 278 | #gru_d_emd = compute_metrics(euler_gt_2_seq, euler_gru_d_seq) 279 | #pgru_a_emd = compute_metrics(euler_gt_3_seq, euler_pgru_a_seq) 280 | #jul_long_emd = compute_metrics(euler_gt_4_seq, euler_jul_long_seq) 281 | 282 | # Put them together and revert the coordinate space 283 | expmap_all = revert_coordinate_space( np.vstack((expmap_gt_1, pgru_d_expmap_pred)), np.eye(3), np.zeros(3) ) 284 | expmap_gt = expmap_all[:nframes_gt,:] 285 | pgru_d_expmap_pred = expmap_all[nframes_gt:,:] 286 | 287 | # gru-d revert co-ord space 288 | expmap_all = revert_coordinate_space( np.vstack((expmap_gt_2, gru_d_expmap_pred)), np.eye(3), np.zeros(3) ) 289 | gru_d_expmap_pred = expmap_all[nframes_gt:,:] 290 | 291 | # pgru-ac revert co-ord space 292 | expmap_all = revert_coordinate_space( np.vstack((expmap_gt_3, pgru_a_expmap_pred)), np.eye(3), np.zeros(3) ) 293 | pgru_a_expmap_pred = expmap_all[nframes_gt:,:] 294 | 295 | # julietta-long revert co-ord space 296 | expmap_all = revert_coordinate_space( np.vstack((expmap_gt_4, jul_long_expmap_pred)), np.eye(3), np.zeros(3) ) 297 | jul_long_expmap_pred = expmap_all[nframes_gt:,:] 298 | 299 | # jul_unsup_sa revert co-ord space 300 | expmap_all = revert_coordinate_space( np.vstack((expmap_gt_5, jul_unsup_sa_expmap_pred)), np.eye(3), np.zeros(3) ) 301 | jul_unsup_sa_expmap_pred = expmap_all[nframes_gt:,:] 302 | 303 | # pgru_skip_1 revert co-ord space 304 | expmap_all = revert_coordinate_space( np.vstack((expmap_gt_6, pgru_skip_1_expmap_pred)), np.eye(3), np.zeros(3) ) 305 | pgru_skip_1_expmap_pred = expmap_all[nframes_gt:,:] 306 | 307 | 308 | # Compute 3d points for each frame 309 | xyz_gt, pgru_d_xyz_pred = np.zeros((nframes_gt, 96)), np.zeros((nframes_pred, 96)) 310 | gru_d_xyz_pred = np.zeros((nframes_gt, 96)) 311 | pgru_a_xyz_pred = np.zeros((nframes_gt, 96)) 312 | jul_long_xyz_pred = np.zeros((nframes_gt, 96)) 313 | 314 | jul_unsup_sa_xyz_pred = np.zeros((nframes_gt, 96)) 315 | pgru_skip_1_xyz_pred = np.zeros((nframes_gt, 96)) 316 | 317 | # ground-truth xyz frames 318 | for i in range( nframes_gt ): 319 | xyz_gt[i,:] = fkl( expmap_gt[i,:], parent, offset, rotInd, expmapInd ) 320 | 321 | # pgru-d xyz frames 322 | for i in range( nframes_pred ): 323 | pgru_d_xyz_pred[i,:] = fkl( pgru_d_expmap_pred[i,:], parent, offset, rotInd, expmapInd ) 324 | 325 | # gru-d xyz frames 326 | for i in range( nframes_pred ): 327 | gru_d_xyz_pred[i,:] = fkl( gru_d_expmap_pred[i,:], parent, offset, rotInd, expmapInd ) 328 | 329 | # gru-ac xyz frames 330 | for i in range( nframes_pred ): 331 | pgru_a_xyz_pred[i,:] = fkl( pgru_a_expmap_pred[i,:], parent, offset, rotInd, expmapInd ) 332 | 333 | # jul-long xyz frames 334 | for i in range( nframes_pred ): 335 | jul_long_xyz_pred[i,:] = fkl( jul_long_expmap_pred[i,:], parent, offset, rotInd, expmapInd ) 336 | 337 | # jul-unsup-sa xyz frames 338 | for i in range( nframes_pred ): 339 | jul_unsup_sa_xyz_pred[i,:] = fkl( jul_unsup_sa_expmap_pred[i,:], parent, offset, rotInd, expmapInd ) 340 | 341 | # pgru-skip-1 xyz frames 342 | for i in range( nframes_pred ): 343 | pgru_skip_1_xyz_pred[i,:] = fkl( pgru_skip_1_expmap_pred[i,:], parent, offset, rotInd, expmapInd ) 344 | 345 | # setting up stuff to save video 346 | FFMpegWriter = animation.writers['ffmpeg'] 347 | metadata = dict(title='Walking Sequence 6', artist='Matplotlib', comment='Movie support!') 348 | writer = FFMpegWriter(fps=12, codec="libx264", bitrate=-1, metadata=metadata) 349 | 350 | # === Plot and animate === 351 | fig = plt.figure(figsize=(22.0,11.0)) 352 | fig.suptitle("Walking Sequence 6") 353 | fig.subplots_adjust(left=0.05, bottom=0.05, right=0.95, top=0.95, wspace=None, hspace=None) 354 | gt_ax = fig.add_subplot(3, 3, 2, projection='3d') 355 | jul_unsup_sa_pred_ax = fig.add_subplot(3, 3, 4, projection='3d') 356 | pgru_skip_1_pred_ax = fig.add_subplot(3, 3, 5, projection='3d') 357 | jul_long_pred_ax = fig.add_subplot(3, 3, 6, projection='3d') 358 | pgru_a_pred_ax = fig.add_subplot(3, 3, 7, projection='3d') 359 | gru_d_pred_ax = fig.add_subplot(3, 3, 8, projection='3d') 360 | pgru_d_pred_ax = fig.add_subplot(3, 3, 9, projection='3d') 361 | 362 | # setting viewing angle 363 | gt_ax.view_init(azim=135) 364 | jul_unsup_sa_pred_ax.view_init(azim=45) 365 | pgru_skip_1_pred_ax.view_init(azim=45) 366 | jul_long_pred_ax.view_init(azim=45) 367 | pgru_a_pred_ax.view_init(azim=45) 368 | gru_d_pred_ax.view_init(azim=45) 369 | pgru_d_pred_ax.view_init(azim=45) 370 | 371 | font = {'family': 'serif', 372 | 'color': 'black', 373 | 'weight': 'normal', 374 | 'size': 12, 375 | } 376 | 377 | # titles and legends for subplots 378 | gt_ax.set_title("Ground-Truth") 379 | 380 | #jul_unsup_sa_emd_str = '$\mathrm{NPSS}=%.3f$'%(jul_unsup_sa_emd) 381 | jul_unsup_sa_pred_ax.set_title("A") 382 | #jul_unsup_sa_pred_ax.text2D(0.35,0.80, jul_unsup_sa_emd_str, fontdict=font, transform=jul_unsup_sa_pred_ax.transAxes) 383 | 384 | #pgru_skip_1_emd_str = '$\mathrm{NPSS}=%.3f$'%(pgru_skip_1_emd) 385 | pgru_skip_1_pred_ax.set_title("B") 386 | #pgru_skip_1_pred_ax.text2D(0.35,0.80, pgru_skip_1_emd_str, fontdict=font, transform=pgru_skip_1_pred_ax.transAxes) 387 | 388 | #jul_long_emd_str = '$\mathrm{NPSS}=%.3f$'%(jul_long_emd) 389 | jul_long_pred_ax.set_title("C") 390 | #jul_long_pred_ax.text2D(0.35,0.80, jul_long_emd_str, fontdict=font, transform=jul_long_pred_ax.transAxes) 391 | 392 | #pgru_a_emd_str = '$\mathrm{NPSS}=%.3f$'%(pgru_a_emd) 393 | pgru_a_pred_ax.set_title("D") 394 | #pgru_a_pred_ax.text2D(0.35,0.80, pgru_a_emd_str, fontdict=font, transform=pgru_a_pred_ax.transAxes) 395 | 396 | #gru_d_emd_str = '$\mathrm{NPSS}=%.3f$'%(gru_d_emd) 397 | gru_d_pred_ax.set_title("E") 398 | #gru_d_pred_ax.text2D(0.35, 0.80, gru_d_emd_str, fontdict=font, transform=gru_d_pred_ax.transAxes) 399 | 400 | #pgru_d_emd_str = '$\mathrm{NPSS}=%.3f$'%(pgru_d_emd) 401 | pgru_d_pred_ax.set_title("F") 402 | #pgru_d_pred_ax.text2D(0.35, 0.80, pgru_d_emd_str, fontdict=font, transform=pgru_d_pred_ax.transAxes) 403 | 404 | ob_gt = viz.Ax3DPose(gt_ax) 405 | jul_unsup_sa_ob_pred = viz.Ax3DPose(jul_unsup_sa_pred_ax) 406 | pgru_skip_1_ob_pred = viz.Ax3DPose(pgru_skip_1_pred_ax) 407 | jul_long_ob_pred = viz.Ax3DPose(jul_long_pred_ax) 408 | pgru_a_ob_pred = viz.Ax3DPose(pgru_a_pred_ax) 409 | gru_d_ob_pred = viz.Ax3DPose(gru_d_pred_ax) 410 | pgru_d_ob_pred = viz.Ax3DPose(pgru_d_pred_ax) 411 | 412 | with writer.saving(fig, "walking_seq_6.mp4", 100): 413 | 414 | for i in range(nframes_gt): 415 | # Plot the conditioning ground truth 416 | ob_gt.update( xyz_gt[i,:] ) 417 | fig.canvas.draw() 418 | 419 | # Plot the jul-unsup-sa prediction 420 | jul_unsup_sa_ob_pred.update( jul_unsup_sa_xyz_pred[i,:], lcolor="#9b59b6", rcolor="#2ecc71" ) 421 | plt.show(block=False) 422 | fig.canvas.draw() 423 | 424 | # Plot the pgru-skip-1 prediction 425 | pgru_skip_1_ob_pred.update( pgru_skip_1_xyz_pred[i,:], lcolor="#9b59b6", rcolor="#2ecc71" ) 426 | plt.show(block=False) 427 | fig.canvas.draw() 428 | 429 | # Plot the jul-long prediction 430 | jul_long_ob_pred.update( jul_long_xyz_pred[i,:], lcolor="#9b59b6", rcolor="#2ecc71" ) 431 | plt.show(block=False) 432 | fig.canvas.draw() 433 | 434 | # Plot the pgru-ac prediction 435 | pgru_a_ob_pred.update( pgru_a_xyz_pred[i,:], lcolor="#9b59b6", rcolor="#2ecc71" ) 436 | plt.show(block=False) 437 | fig.canvas.draw() 438 | 439 | # Plot the gru-d prediction 440 | gru_d_ob_pred.update( gru_d_xyz_pred[i,:], lcolor="#9b59b6", rcolor="#2ecc71" ) 441 | plt.show(block=False) 442 | fig.canvas.draw() 443 | 444 | # Plot the pgru-ac prediction 445 | pgru_d_ob_pred.update( pgru_d_xyz_pred[i,:], lcolor="#9b59b6", rcolor="#2ecc71" ) 446 | plt.show(block=False) 447 | fig.canvas.draw() 448 | writer.grab_frame() 449 | 450 | if __name__ == '__main__': 451 | main() 452 | -------------------------------------------------------------------------------- /metrics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Fri Jul 27 01:41:56 2018 5 | 6 | @author: anand 7 | """ 8 | 9 | import h5py 10 | import numpy as np 11 | import sklearn.metrics.pairwise as metrics 12 | import matplotlib.pyplot as plt 13 | import matplotlib.style as style 14 | import data_utils 15 | import forward_kinematics 16 | 17 | def read_data(fname): 18 | 19 | hf = h5py.File(fname,'r') 20 | action = 'discussion' 21 | 22 | gt_sequences = np.zeros((8, 100, 99)) 23 | pred_sequences = np.zeros((8, 100, 99)) 24 | 25 | euler_gt_sequences = np.zeros((8, 100, 99)) 26 | euler_pred_sequences = np.zeros((8, 100, 99)) 27 | 28 | error_hf = hf.get('mean_'+ action + '_error/') 29 | errors = np.array(error_hf) 30 | 31 | for i in range(8): 32 | gt_fname = 'expmap/gt/' + action + '_' + str(i) 33 | n1 = np.array(hf.get(gt_fname)) 34 | gt_sequences[i,:,:] = n1 35 | 36 | pred_fname = 'expmap/preds/' + action + '_' + str(i) 37 | n2 = np.array(hf.get(pred_fname)) 38 | pred_sequences[i,:,:] = n2 39 | 40 | # converting back to euler angles 41 | for j in np.arange( gt_sequences.shape[1] ): 42 | for k in np.arange(3,97,3): 43 | euler_gt_sequences[i, j, k:k+3] = data_utils.rotmat2euler(data_utils.expmap2rotmat( gt_sequences[i, j, k:k+3] )) 44 | euler_pred_sequences[i, j, k:k+3] = data_utils.rotmat2euler(data_utils.expmap2rotmat( pred_sequences[i, j, k:k+3] )) 45 | 46 | euler_gt_sequences[i,:,0:6] = 0 47 | euler_pred_sequences[i,:,0:6] = 0 48 | 49 | return euler_gt_sequences, euler_pred_sequences, errors 50 | 51 | def compute_metrics(euler_gt_sequences, euler_pred_sequences): 52 | 53 | # computing 1) fourier coeffs 2)power of fft 3) normalizing power of fft dim-wise 4) cumsum over freq. 5) EMD 54 | gt_fourier_coeffs = np.zeros(euler_gt_sequences.shape) 55 | pred_fourier_coeffs = np.zeros(euler_pred_sequences.shape) 56 | 57 | # power vars 58 | gt_power = np.zeros((gt_fourier_coeffs.shape)) 59 | pred_power = np.zeros((gt_fourier_coeffs.shape)) 60 | 61 | # normalizing power vars 62 | gt_norm_power = np.zeros(gt_fourier_coeffs.shape) 63 | pred_norm_power = np.zeros(gt_fourier_coeffs.shape) 64 | 65 | cdf_gt_power = np.zeros(gt_norm_power.shape) 66 | cdf_pred_power = np.zeros(pred_norm_power.shape) 67 | 68 | emd = np.zeros(cdf_pred_power.shape[0:3:2]) 69 | 70 | # used to store powers of feature_dims and sequences used for avg later 71 | seq_feature_power = np.zeros(euler_gt_sequences.shape[0:3:2]) 72 | power_weighted_emd = 0 73 | 74 | for s in range(euler_gt_sequences.shape[0]): 75 | 76 | for d in range(euler_gt_sequences.shape[2]): 77 | gt_fourier_coeffs[s,:,d] = np.fft.fft(euler_gt_sequences[s,:,d]) # slice is 1D array 78 | pred_fourier_coeffs[s,:,d] = np.fft.fft(euler_pred_sequences[s,:,d]) 79 | 80 | # computing power of fft per sequence per dim 81 | gt_power[s,:,d] = np.square(np.absolute(gt_fourier_coeffs[s,:,d])) 82 | pred_power[s,:,d] = np.square(np.absolute(pred_fourier_coeffs[s,:,d])) 83 | 84 | # matching power of gt and pred sequences 85 | gt_total_power = np.sum(gt_power[s,:,d]) 86 | pred_total_power = np.sum(pred_power[s,:,d]) 87 | #power_diff = gt_total_power - pred_total_power 88 | 89 | # adding power diff to zero freq of pred seq 90 | #pred_power[s,0,d] = pred_power[s,0,d] + power_diff 91 | 92 | # computing seq_power and feature_dims power 93 | seq_feature_power[s,d] = gt_total_power 94 | 95 | # normalizing power per sequence per dim 96 | if gt_total_power != 0: 97 | gt_norm_power[s,:,d] = gt_power[s,:,d] / gt_total_power 98 | 99 | if pred_total_power !=0: 100 | pred_norm_power[s,:,d] = pred_power[s,:,d] / pred_total_power 101 | 102 | # computing cumsum over freq 103 | cdf_gt_power[s,:,d] = np.cumsum(gt_norm_power[s,:,d]) # slice is 1D 104 | cdf_pred_power[s,:,d] = np.cumsum(pred_norm_power[s,:,d]) 105 | 106 | # computing EMD 107 | emd[s,d] = np.linalg.norm((cdf_pred_power[s,:,d] - cdf_gt_power[s,:,d]), ord=1) 108 | 109 | # computing weighted emd (by sequence and feature powers) 110 | power_weighted_emd = np.average(emd, weights=seq_feature_power) 111 | 112 | return power_weighted_emd 113 | 114 | 115 | # read data from all models 116 | #gru_nl_nd_gt_sequence, gru_nl_nd_pred_sequence, gru_nl_nd_errors = read_data('../multi_exp_samples/long-term/simple_gru_no_plan_no_deriv/discussion_samples_v2.h5') 117 | #pgru_d_nl_gt_sequence, pgru_d_nl_pred_sequence, pgru_d_nl_errors = read_data('../final_exp_samples/long-term/full_pgru_no_loss/discussion_samples_v2.h5') 118 | #no_plan_gt_sequence, no_plan_pred_sequence, no_plan_errors = read_data('../final_exp_samples/long-term/gru-d/discussion_samples.h5') 119 | #plan_gt_sequence, plan_pred_sequence, plan_errors = read_data('../final_exp_samples/long-term/pgru-d/discussion_samples.h5') 120 | #multi_base_gt, multi_base_pred, multi_base_errors = read_data('../multi_action_samples/samples_1_layer_attn_drop_0.2_5k.h5') 121 | 122 | # compute metrics for all models 123 | multi_base_npss = compute_metrics(multi_base_gt, multi_base_pred) 124 | #gru_nl_nd_emd = compute_metrics(gru_nl_nd_gt_sequence, gru_nl_nd_pred_sequence) 125 | #pgru_d_nl_emd = compute_metrics(pgru_d_nl_gt_sequence, pgru_d_nl_pred_sequence) 126 | #no_plan_emd = compute_metrics(no_plan_gt_sequence, no_plan_pred_sequence) 127 | #plan_emd = compute_metrics(plan_gt_sequence, plan_pred_sequence) 128 | #auto_cond_emd = compute_metrics(auto_cond_gt_sequence, auto_cond_pred_sequence) 129 | #jul_emd = compute_metrics(jul_gt_sequence, jul_pred_sequence) 130 | #skip_1_emd = compute_metrics(skip_1_gt_sequence, skip_1_pred_sequence) 131 | #jul_unsup_emd = compute_metrics(jul_unsup_gt_sequence, jul_unsup_pred_sequence) 132 | 133 | -------------------------------------------------------------------------------- /rnn_cell_extensions.py: -------------------------------------------------------------------------------- 1 | 2 | """ Extensions to TF RNN class by una_dinosaria""" 3 | 4 | from __future__ import absolute_import 5 | from __future__ import division 6 | from __future__ import print_function 7 | 8 | import tensorflow as tf 9 | 10 | #from tensorflow.contrib.rnn.python.ops.core_rnn_cell import RNNCell 11 | from rnn_cell_impl import RNNCell # cell definitions with layer_norm 12 | 13 | 14 | # The import for LSTMStateTuple changes in TF >= 1.2.0 15 | from pkg_resources import parse_version as pv 16 | if pv(tf.__version__) >= pv('1.2.0'): 17 | from tensorflow.contrib.rnn import LSTMStateTuple 18 | else: 19 | from tensorflow.contrib.rnn.python.ops.core_rnn_cell import LSTMStateTuple 20 | del pv 21 | 22 | from tensorflow.python.ops import variable_scope as vs 23 | 24 | import collections 25 | import math 26 | 27 | class ResidualWrapper(RNNCell): 28 | """Operator adding residual connections to a given cell.""" 29 | 30 | def __init__(self, cell): 31 | """Create a cell with added residual connection. 32 | 33 | Args: 34 | cell: an RNNCell. The input is added to the output. 35 | 36 | Raises: 37 | TypeError: if cell is not an RNNCell. 38 | """ 39 | if not isinstance(cell, RNNCell): 40 | raise TypeError("The parameter cell is not a RNNCell.") 41 | 42 | self._cell = cell 43 | 44 | @property 45 | def state_size(self): 46 | return self._cell.state_size 47 | 48 | @property 49 | def output_size(self): 50 | return self._cell.output_size 51 | 52 | def __call__(self, inputs, state, scope=None): 53 | """Run the cell and add a residual connection.""" 54 | 55 | # Run the rnn as usual 56 | output, new_state = self._cell(inputs, state, scope) 57 | 58 | # Add the residual connection 59 | output = tf.add(output, inputs) 60 | 61 | return output, new_state 62 | 63 | 64 | class ResidualWrapperv1(RNNCell): 65 | """Operator adding residual connections to a given cell.""" 66 | 67 | def __init__(self, cell, output_size): 68 | """Create a cell with added residual connection. 69 | 70 | Args: 71 | cell: an RNNCell. The input is added to the output. 72 | 73 | Raises: 74 | TypeError: if cell is not an RNNCell. 75 | """ 76 | if not isinstance(cell, RNNCell): 77 | raise TypeError("The parameter cell is not a RNNCell.") 78 | 79 | self._cell = cell 80 | self._output_size = output_size 81 | 82 | self.r = tf.get_variable("r_interp", [self._output_size], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) 83 | 84 | @property 85 | def state_size(self): 86 | return self._cell.state_size 87 | 88 | @property 89 | def output_size(self): 90 | return self._cell.output_size 91 | 92 | def __call__(self, inputs, state, scope=None): # modified 93 | """Run the cell and add a residual connection.""" 94 | 95 | # Run the rnn as usual 96 | output, new_state = self._cell(inputs, state, scope) # modified 97 | 98 | # perform residual_v1 interpolation op 99 | output = (1.0 - self.r) * output + self.r * inputs 100 | 101 | return output, new_state 102 | 103 | 104 | class ResidualWrapperv2(RNNCell): 105 | """Operator adding residual connections to a given cell.""" 106 | 107 | def __init__(self, cell, output_size): 108 | """Create a cell with added residual connection. 109 | 110 | Args: 111 | cell: an RNNCell. The input is added to the output. 112 | 113 | Raises: 114 | TypeError: if cell is not an RNNCell. 115 | """ 116 | if not isinstance(cell, RNNCell): 117 | raise TypeError("The parameter cell is not a RNNCell.") 118 | 119 | self._cell = cell 120 | self._output_size = output_size 121 | 122 | self.r = tf.get_variable("r_interp", [self._output_size], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) 123 | self.W_res = tf.get_variable("W_res", [self._output_size, self._output_size], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) 124 | self.b_res = tf.get_variable("b_res", [self._output_size], dtype=tf.float32, initializer=tf.constant_initializer(0.1)) 125 | 126 | @property 127 | def state_size(self): 128 | return self._cell.state_size 129 | 130 | @property 131 | def output_size(self): 132 | return self._cell.output_size 133 | 134 | def __call__(self, inputs, state, scope=None): # modified 135 | """Run the cell and add a residual connection.""" 136 | 137 | # Run the rnn as usual 138 | output, new_state = self._cell(inputs, state, scope) # modified 139 | 140 | # perform residual_v2 interpolation op 141 | output = (1.0 - self.r) * output + self.r * (tf.matmul(inputs, self.W_res) + self.b_res) 142 | 143 | return output, new_state 144 | 145 | 146 | 147 | 148 | class LinearSpaceDecoderWrapper(RNNCell): 149 | """Operator adding a linear encoder to an RNN cell""" 150 | 151 | def __init__(self, cell, output_size): 152 | """Create a cell with with a linear encoder in space. 153 | 154 | Args: 155 | cell: an RNNCell. The input is passed through a linear layer. 156 | 157 | Raises: 158 | TypeError: if cell is not an RNNCell. 159 | """ 160 | if not isinstance(cell, RNNCell): 161 | raise TypeError("The parameter cell is not a RNNCell.") 162 | 163 | self._cell = cell 164 | 165 | print( 'output_size = {0}'.format(output_size) ) 166 | print( ' state_size = {0}'.format(self._cell.state_size) ) 167 | 168 | # Tuple if multi-rnn 169 | if isinstance(self._cell.state_size,tuple): 170 | 171 | # Fine if GRU... 172 | insize = self._cell.state_size[-1] 173 | 174 | # LSTMStateTuple if LSTM 175 | if isinstance( insize, LSTMStateTuple ): 176 | insize = insize.h 177 | 178 | else: 179 | # Fine if not multi-rnn 180 | insize = self._cell.state_size 181 | 182 | self.w_out = tf.get_variable("proj_w_out", 183 | [insize, output_size], 184 | dtype=tf.float32, 185 | initializer=tf.random_uniform_initializer(minval=-0.04, maxval=0.04)) 186 | self.b_out = tf.get_variable("proj_b_out", [output_size], 187 | dtype=tf.float32, 188 | initializer=tf.random_uniform_initializer(minval=-0.04, maxval=0.04)) 189 | 190 | self.linear_output_size = output_size 191 | 192 | 193 | @property 194 | def state_size(self): 195 | return self._cell.state_size 196 | 197 | @property 198 | def output_size(self): 199 | return self.linear_output_size 200 | 201 | def __call__(self, inputs, state, scope=None): 202 | """Use a linear layer and pass the output to the cell.""" 203 | 204 | # Run the rnn as usual 205 | output, new_state = self._cell(inputs, state, scope) 206 | 207 | # Apply the multiplication to everything 208 | output = tf.matmul(output, self.w_out) + self.b_out 209 | 210 | return output, new_state 211 | -------------------------------------------------------------------------------- /translate_lm.py: -------------------------------------------------------------------------------- 1 | 2 | """Simple code for training an RNN for motion prediction.""" 3 | 4 | from __future__ import absolute_import 5 | from __future__ import division 6 | from __future__ import print_function 7 | 8 | import math 9 | import os 10 | import random 11 | import sys 12 | import time 13 | import h5py 14 | 15 | import numpy as np 16 | from six.moves import xrange # pylint: disable=redefined-builtin 17 | import tensorflow as tf 18 | 19 | import data_utils 20 | import seq2seq_model 21 | import motion_rnn_lm 22 | 23 | # Learning 24 | tf.app.flags.DEFINE_float("learning_rate", .0002, "Learning rate.") 25 | tf.app.flags.DEFINE_float("learning_rate_decay_factor", 0.8, "Learning rate is multiplied by this much. 1 means no decay.") 26 | tf.app.flags.DEFINE_integer("learning_rate_step", 2000, "Every this many steps, do decay.") 27 | tf.app.flags.DEFINE_float("max_gradient_norm", 1, "Clip gradients to this norm.") 28 | tf.app.flags.DEFINE_integer("batch_size", 32, "Batch size to use during training.") 29 | tf.app.flags.DEFINE_integer("iterations", int(2e4), "Iterations to train for.") 30 | # Architecture 31 | tf.app.flags.DEFINE_string("architecture", "tied", "Seq2seq architecture to use: [basic, tied].") 32 | tf.app.flags.DEFINE_string("loop_type", "closed", "loop type to use: [open, closed].") 33 | tf.app.flags.DEFINE_integer("body_size", 128, "Size of each body rnn model layer.") 34 | tf.app.flags.DEFINE_string("body_cell", "gru", "RNN cell type of body rnn : [elman, lstm, gru]") 35 | tf.app.flags.DEFINE_integer("plan_size", 128, "Size of each plan rnn model layer.") 36 | tf.app.flags.DEFINE_string("plan_cell", "gru", "RNN cell type of body rnn : [elman, lstm, gru]") 37 | tf.app.flags.DEFINE_integer("num_layers", 1, "Number of layers in the model.") 38 | tf.app.flags.DEFINE_integer("seq_length_in", 50, "Number of frames to feed into the encoder. 25 fps") 39 | tf.app.flags.DEFINE_integer("seq_length_out", 25, "Number of frames that the decoder has to predict. 25fps") 40 | tf.app.flags.DEFINE_boolean("omit_one_hot", False, "Whether to remove one-hot encoding from the data") 41 | tf.app.flags.DEFINE_boolean("residual_velocities", True, "Add a residual connection that effectively models velocities") 42 | # Directories 43 | tf.app.flags.DEFINE_string("data_dir", os.path.normpath("./data/h3.6m/dataset"), "Data directory") 44 | tf.app.flags.DEFINE_string("train_dir", os.path.normpath("./experiments/"), "Training directory.") 45 | 46 | tf.app.flags.DEFINE_string("action","all", "The action to train on. all means all the actions, all_periodic means walking, eating and smoking") 47 | tf.app.flags.DEFINE_string("loss_to_use","sampling_based", "The type of loss to use, supervised or sampling_based") 48 | 49 | tf.app.flags.DEFINE_integer("test_every", 1000, "How often to compute error on the test set.") 50 | tf.app.flags.DEFINE_integer("save_every", 1000, "How often to compute error on the test set.") 51 | tf.app.flags.DEFINE_boolean("sample", False, "Set to True for sampling.") 52 | tf.app.flags.DEFINE_boolean("use_cpu", False, "Whether to use the CPU") 53 | tf.app.flags.DEFINE_integer("load", 0, "Try to load a previous checkpoint.") 54 | 55 | FLAGS = tf.app.flags.FLAGS 56 | 57 | train_dir = os.path.normpath(os.path.join( FLAGS.train_dir, FLAGS.action, 58 | 'out_{0}'.format(FLAGS.seq_length_out), 59 | 'iterations_{0}'.format(FLAGS.iterations), 60 | FLAGS.architecture, 61 | 'loop_type_{0}'.format(FLAGS.loop_type), 62 | FLAGS.loss_to_use, 63 | 'omit_one_hot' if FLAGS.omit_one_hot else 'one_hot', 64 | 'depth_{0}'.format(FLAGS.num_layers), 65 | 'plan_cell_{0}'.format(FLAGS.plan_cell), 66 | 'plan_size_{0}'.format(FLAGS.plan_size), 67 | 'body_cell_{0}'.format(FLAGS.body_cell), 68 | 'body_size_{0}'.format(FLAGS.body_size), 69 | 'lr_{0}'.format(FLAGS.learning_rate), 70 | 'residual_vel' if FLAGS.residual_velocities else 'not_residual_vel')) 71 | 72 | summaries_dir = os.path.normpath(os.path.join( train_dir, "log" )) # Directory for TB summaries 73 | 74 | def create_model(session, actions, sampling=False): 75 | """Create translation model and initialize or load parameters in session.""" 76 | 77 | model = motion_rnn_lm.MotionRNNModelLM( 78 | FLAGS.architecture, 79 | FLAGS.loop_type, 80 | FLAGS.seq_length_in if not sampling else 50, 81 | FLAGS.seq_length_out if not sampling else 100, 82 | FLAGS.body_size, 83 | FLAGS.body_cell, 84 | FLAGS.plan_size, 85 | FLAGS.plan_cell, 86 | FLAGS.num_layers, 87 | FLAGS.max_gradient_norm, 88 | FLAGS.batch_size, 89 | FLAGS.learning_rate, 90 | FLAGS.learning_rate_decay_factor, 91 | summaries_dir, 92 | FLAGS.loss_to_use if not sampling else "sampling_based", 93 | len( actions ), 94 | not FLAGS.omit_one_hot, 95 | FLAGS.residual_velocities, 96 | dtype=tf.float32) 97 | 98 | if FLAGS.load <= 0: 99 | print("Creating model with fresh parameters.") 100 | session.run(tf.global_variables_initializer()) 101 | return model 102 | 103 | ckpt = tf.train.get_checkpoint_state( train_dir, latest_filename="checkpoint") 104 | print( "train_dir", train_dir ) 105 | 106 | if ckpt and ckpt.model_checkpoint_path: 107 | # Check if the specific checkpoint exists 108 | if FLAGS.load > 0: 109 | if os.path.isfile(os.path.join(train_dir,"checkpoint-{0}.index".format(FLAGS.load))): 110 | ckpt_name = os.path.normpath(os.path.join( os.path.join(train_dir,"checkpoint-{0}".format(FLAGS.load)) )) 111 | else: 112 | raise ValueError("Asked to load checkpoint {0}, but it does not seem to exist".format(FLAGS.load)) 113 | else: 114 | ckpt_name = os.path.basename( ckpt.model_checkpoint_path ) 115 | 116 | print("Loading model {0}".format( ckpt_name )) 117 | model.saver.restore( session, ckpt.model_checkpoint_path ) 118 | return model 119 | else: 120 | print("Could not find checkpoint. Aborting.") 121 | raise( ValueError, "Checkpoint {0} does not seem to exist".format( ckpt.model_checkpoint_path ) ) 122 | 123 | return model 124 | 125 | 126 | def train(): 127 | """Train a seq2seq model on human motion""" 128 | 129 | actions = define_actions( FLAGS.action ) 130 | 131 | number_of_actions = len( actions ) 132 | 133 | train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use = read_all_data( actions, FLAGS.seq_length_in, FLAGS.seq_length_out, FLAGS.data_dir, not FLAGS.omit_one_hot ) 134 | 135 | # Limit TF to take a fraction of the GPU memory 136 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.95) 137 | device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1} 138 | 139 | with tf.Session(config=tf.ConfigProto( gpu_options=gpu_options, device_count = device_count )) as sess: 140 | 141 | # === Create the model === 142 | print("Creating %d layers of %d units for plan RNN." % (FLAGS.num_layers, FLAGS.plan_size)) 143 | print("Creating %d layers of %d units for body RNN." % (FLAGS.num_layers, FLAGS.body_size)) 144 | 145 | model = create_model( sess, actions ) 146 | model.train_writer.add_graph( sess.graph ) 147 | print( "Model created" ) 148 | 149 | # === Read and denormalize the gt with srnn's seeds, as we'll need them 150 | # many times for evaluation in Euler Angles === 151 | srnn_gts_euler = get_srnn_gts( actions, model, test_set, data_mean, data_std, dim_to_ignore, FLAGS.omit_one_hot ) # modified 152 | 153 | #=== This is the training loop === 154 | step_time, loss, val_loss = 0.0, 0.0, 0.0 155 | current_step = 0 if FLAGS.load <= 0 else FLAGS.load + 1 156 | previous_losses = [] 157 | 158 | step_time, loss = 0, 0 159 | 160 | for _ in xrange( FLAGS.iterations ): 161 | 162 | start_time = time.time() 163 | 164 | # === Training step === 165 | encoder_inputs, encoder_outputs, plan_inputs, decoder_inputs, decoder_outputs = model.get_batch( train_set, FLAGS.omit_one_hot ) # modified 166 | _, step_loss, loss_summary, lr_summary = model.step( sess, encoder_inputs, encoder_outputs, plan_inputs, decoder_inputs, decoder_outputs, False ) 167 | model.train_writer.add_summary( loss_summary, current_step ) 168 | model.train_writer.add_summary( lr_summary, current_step ) 169 | 170 | if current_step % 10 == 0: 171 | print("step {0:04d}; step_loss: {1:.4f}".format(current_step, step_loss )) 172 | 173 | step_time += (time.time() - start_time) / FLAGS.test_every 174 | loss += step_loss / FLAGS.test_every 175 | current_step += 1 176 | 177 | # === step decay === 178 | #if current_step % FLAGS.learning_rate_step == 0: 179 | # sess.run(model.learning_rate_decay_op) 180 | 181 | # Once in a while, we save checkpoint, print statistics, and run evals. 182 | if current_step % FLAGS.test_every == 0: 183 | 184 | # === Validation with randomly chosen seeds === 185 | forward_only = True 186 | 187 | encoder_inputs, encoder_outputs, plan_inputs, decoder_inputs, decoder_outputs = model.get_batch( test_set, FLAGS.omit_one_hot ) # modified 188 | step_loss, loss_summary = model.step(sess, encoder_inputs, encoder_outputs, plan_inputs, decoder_inputs, decoder_outputs, forward_only) 189 | val_loss = step_loss # Loss book-keeping 190 | 191 | model.test_writer.add_summary(loss_summary, current_step) 192 | 193 | print() 194 | print("{0: <16} |".format("milliseconds"), end="") 195 | for ms in [80, 160, 320, 400, 560, 1000]: 196 | print(" {0:5d} |".format(ms), end="") 197 | print() 198 | 199 | # === Validation with srnn's seeds === 200 | for action in actions: 201 | 202 | # Evaluate the model on the test batches 203 | encoder_inputs, encoder_outputs, plan_inputs, decoder_inputs, decoder_outputs = model.get_batch_srnn( test_set, action ) 204 | srnn_loss, srnn_poses, _ = model.step(sess, encoder_inputs, encoder_outputs, plan_inputs, decoder_inputs, decoder_outputs, True, True) 205 | 206 | # Denormalize the output 207 | srnn_pred_expmap = data_utils.revert_output_format( srnn_poses, data_mean, data_std, dim_to_ignore, actions, FLAGS.omit_one_hot ) # modified 208 | 209 | # Save the errors here 210 | mean_errors = np.zeros( (len(srnn_pred_expmap), srnn_pred_expmap[0].shape[0]) ) 211 | 212 | # Training is done in exponential map, but the error is reported in 213 | # Euler angles, as in previous work. 214 | # See https://github.com/asheshjain399/RNNexp/issues/6#issuecomment-247769197 215 | N_SEQUENCE_TEST = 8 216 | for i in np.arange(N_SEQUENCE_TEST): 217 | eulerchannels_pred = srnn_pred_expmap[i] 218 | 219 | # Convert from exponential map to Euler angles 220 | for j in np.arange( eulerchannels_pred.shape[0] ): 221 | for k in np.arange(3,97,3): 222 | eulerchannels_pred[j,k:k+3] = data_utils.rotmat2euler( data_utils.expmap2rotmat( eulerchannels_pred[j,k:k+3] )) 223 | 224 | # The global translation (first 3 entries) and global rotation 225 | # (next 3 entries) are also not considered in the error, so the_key 226 | # are set to zero. 227 | # See https://github.com/asheshjain399/RNNexp/issues/6#issuecomment-249404882 228 | gt_i=np.copy(srnn_gts_euler[action][i]) 229 | gt_i[:,0:6] = 0 230 | 231 | # Now compute the l2 error. The following is numpy port of the error 232 | # function provided by Ashesh Jain (in matlab), available at 233 | # https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/dataParser/Utils/motionGenerationError.m#L40-L54 234 | idx_to_use = np.where( np.std( gt_i, 0 ) > 1e-4 )[0] 235 | 236 | euc_error = np.power( gt_i[:,idx_to_use] - eulerchannels_pred[:,idx_to_use], 2) 237 | euc_error = np.sum(euc_error, 1) 238 | euc_error = np.sqrt( euc_error ) 239 | mean_errors[i,:] = euc_error 240 | 241 | # This is simply the mean error over the N_SEQUENCE_TEST examples 242 | mean_mean_errors = np.mean( mean_errors, 0 ) 243 | 244 | # Pretty print of the results for 80, 160, 320, 400, 560 and 1000 ms 245 | print("{0: <16} |".format(action), end="") 246 | for ms in [1,3,7,9,13,24]: 247 | if FLAGS.seq_length_out >= ms+1: 248 | print(" {0:.3f} |".format( mean_mean_errors[ms] ), end="") 249 | else: 250 | print(" n/a |", end="") 251 | print() 252 | 253 | # Ugly massive if-then to log the error to tensorboard :shrug: 254 | if action == "walking": 255 | summaries = sess.run( 256 | [model.walking_err80_summary, 257 | model.walking_err160_summary, 258 | model.walking_err320_summary, 259 | model.walking_err400_summary, 260 | model.walking_err560_summary, 261 | model.walking_err1000_summary], 262 | {model.walking_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 263 | model.walking_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 264 | model.walking_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 265 | model.walking_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 266 | model.walking_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 267 | model.walking_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 268 | elif action == "eating": 269 | summaries = sess.run( 270 | [model.eating_err80_summary, 271 | model.eating_err160_summary, 272 | model.eating_err320_summary, 273 | model.eating_err400_summary, 274 | model.eating_err560_summary, 275 | model.eating_err1000_summary], 276 | {model.eating_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 277 | model.eating_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 278 | model.eating_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 279 | model.eating_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 280 | model.eating_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 281 | model.eating_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 282 | elif action == "smoking": 283 | summaries = sess.run( 284 | [model.smoking_err80_summary, 285 | model.smoking_err160_summary, 286 | model.smoking_err320_summary, 287 | model.smoking_err400_summary, 288 | model.smoking_err560_summary, 289 | model.smoking_err1000_summary], 290 | {model.smoking_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 291 | model.smoking_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 292 | model.smoking_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 293 | model.smoking_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 294 | model.smoking_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 295 | model.smoking_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 296 | elif action == "discussion": 297 | summaries = sess.run( 298 | [model.discussion_err80_summary, 299 | model.discussion_err160_summary, 300 | model.discussion_err320_summary, 301 | model.discussion_err400_summary, 302 | model.discussion_err560_summary, 303 | model.discussion_err1000_summary], 304 | {model.discussion_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 305 | model.discussion_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 306 | model.discussion_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 307 | model.discussion_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 308 | model.discussion_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 309 | model.discussion_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 310 | elif action == "directions": 311 | summaries = sess.run( 312 | [model.directions_err80_summary, 313 | model.directions_err160_summary, 314 | model.directions_err320_summary, 315 | model.directions_err400_summary, 316 | model.directions_err560_summary, 317 | model.directions_err1000_summary], 318 | {model.directions_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 319 | model.directions_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 320 | model.directions_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 321 | model.directions_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 322 | model.directions_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 323 | model.directions_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 324 | elif action == "greeting": 325 | summaries = sess.run( 326 | [model.greeting_err80_summary, 327 | model.greeting_err160_summary, 328 | model.greeting_err320_summary, 329 | model.greeting_err400_summary, 330 | model.greeting_err560_summary, 331 | model.greeting_err1000_summary], 332 | {model.greeting_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 333 | model.greeting_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 334 | model.greeting_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 335 | model.greeting_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 336 | model.greeting_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 337 | model.greeting_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 338 | elif action == "phoning": 339 | summaries = sess.run( 340 | [model.phoning_err80_summary, 341 | model.phoning_err160_summary, 342 | model.phoning_err320_summary, 343 | model.phoning_err400_summary, 344 | model.phoning_err560_summary, 345 | model.phoning_err1000_summary], 346 | {model.phoning_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 347 | model.phoning_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 348 | model.phoning_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 349 | model.phoning_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 350 | model.phoning_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 351 | model.phoning_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 352 | elif action == "posing": 353 | summaries = sess.run( 354 | [model.posing_err80_summary, 355 | model.posing_err160_summary, 356 | model.posing_err320_summary, 357 | model.posing_err400_summary, 358 | model.posing_err560_summary, 359 | model.posing_err1000_summary], 360 | {model.posing_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 361 | model.posing_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 362 | model.posing_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 363 | model.posing_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 364 | model.posing_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 365 | model.posing_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 366 | elif action == "purchases": 367 | summaries = sess.run( 368 | [model.purchases_err80_summary, 369 | model.purchases_err160_summary, 370 | model.purchases_err320_summary, 371 | model.purchases_err400_summary, 372 | model.purchases_err560_summary, 373 | model.purchases_err1000_summary], 374 | {model.purchases_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 375 | model.purchases_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 376 | model.purchases_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 377 | model.purchases_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 378 | model.purchases_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 379 | model.purchases_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 380 | elif action == "sitting": 381 | summaries = sess.run( 382 | [model.sitting_err80_summary, 383 | model.sitting_err160_summary, 384 | model.sitting_err320_summary, 385 | model.sitting_err400_summary, 386 | model.sitting_err560_summary, 387 | model.sitting_err1000_summary], 388 | {model.sitting_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 389 | model.sitting_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 390 | model.sitting_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 391 | model.sitting_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 392 | model.sitting_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 393 | model.sitting_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 394 | elif action == "sittingdown": 395 | summaries = sess.run( 396 | [model.sittingdown_err80_summary, 397 | model.sittingdown_err160_summary, 398 | model.sittingdown_err320_summary, 399 | model.sittingdown_err400_summary, 400 | model.sittingdown_err560_summary, 401 | model.sittingdown_err1000_summary], 402 | {model.sittingdown_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 403 | model.sittingdown_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 404 | model.sittingdown_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 405 | model.sittingdown_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 406 | model.sittingdown_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 407 | model.sittingdown_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 408 | elif action == "takingphoto": 409 | summaries = sess.run( 410 | [model.takingphoto_err80_summary, 411 | model.takingphoto_err160_summary, 412 | model.takingphoto_err320_summary, 413 | model.takingphoto_err400_summary, 414 | model.takingphoto_err560_summary, 415 | model.takingphoto_err1000_summary], 416 | {model.takingphoto_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 417 | model.takingphoto_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 418 | model.takingphoto_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 419 | model.takingphoto_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 420 | model.takingphoto_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 421 | model.takingphoto_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 422 | elif action == "waiting": 423 | summaries = sess.run( 424 | [model.waiting_err80_summary, 425 | model.waiting_err160_summary, 426 | model.waiting_err320_summary, 427 | model.waiting_err400_summary, 428 | model.waiting_err560_summary, 429 | model.waiting_err1000_summary], 430 | {model.waiting_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 431 | model.waiting_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 432 | model.waiting_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 433 | model.waiting_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 434 | model.waiting_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 435 | model.waiting_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 436 | elif action == "walkingdog": 437 | summaries = sess.run( 438 | [model.walkingdog_err80_summary, 439 | model.walkingdog_err160_summary, 440 | model.walkingdog_err320_summary, 441 | model.walkingdog_err400_summary, 442 | model.walkingdog_err560_summary, 443 | model.walkingdog_err1000_summary], 444 | {model.walkingdog_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 445 | model.walkingdog_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 446 | model.walkingdog_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 447 | model.walkingdog_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 448 | model.walkingdog_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 449 | model.walkingdog_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 450 | elif action == "walkingtogether": 451 | summaries = sess.run( 452 | [model.walkingtogether_err80_summary, 453 | model.walkingtogether_err160_summary, 454 | model.walkingtogether_err320_summary, 455 | model.walkingtogether_err400_summary, 456 | model.walkingtogether_err560_summary, 457 | model.walkingtogether_err1000_summary], 458 | {model.walkingtogether_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 459 | model.walkingtogether_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 460 | model.walkingtogether_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 461 | model.walkingtogether_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 462 | model.walkingtogether_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 463 | model.walkingtogether_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 464 | 465 | for i in np.arange(len( summaries )): 466 | model.test_writer.add_summary(summaries[i], current_step) 467 | 468 | 469 | print() 470 | print("============================\n" 471 | "Global step: %d\n" 472 | "Learning rate: %.6f\n" 473 | "Step-time (ms): %.4f\n" 474 | "Train loss avg: %.4f\n" 475 | "--------------------------\n" 476 | "Val loss: %.4f\n" 477 | "srnn loss: %.4f\n" 478 | "============================" % (model.global_step.eval(), 479 | model.learning_rate.eval(), step_time*1000, loss, 480 | val_loss, srnn_loss)) 481 | print() 482 | 483 | previous_losses.append(loss) 484 | 485 | # Save the model 486 | if current_step % FLAGS.save_every == 0: 487 | print( "Saving the model..." ); start_time = time.time() 488 | model.saver.save(sess, os.path.normpath(os.path.join(train_dir, 'checkpoint')), global_step=current_step ) 489 | print( "done in {0:.2f} ms".format( (time.time() - start_time)*1000) ) 490 | 491 | # Reset global time and loss 492 | step_time, loss = 0, 0 493 | 494 | sys.stdout.flush() 495 | 496 | 497 | def get_srnn_gts( actions, model, test_set, data_mean, data_std, dim_to_ignore, one_hot, to_euler=True ): 498 | """ 499 | Get the ground truths for srnn's sequences, and convert to Euler angles. 500 | (the error is always computed in Euler angles). 501 | 502 | Args 503 | actions: a list of actions to get ground truths for. 504 | model: training model we are using (we only use the "get_batch" method). 505 | test_set: dictionary with normalized training data. 506 | data_mean: d-long vector with the mean of the training data. 507 | data_std: d-long vector with the standard deviation of the training data. 508 | dim_to_ignore: dimensions that we are not using to train/predict. 509 | one_hot: whether the data comes with one-hot encoding indicating action. 510 | to_euler: whether to convert the angles to Euler format or keep thm in exponential map 511 | 512 | Returns 513 | srnn_gts_euler: a dictionary where the keys are actions, and the values 514 | are the ground_truth, denormalized expected outputs of srnns's seeds. 515 | """ 516 | srnn_gts_euler = {} 517 | 518 | for action in actions: 519 | 520 | srnn_gt_euler = [] 521 | _,_, _, _, srnn_expmap = model.get_batch_srnn( test_set, action ) # modified 522 | 523 | # expmap -> rotmat -> euler 524 | for i in np.arange( srnn_expmap.shape[0] ): 525 | denormed = data_utils.unNormalizeData(srnn_expmap[i,:,:], data_mean, data_std, dim_to_ignore, actions, one_hot ) 526 | 527 | if to_euler: 528 | for j in np.arange( denormed.shape[0] ): 529 | for k in np.arange(3,97,3): 530 | denormed[j,k:k+3] = data_utils.rotmat2euler( data_utils.expmap2rotmat( denormed[j,k:k+3] )) 531 | 532 | srnn_gt_euler.append( denormed ); 533 | 534 | # Put back in the dictionary 535 | srnn_gts_euler[action] = srnn_gt_euler 536 | 537 | return srnn_gts_euler 538 | 539 | 540 | def sample(): 541 | """Sample predictions for srnn's seeds""" 542 | 543 | if FLAGS.load <= 0: 544 | raise( ValueError, "Must give an iteration to read parameters from") 545 | 546 | actions = define_actions( FLAGS.action ) 547 | 548 | # Use the CPU if asked to 549 | device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1} 550 | with tf.Session(config=tf.ConfigProto( device_count = device_count )) as sess: 551 | 552 | # === Create the model === 553 | print("Creating %d layers of %d units for plan RNN." % (FLAGS.num_layers, FLAGS.plan_size)) 554 | print("Creating %d layers of %d units for body RNN." % (FLAGS.num_layers, FLAGS.body_size)) 555 | sampling = True 556 | model = create_model(sess, actions, sampling) 557 | print("Model created") 558 | 559 | # Load all the data 560 | train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use = read_all_data(actions, FLAGS.seq_length_in, FLAGS.seq_length_out, FLAGS.data_dir, not FLAGS.omit_one_hot ) 561 | 562 | # === Read and denormalize the gt with srnn's seeds, as we'll need them 563 | # many times for evaluation in Euler Angles === 564 | srnn_gts_expmap = get_srnn_gts( actions, model, test_set, data_mean, data_std, dim_to_ignore, FLAGS.omit_one_hot, to_euler=False ) # modified 565 | srnn_gts_euler = get_srnn_gts( actions, model, test_set, data_mean, data_std, dim_to_ignore, FLAGS.omit_one_hot ) # modified 566 | 567 | # Clean and create a new h5 file of samples 568 | SAMPLES_FNAME = 'samples.h5' 569 | try: 570 | os.remove( SAMPLES_FNAME ) 571 | except OSError: 572 | pass 573 | 574 | # Predict and save for each action 575 | for action in actions: 576 | 577 | # Make prediction with srnn' seeds 578 | encoder_inputs, encoder_outputs, plan_inputs, decoder_inputs, decoder_outputs = model.get_batch_srnn( test_set, action ) 579 | forward_only = True 580 | srnn_seeds = True 581 | srnn_loss, srnn_poses, _ = model.step(sess, encoder_inputs, encoder_outputs, plan_inputs, decoder_inputs, decoder_outputs, forward_only, srnn_seeds) 582 | 583 | # denormalizes too 584 | srnn_pred_expmap = data_utils.revert_output_format( srnn_poses, data_mean, data_std, dim_to_ignore, actions, FLAGS.omit_one_hot ) # modified 585 | 586 | # Save the conditioning seeds 587 | 588 | # Save the samples 589 | with h5py.File( SAMPLES_FNAME, 'a' ) as hf: 590 | for i in np.arange(8): 591 | # Save conditioning ground truth 592 | node_name = 'expmap/gt/{1}_{0}'.format(i, action) 593 | hf.create_dataset( node_name, data=srnn_gts_expmap[action][i] ) 594 | # Save prediction 595 | node_name = 'expmap/preds/{1}_{0}'.format(i, action) 596 | hf.create_dataset( node_name, data=srnn_pred_expmap[i] ) 597 | 598 | # Compute and save the errors here 599 | mean_errors = np.zeros( (len(srnn_pred_expmap), srnn_pred_expmap[0].shape[0]) ) 600 | 601 | for i in np.arange(8): 602 | 603 | eulerchannels_pred = srnn_pred_expmap[i] 604 | 605 | for j in np.arange( eulerchannels_pred.shape[0] ): 606 | for k in np.arange(3,97,3): 607 | eulerchannels_pred[j,k:k+3] = data_utils.rotmat2euler( data_utils.expmap2rotmat( eulerchannels_pred[j,k:k+3] )) 608 | 609 | eulerchannels_pred[:,0:6] = 0 610 | 611 | # Pick only the dimensions with sufficient standard deviation. Others are ignored. 612 | idx_to_use = np.where( np.std( eulerchannels_pred, 0 ) > 1e-4 )[0] 613 | 614 | euc_error = np.power( srnn_gts_euler[action][i][:,idx_to_use] - eulerchannels_pred[:,idx_to_use], 2) 615 | euc_error = np.sum(euc_error, 1) 616 | euc_error = np.sqrt( euc_error ) 617 | mean_errors[i,:] = euc_error 618 | 619 | mean_mean_errors = np.mean( mean_errors, 0 ) 620 | print( action ) 621 | print( ','.join(map(str, mean_mean_errors.tolist() )) ) 622 | 623 | with h5py.File( SAMPLES_FNAME, 'a' ) as hf: 624 | node_name = 'mean_{0}_error'.format( action ) 625 | hf.create_dataset( node_name, data=mean_mean_errors ) 626 | 627 | return 628 | 629 | 630 | def define_actions( action ): 631 | """ 632 | Define the list of actions we are using. 633 | 634 | Args 635 | action: String with the passed action. Could be "all" 636 | Returns 637 | actions: List of strings of actions 638 | Raises 639 | ValueError if the action is not included in H3.6M 640 | """ 641 | 642 | actions = ["walking", "eating", "smoking", "discussion", "directions", 643 | "greeting", "phoning", "posing", "purchases", "sitting", 644 | "sittingdown", "takingphoto", "waiting", "walkingdog", 645 | "walkingtogether"] 646 | 647 | if action in actions: 648 | return [action] 649 | 650 | if action == "all": 651 | return actions 652 | 653 | if action == "all_srnn": 654 | return ["walking", "eating", "smoking", "discussion"] 655 | 656 | raise( ValueError, "Unrecognized action: %d" % action ) 657 | 658 | 659 | def read_all_data( actions, seq_length_in, seq_length_out, data_dir, one_hot ): 660 | """ 661 | Loads data for training/testing and normalizes it. 662 | 663 | Args 664 | actions: list of strings (actions) to load 665 | seq_length_in: number of frames to use in the burn-in sequence 666 | seq_length_out: number of frames to use in the output sequence 667 | data_dir: directory to load the data from 668 | one_hot: whether to use one-hot encoding per action 669 | Returns 670 | train_set: dictionary with normalized training data 671 | test_set: dictionary with test data 672 | data_mean: d-long vector with the mean of the training data 673 | data_std: d-long vector with the standard dev of the training data 674 | dim_to_ignore: dimensions that are not used becaused stdev is too small 675 | dim_to_use: dimensions that we are actually using in the model 676 | """ 677 | 678 | # === Read training data === 679 | print ("Reading training data (seq_len_in: {0}, seq_len_out {1}).".format( 680 | seq_length_in, seq_length_out)) 681 | 682 | train_subject_ids = [1,6,7,8,9,11] 683 | test_subject_ids = [5] 684 | 685 | train_set, complete_train = data_utils.load_data( data_dir, train_subject_ids, actions, one_hot ) 686 | test_set, complete_test = data_utils.load_data( data_dir, test_subject_ids, actions, one_hot ) 687 | 688 | # Compute normalization stats 689 | data_mean, data_std, dim_to_ignore, dim_to_use = data_utils.normalization_stats(complete_train) 690 | 691 | # MODIFIED 692 | #print(*dim_to_use) 693 | 694 | # Normalize -- subtract mean, divide by stdev 695 | train_set = data_utils.normalize_data( train_set, data_mean, data_std, dim_to_use, actions, one_hot ) 696 | test_set = data_utils.normalize_data( test_set, data_mean, data_std, dim_to_use, actions, one_hot ) 697 | print("done reading data.") 698 | 699 | return train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use 700 | 701 | 702 | def main(_): 703 | if FLAGS.sample: 704 | sample() 705 | else: 706 | train() 707 | 708 | if __name__ == "__main__": 709 | tf.app.run() 710 | -------------------------------------------------------------------------------- /translate_simple_lm.py: -------------------------------------------------------------------------------- 1 | 2 | """Simple code for training an RNN for motion prediction.""" 3 | 4 | from __future__ import absolute_import 5 | from __future__ import division 6 | from __future__ import print_function 7 | 8 | import math 9 | import os 10 | import random 11 | import sys 12 | import time 13 | import h5py 14 | 15 | import numpy as np 16 | from six.moves import xrange # pylint: disable=redefined-builtin 17 | import tensorflow as tf 18 | 19 | import data_utils 20 | import seq2seq_model 21 | import motion_rnn_simple_lm 22 | 23 | print("Setting seed.") 24 | np.random.seed(42) 25 | 26 | # Learning 27 | tf.app.flags.DEFINE_float("learning_rate", 0.0001, "Learning rate.") 28 | tf.app.flags.DEFINE_float("learning_rate_decay_factor", 0.6, "Learning rate is multiplied by this much. 1 means no decay.") 29 | tf.app.flags.DEFINE_integer("learning_rate_step", 1500, "Every this many steps, do decay.") 30 | tf.app.flags.DEFINE_float("max_gradient_norm", 1, "Clip gradients to this norm.") 31 | tf.app.flags.DEFINE_integer("batch_size", 32, "Batch size to use during training.") 32 | tf.app.flags.DEFINE_integer("iterations", int(1e4), "Iterations to train for.") 33 | # Architecture 34 | tf.app.flags.DEFINE_string("architecture", "tied", "Seq2seq architecture to use: [basic, tied].") 35 | tf.app.flags.DEFINE_string("loop_type", "open", "loop type to use: [open, closed].") 36 | tf.app.flags.DEFINE_integer("body_size", 512, "Size of each body rnn model layer.") 37 | tf.app.flags.DEFINE_string("body_cell", "gru", "RNN cell type of body rnn : [elman, lstm, gru]") 38 | tf.app.flags.DEFINE_integer("num_layers", 2, "Number of layers in the model.") 39 | tf.app.flags.DEFINE_integer("seq_length_in", 50, "Number of frames to feed into the encoder. 25 fps") 40 | tf.app.flags.DEFINE_integer("seq_length_out", 100, "Number of frames that the decoder has to predict. 25fps") 41 | tf.app.flags.DEFINE_boolean("omit_one_hot", False, "Whether to remove one-hot encoding from the data") 42 | tf.app.flags.DEFINE_boolean("residual_velocities", False, "Add a residual connection that effectively models velocities") 43 | # Directories 44 | tf.app.flags.DEFINE_string("data_dir", os.path.normpath("./data/h3.6m/dataset"), "Data directory") 45 | tf.app.flags.DEFINE_string("train_dir", os.path.normpath("./final_exp_samples/simple_gru_no_plan_no_deriv"), "Training directory.") 46 | 47 | tf.app.flags.DEFINE_string("action","all", "The action to train on. all means all the actions, all_periodic means walking, eating and smoking") 48 | tf.app.flags.DEFINE_string("loss_to_use","sampling_based", "The type of loss to use, supervised or sampling_based") 49 | 50 | tf.app.flags.DEFINE_integer("test_every", 1000, "How often to compute error on the test set.") 51 | tf.app.flags.DEFINE_integer("save_every", 5000, "How often to compute error on the test set.") 52 | tf.app.flags.DEFINE_boolean("sample", False, "Set to True for sampling.") 53 | tf.app.flags.DEFINE_boolean("use_cpu", False, "Whether to use the CPU") 54 | tf.app.flags.DEFINE_integer("load", 0, "Try to load a previous checkpoint.") 55 | 56 | FLAGS = tf.app.flags.FLAGS 57 | 58 | train_dir = os.path.normpath(os.path.join( FLAGS.train_dir, FLAGS.action, 59 | 'out_{0}'.format(FLAGS.seq_length_out), 60 | 'iterations_{0}'.format(FLAGS.iterations), 61 | FLAGS.architecture, 62 | 'loop_type_{0}'.format(FLAGS.loop_type), 63 | FLAGS.loss_to_use, 64 | 'omit_one_hot' if FLAGS.omit_one_hot else 'one_hot', 65 | 'depth_{0}'.format(FLAGS.num_layers), 66 | 'body_cell_{0}'.format(FLAGS.body_cell), 67 | 'body_size_{0}'.format(FLAGS.body_size), 68 | 'lr_{0}'.format(FLAGS.learning_rate), 69 | 'residual_vel' if FLAGS.residual_velocities else 'not_residual_vel')) 70 | 71 | summaries_dir = os.path.normpath(os.path.join( train_dir, "log" )) # Directory for TB summaries 72 | 73 | def create_model(session, actions, sampling=False): 74 | """Create translation model and initialize or load parameters in session.""" 75 | 76 | model = motion_rnn_simple_lm.MotionRNNModelSimpleLM( 77 | FLAGS.architecture, 78 | FLAGS.loop_type, 79 | FLAGS.seq_length_in if not sampling else 50, 80 | FLAGS.seq_length_out if not sampling else 100, 81 | FLAGS.body_size, 82 | FLAGS.body_cell, 83 | FLAGS.num_layers, 84 | FLAGS.max_gradient_norm, 85 | FLAGS.batch_size, 86 | FLAGS.learning_rate, 87 | FLAGS.learning_rate_decay_factor, 88 | summaries_dir, 89 | FLAGS.loss_to_use if not sampling else "sampling_based", 90 | len( actions ), 91 | not FLAGS.omit_one_hot, 92 | FLAGS.residual_velocities, 93 | dtype=tf.float32) 94 | 95 | if FLAGS.load <= 0: 96 | print("Creating model with fresh parameters.") 97 | session.run(tf.global_variables_initializer()) 98 | return model 99 | 100 | ckpt = tf.train.get_checkpoint_state( train_dir, latest_filename="checkpoint") 101 | print( "train_dir", train_dir ) 102 | 103 | if ckpt and ckpt.model_checkpoint_path: 104 | # Check if the specific checkpoint exists 105 | if FLAGS.load > 0: 106 | if os.path.isfile(os.path.join(train_dir,"checkpoint-{0}.index".format(FLAGS.load))): 107 | ckpt_name = os.path.normpath(os.path.join( os.path.join(train_dir,"checkpoint-{0}".format(FLAGS.load)) )) 108 | else: 109 | raise ValueError("Asked to load checkpoint {0}, but it does not seem to exist".format(FLAGS.load)) 110 | else: 111 | ckpt_name = os.path.basename( ckpt.model_checkpoint_path ) 112 | 113 | print("Loading model {0}".format( ckpt_name )) 114 | model.saver.restore( session, ckpt.model_checkpoint_path ) 115 | return model 116 | else: 117 | print("Could not find checkpoint. Aborting.") 118 | raise( ValueError, "Checkpoint {0} does not seem to exist".format( ckpt.model_checkpoint_path ) ) 119 | 120 | return model 121 | 122 | 123 | def train(): 124 | """Train a seq2seq model on human motion""" 125 | 126 | actions = define_actions( FLAGS.action ) 127 | 128 | number_of_actions = len( actions ) 129 | 130 | train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use = read_all_data( actions, FLAGS.seq_length_in, FLAGS.seq_length_out, FLAGS.data_dir, not FLAGS.omit_one_hot ) 131 | 132 | # Limit TF to take a fraction of the GPU memory 133 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.95) 134 | device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1} 135 | 136 | # setting graph-level seed 137 | tf.set_random_seed(42) 138 | 139 | with tf.Session(config=tf.ConfigProto( gpu_options=gpu_options, device_count = device_count )) as sess: 140 | 141 | # === Create the model === 142 | print("Creating %d layers of %d units for body RNN." % (FLAGS.num_layers, FLAGS.body_size)) 143 | 144 | model = create_model( sess, actions ) 145 | model.train_writer.add_graph( sess.graph ) 146 | print( "Model created" ) 147 | 148 | # === Read and denormalize the gt with srnn's seeds, as we'll need them 149 | # many times for evaluation in Euler Angles === 150 | srnn_gts_euler = get_srnn_gts( actions, model, test_set, data_mean, data_std, dim_to_ignore, FLAGS.omit_one_hot ) # modified 151 | 152 | #=== This is the training loop === 153 | step_time, loss, val_loss = 0.0, 0.0, 0.0 154 | current_step = 0 if FLAGS.load <= 0 else FLAGS.load + 1 155 | previous_losses = [] 156 | 157 | step_time, loss = 0, 0 158 | sampling_weight = 0.0 159 | 160 | sampling_schedule = [500, 1000, 2000, 4000, 7000] 161 | sampling_weights = [0.2, 0.4, 0.6, 0.8, 1.0] 162 | sampling_weight = 0.0 163 | samp_cnt = -1 164 | 165 | for _ in xrange( FLAGS.iterations ): 166 | 167 | start_time = time.time() 168 | 169 | # === Training step === 170 | encoder_inputs, encoder_outputs, decoder_inputs, decoder_outputs = model.get_batch( train_set, FLAGS.omit_one_hot ) # modified 171 | 172 | is_training = True 173 | dropout_prob = 0.3 174 | use_sample = False 175 | 176 | if samp_cnt < len(sampling_schedule)-1: 177 | if current_step == sampling_schedule[samp_cnt+1]: 178 | sampling_weight = sampling_weights[samp_cnt+1] 179 | samp_cnt = samp_cnt + 1 180 | 181 | _, step_loss, step_sampling_loss, loss_summary, lr_summary = model.step( sess, encoder_inputs, encoder_outputs, decoder_inputs, decoder_outputs, is_training, use_sample, dropout_prob, sampling_weight, False ) 182 | 183 | model.train_writer.add_summary( loss_summary, current_step ) 184 | model.train_writer.add_summary( lr_summary, current_step ) 185 | 186 | if current_step % 10 == 0: 187 | print("step {0:04d}; step_loss: {1:.4f}; sampling_loss: {2:.4f}".format(current_step, step_loss, sampling_weight*step_sampling_loss )) 188 | 189 | step_time += (time.time() - start_time) / FLAGS.test_every 190 | loss += step_loss / FLAGS.test_every 191 | current_step += 1 192 | 193 | # === step decay === 194 | if current_step % FLAGS.learning_rate_step == 0: #and current_step <= 60000: 195 | sess.run(model.learning_rate_decay_op) 196 | 197 | # Once in a while, we save checkpoint, print statistics, and run evals. 198 | if current_step % FLAGS.test_every == 0: 199 | 200 | # === Validation with randomly chosen seeds === 201 | forward_only = True 202 | is_training = False 203 | use_sample = True 204 | dropout_prob = 0.0 205 | 206 | encoder_inputs, encoder_outputs, decoder_inputs, decoder_outputs = model.get_batch( test_set, FLAGS.omit_one_hot ) # modified 207 | step_loss, loss_summary = model.step(sess, encoder_inputs, encoder_outputs, decoder_inputs, decoder_outputs, is_training, use_sample, dropout_prob, sampling_weight, forward_only) 208 | val_loss = step_loss # Loss book-keeping 209 | 210 | model.test_writer.add_summary(loss_summary, current_step) 211 | 212 | print() 213 | print("{0: <16} |".format("milliseconds"), end="") 214 | for ms in [80, 160, 320, 400, 560, 1000]: 215 | print(" {0:5d} |".format(ms), end="") 216 | print() 217 | 218 | # === Validation with srnn's seeds === 219 | for action in actions: 220 | 221 | # Evaluate the model on the test batches 222 | encoder_inputs, encoder_outputs, decoder_inputs, decoder_outputs = model.get_batch_srnn( test_set, action ) 223 | 224 | is_training = False 225 | use_sample = True 226 | dropout_prob = 0.0 227 | 228 | srnn_loss, srnn_poses, _ = model.step(sess, encoder_inputs, encoder_outputs, decoder_inputs, decoder_outputs, is_training, use_sample, dropout_prob, sampling_weight, True, True) 229 | 230 | # Denormalize the output 231 | srnn_pred_expmap = data_utils.revert_output_format( srnn_poses, data_mean, data_std, dim_to_ignore, actions, FLAGS.omit_one_hot ) # modified 232 | 233 | # Save the errors here 234 | mean_errors = np.zeros( (len(srnn_pred_expmap), srnn_pred_expmap[0].shape[0]) ) 235 | 236 | # Training is done in exponential map, but the error is reported in 237 | # Euler angles, as in previous work. 238 | # See https://github.com/asheshjain399/RNNexp/issues/6#issuecomment-247769197 239 | N_SEQUENCE_TEST = 8 240 | for i in np.arange(N_SEQUENCE_TEST): 241 | eulerchannels_pred = srnn_pred_expmap[i] 242 | 243 | # Convert from exponential map to Euler angles 244 | for j in np.arange( eulerchannels_pred.shape[0] ): 245 | for k in np.arange(3,97,3): 246 | eulerchannels_pred[j,k:k+3] = data_utils.rotmat2euler( data_utils.expmap2rotmat( eulerchannels_pred[j,k:k+3] )) 247 | 248 | # The global translation (first 3 entries) and global rotation 249 | # (next 3 entries) are also not considered in the error, so the_key 250 | # are set to zero. 251 | # See https://github.com/asheshjain399/RNNexp/issues/6#issuecomment-249404882 252 | gt_i=np.copy(srnn_gts_euler[action][i]) 253 | gt_i[:,0:6] = 0 254 | 255 | # Now compute the l2 error. The following is numpy port of the error 256 | # function provided by Ashesh Jain (in matlab), available at 257 | # https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/dataParser/Utils/motionGenerationError.m#L40-L54 258 | idx_to_use = np.where( np.std( gt_i, 0 ) > 1e-4 )[0] 259 | 260 | euc_error = np.power( gt_i[:,idx_to_use] - eulerchannels_pred[:,idx_to_use], 2) 261 | euc_error = np.sum(euc_error, 1) 262 | euc_error = np.sqrt( euc_error ) 263 | mean_errors[i,:] = euc_error 264 | 265 | # This is simply the mean error over the N_SEQUENCE_TEST examples 266 | mean_mean_errors = np.mean( mean_errors, 0 ) 267 | 268 | # Pretty print of the results for 80, 160, 320, 400, 560 and 1000 ms 269 | print("{0: <16} |".format(action), end="") 270 | for ms in [1,3,7,9,13,24]: 271 | if FLAGS.seq_length_out >= ms+1: 272 | print(" {0:.3f} |".format( mean_mean_errors[ms] ), end="") 273 | else: 274 | print(" n/a |", end="") 275 | print() 276 | 277 | # Ugly massive if-then to log the error to tensorboard :shrug: 278 | if action == "walking": 279 | summaries = sess.run( 280 | [model.walking_err80_summary, 281 | model.walking_err160_summary, 282 | model.walking_err320_summary, 283 | model.walking_err400_summary, 284 | model.walking_err560_summary, 285 | model.walking_err1000_summary], 286 | {model.walking_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 287 | model.walking_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 288 | model.walking_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 289 | model.walking_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 290 | model.walking_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 291 | model.walking_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 292 | elif action == "eating": 293 | summaries = sess.run( 294 | [model.eating_err80_summary, 295 | model.eating_err160_summary, 296 | model.eating_err320_summary, 297 | model.eating_err400_summary, 298 | model.eating_err560_summary, 299 | model.eating_err1000_summary], 300 | {model.eating_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 301 | model.eating_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 302 | model.eating_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 303 | model.eating_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 304 | model.eating_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 305 | model.eating_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 306 | elif action == "smoking": 307 | summaries = sess.run( 308 | [model.smoking_err80_summary, 309 | model.smoking_err160_summary, 310 | model.smoking_err320_summary, 311 | model.smoking_err400_summary, 312 | model.smoking_err560_summary, 313 | model.smoking_err1000_summary], 314 | {model.smoking_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 315 | model.smoking_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 316 | model.smoking_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 317 | model.smoking_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 318 | model.smoking_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 319 | model.smoking_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 320 | elif action == "discussion": 321 | summaries = sess.run( 322 | [model.discussion_err80_summary, 323 | model.discussion_err160_summary, 324 | model.discussion_err320_summary, 325 | model.discussion_err400_summary, 326 | model.discussion_err560_summary, 327 | model.discussion_err1000_summary], 328 | {model.discussion_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 329 | model.discussion_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 330 | model.discussion_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 331 | model.discussion_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 332 | model.discussion_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 333 | model.discussion_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 334 | elif action == "directions": 335 | summaries = sess.run( 336 | [model.directions_err80_summary, 337 | model.directions_err160_summary, 338 | model.directions_err320_summary, 339 | model.directions_err400_summary, 340 | model.directions_err560_summary, 341 | model.directions_err1000_summary], 342 | {model.directions_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 343 | model.directions_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 344 | model.directions_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 345 | model.directions_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 346 | model.directions_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 347 | model.directions_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 348 | elif action == "greeting": 349 | summaries = sess.run( 350 | [model.greeting_err80_summary, 351 | model.greeting_err160_summary, 352 | model.greeting_err320_summary, 353 | model.greeting_err400_summary, 354 | model.greeting_err560_summary, 355 | model.greeting_err1000_summary], 356 | {model.greeting_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 357 | model.greeting_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 358 | model.greeting_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 359 | model.greeting_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 360 | model.greeting_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 361 | model.greeting_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 362 | elif action == "phoning": 363 | summaries = sess.run( 364 | [model.phoning_err80_summary, 365 | model.phoning_err160_summary, 366 | model.phoning_err320_summary, 367 | model.phoning_err400_summary, 368 | model.phoning_err560_summary, 369 | model.phoning_err1000_summary], 370 | {model.phoning_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 371 | model.phoning_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 372 | model.phoning_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 373 | model.phoning_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 374 | model.phoning_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 375 | model.phoning_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 376 | elif action == "posing": 377 | summaries = sess.run( 378 | [model.posing_err80_summary, 379 | model.posing_err160_summary, 380 | model.posing_err320_summary, 381 | model.posing_err400_summary, 382 | model.posing_err560_summary, 383 | model.posing_err1000_summary], 384 | {model.posing_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 385 | model.posing_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 386 | model.posing_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 387 | model.posing_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 388 | model.posing_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 389 | model.posing_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 390 | elif action == "purchases": 391 | summaries = sess.run( 392 | [model.purchases_err80_summary, 393 | model.purchases_err160_summary, 394 | model.purchases_err320_summary, 395 | model.purchases_err400_summary, 396 | model.purchases_err560_summary, 397 | model.purchases_err1000_summary], 398 | {model.purchases_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 399 | model.purchases_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 400 | model.purchases_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 401 | model.purchases_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 402 | model.purchases_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 403 | model.purchases_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 404 | elif action == "sitting": 405 | summaries = sess.run( 406 | [model.sitting_err80_summary, 407 | model.sitting_err160_summary, 408 | model.sitting_err320_summary, 409 | model.sitting_err400_summary, 410 | model.sitting_err560_summary, 411 | model.sitting_err1000_summary], 412 | {model.sitting_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 413 | model.sitting_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 414 | model.sitting_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 415 | model.sitting_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 416 | model.sitting_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 417 | model.sitting_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 418 | elif action == "sittingdown": 419 | summaries = sess.run( 420 | [model.sittingdown_err80_summary, 421 | model.sittingdown_err160_summary, 422 | model.sittingdown_err320_summary, 423 | model.sittingdown_err400_summary, 424 | model.sittingdown_err560_summary, 425 | model.sittingdown_err1000_summary], 426 | {model.sittingdown_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 427 | model.sittingdown_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 428 | model.sittingdown_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 429 | model.sittingdown_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 430 | model.sittingdown_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 431 | model.sittingdown_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 432 | elif action == "takingphoto": 433 | summaries = sess.run( 434 | [model.takingphoto_err80_summary, 435 | model.takingphoto_err160_summary, 436 | model.takingphoto_err320_summary, 437 | model.takingphoto_err400_summary, 438 | model.takingphoto_err560_summary, 439 | model.takingphoto_err1000_summary], 440 | {model.takingphoto_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 441 | model.takingphoto_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 442 | model.takingphoto_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 443 | model.takingphoto_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 444 | model.takingphoto_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 445 | model.takingphoto_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 446 | elif action == "waiting": 447 | summaries = sess.run( 448 | [model.waiting_err80_summary, 449 | model.waiting_err160_summary, 450 | model.waiting_err320_summary, 451 | model.waiting_err400_summary, 452 | model.waiting_err560_summary, 453 | model.waiting_err1000_summary], 454 | {model.waiting_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 455 | model.waiting_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 456 | model.waiting_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 457 | model.waiting_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 458 | model.waiting_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 459 | model.waiting_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 460 | elif action == "walkingdog": 461 | summaries = sess.run( 462 | [model.walkingdog_err80_summary, 463 | model.walkingdog_err160_summary, 464 | model.walkingdog_err320_summary, 465 | model.walkingdog_err400_summary, 466 | model.walkingdog_err560_summary, 467 | model.walkingdog_err1000_summary], 468 | {model.walkingdog_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 469 | model.walkingdog_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 470 | model.walkingdog_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 471 | model.walkingdog_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 472 | model.walkingdog_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 473 | model.walkingdog_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 474 | elif action == "walkingtogether": 475 | summaries = sess.run( 476 | [model.walkingtogether_err80_summary, 477 | model.walkingtogether_err160_summary, 478 | model.walkingtogether_err320_summary, 479 | model.walkingtogether_err400_summary, 480 | model.walkingtogether_err560_summary, 481 | model.walkingtogether_err1000_summary], 482 | {model.walkingtogether_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 483 | model.walkingtogether_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 484 | model.walkingtogether_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 485 | model.walkingtogether_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 486 | model.walkingtogether_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 487 | model.walkingtogether_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 488 | 489 | for i in np.arange(len( summaries )): 490 | model.test_writer.add_summary(summaries[i], current_step) 491 | 492 | 493 | print() 494 | print("============================\n" 495 | "Global step: %d\n" 496 | "Learning rate: %.6f\n" 497 | "Step-time (ms): %.4f\n" 498 | "Train loss avg: %.4f\n" 499 | "--------------------------\n" 500 | "Val loss: %.4f\n" 501 | "srnn loss: %.4f\n" 502 | "============================" % (model.global_step.eval(), 503 | model.learning_rate.eval(), step_time*1000, loss, 504 | val_loss, srnn_loss)) 505 | print() 506 | 507 | previous_losses.append(loss) 508 | 509 | # Save the model 510 | if current_step % FLAGS.save_every == 0: 511 | print( "Saving the model..." ); start_time = time.time() 512 | model.saver.save(sess, os.path.normpath(os.path.join(train_dir, 'checkpoint')), global_step=current_step ) 513 | print( "done in {0:.2f} ms".format( (time.time() - start_time)*1000) ) 514 | 515 | # Reset global time and loss 516 | step_time, loss = 0, 0 517 | 518 | sys.stdout.flush() 519 | 520 | 521 | def get_srnn_gts( actions, model, test_set, data_mean, data_std, dim_to_ignore, one_hot, to_euler=True ): 522 | """ 523 | Get the ground truths for srnn's sequences, and convert to Euler angles. 524 | (the error is always computed in Euler angles). 525 | 526 | Args 527 | actions: a list of actions to get ground truths for. 528 | model: training model we are using (we only use the "get_batch" method). 529 | test_set: dictionary with normalized training data. 530 | data_mean: d-long vector with the mean of the training data. 531 | data_std: d-long vector with the standard deviation of the training data. 532 | dim_to_ignore: dimensions that we are not using to train/predict. 533 | one_hot: whether the data comes with one-hot encoding indicating action. 534 | to_euler: whether to convert the angles to Euler format or keep thm in exponential map 535 | 536 | Returns 537 | srnn_gts_euler: a dictionary where the keys are actions, and the values 538 | are the ground_truth, denormalized expected outputs of srnns's seeds. 539 | """ 540 | srnn_gts_euler = {} 541 | 542 | for action in actions: 543 | 544 | srnn_gt_euler = [] 545 | _, _, _, srnn_expmap = model.get_batch_srnn( test_set, action ) # modified 546 | 547 | # expmap -> rotmat -> euler 548 | for i in np.arange( srnn_expmap.shape[0] ): 549 | denormed = data_utils.unNormalizeData(srnn_expmap[i,:,:], data_mean, data_std, dim_to_ignore, actions, one_hot ) # modified 550 | 551 | if to_euler: 552 | for j in np.arange( denormed.shape[0] ): 553 | for k in np.arange(3,97,3): 554 | denormed[j,k:k+3] = data_utils.rotmat2euler( data_utils.expmap2rotmat( denormed[j,k:k+3] )) 555 | 556 | srnn_gt_euler.append( denormed ); 557 | 558 | # Put back in the dictionary 559 | srnn_gts_euler[action] = srnn_gt_euler 560 | 561 | return srnn_gts_euler 562 | 563 | 564 | def sample(): 565 | """Sample predictions for srnn's seeds""" 566 | 567 | if FLAGS.load <= 0: 568 | raise( ValueError, "Must give an iteration to read parameters from") 569 | 570 | actions = define_actions( FLAGS.action ) 571 | 572 | # Use the CPU if asked to 573 | device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1} 574 | with tf.Session(config=tf.ConfigProto( device_count = device_count )) as sess: 575 | 576 | # === Create the model === 577 | print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.body_size)) 578 | sampling = True 579 | model = create_model(sess, actions, sampling) 580 | print("Model created") 581 | 582 | # Load all the data 583 | train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use = read_all_data( actions, FLAGS.seq_length_in, FLAGS.seq_length_out, FLAGS.data_dir, not FLAGS.omit_one_hot ) 584 | 585 | # === Read and denormalize the gt with srnn's seeds, as we'll need them 586 | # many times for evaluation in Euler Angles === 587 | srnn_gts_expmap = get_srnn_gts( actions, model, test_set, data_mean, data_std, dim_to_ignore, FLAGS.omit_one_hot, to_euler=False ) 588 | srnn_gts_euler = get_srnn_gts( actions, model, test_set, data_mean, data_std, dim_to_ignore, FLAGS.omit_one_hot ) 589 | 590 | # Clean and create a new h5 file of samples 591 | SAMPLES_FNAME = 'samples.h5' 592 | try: 593 | os.remove( SAMPLES_FNAME ) 594 | except OSError: 595 | pass 596 | 597 | # Predict and save for each action 598 | for action in actions: 599 | 600 | # Make prediction with srnn' seeds 601 | encoder_inputs, encoder_outputs, decoder_inputs, decoder_outputs = model.get_batch_srnn( test_set, action ) 602 | forward_only = True 603 | srnn_seeds = True 604 | 605 | is_training = False 606 | use_sample = True 607 | dropout_prob = 0.0 608 | sampling_weight = 0.0 609 | 610 | srnn_loss, srnn_poses, _ = model.step(sess, encoder_inputs, encoder_outputs, decoder_inputs, decoder_outputs, is_training, use_sample, dropout_prob, sampling_weight, forward_only, srnn_seeds) 611 | 612 | # denormalizes too 613 | srnn_pred_expmap = data_utils.revert_output_format( srnn_poses, data_mean, data_std, dim_to_ignore, actions, FLAGS.omit_one_hot ) # modified 614 | 615 | # Save the conditioning seeds 616 | 617 | # Save the samples 618 | with h5py.File( SAMPLES_FNAME, 'a' ) as hf: 619 | for i in np.arange(8): 620 | # Save conditioning ground truth 621 | node_name = 'expmap/gt/{1}_{0}'.format(i, action) 622 | hf.create_dataset( node_name, data=srnn_gts_expmap[action][i] ) 623 | # Save prediction 624 | node_name = 'expmap/preds/{1}_{0}'.format(i, action) 625 | hf.create_dataset( node_name, data=srnn_pred_expmap[i] ) 626 | 627 | # Compute and save the errors here 628 | mean_errors = np.zeros( (len(srnn_pred_expmap), srnn_pred_expmap[0].shape[0]) ) 629 | 630 | for i in np.arange(8): 631 | 632 | eulerchannels_pred = srnn_pred_expmap[i] 633 | 634 | for j in np.arange( eulerchannels_pred.shape[0] ): 635 | for k in np.arange(3,97,3): 636 | eulerchannels_pred[j,k:k+3] = data_utils.rotmat2euler(data_utils.expmap2rotmat( eulerchannels_pred[j,k:k+3] )) 637 | 638 | eulerchannels_pred[:,0:6] = 0 639 | 640 | # Pick only the dimensions with sufficient standard deviation. Others are ignored. 641 | idx_to_use = np.where( np.std( eulerchannels_pred, 0 ) > 1e-4 )[0] 642 | 643 | euc_error = np.power( srnn_gts_euler[action][i][:,idx_to_use] - eulerchannels_pred[:,idx_to_use], 2) 644 | euc_error = np.sum(euc_error, 1) 645 | euc_error = np.sqrt( euc_error ) 646 | mean_errors[i,:] = euc_error 647 | 648 | mean_mean_errors = np.mean( mean_errors, 0 ) 649 | print( action ) 650 | print( ','.join(map(str, mean_mean_errors.tolist() )) ) 651 | 652 | with h5py.File( SAMPLES_FNAME, 'a' ) as hf: 653 | node_name = 'mean_{0}_error'.format( action ) 654 | hf.create_dataset( node_name, data=mean_mean_errors ) 655 | 656 | return 657 | 658 | 659 | def define_actions( action ): 660 | """ 661 | Define the list of actions we are using. 662 | 663 | Args 664 | action: String with the passed action. Could be "all" 665 | Returns 666 | actions: List of strings of actions 667 | Raises 668 | ValueError if the action is not included in H3.6M 669 | """ 670 | 671 | actions = ["walking", "eating", "smoking", "discussion", "directions", 672 | "greeting", "phoning", "posing", "purchases", "sitting", 673 | "sittingdown", "takingphoto", "waiting", "walkingdog", 674 | "walkingtogether"] 675 | 676 | if action in actions: 677 | return [action] 678 | 679 | if action == "all": 680 | return actions 681 | 682 | if action == "all_srnn": 683 | return ["walking", "eating", "smoking", "discussion"] 684 | 685 | raise( ValueError, "Unrecognized action: %d" % action ) 686 | 687 | 688 | def read_all_data( actions, seq_length_in, seq_length_out, data_dir, one_hot ): 689 | """ 690 | Loads data for training/testing and normalizes it. 691 | 692 | Args 693 | actions: list of strings (actions) to load 694 | seq_length_in: number of frames to use in the burn-in sequence 695 | seq_length_out: number of frames to use in the output sequence 696 | data_dir: directory to load the data from 697 | one_hot: whether to use one-hot encoding per action 698 | Returns 699 | train_set: dictionary with normalized training data 700 | test_set: dictionary with test data 701 | data_mean: d-long vector with the mean of the training data 702 | data_std: d-long vector with the standard dev of the training data 703 | dim_to_ignore: dimensions that are not used becaused stdev is too small 704 | dim_to_use: dimensions that we are actually using in the model 705 | """ 706 | 707 | # === Read training data === 708 | print ("Reading training data (seq_len_in: {0}, seq_len_out {1}).".format( 709 | seq_length_in, seq_length_out)) 710 | 711 | train_subject_ids = [1,6,7,8,9,11] 712 | test_subject_ids = [5] 713 | 714 | train_set, complete_train = data_utils.load_data( data_dir, train_subject_ids, actions, one_hot ) 715 | test_set, complete_test = data_utils.load_data( data_dir, test_subject_ids, actions, one_hot ) 716 | 717 | # Compute normalization stats 718 | data_mean, data_std, dim_to_ignore, dim_to_use = data_utils.normalization_stats(complete_train) 719 | 720 | # Normalize -- subtract mean, divide by stdev 721 | train_set = data_utils.normalize_data( train_set, data_mean, data_std, dim_to_use, actions, one_hot ) 722 | test_set = data_utils.normalize_data( test_set, data_mean, data_std, dim_to_use, actions, one_hot ) 723 | print("done reading data.") 724 | 725 | return train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use 726 | 727 | 728 | def main(_): 729 | if FLAGS.sample: 730 | sample() 731 | else: 732 | train() 733 | 734 | if __name__ == "__main__": 735 | tf.app.run() 736 | -------------------------------------------------------------------------------- /translate_simple_lm_flow.py: -------------------------------------------------------------------------------- 1 | 2 | """Simple code for training an RNN for motion prediction.""" 3 | 4 | from __future__ import absolute_import 5 | from __future__ import division 6 | from __future__ import print_function 7 | 8 | import math 9 | import os 10 | import random 11 | import sys 12 | import time 13 | import h5py 14 | 15 | import numpy as np 16 | from six.moves import xrange # pylint: disable=redefined-builtin 17 | import tensorflow as tf 18 | 19 | import data_utils 20 | import seq2seq_model 21 | import motion_rnn_simple_lm_flow 22 | 23 | print("Setting seed.") 24 | np.random.seed(42) 25 | 26 | # Learning 27 | tf.app.flags.DEFINE_float("learning_rate", .0001, "Learning rate.") 28 | tf.app.flags.DEFINE_float("learning_rate_decay_factor", 0.6, "Learning rate is multiplied by this much. 1 means no decay.") 29 | tf.app.flags.DEFINE_integer("learning_rate_step", 2000, "Every this many steps, do decay.") 30 | tf.app.flags.DEFINE_float("max_gradient_norm", 1, "Clip gradients to this norm.") 31 | tf.app.flags.DEFINE_integer("batch_size", 32, "Batch size to use during training.") 32 | tf.app.flags.DEFINE_integer("iterations", int(1e4), "Iterations to train for.") 33 | # Architecture 34 | tf.app.flags.DEFINE_string("architecture", "tied", "Seq2seq architecture to use: [basic, tied].") 35 | tf.app.flags.DEFINE_string("loop_type", "open", "loop type to use: [open, closed].") 36 | tf.app.flags.DEFINE_integer("body_size", 512, "Size of each body rnn model layer.") 37 | tf.app.flags.DEFINE_string("body_cell", "gru", "RNN cell type of body rnn : [elman, lstm, gru]") 38 | tf.app.flags.DEFINE_integer("num_layers", 2, "Number of layers in the model.") 39 | tf.app.flags.DEFINE_integer("seq_length_in", 50, "Number of frames to feed into the encoder. 25 fps") 40 | tf.app.flags.DEFINE_integer("seq_length_out", 25, "Number of frames that the decoder has to predict. 25fps") 41 | tf.app.flags.DEFINE_boolean("omit_one_hot", False, "Whether to remove one-hot encoding from the data") 42 | tf.app.flags.DEFINE_boolean("residual_velocities", False, "Add a residual connection that effectively models velocities") 43 | # Directories 44 | tf.app.flags.DEFINE_string("data_dir", os.path.normpath("./data/h3.6m/dataset"), "Data directory") 45 | tf.app.flags.DEFINE_string("train_dir", os.path.normpath("./simple_lm_flow_experiments/"), "Training directory.") 46 | 47 | tf.app.flags.DEFINE_string("action","all", "The action to train on. all means all the actions, all_periodic means walking, eating and smoking") 48 | tf.app.flags.DEFINE_string("loss_to_use","sampling_based", "The type of loss to use, supervised or sampling_based") 49 | 50 | tf.app.flags.DEFINE_integer("test_every", 1000, "How often to compute error on the test set.") 51 | tf.app.flags.DEFINE_integer("save_every", 5000, "How often to compute error on the test set.") 52 | tf.app.flags.DEFINE_boolean("sample", False, "Set to True for sampling.") 53 | tf.app.flags.DEFINE_boolean("use_cpu", False, "Whether to use the CPU") 54 | tf.app.flags.DEFINE_integer("load", 0, "Try to load a previous checkpoint.") 55 | 56 | FLAGS = tf.app.flags.FLAGS 57 | 58 | train_dir = os.path.normpath(os.path.join( FLAGS.train_dir, FLAGS.action, 59 | 'out_{0}'.format(FLAGS.seq_length_out), 60 | 'iterations_{0}'.format(FLAGS.iterations), 61 | FLAGS.architecture, 62 | 'loop_type_{0}'.format(FLAGS.loop_type), 63 | FLAGS.loss_to_use, 64 | 'omit_one_hot' if FLAGS.omit_one_hot else 'one_hot', 65 | 'depth_{0}'.format(FLAGS.num_layers), 66 | 'body_cell_{0}'.format(FLAGS.body_cell), 67 | 'body_size_{0}'.format(FLAGS.body_size), 68 | 'lr_{0}'.format(FLAGS.learning_rate), 69 | 'residual_vel' if FLAGS.residual_velocities else 'not_residual_vel')) 70 | 71 | summaries_dir = os.path.normpath(os.path.join( train_dir, "log" )) # Directory for TB summaries 72 | 73 | def create_model(session, actions, sampling=False): 74 | """Create translation model and initialize or load parameters in session.""" 75 | 76 | model = motion_rnn_simple_lm_flow.MotionRNNModelSimpleLM( 77 | FLAGS.architecture, 78 | FLAGS.loop_type, 79 | FLAGS.seq_length_in if not sampling else 50, 80 | FLAGS.seq_length_out if not sampling else 100, 81 | FLAGS.body_size, 82 | FLAGS.body_cell, 83 | FLAGS.num_layers, 84 | FLAGS.max_gradient_norm, 85 | FLAGS.batch_size, 86 | FLAGS.learning_rate, 87 | FLAGS.learning_rate_decay_factor, 88 | summaries_dir, 89 | FLAGS.loss_to_use if not sampling else "sampling_based", 90 | len( actions ), 91 | not FLAGS.omit_one_hot, 92 | FLAGS.residual_velocities, 93 | dtype=tf.float32) 94 | 95 | if FLAGS.load <= 0: 96 | print("Creating model with fresh parameters.") 97 | session.run(tf.global_variables_initializer()) 98 | return model 99 | 100 | ckpt = tf.train.get_checkpoint_state( train_dir, latest_filename="checkpoint") 101 | print( "train_dir", train_dir ) 102 | 103 | if ckpt and ckpt.model_checkpoint_path: 104 | # Check if the specific checkpoint exists 105 | if FLAGS.load > 0: 106 | if os.path.isfile(os.path.join(train_dir,"checkpoint-{0}.index".format(FLAGS.load))): 107 | ckpt_name = os.path.normpath(os.path.join( os.path.join(train_dir,"checkpoint-{0}".format(FLAGS.load)) )) 108 | else: 109 | raise ValueError("Asked to load checkpoint {0}, but it does not seem to exist".format(FLAGS.load)) 110 | else: 111 | ckpt_name = os.path.basename( ckpt.model_checkpoint_path ) 112 | 113 | print("Loading model {0}".format( ckpt_name )) 114 | model.saver.restore( session, ckpt.model_checkpoint_path ) 115 | return model 116 | else: 117 | print("Could not find checkpoint. Aborting.") 118 | raise( ValueError, "Checkpoint {0} does not seem to exist".format( ckpt.model_checkpoint_path ) ) 119 | 120 | return model 121 | 122 | 123 | def train(): 124 | """Train a seq2seq model on human motion""" 125 | 126 | actions = define_actions( FLAGS.action ) 127 | 128 | number_of_actions = len( actions ) 129 | 130 | train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use = read_all_data( actions, FLAGS.seq_length_in, FLAGS.seq_length_out, FLAGS.data_dir, not FLAGS.omit_one_hot ) 131 | 132 | # Limit TF to take a fraction of the GPU memory 133 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.95) 134 | device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1} 135 | 136 | # setting graph-level seed 137 | tf.set_random_seed(42) 138 | 139 | with tf.Session(config=tf.ConfigProto( gpu_options=gpu_options, device_count = device_count )) as sess: 140 | 141 | # === Create the model === 142 | print("Creating %d layers of %d units for body RNN." % (FLAGS.num_layers, FLAGS.body_size)) 143 | 144 | model = create_model( sess, actions ) 145 | model.train_writer.add_graph( sess.graph ) 146 | print( "Model created" ) 147 | 148 | # === Read and denormalize the gt with srnn's seeds, as we'll need them 149 | # many times for evaluation in Euler Angles === 150 | srnn_gts_euler = get_srnn_gts( actions, model, test_set, data_mean, data_std, dim_to_ignore, FLAGS.omit_one_hot ) # modified 151 | 152 | #=== This is the training loop === 153 | step_time, loss, val_loss = 0.0, 0.0, 0.0 154 | current_step = 0 if FLAGS.load <= 0 else FLAGS.load + 1 155 | previous_losses = [] 156 | 157 | step_time, loss = 0, 0 158 | n_trials = 30 159 | sampling_schedule = [500, 1000, 2000, 4000, 7000] 160 | sampling_weights = [0.2, 0.4, 0.6, 0.8, 1.0] 161 | sampling_weight = 0.0 162 | samp_cnt = -1 163 | 164 | for _ in xrange( FLAGS.iterations ): 165 | 166 | start_time = time.time() 167 | 168 | # === Training step === 169 | encoder_inputs, encoder_outputs, decoder_inputs, decoder_outputs = model.get_batch( train_set, FLAGS.omit_one_hot ) # modified 170 | 171 | is_training = True 172 | dropout_prob = 0.3 173 | use_sample = False 174 | 175 | if samp_cnt < len(sampling_schedule)-1: 176 | if current_step == sampling_schedule[samp_cnt+1]: 177 | sampling_weight = sampling_weights[samp_cnt+1] 178 | samp_cnt = samp_cnt + 1 179 | 180 | _, step_loss, step_sample_loss, loss_summary, lr_summary = model.step( sess, encoder_inputs, encoder_outputs, decoder_inputs, decoder_outputs, is_training, use_sample, dropout_prob, sampling_weight, False) 181 | 182 | 183 | model.train_writer.add_summary( loss_summary, current_step ) 184 | model.train_writer.add_summary( lr_summary, current_step ) 185 | 186 | if current_step % 10 == 0: 187 | print("step {0:04d}; step_loss: {1:.4f}; step_sample_loss: {2:.4f}".format(current_step, step_loss, sampling_weight*step_sample_loss )) 188 | 189 | step_time += (time.time() - start_time) / FLAGS.test_every 190 | loss += step_loss / FLAGS.test_every 191 | current_step += 1 192 | 193 | # === step decay === 194 | if current_step % FLAGS.learning_rate_step == 0: 195 | sess.run(model.learning_rate_decay_op) 196 | 197 | # Once in a while, we save checkpoint, print statistics, and run evals. 198 | if current_step % FLAGS.test_every == 0: 199 | 200 | # === Validation with randomly chosen seeds === 201 | forward_only = True 202 | is_training = False 203 | use_sample = True 204 | dropout_prob = 0.0 205 | 206 | encoder_inputs, encoder_outputs, decoder_inputs, decoder_outputs = model.get_batch( test_set, FLAGS.omit_one_hot ) # modified 207 | step_loss, loss_summary = model.step(sess, encoder_inputs, encoder_outputs, decoder_inputs, decoder_outputs, is_training, use_sample, dropout_prob, sampling_weight, forward_only) 208 | val_loss = step_loss # Loss book-keeping 209 | 210 | model.test_writer.add_summary(loss_summary, current_step) 211 | 212 | print() 213 | print("{0: <16} |".format("milliseconds"), end="") 214 | for ms in [80, 160, 320, 400, 560, 1000]: 215 | print(" {0:5d} |".format(ms), end="") 216 | print() 217 | 218 | # === Validation with srnn's seeds === 219 | for action in actions: 220 | 221 | # Evaluate the model on the test batches 222 | encoder_inputs, encoder_outputs, decoder_inputs, decoder_outputs = model.get_batch_srnn( test_set, action ) 223 | 224 | is_training = False 225 | use_sample = True 226 | dropout_prob = 0.0 227 | 228 | srnn_loss, srnn_poses, _ = model.step(sess, encoder_inputs, encoder_outputs, decoder_inputs, decoder_outputs, is_training, use_sample, dropout_prob, sampling_weight, True, True) 229 | 230 | # Denormalize the output 231 | srnn_pred_expmap = data_utils.revert_output_format( srnn_poses, data_mean, data_std, dim_to_ignore, actions, FLAGS.omit_one_hot ) # modified 232 | 233 | # Save the errors here 234 | mean_errors = np.zeros( (len(srnn_pred_expmap), srnn_pred_expmap[0].shape[0]) ) 235 | 236 | # Training is done in exponential map, but the error is reported in 237 | # Euler angles, as in previous work. 238 | # See https://github.com/asheshjain399/RNNexp/issues/6#issuecomment-247769197 239 | N_SEQUENCE_TEST = 8 240 | for i in np.arange(N_SEQUENCE_TEST): 241 | eulerchannels_pred = srnn_pred_expmap[i] 242 | 243 | # Convert from exponential map to Euler angles 244 | for j in np.arange( eulerchannels_pred.shape[0] ): 245 | for k in np.arange(3,97,3): 246 | eulerchannels_pred[j,k:k+3] = data_utils.rotmat2euler( data_utils.expmap2rotmat( eulerchannels_pred[j,k:k+3] )) 247 | 248 | # The global translation (first 3 entries) and global rotation 249 | # (next 3 entries) are also not considered in the error, so the_key 250 | # are set to zero. 251 | # See https://github.com/asheshjain399/RNNexp/issues/6#issuecomment-249404882 252 | gt_i=np.copy(srnn_gts_euler[action][i]) 253 | gt_i[:,0:6] = 0 254 | 255 | # Now compute the l2 error. The following is numpy port of the error 256 | # function provided by Ashesh Jain (in matlab), available at 257 | # https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/dataParser/Utils/motionGenerationError.m#L40-L54 258 | idx_to_use = np.where( np.std( gt_i, 0 ) > 1e-4 )[0] 259 | 260 | euc_error = np.power( gt_i[:,idx_to_use] - eulerchannels_pred[:,idx_to_use], 2) 261 | euc_error = np.sum(euc_error, 1) 262 | euc_error = np.sqrt( euc_error ) 263 | mean_errors[i,:] = euc_error 264 | 265 | # This is simply the mean error over the N_SEQUENCE_TEST examples 266 | mean_mean_errors = np.mean( mean_errors, 0 ) 267 | 268 | # Pretty print of the results for 80, 160, 320, 400, 560 and 1000 ms 269 | print("{0: <16} |".format(action), end="") 270 | for ms in [1,3,7,9,13,24]: 271 | if FLAGS.seq_length_out >= ms+1: 272 | print(" {0:.3f} |".format( mean_mean_errors[ms] ), end="") 273 | else: 274 | print(" n/a |", end="") 275 | print() 276 | 277 | # Ugly massive if-then to log the error to tensorboard :shrug: 278 | if action == "walking": 279 | summaries = sess.run( 280 | [model.walking_err80_summary, 281 | model.walking_err160_summary, 282 | model.walking_err320_summary, 283 | model.walking_err400_summary, 284 | model.walking_err560_summary, 285 | model.walking_err1000_summary], 286 | {model.walking_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 287 | model.walking_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 288 | model.walking_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 289 | model.walking_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 290 | model.walking_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 291 | model.walking_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 292 | elif action == "eating": 293 | summaries = sess.run( 294 | [model.eating_err80_summary, 295 | model.eating_err160_summary, 296 | model.eating_err320_summary, 297 | model.eating_err400_summary, 298 | model.eating_err560_summary, 299 | model.eating_err1000_summary], 300 | {model.eating_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 301 | model.eating_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 302 | model.eating_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 303 | model.eating_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 304 | model.eating_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 305 | model.eating_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 306 | elif action == "smoking": 307 | summaries = sess.run( 308 | [model.smoking_err80_summary, 309 | model.smoking_err160_summary, 310 | model.smoking_err320_summary, 311 | model.smoking_err400_summary, 312 | model.smoking_err560_summary, 313 | model.smoking_err1000_summary], 314 | {model.smoking_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 315 | model.smoking_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 316 | model.smoking_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 317 | model.smoking_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 318 | model.smoking_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 319 | model.smoking_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 320 | elif action == "discussion": 321 | summaries = sess.run( 322 | [model.discussion_err80_summary, 323 | model.discussion_err160_summary, 324 | model.discussion_err320_summary, 325 | model.discussion_err400_summary, 326 | model.discussion_err560_summary, 327 | model.discussion_err1000_summary], 328 | {model.discussion_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 329 | model.discussion_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 330 | model.discussion_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 331 | model.discussion_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 332 | model.discussion_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 333 | model.discussion_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 334 | elif action == "directions": 335 | summaries = sess.run( 336 | [model.directions_err80_summary, 337 | model.directions_err160_summary, 338 | model.directions_err320_summary, 339 | model.directions_err400_summary, 340 | model.directions_err560_summary, 341 | model.directions_err1000_summary], 342 | {model.directions_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 343 | model.directions_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 344 | model.directions_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 345 | model.directions_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 346 | model.directions_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 347 | model.directions_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 348 | elif action == "greeting": 349 | summaries = sess.run( 350 | [model.greeting_err80_summary, 351 | model.greeting_err160_summary, 352 | model.greeting_err320_summary, 353 | model.greeting_err400_summary, 354 | model.greeting_err560_summary, 355 | model.greeting_err1000_summary], 356 | {model.greeting_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 357 | model.greeting_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 358 | model.greeting_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 359 | model.greeting_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 360 | model.greeting_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 361 | model.greeting_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 362 | elif action == "phoning": 363 | summaries = sess.run( 364 | [model.phoning_err80_summary, 365 | model.phoning_err160_summary, 366 | model.phoning_err320_summary, 367 | model.phoning_err400_summary, 368 | model.phoning_err560_summary, 369 | model.phoning_err1000_summary], 370 | {model.phoning_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 371 | model.phoning_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 372 | model.phoning_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 373 | model.phoning_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 374 | model.phoning_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 375 | model.phoning_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 376 | elif action == "posing": 377 | summaries = sess.run( 378 | [model.posing_err80_summary, 379 | model.posing_err160_summary, 380 | model.posing_err320_summary, 381 | model.posing_err400_summary, 382 | model.posing_err560_summary, 383 | model.posing_err1000_summary], 384 | {model.posing_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 385 | model.posing_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 386 | model.posing_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 387 | model.posing_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 388 | model.posing_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 389 | model.posing_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 390 | elif action == "purchases": 391 | summaries = sess.run( 392 | [model.purchases_err80_summary, 393 | model.purchases_err160_summary, 394 | model.purchases_err320_summary, 395 | model.purchases_err400_summary, 396 | model.purchases_err560_summary, 397 | model.purchases_err1000_summary], 398 | {model.purchases_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 399 | model.purchases_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 400 | model.purchases_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 401 | model.purchases_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 402 | model.purchases_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 403 | model.purchases_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 404 | elif action == "sitting": 405 | summaries = sess.run( 406 | [model.sitting_err80_summary, 407 | model.sitting_err160_summary, 408 | model.sitting_err320_summary, 409 | model.sitting_err400_summary, 410 | model.sitting_err560_summary, 411 | model.sitting_err1000_summary], 412 | {model.sitting_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 413 | model.sitting_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 414 | model.sitting_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 415 | model.sitting_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 416 | model.sitting_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 417 | model.sitting_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 418 | elif action == "sittingdown": 419 | summaries = sess.run( 420 | [model.sittingdown_err80_summary, 421 | model.sittingdown_err160_summary, 422 | model.sittingdown_err320_summary, 423 | model.sittingdown_err400_summary, 424 | model.sittingdown_err560_summary, 425 | model.sittingdown_err1000_summary], 426 | {model.sittingdown_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 427 | model.sittingdown_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 428 | model.sittingdown_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 429 | model.sittingdown_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 430 | model.sittingdown_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 431 | model.sittingdown_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 432 | elif action == "takingphoto": 433 | summaries = sess.run( 434 | [model.takingphoto_err80_summary, 435 | model.takingphoto_err160_summary, 436 | model.takingphoto_err320_summary, 437 | model.takingphoto_err400_summary, 438 | model.takingphoto_err560_summary, 439 | model.takingphoto_err1000_summary], 440 | {model.takingphoto_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 441 | model.takingphoto_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 442 | model.takingphoto_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 443 | model.takingphoto_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 444 | model.takingphoto_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 445 | model.takingphoto_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 446 | elif action == "waiting": 447 | summaries = sess.run( 448 | [model.waiting_err80_summary, 449 | model.waiting_err160_summary, 450 | model.waiting_err320_summary, 451 | model.waiting_err400_summary, 452 | model.waiting_err560_summary, 453 | model.waiting_err1000_summary], 454 | {model.waiting_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 455 | model.waiting_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 456 | model.waiting_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 457 | model.waiting_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 458 | model.waiting_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 459 | model.waiting_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 460 | elif action == "walkingdog": 461 | summaries = sess.run( 462 | [model.walkingdog_err80_summary, 463 | model.walkingdog_err160_summary, 464 | model.walkingdog_err320_summary, 465 | model.walkingdog_err400_summary, 466 | model.walkingdog_err560_summary, 467 | model.walkingdog_err1000_summary], 468 | {model.walkingdog_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 469 | model.walkingdog_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 470 | model.walkingdog_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 471 | model.walkingdog_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 472 | model.walkingdog_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 473 | model.walkingdog_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 474 | elif action == "walkingtogether": 475 | summaries = sess.run( 476 | [model.walkingtogether_err80_summary, 477 | model.walkingtogether_err160_summary, 478 | model.walkingtogether_err320_summary, 479 | model.walkingtogether_err400_summary, 480 | model.walkingtogether_err560_summary, 481 | model.walkingtogether_err1000_summary], 482 | {model.walkingtogether_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None, 483 | model.walkingtogether_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None, 484 | model.walkingtogether_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None, 485 | model.walkingtogether_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None, 486 | model.walkingtogether_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None, 487 | model.walkingtogether_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None}) 488 | 489 | for i in np.arange(len( summaries )): 490 | model.test_writer.add_summary(summaries[i], current_step) 491 | 492 | 493 | print() 494 | print("============================\n" 495 | "Global step: %d\n" 496 | "Learning rate: %.6f\n" 497 | "Step-time (ms): %.4f\n" 498 | "Train loss avg: %.4f\n" 499 | "--------------------------\n" 500 | "Val loss: %.4f\n" 501 | "srnn loss: %.4f\n" 502 | "============================" % (model.global_step.eval(), 503 | model.learning_rate.eval(), step_time*1000, loss, 504 | val_loss, srnn_loss)) 505 | print() 506 | 507 | previous_losses.append(loss) 508 | 509 | # Save the model 510 | if current_step % FLAGS.save_every == 0: 511 | print( "Saving the model..." ); start_time = time.time() 512 | model.saver.save(sess, os.path.normpath(os.path.join(train_dir, 'checkpoint')), global_step=current_step ) 513 | print( "done in {0:.2f} ms".format( (time.time() - start_time)*1000) ) 514 | 515 | # Reset global time and loss 516 | step_time, loss = 0, 0 517 | 518 | sys.stdout.flush() 519 | 520 | 521 | def get_srnn_gts( actions, model, test_set, data_mean, data_std, dim_to_ignore, one_hot, to_euler=True ): 522 | """ 523 | Get the ground truths for srnn's sequences, and convert to Euler angles. 524 | (the error is always computed in Euler angles). 525 | 526 | Args 527 | actions: a list of actions to get ground truths for. 528 | model: training model we are using (we only use the "get_batch" method). 529 | test_set: dictionary with normalized training data. 530 | data_mean: d-long vector with the mean of the training data. 531 | data_std: d-long vector with the standard deviation of the training data. 532 | dim_to_ignore: dimensions that we are not using to train/predict. 533 | one_hot: whether the data comes with one-hot encoding indicating action. 534 | to_euler: whether to convert the angles to Euler format or keep thm in exponential map 535 | 536 | Returns 537 | srnn_gts_euler: a dictionary where the keys are actions, and the values 538 | are the ground_truth, denormalized expected outputs of srnns's seeds. 539 | """ 540 | srnn_gts_euler = {} 541 | 542 | for action in actions: 543 | 544 | srnn_gt_euler = [] 545 | _, _, _, srnn_expmap = model.get_batch_srnn( test_set, action ) # modified 546 | 547 | # expmap -> rotmat -> euler 548 | for i in np.arange( srnn_expmap.shape[0] ): 549 | denormed = data_utils.unNormalizeData(srnn_expmap[i,:,:], data_mean, data_std, dim_to_ignore, actions, one_hot ) 550 | 551 | if to_euler: 552 | for j in np.arange( denormed.shape[0] ): 553 | for k in np.arange(3,97,3): 554 | denormed[j,k:k+3] = data_utils.rotmat2euler( data_utils.expmap2rotmat( denormed[j,k:k+3] )) 555 | 556 | srnn_gt_euler.append( denormed ); 557 | 558 | # Put back in the dictionary 559 | srnn_gts_euler[action] = srnn_gt_euler 560 | 561 | return srnn_gts_euler 562 | 563 | 564 | def sample(): 565 | """Sample predictions for srnn's seeds""" 566 | 567 | if FLAGS.load <= 0: 568 | raise( ValueError, "Must give an iteration to read parameters from") 569 | 570 | actions = define_actions( FLAGS.action ) 571 | 572 | # Use the CPU if asked to 573 | device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1} 574 | with tf.Session(config=tf.ConfigProto( device_count = device_count )) as sess: 575 | 576 | # === Create the model === 577 | print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.body_size)) 578 | sampling = True 579 | model = create_model(sess, actions, sampling) 580 | print("Model created") 581 | 582 | # Load all the data 583 | train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use = read_all_data( actions, FLAGS.seq_length_in, FLAGS.seq_length_out, FLAGS.data_dir, not FLAGS.omit_one_hot ) 584 | 585 | # === Read and denormalize the gt with srnn's seeds, as we'll need them 586 | # many times for evaluation in Euler Angles === 587 | srnn_gts_expmap = get_srnn_gts( actions, model, test_set, data_mean, data_std, dim_to_ignore, FLAGS.omit_one_hot, to_euler=False ) 588 | srnn_gts_euler = get_srnn_gts( actions, model, test_set, data_mean, data_std, dim_to_ignore, FLAGS.omit_one_hot ) 589 | 590 | # Clean and create a new h5 file of samples 591 | SAMPLES_FNAME = 'samples.h5' 592 | try: 593 | os.remove( SAMPLES_FNAME ) 594 | except OSError: 595 | pass 596 | 597 | # Predict and save for each action 598 | for action in actions: 599 | 600 | # Make prediction with srnn' seeds 601 | encoder_inputs, encoder_outputs, decoder_inputs, decoder_outputs = model.get_batch_srnn( test_set, action ) 602 | forward_only = True 603 | srnn_seeds = True 604 | 605 | is_training = False 606 | use_sample = True 607 | dropout_prob = 0.0 608 | sampling_weight = 0.0 609 | 610 | srnn_loss, srnn_poses, _ = model.step(sess, encoder_inputs, encoder_outputs, decoder_inputs, decoder_outputs, is_training, use_sample, dropout_prob, sampling_weight, forward_only, srnn_seeds) 611 | 612 | # denormalizes too 613 | srnn_pred_expmap = data_utils.revert_output_format( srnn_poses, data_mean, data_std, dim_to_ignore, actions, FLAGS.omit_one_hot ) 614 | 615 | # Save the conditioning seeds 616 | 617 | # Save the samples 618 | with h5py.File( SAMPLES_FNAME, 'a' ) as hf: 619 | for i in np.arange(8): 620 | # Save conditioning ground truth 621 | node_name = 'expmap/gt/{1}_{0}'.format(i, action) 622 | hf.create_dataset( node_name, data=srnn_gts_expmap[action][i] ) 623 | # Save prediction 624 | node_name = 'expmap/preds/{1}_{0}'.format(i, action) 625 | hf.create_dataset( node_name, data=srnn_pred_expmap[i] ) 626 | 627 | # Compute and save the errors here 628 | mean_errors = np.zeros( (len(srnn_pred_expmap), srnn_pred_expmap[0].shape[0]) ) 629 | 630 | for i in np.arange(8): 631 | 632 | eulerchannels_pred = srnn_pred_expmap[i] 633 | 634 | for j in np.arange( eulerchannels_pred.shape[0] ): 635 | for k in np.arange(3,97,3): 636 | eulerchannels_pred[j,k:k+3] = data_utils.rotmat2euler(data_utils.expmap2rotmat( eulerchannels_pred[j,k:k+3] )) 637 | 638 | eulerchannels_pred[:,0:6] = 0 639 | 640 | # Pick only the dimensions with sufficient standard deviation. Others are ignored. 641 | idx_to_use = np.where( np.std( eulerchannels_pred, 0 ) > 1e-4 )[0] 642 | 643 | euc_error = np.power( srnn_gts_euler[action][i][:,idx_to_use] - eulerchannels_pred[:,idx_to_use], 2) 644 | euc_error = np.sum(euc_error, 1) 645 | euc_error = np.sqrt( euc_error ) 646 | mean_errors[i,:] = euc_error 647 | 648 | mean_mean_errors = np.mean( mean_errors, 0 ) 649 | print( action ) 650 | print( ','.join(map(str, mean_mean_errors.tolist() )) ) 651 | 652 | with h5py.File( SAMPLES_FNAME, 'a' ) as hf: 653 | node_name = 'mean_{0}_error'.format( action ) 654 | hf.create_dataset( node_name, data=mean_mean_errors ) 655 | 656 | return 657 | 658 | 659 | def define_actions( action ): 660 | """ 661 | Define the list of actions we are using. 662 | 663 | Args 664 | action: String with the passed action. Could be "all" 665 | Returns 666 | actions: List of strings of actions 667 | Raises 668 | ValueError if the action is not included in H3.6M 669 | """ 670 | 671 | actions = ["walking", "eating", "smoking", "discussion", "directions", 672 | "greeting", "phoning", "posing", "purchases", "sitting", 673 | "sittingdown", "takingphoto", "waiting", "walkingdog", 674 | "walkingtogether"] 675 | 676 | if action in actions: 677 | return [action] 678 | 679 | if action == "all": 680 | return actions 681 | 682 | if action == "all_srnn": 683 | return ["walking", "eating", "smoking", "discussion"] 684 | 685 | raise( ValueError, "Unrecognized action: %d" % action ) 686 | 687 | 688 | def read_all_data( actions, seq_length_in, seq_length_out, data_dir, one_hot ): 689 | """ 690 | Loads data for training/testing and normalizes it. 691 | 692 | Args 693 | actions: list of strings (actions) to load 694 | seq_length_in: number of frames to use in the burn-in sequence 695 | seq_length_out: number of frames to use in the output sequence 696 | data_dir: directory to load the data from 697 | one_hot: whether to use one-hot encoding per action 698 | Returns 699 | train_set: dictionary with normalized training data 700 | test_set: dictionary with test data 701 | data_mean: d-long vector with the mean of the training data 702 | data_std: d-long vector with the standard dev of the training data 703 | dim_to_ignore: dimensions that are not used becaused stdev is too small 704 | dim_to_use: dimensions that we are actually using in the model 705 | """ 706 | 707 | # === Read training data === 708 | print ("Reading training data (seq_len_in: {0}, seq_len_out {1}).".format( 709 | seq_length_in, seq_length_out)) 710 | 711 | train_subject_ids = [1,6,7,8,9,11] 712 | test_subject_ids = [5] 713 | 714 | train_set, complete_train = data_utils.load_data( data_dir, train_subject_ids, actions, one_hot ) 715 | test_set, complete_test = data_utils.load_data( data_dir, test_subject_ids, actions, one_hot ) 716 | 717 | # Compute normalization stats 718 | data_mean, data_std, dim_to_ignore, dim_to_use = data_utils.normalization_stats(complete_train) 719 | 720 | # MODIFIED 721 | #print(*dim_to_use) 722 | 723 | # Normalize -- subtract mean, divide by stdev 724 | train_set = data_utils.normalize_data( train_set, data_mean, data_std, dim_to_use, actions, one_hot ) 725 | test_set = data_utils.normalize_data( test_set, data_mean, data_std, dim_to_use, actions, one_hot ) 726 | print("done reading data.") 727 | 728 | return train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use 729 | 730 | 731 | def main(_): 732 | if FLAGS.sample: 733 | sample() 734 | else: 735 | train() 736 | 737 | if __name__ == "__main__": 738 | tf.app.run() 739 | -------------------------------------------------------------------------------- /viz.py: -------------------------------------------------------------------------------- 1 | """Functions to visualize human poses""" 2 | 3 | import matplotlib.pyplot as plt 4 | import data_utils 5 | import numpy as np 6 | import h5py 7 | import os 8 | from mpl_toolkits.mplot3d import Axes3D 9 | 10 | class Ax3DPose(object): 11 | def __init__(self, ax, lcolor="#3498db", rcolor="#e74c3c"): 12 | """ 13 | Create a 3d pose visualizer that can be updated with new poses. 14 | 15 | Args 16 | ax: 3d axis to plot the 3d pose on 17 | lcolor: String. Colour for the left part of the body 18 | rcolor: String. Colour for the right part of the body 19 | """ 20 | 21 | # Start and endpoints of our representation 22 | self.I = np.array([1,2,3,1,7,8,1, 13,14,15,14,18,19,14,26,27])-1 23 | self.J = np.array([2,3,4,7,8,9,13,14,15,16,18,19,20,26,27,28])-1 24 | # Left / right indicator 25 | self.LR = np.array([1,1,1,0,0,0,0, 0, 0, 0, 0, 0, 0, 1, 1, 1], dtype=bool) 26 | self.ax = ax 27 | 28 | vals = np.zeros((32, 3)) 29 | 30 | # Make connection matrix 31 | self.plots = [] 32 | for i in np.arange( len(self.I) ): 33 | x = np.array( [vals[self.I[i], 0], vals[self.J[i], 0]] ) 34 | y = np.array( [vals[self.I[i], 1], vals[self.J[i], 1]] ) 35 | z = np.array( [vals[self.I[i], 2], vals[self.J[i], 2]] ) 36 | self.plots.append(self.ax.plot(x, y, z, lw=2, c=lcolor if self.LR[i] else rcolor)) 37 | 38 | self.ax.set_xlabel("x") 39 | self.ax.set_ylabel("y") 40 | self.ax.set_zlabel("z") 41 | 42 | def update(self, channels, lcolor="#3498db", rcolor="#e74c3c"): 43 | """ 44 | Update the plotted 3d pose. 45 | 46 | Args 47 | channels: 96-dim long np array. The pose to plot. 48 | lcolor: String. Colour for the left part of the body. 49 | rcolor: String. Colour for the right part of the body. 50 | Returns 51 | Nothing. Simply updates the axis with the new pose. 52 | """ 53 | 54 | assert channels.size == 96, "channels should have 96 entries, it has %d instead" % channels.size 55 | vals = np.reshape( channels, (32, -1) ) 56 | 57 | for i in np.arange( len(self.I) ): 58 | x = np.array( [vals[self.I[i], 0], vals[self.J[i], 0]] ) 59 | y = np.array( [vals[self.I[i], 1], vals[self.J[i], 1]] ) 60 | z = np.array( [vals[self.I[i], 2], vals[self.J[i], 2]] ) 61 | self.plots[i][0].set_xdata(x) 62 | self.plots[i][0].set_ydata(y) 63 | self.plots[i][0].set_3d_properties(z) 64 | self.plots[i][0].set_color(lcolor if self.LR[i] else rcolor) 65 | 66 | r = 750; 67 | xroot, yroot, zroot = vals[0,0], vals[0,1], vals[0,2] 68 | self.ax.set_xlim3d([-r+xroot, r+xroot]) 69 | self.ax.set_zlim3d([-r+zroot, r+zroot]) 70 | self.ax.set_ylim3d([-r+yroot, r+yroot]) 71 | 72 | self.ax.set_aspect('equal') 73 | --------------------------------------------------------------------------------