├── LICENSE
├── README.md
├── baselines.py
├── body_rnn_cell_extensions.py
├── body_rnn_cell_extensions_v1.py
├── core_rnn.py
├── data_utils.py
├── deltaRNN.py
├── forward_kinematics.py
├── forward_kinematics_v2.py
├── metrics.py
├── motion_rnn_lm.py
├── motion_rnn_lm_v2.py
├── motion_rnn_lm_v2_flow.py
├── motion_rnn_simple_lm.py
├── motion_rnn_simple_lm_flow.py
├── rnn.py
├── rnn_cell_extensions.py
├── rnn_cell_impl.py
├── rnn_cell_implement.py
├── rnn_cell_implement_flow.py
├── rnn_mod.py
├── translate_lm.py
├── translate_lm_v2.py
├── translate_lm_v2_flow.py
├── translate_simple_lm.py
├── translate_simple_lm_flow.py
└── viz.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Anand & Ankur
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Code and results/visualizations for the paper "A Neural Temporal Model for Human Motion Prediction", CVPR 2019
2 | 


--------------------------------------------------------------------------------
/baselines.py:
--------------------------------------------------------------------------------
  1 | 
  2 | """Super-simple baselines for short term human motion prediction."""
  3 | 
  4 | from __future__ import absolute_import
  5 | from __future__ import division
  6 | from __future__ import print_function
  7 | 
  8 | import numpy as np
  9 | from six.moves import xrange  # pylint: disable=redefined-builtin
 10 | import tensorflow as tf
 11 | 
 12 | import translate
 13 | import data_utils
 14 | import seq2seq_model
 15 | 
 16 | 
 17 | # Dummy object to create parameters for also-dummy model
 18 | class Object(object):
 19 |     pass
 20 | 
 21 | def running_average( actions_dict, actions, k ):
 22 |   """
 23 |   Compute the error if we simply take the average of the last k frames.
 24 | 
 25 |   Args
 26 |     actions_dict: Dictionary where keys are the actions, and each entry has a
 27 |                   tuple of (enc_in, dec_in, dec_out) poses.
 28 |     actions: List of strings. The keys of actions_dict.
 29 |     k:Integer. Number of frames to use for running average.
 30 | 
 31 |   Returns
 32 |     errs: a dictionary where, for each action, we have a 100-long list with the
 33 |           error at each point in time.
 34 |   """
 35 | 
 36 |   # Get how many batches we have
 37 |   enc_in, dec_in, dec_out = actions_dict[ actions[0] ]
 38 | 
 39 |   n_sequences = len( enc_in )
 40 |   seq_length_out = dec_out[0].shape[0]
 41 | 
 42 |   errs = dict()
 43 | 
 44 |   for action in actions:
 45 | 
 46 |     # Make space for the error
 47 |     errs[ action ] = np.zeros( (n_sequences, seq_length_out) )
 48 | 
 49 |     # Get the lists for this action
 50 |     enc_in, dec_in, dec_out = actions_dict[action]
 51 | 
 52 |     for i in np.arange( n_sequences ):
 53 | 
 54 |       n, d = dec_out[i].shape
 55 | 
 56 |       # The last frame
 57 |       last_frame = dec_in[i][0, :]
 58 |       last_frame[0:6] = 0
 59 | 
 60 |       if k > 1:
 61 |         # Get the last k-1 frames
 62 |         last_k = enc_in[i][(-k+1):, :]
 63 |         assert( last_k.shape[0] == (k-1) )
 64 | 
 65 |         # Merge and average them
 66 |         avg = np.mean( np.vstack( (last_k, last_frame) ), 0 )
 67 |       else:
 68 |         avg = last_frame
 69 | 
 70 |       dec_out[i][:, 0:6] = 0
 71 |       idx_to_use = np.where( np.std( dec_out[i], 0 ) > 1e-4 )[0]
 72 | 
 73 |       ee = np.power( dec_out[i][:,idx_to_use] - avg[idx_to_use], 2 )
 74 |       ee = np.sum( ee, 1 )
 75 |       ee = np.sqrt( ee )
 76 |       errs[ action ][i, :] = ee
 77 | 
 78 |     errs[action] = np.mean( errs[action], 0 )
 79 | 
 80 |   return errs
 81 | 
 82 | 
 83 | def last_buffer_frame( actions_dict, actions ):
 84 |   """
 85 |   Compute the error if we simply take the last buffer frame as a fixed prediction
 86 | 
 87 |   Args
 88 |     actions_dict: Dictionary where keys are the actions, and each entry has a
 89 |                   tuple of (enc_in, dec_in, dec_out) poses.
 90 |     actions: List of strings. The keys of actions_dict.
 91 |     
 92 | 
 93 |   Returns
 94 |     errs: a dictionary where, for each action, we have a 100-long list with the
 95 |           error at each point in time.
 96 |   """
 97 | 
 98 |   # Get how many batches we have
 99 |   enc_in, dec_in, dec_out = actions_dict[ actions[0] ]
100 | 
101 |   n_sequences = len( enc_in )
102 |   seq_length_out = dec_out[0].shape[0]
103 | 
104 |   errs = dict()
105 | 
106 |   for action in actions:
107 | 
108 |     # Make space for the error
109 |     errs[ action ] = np.zeros( (n_sequences, seq_length_out) )
110 | 
111 |     # Get the lists for this action
112 |     enc_in, dec_in, dec_out = actions_dict[action]
113 | 
114 |     for i in np.arange( n_sequences ):
115 | 
116 |       n, d = dec_out[i].shape
117 | 
118 |       # The last buffer frame
119 |       last_buffer_frame = enc_in[i][-1, :]
120 |       last_buffer_frame[0:6] = 0
121 |       
122 |       dec_out[i][:, 0:6] = 0
123 |       idx_to_use = np.where( np.std( dec_out[i], 0 ) > 1e-4 )[0]
124 | 
125 |       ee = np.power( dec_out[i][:,idx_to_use] - last_buffer_frame[idx_to_use], 2 )
126 |       ee = np.sum( ee, 1 )
127 |       ee = np.sqrt( ee )
128 |       errs[ action ][i, :] = ee
129 | 
130 |     errs[action] = np.mean( errs[action], 0 )
131 | 
132 |   return errs
133 | 
134 | 
135 | 
136 | def denormalize_and_convert_to_euler( data, data_mean, data_std, dim_to_ignore, actions, one_hot ):
137 |   """
138 |   Denormalizes data and converts to Euler angles
139 |   (all losses are computed on Euler angles).
140 | 
141 |   Args
142 |     data: dictionary with human poses.
143 |     data_mean: d-long vector with the mean of the training data.
144 |     data_std: d-long vector with the standard deviation of the training data.
145 |     dim_to_ignore: dimensions to ignore because the std is too small or for other reasons.
146 |     actions: list of strings with the actions in the data dictionary.
147 |     one_hot: whether the data comes with one-hot encoding.
148 | 
149 |   Returns
150 |     all_denormed: a list with nbatch entries. Each entry is an n-by-d matrix
151 |                   that corresponds to a denormalized sequence in Euler angles
152 |   """
153 | 
154 |   all_denormed = []
155 | 
156 |   # expmap -> rotmat -> euler
157 |   for i in np.arange( data.shape[0] ):
158 |     denormed = data_utils.unNormalizeData(data[i,:,:], data_mean, data_std, dim_to_ignore, actions, one_hot )
159 | 
160 |     for j in np.arange( denormed.shape[0] ):
161 |       for k in np.arange(3,97,3):
162 |         denormed[j,k:k+3] = data_utils.rotmat2euler( data_utils.expmap2rotmat( denormed[j,k:k+3] ))
163 | 
164 |     all_denormed.append( denormed )
165 | 
166 |   return all_denormed
167 | 
168 | 
169 | def main():
170 | 
171 | 
172 |   actions = ["walking", "eating", "smoking", "discussion"]
173 | 
174 |   # TODO make this a runtime option
175 |   # Uncomment th eliens
176 | 
177 |   # actions.extend(["directions", "greeting", "phoning", "posing", "purchases",
178 |   #   "sitting", "sittingdown", "takingphoto", "waiting", "walkingdog", "walkingtogether"])
179 | 
180 |   # Parameters for dummy model. We only build the model to load the data.
181 |   one_hot = False
182 |   FLAGS = Object()
183 |   FLAGS.data_dir = "./data/h3.6m/dataset"
184 |   FLAGS.architecture = "tied"
185 |   FLAGS.seq_length_in = 50
186 |   FLAGS.seq_length_out = 100
187 |   FLAGS.num_layers = 1
188 |   FLAGS.size = 128
189 |   FLAGS.max_gradient_norm = 5
190 |   FLAGS.batch_size = 8
191 |   FLAGS.learning_rate = 0.005
192 |   FLAGS.learning_rate_decay_factor = 1
193 |   summaries_dir = "./log/"
194 |   FLAGS.loss_to_use = "sampling_based"
195 |   FLAGS.omit_one_hot = True,
196 |   FLAGS.residual_velocities = False,
197 |   dtype = tf.float32
198 | 
199 |   # Baselines are very simple. No need to use the GPU.
200 |   with tf.Session(config=tf.ConfigProto( device_count = {"GPU": 0})) as sess:
201 | 
202 |     model = seq2seq_model.Seq2SeqModel(
203 |         FLAGS.architecture,
204 |         FLAGS.seq_length_in,
205 |         FLAGS.seq_length_out,
206 |         FLAGS.size, # hidden layer size
207 |         FLAGS.num_layers,
208 |         FLAGS.max_gradient_norm,
209 |         FLAGS.batch_size,
210 |         FLAGS.learning_rate,
211 |         FLAGS.learning_rate_decay_factor,
212 |         summaries_dir,
213 |         FLAGS.loss_to_use,
214 |         len( actions ),
215 |         not FLAGS.omit_one_hot,
216 |         FLAGS.residual_velocities,
217 |         dtype=dtype)
218 | 
219 |     # Load the data
220 |     _, test_set, data_mean, data_std, dim_to_ignore, dim_to_use =  translate.read_all_data(actions, FLAGS.seq_length_in, FLAGS.seq_length_out, FLAGS.data_dir, not FLAGS.omit_one_hot )
221 | 
222 |     # Get all the data, denormalize and convert it to euler angles
223 |     poses_data = {}
224 |     for action in actions:
225 |       enc_in, dec_in, dec_out = model.get_batch_srnn( test_set, action )
226 | 
227 |       enc_in  = denormalize_and_convert_to_euler(enc_in, data_mean, data_std, dim_to_ignore, actions, not FLAGS.omit_one_hot )
228 |       dec_in  = denormalize_and_convert_to_euler(dec_in, data_mean, data_std, dim_to_ignore, actions, not FLAGS.omit_one_hot )
229 |       dec_out = denormalize_and_convert_to_euler(dec_out, data_mean, data_std, dim_to_ignore, actions, not FLAGS.omit_one_hot )
230 | 
231 |       poses_data[action] = (enc_in, dec_in, dec_out)
232 | 
233 |     # Compute baseline errors
234 |     errs_constant_frame = running_average( poses_data, actions, 1 )
235 |     running_average_2   = running_average( poses_data, actions, 2 )
236 |     running_average_4   = running_average( poses_data, actions, 4 )
237 |     last_buffer_frame_const = last_buffer_frame( poses_data, actions)
238 | 
239 |     print()
240 |     print("=== Zero-velocity (running avg. 1) ===")
241 |     print("{0: <16} | {1:4d} | {2:4d} | {3:4d} | {4:4d} | {5:4d} | {6:4d} | {7:4d}".format("milliseconds", 80, 160, 320, 400, 560, 1000, 2000))
242 |     for action in actions:
243 |       print("{0: <16} | {1:.2f} | {2:.2f} | {3:.2f} | {4:.2f} | {5:.2f} | {6:.2f} | {7:.2f}".format( action, errs_constant_frame[action][1], errs_constant_frame[action][3],
244 |             errs_constant_frame[action][7], errs_constant_frame[action][9], errs_constant_frame[action][13], errs_constant_frame[action][24], errs_constant_frame[action][49] ))
245 | 
246 |     print()
247 |     print("=== Runnning avg. 2 ===")
248 |     print("{0: <16} | {1:4d} | {2:4d} | {3:4d} | {4:4d} | {5:4d} | {6:4d} | {7:4d}".format("milliseconds", 80, 160, 320, 400, 560, 1000, 2000))
249 |     for action in actions:
250 |       print("{0: <16} | {1:.2f} | {2:.2f} | {3:.2f} | {4:.2f} | {5:.2f} | {6:.2f} | {7:.2f}".format( action, running_average_2[action][1], running_average_2[action][3],
251 |             running_average_2[action][7], running_average_2[action][9], running_average_2[action][13], running_average_2[action][24], running_average_2[action][49] ))
252 | 
253 |     print()
254 |     print("=== Runnning avg. 4 ===")
255 |     print("{0: <16} | {1:4d} | {2:4d} | {3:4d} | {4:4d} | {5:4d} | {6:4d} | {7:4d}".format("milliseconds", 80, 160, 320, 400, 560, 1000, 2000))
256 |     for action in actions:
257 |       print("{0: <16} | {1:.2f} | {2:.2f} | {3:.2f} | {4:.2f} | {5:.2f} | {6:.2f} | {7:.2f}".format( action, running_average_4[action][1], running_average_4[action][3],
258 |             running_average_4[action][7], running_average_4[action][9], running_average_4[action][13], running_average_4[action][24], running_average_4[action][49] ))
259 | 
260 |     print()
261 |     print("=== Last buffer frame ===")
262 |     print("{0: <16} | {1:4d} | {2:4d} | {3:4d} | {4:4d} | {5:4d} | {6:4d} | {7:4d}".format("milliseconds", 80, 160, 320, 400, 560, 1000, 2000))
263 |     for action in actions:
264 |       print("{0: <16} | {1:.2f} | {2:.2f} | {3:.2f} | {4:.2f} | {5:.2f} | {6:.2f} | {7:.2f}".format( action, last_buffer_frame_const[action][1], last_buffer_frame_const[action][3],
265 |             last_buffer_frame_const[action][7], last_buffer_frame_const[action][9], last_buffer_frame_const[action][13], last_buffer_frame_const[action][24], last_buffer_frame_const[action][49] ))
266 | 
267 | 
268 | if __name__ == "__main__":
269 |   main()
270 | 


--------------------------------------------------------------------------------
/body_rnn_cell_extensions.py:
--------------------------------------------------------------------------------
  1 | 
  2 | """ Extensions to TF RNN class by una_dinosaria"""
  3 | 
  4 | from __future__ import absolute_import
  5 | from __future__ import division
  6 | from __future__ import print_function
  7 | 
  8 | import tensorflow as tf
  9 | 
 10 | #from tensorflow.contrib.rnn.python.ops.core_rnn_cell import RNNCell
 11 | from rnn_cell_implement import RNNCell # modified body cell definitions
 12 | #from deltaRNN import RNNCell # only for delta-RNN 
 13 | #from rnn_cell_implement import MultiRNNCell
 14 | import hard_att
 15 | import queue
 16 | 
 17 | # The import for LSTMStateTuple changes in TF >= 1.2.0
 18 | from pkg_resources import parse_version as pv
 19 | if pv(tf.__version__) >= pv('1.2.0'):
 20 |   from tensorflow.contrib.rnn import LSTMStateTuple
 21 | else:
 22 |   from tensorflow.contrib.rnn.python.ops.core_rnn_cell import LSTMStateTuple
 23 | del pv
 24 | 
 25 | from tensorflow.python.ops import variable_scope as vs
 26 | 
 27 | import collections
 28 | import math
 29 | 
 30 | class ResidualWrapper(RNNCell):
 31 |   """Operator adding residual connections to a given cell."""
 32 | 
 33 |   def __init__(self, cell):
 34 |     """Create a cell with added residual connection.
 35 | 
 36 |     Args:
 37 |       cell: an RNNCell. The input is added to the output.
 38 | 
 39 |     Raises:
 40 |       TypeError: if cell is not an RNNCell.
 41 |     """
 42 |     if not isinstance(cell, RNNCell):
 43 |       raise TypeError("The parameter cell is not a RNNCell.")
 44 | 
 45 |     self._cell = cell
 46 | 
 47 |   @property
 48 |   def state_size(self):
 49 |     return self._cell.state_size
 50 | 
 51 |   @property
 52 |   def output_size(self):
 53 |     return self._cell.output_size
 54 | 
 55 |   def __call__(self, inputs, state, context, scope=None): # modified
 56 |     """Run the cell and add a residual connection."""
 57 | 
 58 |     # Run the rnn as usual
 59 |     output, new_state = self._cell(inputs, state, context, scope) # modified
 60 | 
 61 |     # Add the residual connection
 62 |     output = tf.add(output, inputs)
 63 | 
 64 |     return output, new_state
 65 | 
 66 | class ResidualWrapperv1(RNNCell):
 67 |   """Operator adding residual connections to a given cell."""
 68 | 
 69 |   def __init__(self, cell, output_size):
 70 |     """Create a cell with added residual connection.
 71 | 
 72 |     Args:
 73 |       cell: an RNNCell. The input is added to the output.
 74 | 
 75 |     Raises:
 76 |       TypeError: if cell is not an RNNCell.
 77 |     """
 78 |     if not isinstance(cell, RNNCell):
 79 |       raise TypeError("The parameter cell is not a RNNCell.")
 80 | 
 81 |     self._cell = cell
 82 |     self._output_size = output_size
 83 | 
 84 |     self.r = tf.get_variable("r_interp", [self._output_size], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())
 85 | 
 86 |   @property
 87 |   def state_size(self):
 88 |     return self._cell.state_size
 89 | 
 90 |   @property
 91 |   def output_size(self):
 92 |     return self._cell.output_size
 93 | 
 94 |   def __call__(self, inputs, state, context, scope=None): # modified
 95 |     """Run the cell and add a residual connection."""
 96 | 
 97 |     # Run the rnn as usual
 98 |     output, new_state = self._cell(inputs, state, context, scope) # modified
 99 | 
100 |     # perform residual_v1 interpolation op
101 |     output = (1.0 - self.r) * output + self.r * inputs
102 |  
103 |     return output, new_state
104 | 
105 | 
106 | class ResidualWrapperv2(RNNCell):
107 |   """Operator adding residual connections to a given cell."""
108 | 
109 |   def __init__(self, cell, output_size):
110 |     """Create a cell with added residual connection.
111 | 
112 |     Args:
113 |       cell: an RNNCell. The input is added to the output.
114 | 
115 |     Raises:
116 |       TypeError: if cell is not an RNNCell.
117 |     """
118 |     if not isinstance(cell, RNNCell):
119 |       raise TypeError("The parameter cell is not a RNNCell.")
120 | 
121 |     self._cell = cell
122 |     self._output_size = output_size 
123 | 
124 |     self.r = tf.get_variable("r_interp", [self._output_size], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())
125 |     self.W_res = tf.get_variable("W_res", [self._output_size, self._output_size], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())
126 |     self.b_res = tf.get_variable("b_res", [self._output_size], dtype=tf.float32, initializer=tf.constant_initializer(0.1))
127 | 
128 | 
129 |   @property
130 |   def state_size(self):
131 |     return self._cell.state_size
132 | 
133 |   @property
134 |   def output_size(self):
135 |     return self._cell.output_size
136 | 
137 |   def __call__(self, inputs, state, context, scope=None): # modified
138 |     """Run the cell and add a residual connection."""
139 | 
140 |     # Run the rnn as usual
141 |     output, new_state = self._cell(inputs, state, context, scope) # modified
142 |    
143 |     # perform residual_v2 interpolation op
144 |     output = (1.0 - self.r) * output + self.r * (tf.matmul(inputs, self.W_res) + self.b_res)
145 |  
146 |     return output, new_state
147 | 
148 | 
149 | class LinearSpaceDecoderWrapper(RNNCell): # modified
150 |   """Operator adding a linear encoder to an RNN cell"""
151 | 
152 |   def __init__(self, cell, output_size, is_attention, num_attn_units, num_actions, memory_length):
153 |     """Create a cell with with a linear encoder in space.
154 | 
155 |     Args:
156 |       cell: an RNNCell. The input is passed through a linear layer.
157 | 
158 |     Raises:
159 |       TypeError: if cell is not an RNNCell.
160 |     """
161 |     if not isinstance(cell, RNNCell): # modified
162 |       raise TypeError("The parameter cell is not a RNNCell.")
163 | 
164 |     self._cell = cell
165 |     self.is_attention = is_attention
166 |     self.num_attn_units = num_attn_units
167 |     self.num_actions = num_actions
168 |     self.memory_length = memory_length 
169 | 
170 |     print( 'output_size = {0}'.format(output_size) )
171 |     print( ' state_size = {0}'.format(self._cell.state_size) )
172 | 
173 |     # Tuple if multi-rnn
174 |     if isinstance(self._cell.state_size,tuple):
175 | 
176 |       # Fine if GRU...
177 |       insize = self._cell.state_size[-1]
178 | 
179 |       # LSTMStateTuple if LSTM
180 |       if isinstance( insize, LSTMStateTuple ):
181 |         insize = insize.h
182 | 
183 |     else:
184 |       # Fine if not multi-rnn
185 |       insize = self._cell.state_size
186 | 
187 |     # output projection params
188 |     self.w_out = tf.get_variable("proj_w_out", [insize, output_size], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())
189 |     self.b_out = tf.get_variable("proj_b_out", [output_size], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())
190 | 
191 |     if self.is_attention: # flag to indicate whether we're using attention-based LM
192 | 	# init attention params
193 | 	self.num_attn_units = num_attn_units
194 | 	self.W_1_attn = tf.get_variable("W_1_attn", [insize+self.num_actions, self.num_attn_units], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())
195 | 	self.W_2_attn = tf.get_variable("W_2_attn", [insize+self.num_actions, self.num_attn_units], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())
196 | 	self.v_a = tf.get_variable("v_a_attn", [1, self.num_attn_units], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())
197 | 	self.memory_length = memory_length
198 | 	self.w_out_c_t = tf.get_variable("w_out_c_t", [insize+self.num_actions, output_size], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())
199 |         self.attn_memory = queue.Queue(self.memory_length) 
200 |         #self.call_counter = 0 	
201 | 
202 |     self.linear_output_size = output_size
203 | 
204 | 
205 |   @property
206 |   def state_size(self):
207 |     return self._cell.state_size
208 | 
209 |   @property
210 |   def output_size(self):
211 |     return self.linear_output_size
212 | 
213 |   def __call__(self, inputs, state, context, scope=None):
214 |     """Use a linear layer and pass the output to the cell."""
215 | 
216 |     #self.call_counter = self.call_counter + 1 # temp fix 
217 |     
218 |     # Run the rnn as usual
219 |     output, new_state = self._cell(inputs, state, context, scope)
220 | 
221 |     if self.is_attention and self.attn_memory.full():
222 |         # store t-50 prev states (h_enc) 
223 | 	self.attn_memory.get()
224 | 	self.attn_memory.put(tf.concat([new_state, context], axis=1))
225 |  
226 |     elif self.is_attention and (not self.attn_memory.full()):
227 |   	self.attn_memory.put(tf.concat([new_state, context], axis=1)) 
228 |      
229 |     if self.is_attention: 					#and self.call_counter>50: # some flag to indicate when to use attention
230 |     	# convert attn_memory -> list
231 |     	list_attn_memory = list(self.attn_memory.queue)
232 | 	
233 |     	# applying attention and include c_t to decode and get y_hat	
234 |     	alpha, c_t = hard_att.bahdanau_attention(tf.concat([state, context], axis=1), list_attn_memory, self.v_a, self.W_1_attn, self.W_2_attn, self.memory_length)
235 |     	output = tf.matmul(output, self.w_out) + tf.matmul(c_t, self.w_out_c_t) + self.b_out	
236 | 
237 |     if not self.is_attention:					 #) or (self.is_attention and self.call_counter <= 50):
238 |         # Apply the multiplication to everything (when no attention is used to decode)
239 |         output = tf.matmul(output, self.w_out) + self.b_out
240 | 
241 |     # setting counter back after 150 timesteps when attention is being used
242 |     #if self.is_attention and self.call_counter == 150:
243 |     #    self.call_counter = 0
244 | 
245 |     return output, new_state
246 | 


--------------------------------------------------------------------------------
/body_rnn_cell_extensions_v1.py:
--------------------------------------------------------------------------------
  1 | 
  2 | """ Extensions to TF RNN class by una_dinosaria"""
  3 | 
  4 | from __future__ import absolute_import
  5 | from __future__ import division
  6 | from __future__ import print_function
  7 | 
  8 | import tensorflow as tf
  9 | 
 10 | #from tensorflow.contrib.rnn.python.ops.core_rnn_cell import RNNCell
 11 | from rnn_cell_implement import RNNCell # modified body cell definitions
 12 | #from deltaRNN import RNNCell # only for delta-RNN 
 13 | #from rnn_cell_implement import MultiRNNCell
 14 | import hard_att
 15 | import queue
 16 | 
 17 | # The import for LSTMStateTuple changes in TF >= 1.2.0
 18 | from pkg_resources import parse_version as pv
 19 | if pv(tf.__version__) >= pv('1.2.0'):
 20 |   from tensorflow.contrib.rnn import LSTMStateTuple
 21 | else:
 22 |   from tensorflow.contrib.rnn.python.ops.core_rnn_cell import LSTMStateTuple
 23 | del pv
 24 | 
 25 | from tensorflow.python.ops import variable_scope as vs
 26 | 
 27 | import collections
 28 | import math
 29 | 
 30 | class ResidualWrapper(RNNCell):
 31 |   """Operator adding residual connections to a given cell."""
 32 | 
 33 |   def __init__(self, cell):
 34 |     """Create a cell with added residual connection.
 35 | 
 36 |     Args:
 37 |       cell: an RNNCell. The input is added to the output.
 38 | 
 39 |     Raises:
 40 |       TypeError: if cell is not an RNNCell.
 41 |     """
 42 |     if not isinstance(cell, RNNCell):
 43 |       raise TypeError("The parameter cell is not a RNNCell.")
 44 | 
 45 |     self._cell = cell
 46 | 
 47 |   @property
 48 |   def state_size(self):
 49 |     return self._cell.state_size
 50 | 
 51 |   @property
 52 |   def output_size(self):
 53 |     return self._cell.output_size
 54 | 
 55 |   def __call__(self, inputs, state, context, scope=None): # modified
 56 |     """Run the cell and add a residual connection."""
 57 | 
 58 |     # Run the rnn as usual
 59 |     output, new_state = self._cell(inputs, state, context, scope) # modified
 60 | 
 61 |     # Add the residual connection
 62 |     output = tf.add(output, inputs)
 63 | 
 64 |     return output, new_state
 65 | 
 66 | class ResidualWrapperv1(RNNCell):
 67 |   """Operator adding residual connections to a given cell."""
 68 | 
 69 |   def __init__(self, cell, output_size):
 70 |     """Create a cell with added residual connection.
 71 | 
 72 |     Args:
 73 |       cell: an RNNCell. The input is added to the output.
 74 | 
 75 |     Raises:
 76 |       TypeError: if cell is not an RNNCell.
 77 |     """
 78 |     if not isinstance(cell, RNNCell):
 79 |       raise TypeError("The parameter cell is not a RNNCell.")
 80 | 
 81 |     self._cell = cell
 82 |     self._output_size = output_size
 83 | 
 84 |     self.r = tf.get_variable("r_interp", [self._output_size], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())
 85 | 
 86 |   @property
 87 |   def state_size(self):
 88 |     return self._cell.state_size
 89 | 
 90 |   @property
 91 |   def output_size(self):
 92 |     return self._cell.output_size
 93 | 
 94 |   def __call__(self, inputs, state, context, scope=None): # modified
 95 |     """Run the cell and add a residual connection."""
 96 | 
 97 |     # Run the rnn as usual
 98 |     output, new_state = self._cell(inputs, state, context, scope) # modified
 99 | 
100 |     # perform residual_v1 interpolation op
101 |     output = (1.0 - self.r) * output + self.r * inputs
102 |  
103 |     return output, new_state
104 | 
105 | 
106 | class ResidualWrapperv2(RNNCell):
107 |   """Operator adding residual connections to a given cell."""
108 | 
109 |   def __init__(self, cell, output_size):
110 |     """Create a cell with added residual connection.
111 | 
112 |     Args:
113 |       cell: an RNNCell. The input is added to the output.
114 | 
115 |     Raises:
116 |       TypeError: if cell is not an RNNCell.
117 |     """
118 |     if not isinstance(cell, RNNCell):
119 |       raise TypeError("The parameter cell is not a RNNCell.")
120 | 
121 |     self._cell = cell
122 |     self._output_size = output_size 
123 | 
124 |     self.r = tf.get_variable("r_interp", [self._output_size], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())
125 |     self.W_res = tf.get_variable("W_res", [self._output_size, self._output_size], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())
126 |     self.b_res = tf.get_variable("b_res", [self._output_size], dtype=tf.float32, initializer=tf.constant_initializer(0.1))
127 | 
128 | 
129 |   @property
130 |   def state_size(self):
131 |     return self._cell.state_size
132 | 
133 |   @property
134 |   def output_size(self):
135 |     return self._cell.output_size
136 | 
137 |   def __call__(self, inputs, state, context, scope=None): # modified
138 |     """Run the cell and add a residual connection."""
139 | 
140 |     # Run the rnn as usual
141 |     output, new_state = self._cell(inputs, state, context, scope) # modified
142 |    
143 |     # perform residual_v2 interpolation op
144 |     output = (1.0 - self.r) * output + self.r * (tf.matmul(inputs, self.W_res) + self.b_res)
145 |  
146 |     return output, new_state
147 | 
148 | 
149 | class LinearSpaceDecoderWrapper(RNNCell): # modified
150 |   """Operator adding a linear encoder to an RNN cell"""
151 | 
152 |   def __init__(self, cell, output_size):
153 |     """Create a cell with with a linear encoder in space.
154 | 
155 |     Args:
156 |       cell: an RNNCell. The input is passed through a linear layer.
157 | 
158 |     Raises:
159 |       TypeError: if cell is not an RNNCell.
160 |     """
161 |     if not isinstance(cell, RNNCell): # modified
162 |       raise TypeError("The parameter cell is not a RNNCell.")
163 | 
164 |     self._cell = cell 
165 | 
166 |     print( 'output_size = {0}'.format(output_size) )
167 |     print( ' state_size = {0}'.format(self._cell.state_size) )
168 | 
169 |     # Tuple if multi-rnn
170 |     if isinstance(self._cell.state_size,tuple):
171 | 
172 |       # Fine if GRU...
173 |       insize = self._cell.state_size[-1]
174 | 
175 |       # LSTMStateTuple if LSTM
176 |       if isinstance( insize, LSTMStateTuple ):
177 |         insize = insize.h
178 | 
179 |     else:
180 |       # Fine if not multi-rnn
181 |       insize = self._cell.state_size
182 | 
183 |     # output projection params
184 |     self.w_out = tf.get_variable("proj_w_out", [insize, output_size], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())
185 |     self.b_out = tf.get_variable("proj_b_out", [output_size], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())
186 |     self.linear_output_size = output_size
187 | 
188 | 
189 |   @property
190 |   def state_size(self):
191 |     return self._cell.state_size
192 | 
193 |   @property
194 |   def output_size(self):
195 |     return self.linear_output_size
196 | 
197 |   def __call__(self, inputs, state, context, scope=None):
198 |     """Use a linear layer and pass the output to the cell."""
199 | 
200 |     # Run the rnn as usual
201 |     output, new_state = self._cell(inputs, state, context, scope)
202 | 
203 |     output = tf.matmul(output, self.w_out) + self.b_out
204 | 
205 |     return output, new_state
206 | 


--------------------------------------------------------------------------------
/core_rnn.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """RNN helpers for TensorFlow models."""
 17 | 
 18 | from __future__ import absolute_import
 19 | from __future__ import division
 20 | from __future__ import print_function
 21 | 
 22 | from tensorflow.contrib.rnn.python.ops import core_rnn_cell
 23 | 
 24 | import rnn_cell_implement # has modified RNNcell definitions
 25 | 
 26 | from tensorflow.python.framework import ops
 27 | from tensorflow.python.framework import tensor_shape
 28 | from tensorflow.python.ops import array_ops
 29 | from tensorflow.python.ops import math_ops
 30 | from tensorflow.python.ops import rnn
 31 | from tensorflow.python.ops import rnn_cell_impl
 32 | from tensorflow.python.ops import variable_scope as vs
 33 | from tensorflow.python.util import nest
 34 | 
 35 | 
 36 | # pylint: disable=protected-access
 37 | #_state_size_with_prefix = rnn_cell_impl._state_size_with_prefix
 38 | _state_size_with_prefix = rnn_cell_implement._state_size # modified
 39 | _infer_state_dtype = rnn._infer_state_dtype
 40 | _reverse_seq = rnn._reverse_seq
 41 | _rnn_step = rnn._rnn_step
 42 | # pylint: enable=protected-access
 43 | 
 44 | 
 45 | def static_rnn(cell, inputs, context, initial_state=None, dtype=None,
 46 |                sequence_length=None, scope=None):
 47 |   """Creates a recurrent neural network specified by RNNCell `cell`.
 48 |   The simplest form of RNN network generated is:
 49 |   ```python
 50 |     state = cell.zero_state(...)
 51 |     outputs = []
 52 |     for input_ in inputs:
 53 |       output, state = cell(input_, state)
 54 |       outputs.append(output)
 55 |     return (outputs, state)
 56 |   ```
 57 |   However, a few other options are available:
 58 |   An initial state can be provided.
 59 |   If the sequence_length vector is provided, dynamic calculation is performed.
 60 |   This method of calculation does not compute the RNN steps past the maximum
 61 |   sequence length of the minibatch (thus saving computational time),
 62 |   and properly propagates the state at an example's sequence length
 63 |   to the final state output.
 64 |   The dynamic calculation performed is, at time `t` for batch row `b`,
 65 |   ```python
 66 |     (output, state)(b, t) =
 67 |       (t >= sequence_length(b))
 68 |         ? (zeros(cell.output_size), states(b, sequence_length(b) - 1))
 69 |         : cell(input(b, t), state(b, t - 1))
 70 |   ```
 71 |   Args:
 72 |     cell: An instance of RNNCell.
 73 |     inputs: A length T list of inputs, each a `Tensor` of shape
 74 |       `[batch_size, input_size]`, or a nested tuple of such elements.
 75 |     initial_state: (optional) An initial state for the RNN.
 76 |       If `cell.state_size` is an integer, this must be
 77 |       a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`.
 78 |       If `cell.state_size` is a tuple, this should be a tuple of
 79 |       tensors having shapes `[batch_size, s] for s in cell.state_size`.
 80 |     dtype: (optional) The data type for the initial state and expected output.
 81 |       Required if initial_state is not provided or RNN state has a heterogeneous
 82 |       dtype.
 83 |     sequence_length: Specifies the length of each sequence in inputs.
 84 |       An int32 or int64 vector (tensor) size `[batch_size]`, values in `[0, T)`.
 85 |     scope: VariableScope for the created subgraph; defaults to "rnn".
 86 |   Returns:
 87 |     A pair (outputs, state) where:
 88 |     - outputs is a length T list of outputs (one for each input), or a nested
 89 |       tuple of such elements.
 90 |     - state is the final state
 91 |   Raises:
 92 |     TypeError: If `cell` is not an instance of RNNCell.
 93 |     ValueError: If `inputs` is `None` or an empty list, or if the input depth
 94 |       (column size) cannot be inferred from inputs via shape inference.
 95 |   """
 96 | 
 97 |   if not isinstance(cell, rnn_cell_implement.RNNCell):  # checking instance in modified cell def file
 98 |     raise TypeError("cell must be an instance of RNNCell")
 99 |   if not nest.is_sequence(inputs):
100 |     raise TypeError("inputs must be a sequence")
101 |   if not inputs:
102 |     raise ValueError("inputs must not be empty")
103 | 
104 |   outputs = []
105 |   # Create a new scope in which the caching device is either
106 |   # determined by the parent scope, or is set to place the cached
107 |   # Variable using the same placement as for the rest of the RNN.
108 |   with vs.variable_scope(scope or "rnn") as varscope:
109 |     if varscope.caching_device is None:
110 |       varscope.set_caching_device(lambda op: op.device)
111 | 
112 |     # Obtain the first sequence of the input
113 |     first_input = inputs
114 |     while nest.is_sequence(first_input):
115 |       first_input = first_input[0]
116 | 
117 |     # Temporarily avoid EmbeddingWrapper and seq2seq badness
118 |     # TODO(lukaszkaiser): remove EmbeddingWrapper
119 |     if first_input.get_shape().ndims != 1:
120 | 
121 |       input_shape = first_input.get_shape().with_rank_at_least(2)
122 |       fixed_batch_size = input_shape[0]
123 | 
124 |       flat_inputs = nest.flatten(inputs)
125 |       for flat_input in flat_inputs:
126 |         input_shape = flat_input.get_shape().with_rank_at_least(2)
127 |         batch_size, input_size = input_shape[0], input_shape[1:]
128 |         fixed_batch_size.merge_with(batch_size)
129 |         for i, size in enumerate(input_size):
130 |           if size.value is None:
131 |             raise ValueError(
132 |                 "Input size (dimension %d of inputs) must be accessible via "
133 |                 "shape inference, but saw value None." % i)
134 |     else:
135 |       fixed_batch_size = first_input.get_shape().with_rank_at_least(1)[0]
136 | 
137 |     if fixed_batch_size.value:
138 |       batch_size = fixed_batch_size.value
139 |     else:
140 |       batch_size = array_ops.shape(first_input)[0]
141 |     if initial_state is not None:
142 |       state = initial_state
143 |     else:
144 |       if not dtype:
145 |         raise ValueError("If no initial_state is provided, "
146 |                          "dtype must be specified")
147 |       state = cell.zero_state(batch_size, dtype)
148 | 
149 |     if sequence_length is not None:  # Prepare variables
150 |       sequence_length = ops.convert_to_tensor(
151 |           sequence_length, name="sequence_length")
152 |       if sequence_length.get_shape().ndims not in (None, 1):
153 |         raise ValueError(
154 |             "sequence_length must be a vector of length batch_size")
155 |       def _create_zero_output(output_size):
156 |         # convert int to TensorShape if necessary
157 |         size = _state_size_with_prefix(output_size, prefix=[batch_size])
158 |         output = array_ops.zeros(
159 |             array_ops.stack(size), _infer_state_dtype(dtype, state))
160 |         shape = _state_size_with_prefix(
161 |             output_size, prefix=[fixed_batch_size.value])
162 |         output.set_shape(tensor_shape.TensorShape(shape))
163 |         return output
164 | 
165 |       output_size = cell.output_size
166 |       flat_output_size = nest.flatten(output_size)
167 |       flat_zero_output = tuple(
168 |           _create_zero_output(size) for size in flat_output_size)
169 |       zero_output = nest.pack_sequence_as(structure=output_size,
170 |                                           flat_sequence=flat_zero_output)
171 | 
172 |       sequence_length = math_ops.to_int32(sequence_length)
173 |       min_sequence_length = math_ops.reduce_min(sequence_length)
174 |       max_sequence_length = math_ops.reduce_max(sequence_length)
175 | 
176 |     for time, (input_, ctxt_) in enumerate(zip(inputs, context)): # modified to include context
177 |       if time > 0: varscope.reuse_variables()
178 |       # pylint: disable=cell-var-from-loop
179 |       call_cell = lambda: cell(input_, state, ctxt_)  # call to modified RNNcell
180 |       # pylint: enable=cell-var-from-loop
181 |       if sequence_length is not None:
182 |         (output, state) = _rnn_step(
183 |             time=time,
184 |             sequence_length=sequence_length,
185 |             min_sequence_length=min_sequence_length,
186 |             max_sequence_length=max_sequence_length,
187 |             zero_output=zero_output,
188 |             state=state,
189 |             call_cell=call_cell,
190 |             state_size=cell.state_size)
191 |       else:
192 |         (output, state) = call_cell()
193 | 
194 |       outputs.append(output)
195 | 
196 |     return (outputs, state)
197 | 
198 | 
199 | def static_state_saving_rnn(cell, inputs, state_saver, state_name,
200 |                             sequence_length=None, scope=None):
201 |   """RNN that accepts a state saver for time-truncated RNN calculation.
202 |   Args:
203 |     cell: An instance of `RNNCell`.
204 |     inputs: A length T list of inputs, each a `Tensor` of shape
205 |       `[batch_size, input_size]`.
206 |     state_saver: A state saver object with methods `state` and `save_state`.
207 |     state_name: Python string or tuple of strings.  The name to use with the
208 |       state_saver. If the cell returns tuples of states (i.e.,
209 |       `cell.state_size` is a tuple) then `state_name` should be a tuple of
210 |       strings having the same length as `cell.state_size`.  Otherwise it should
211 |       be a single string.
212 |     sequence_length: (optional) An int32/int64 vector size [batch_size].
213 |       See the documentation for rnn() for more details about sequence_length.
214 |     scope: VariableScope for the created subgraph; defaults to "rnn".
215 |   Returns:
216 |     A pair (outputs, state) where:
217 |       outputs is a length T list of outputs (one for each input)
218 |       states is the final state
219 |   Raises:
220 |     TypeError: If `cell` is not an instance of RNNCell.
221 |     ValueError: If `inputs` is `None` or an empty list, or if the arity and
222 |      type of `state_name` does not match that of `cell.state_size`.
223 |   """
224 |   state_size = cell.state_size
225 |   state_is_tuple = nest.is_sequence(state_size)
226 |   state_name_tuple = nest.is_sequence(state_name)
227 | 
228 |   if state_is_tuple != state_name_tuple:
229 |     raise ValueError(
230 |         "state_name should be the same type as cell.state_size.  "
231 |         "state_name: %s, cell.state_size: %s"
232 |         % (str(state_name), str(state_size)))
233 | 
234 |   if state_is_tuple:
235 |     state_name_flat = nest.flatten(state_name)
236 |     state_size_flat = nest.flatten(state_size)
237 | 
238 |     if len(state_name_flat) != len(state_size_flat):
239 |       raise ValueError("#elems(state_name) != #elems(state_size): %d vs. %d"
240 |                        % (len(state_name_flat), len(state_size_flat)))
241 | 
242 |     initial_state = nest.pack_sequence_as(
243 |         structure=state_size,
244 |         flat_sequence=[state_saver.state(s) for s in state_name_flat])
245 |   else:
246 |     initial_state = state_saver.state(state_name)
247 | 
248 |   (outputs, state) = static_rnn(cell, inputs, initial_state=initial_state,
249 |                                 sequence_length=sequence_length, scope=scope)
250 | 
251 |   if state_is_tuple:
252 |     flat_state = nest.flatten(state)
253 |     state_name = nest.flatten(state_name)
254 |     save_state = [state_saver.save_state(name, substate)
255 |                   for name, substate in zip(state_name, flat_state)]
256 |   else:
257 |     save_state = [state_saver.save_state(state_name, state)]
258 | 
259 |   with ops.control_dependencies(save_state):
260 |     last_output = outputs[-1]
261 |     flat_last_output = nest.flatten(last_output)
262 |     flat_last_output = [
263 |         array_ops.identity(output) for output in flat_last_output]
264 |     outputs[-1] = nest.pack_sequence_as(structure=last_output,
265 |                                         flat_sequence=flat_last_output)
266 | 
267 |   return (outputs, state)
268 | 
269 | 
270 | def static_bidirectional_rnn(cell_fw, cell_bw, inputs,
271 |                              initial_state_fw=None, initial_state_bw=None,
272 |                              dtype=None, sequence_length=None, scope=None):
273 |   """Creates a bidirectional recurrent neural network.
274 |   Similar to the unidirectional case above (rnn) but takes input and builds
275 |   independent forward and backward RNNs with the final forward and backward
276 |   outputs depth-concatenated, such that the output will have the format
277 |   [time][batch][cell_fw.output_size + cell_bw.output_size]. The input_size of
278 |   forward and backward cell must match. The initial state for both directions
279 |   is zero by default (but can be set optionally) and no intermediate states are
280 |   ever returned -- the network is fully unrolled for the given (passed in)
281 |   length(s) of the sequence(s) or completely unrolled if length(s) is not given.
282 |   Args:
283 |     cell_fw: An instance of RNNCell, to be used for forward direction.
284 |     cell_bw: An instance of RNNCell, to be used for backward direction.
285 |     inputs: A length T list of inputs, each a tensor of shape
286 |       [batch_size, input_size], or a nested tuple of such elements.
287 |     initial_state_fw: (optional) An initial state for the forward RNN.
288 |       This must be a tensor of appropriate type and shape
289 |       `[batch_size, cell_fw.state_size]`.
290 |       If `cell_fw.state_size` is a tuple, this should be a tuple of
291 |       tensors having shapes `[batch_size, s] for s in cell_fw.state_size`.
292 |     initial_state_bw: (optional) Same as for `initial_state_fw`, but using
293 |       the corresponding properties of `cell_bw`.
294 |     dtype: (optional) The data type for the initial state.  Required if
295 |       either of the initial states are not provided.
296 |     sequence_length: (optional) An int32/int64 vector, size `[batch_size]`,
297 |       containing the actual lengths for each of the sequences.
298 |     scope: VariableScope for the created subgraph; defaults to
299 |       "bidirectional_rnn"
300 |   Returns:
301 |     A tuple (outputs, output_state_fw, output_state_bw) where:
302 |       outputs is a length `T` list of outputs (one for each input), which
303 |         are depth-concatenated forward and backward outputs.
304 |       output_state_fw is the final state of the forward rnn.
305 |       output_state_bw is the final state of the backward rnn.
306 |   Raises:
307 |     TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`.
308 |     ValueError: If inputs is None or an empty list.
309 |   """
310 | 
311 |   if not isinstance(cell_fw, core_rnn_cell.RNNCell):
312 |     raise TypeError("cell_fw must be an instance of RNNCell")
313 |   if not isinstance(cell_bw, core_rnn_cell.RNNCell):
314 |     raise TypeError("cell_bw must be an instance of RNNCell")
315 |   if not nest.is_sequence(inputs):
316 |     raise TypeError("inputs must be a sequence")
317 |   if not inputs:
318 |     raise ValueError("inputs must not be empty")
319 | 
320 |   with vs.variable_scope(scope or "bidirectional_rnn"):
321 |     # Forward direction
322 |     with vs.variable_scope("fw") as fw_scope:
323 |       output_fw, output_state_fw = static_rnn(
324 |           cell_fw, inputs, initial_state_fw, dtype,
325 |           sequence_length, scope=fw_scope)
326 | 
327 |     # Backward direction
328 |     with vs.variable_scope("bw") as bw_scope:
329 |       reversed_inputs = _reverse_seq(inputs, sequence_length)
330 |       tmp, output_state_bw = static_rnn(
331 |           cell_bw, reversed_inputs, initial_state_bw,
332 |           dtype, sequence_length, scope=bw_scope)
333 | 
334 |   output_bw = _reverse_seq(tmp, sequence_length)
335 |   # Concat each of the forward/backward outputs
336 |   flat_output_fw = nest.flatten(output_fw)
337 |   flat_output_bw = nest.flatten(output_bw)
338 | 
339 |   flat_outputs = tuple(
340 |       array_ops.concat([fw, bw], 1)
341 |       for fw, bw in zip(flat_output_fw, flat_output_bw))
342 | 
343 |   outputs = nest.pack_sequence_as(structure=output_fw,
344 |                                   flat_sequence=flat_outputs)
345 | 
346 |   return (outputs, output_state_fw, output_state_bw)
347 | 
348 | 


--------------------------------------------------------------------------------
/data_utils.py:
--------------------------------------------------------------------------------
  1 | 
  2 | """Functions that help with data processing for human3.6m"""
  3 | 
  4 | from __future__ import absolute_import
  5 | from __future__ import division
  6 | from __future__ import print_function
  7 | 
  8 | import numpy as np
  9 | from six.moves import xrange # pylint: disable=redefined-builtin
 10 | import copy
 11 | 
 12 | import itertools
 13 | 
 14 | def rotmat2euler( R ):
 15 |   """
 16 |   Converts a rotation matrix to Euler angles
 17 |   Matlab port to python for evaluation purposes
 18 |   https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/RotMat2Euler.m#L1
 19 | 
 20 |   Args
 21 |     R: a 3x3 rotation matrix
 22 |   Returns
 23 |     eul: a 3x1 Euler angle representation of R
 24 |   """
 25 |   if R[0,2] == 1 or R[0,2] == -1:
 26 |     # special case
 27 |     E3   = 0 # set arbitrarily
 28 |     dlta = np.arctan2( R[0,1], R[0,2] );
 29 | 
 30 |     if R[0,2] == -1:
 31 |       E2 = np.pi/2;
 32 |       E1 = E3 + dlta;
 33 |     else:
 34 |       E2 = -np.pi/2;
 35 |       E1 = -E3 + dlta;
 36 | 
 37 |   else:
 38 |     E2 = -np.arcsin( R[0,2] )
 39 |     E1 = np.arctan2( R[1,2]/np.cos(E2), R[2,2]/np.cos(E2) )
 40 |     E3 = np.arctan2( R[0,1]/np.cos(E2), R[0,0]/np.cos(E2) )
 41 | 
 42 |   eul = np.array([E1, E2, E3]);
 43 |   return eul
 44 | 
 45 | 
 46 | def quat2expmap(q):
 47 |   """
 48 |   Converts a quaternion to an exponential map
 49 |   Matlab port to python for evaluation purposes
 50 |   https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/quat2expmap.m#L1
 51 | 
 52 |   Args
 53 |     q: 1x4 quaternion
 54 |   Returns
 55 |     r: 1x3 exponential map
 56 |   Raises
 57 |     ValueError if the l2 norm of the quaternion is not close to 1
 58 |   """
 59 |   if (np.abs(np.linalg.norm(q)-1)>1e-3):
 60 |     raise(ValueError, "quat2expmap: input quaternion is not norm 1")
 61 | 
 62 |   sinhalftheta = np.linalg.norm(q[1:])
 63 |   coshalftheta = q[0]
 64 | 
 65 |   r0    = np.divide( q[1:], (np.linalg.norm(q[1:]) + np.finfo(np.float32).eps));
 66 |   theta = 2 * np.arctan2( sinhalftheta, coshalftheta )
 67 |   theta = np.mod( theta + 2*np.pi, 2*np.pi )
 68 | 
 69 |   if theta > np.pi:
 70 |     theta =  2 * np.pi - theta
 71 |     r0    = -r0
 72 | 
 73 |   r = r0 * theta
 74 |   return r
 75 | 
 76 | def rotmat2quat(R):
 77 |   """
 78 |   Converts a rotation matrix to a quaternion
 79 |   Matlab port to python for evaluation purposes
 80 |   https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/rotmat2quat.m#L4
 81 | 
 82 |   Args
 83 |     R: 3x3 rotation matrix
 84 |   Returns
 85 |     q: 1x4 quaternion
 86 |   """
 87 |   rotdiff = R - R.T;
 88 | 
 89 |   r = np.zeros(3)
 90 |   r[0] = -rotdiff[1,2]
 91 |   r[1] =  rotdiff[0,2]
 92 |   r[2] = -rotdiff[0,1]
 93 |   sintheta = np.linalg.norm(r) / 2;
 94 |   r0 = np.divide(r, np.linalg.norm(r) + np.finfo(np.float32).eps );
 95 | 
 96 |   costheta = (np.trace(R)-1) / 2;
 97 | 
 98 |   theta = np.arctan2( sintheta, costheta );
 99 | 
100 |   q      = np.zeros(4)
101 |   q[0]   = np.cos(theta/2)
102 |   q[1:] = r0*np.sin(theta/2)
103 |   return q
104 | 
105 | def rotmat2expmap(R):
106 |   return quat2expmap( rotmat2quat(R) );
107 | 
108 | def expmap2rotmat(r):
109 |   """
110 |   Converts an exponential map angle to a rotation matrix
111 |   Matlab port to python for evaluation purposes
112 |   I believe this is also called Rodrigues' formula
113 |   https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/expmap2rotmat.m
114 | 
115 |   Args
116 |     r: 1x3 exponential map
117 |   Returns
118 |     R: 3x3 rotation matrix
119 |   """
120 |   theta = np.linalg.norm( r )
121 |   r0  = np.divide( r, theta + np.finfo(np.float32).eps )
122 |   r0x = np.array([0, -r0[2], r0[1], 0, 0, -r0[0], 0, 0, 0]).reshape(3,3)
123 |   r0x = r0x - r0x.T
124 |   R = np.eye(3,3) + np.sin(theta)*r0x + (1-np.cos(theta))*(r0x).dot(r0x);
125 |   return R
126 | 
127 | 
128 | def unNormalizeData(normalizedData, data_mean, data_std, dimensions_to_ignore, actions, one_hot ):
129 |   """Borrowed from SRNN code. Reads a csv file and returns a float32 matrix.
130 |   https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/generateMotionData.py#L12
131 | 
132 |   Args
133 |     normalizedData: nxd matrix with normalized data
134 |     data_mean: vector of mean used to normalize the data
135 |     data_std: vector of standard deviation used to normalize the data
136 |     dimensions_to_ignore: vector with dimensions not used by the model
137 |     actions: list of strings with the encoded actions
138 |     one_hot: whether the data comes with one-hot encoding
139 |   Returns
140 |     origData: data originally used to
141 |   """
142 |   T = normalizedData.shape[0]
143 |   D = data_mean.shape[0]
144 | 
145 |   origData = np.zeros((T, D), dtype=np.float32)
146 |   dimensions_to_use = []
147 |   for i in range(D):
148 |     if i in dimensions_to_ignore:
149 |       continue
150 |     dimensions_to_use.append(i)
151 |   dimensions_to_use = np.array(dimensions_to_use)
152 | 
153 |   if one_hot:
154 |     origData[:, dimensions_to_use] = normalizedData[:, :-len(actions)]
155 |   else:
156 |     origData[:, dimensions_to_use] = normalizedData
157 | 
158 |   # potentially ineficient, but only done once per experiment
159 |   stdMat = data_std.reshape((1, D))
160 |   stdMat = np.repeat(stdMat, T, axis=0)
161 |   meanMat = data_mean.reshape((1, D))
162 |   meanMat = np.repeat(meanMat, T, axis=0)
163 |   origData = np.multiply(origData, stdMat) + meanMat
164 |   return origData
165 | 
166 | 
167 | def revert_output_format(poses, data_mean, data_std, dim_to_ignore, actions, one_hot):
168 |   """
169 |   Converts the output of the neural network to a format that is more easy to
170 |   manipulate for, e.g. conversion to other format or visualization
171 | 
172 |   Args
173 |     poses: The output from the TF model. A list with (seq_length) entries,
174 |     each with a (batch_size, dim) output
175 |   Returns
176 |     poses_out: A tensor of size (batch_size, seq_length, dim) output. Each
177 |     batch is an n-by-d sequence of poses.
178 |   """
179 |   seq_len = len(poses)
180 |   if seq_len == 0:
181 |     return []
182 | 
183 |   batch_size, dim = poses[0].shape
184 | 
185 |   poses_out = np.concatenate(poses)
186 |   poses_out = np.reshape(poses_out, (seq_len, batch_size, dim))
187 |   poses_out = np.transpose(poses_out, [1, 0, 2])
188 | 
189 |   poses_out_list = []
190 |   for i in xrange(poses_out.shape[0]):
191 |     poses_out_list.append(unNormalizeData(poses_out[i, :, :], data_mean, data_std, dim_to_ignore, actions, one_hot))
192 | 
193 |   return poses_out_list
194 | 
195 | 
196 | def readCSVasFloat(filename):
197 |   """
198 |   Borrowed from SRNN code. Reads a csv and returns a float matrix.
199 |   https://github.com/asheshjain399/NeuralModels/blob/master/neuralmodels/utils.py#L34
200 | 
201 |   Args
202 |     filename: string. Path to the csv file
203 |   Returns
204 |     returnArray: the read data in a float32 matrix
205 |   """
206 |   returnArray = []
207 |   lines = open(filename).readlines()
208 |   for line in lines:
209 |     line = line.strip().split(',')
210 |     if len(line) > 0:
211 |       returnArray.append(np.array([np.float32(x) for x in line]))
212 | 
213 |   returnArray = np.array(returnArray)
214 |   return returnArray
215 | 
216 | 
217 | def load_data(path_to_dataset, subjects, actions, one_hot):
218 |   """
219 |   Borrowed from SRNN code. This is how the SRNN code reads the provided .txt files
220 |   https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/processdata.py#L270
221 | 
222 |   Args
223 |     path_to_dataset: string. directory where the data resides
224 |     subjects: list of numbers. The subjects to load
225 |     actions: list of string. The actions to load
226 |     one_hot: Whether to add a one-hot encoding to the data
227 |   Returns
228 |     trainData: dictionary with k:v
229 |       k=(subject, action, subaction, 'even'), v=(nxd) un-normalized data
230 |     completeData: nxd matrix with all the data. Used to normlization stats
231 |   """
232 |   nactions = len( actions )
233 | 
234 |   trainData = {}
235 |   completeData = []
236 |   total_frames = 0
237 |   for subj in subjects:
238 |     for action_idx in np.arange(len(actions)):
239 | 
240 |       action = actions[ action_idx ]
241 | 
242 |       for subact in [1, 2]:  # subactions
243 | 
244 |         print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, subact))
245 | 
246 |         filename = '{0}/S{1}/{2}_{3}.txt'.format( path_to_dataset, subj, action, subact)
247 |         action_sequence = readCSVasFloat(filename)
248 | 
249 |         n, d = action_sequence.shape
250 |         even_list = range(0, n, 2)
251 | 
252 |         if one_hot:
253 |           # Add a one-hot encoding at the end of the representation
254 |           the_sequence = np.zeros( (len(even_list), d + nactions), dtype=float )
255 |           the_sequence[ :, 0:d ] = action_sequence[even_list, :]
256 |           the_sequence[ :, d+action_idx ] = 1
257 |           trainData[(subj, action, subact, 'even')] = the_sequence
258 |           
259 |         else:
260 |           trainData[(subj, action, subact, 'even')] = action_sequence[even_list, :]
261 | 
262 | 
263 |         if len(completeData) == 0:
264 |           completeData = copy.deepcopy(action_sequence)
265 |         else:
266 |           completeData = np.append(completeData, action_sequence, axis=0)
267 |   
268 |   return trainData, completeData
269 | 
270 | 
271 | def normalize_data( data, data_mean, data_std, dim_to_use, actions, one_hot ):
272 |   """
273 |   Normalize input data by removing unused dimensions, subtracting the mean and
274 |   dividing by the standard deviation
275 | 
276 |   Args
277 |     data: nx99 matrix with data to normalize
278 |     data_mean: vector of mean used to normalize the data
279 |     data_std: vector of standard deviation used to normalize the data
280 |     dim_to_use: vector with dimensions used by the model
281 |     actions: list of strings with the encoded actions
282 |     one_hot: whether the data comes with one-hot encoding
283 |   Returns
284 |     data_out: the passed data matrix, but normalized
285 |   """
286 |   data_out = {}
287 |   nactions = len(actions)
288 | 
289 |   if not one_hot:
290 |     # No one-hot encoding... no need to do anything special
291 |     for key in data.keys():
292 |       data_out[ key ] = np.divide( (data[key] - data_mean), data_std )
293 |       data_out[ key ] = data_out[ key ][ :, dim_to_use ] # comment this line if you want to model all_dims 
294 |        	
295 |   else:
296 |     # TODO hard-coding 99 dimensions for un-normalized human poses
297 |     for key in data.keys():
298 |       data_out[ key ] = np.divide( (data[key][:, 0:99] - data_mean), data_std )
299 |       data_out[ key ] = data_out[ key ][ :, dim_to_use ] # comment this line if you want to model all_dims
300 |       data_out[ key ] = np.hstack( (data_out[key], data[key][:,-nactions:]) )
301 | 
302 |   return data_out
303 | 
304 | 
305 | def normalization_stats(completeData):
306 |   """"
307 |   Also borrowed for SRNN code. Computes mean, stdev and dimensions to ignore.
308 |   https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/processdata.py#L33
309 | 
310 |   Args
311 |     completeData: nx99 matrix with data to normalize
312 |   Returns
313 |     data_mean: vector of mean used to normalize the data
314 |     data_std: vector of standard deviation used to normalize the data
315 |     dimensions_to_ignore: vector with dimensions not used by the model
316 |     dimensions_to_use: vector with dimensions used by the model
317 |   """
318 |   data_mean = np.mean(completeData, axis=0)
319 |   data_std  =  np.std(completeData, axis=0)
320 | 
321 |   dimensions_to_ignore = []
322 |   dimensions_to_use    = []
323 | 
324 |   dimensions_to_ignore.extend( list(np.where(data_std < 1e-4)[0]) )
325 |   dimensions_to_use.extend( list(np.where(data_std >= 1e-4)[0]) )
326 | 
327 |   data_std[dimensions_to_ignore] = 1.0 # comment line to avoid modifying std of ignored dims
328 | 
329 |   return data_mean, data_std, dimensions_to_ignore, dimensions_to_use
330 | 
331 | 
332 | def body_part_features():
333 | 	"""
334 | 	function to return feature_idx ranges of dims_to_use vector for different body parts ex: torso, left_arm, right_arm, left_leg
335 | 	right_leg. 
336 | 	Outputs: dict which contains start_idx:end_idx for each of above mentioned body parts
337 | 	key: 'torso', 'right_arm', 'left_arm', 'right_leg', 'left_leg'
338 | 	value: list of idxs of dims_to_use vector relevant to that body part
339 | 	"""	
340 | 	dims_to_use = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15, 21, 22, 23, 24, 27, 28, 29, 30, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53, 54, 55, 56, 57, 60, 61, 62, 75, 76, 77, 78, 79, 80, 81, 84, 85, 86]
341 | 	np_dims_to_use = np.asarray(dims_to_use)
342 | 		
343 | 	# defining the ranges of features for different body parts
344 | 	node_feature_ranges = {}
345 | 	node_feature_ranges['torso'] = ((0,5),(36,50))
346 | 	node_feature_ranges['right_arm'] = ((75,98),)
347 | 	node_feature_ranges['left_arm'] = ((51,74),)
348 | 	node_feature_ranges['right_leg'] = ((6,20),)
349 | 	node_feature_ranges['left_leg'] = ((21,35),)
350 | 		
351 | 	body_part_dims = {}
352 | 	
353 | 	for key in node_feature_ranges.keys():
354 | 		# resetting list which stores idxs of body parts
355 | 		part_dims = []
356 | 		for value in node_feature_ranges[key]:   
357 | 			# find indices in dims_to_use which fall in specified body ranges
358 | 			idxs = np.where( (np_dims_to_use >= value[0]) & (np_dims_to_use <= value[1]) )
359 | 			
360 | 			# convert back to tuple
361 | 			#idxs = np.ndarray.tolist(idxs[0])
362 | 			
363 | 			# collect and store them in a list
364 | 			part_dims.append(idxs[0])
365 | 		
366 | 		merged_part_dims = list(itertools.chain(*part_dims))
367 | 		merged_part_dims = np.asarray(merged_part_dims)
368 | 		# assign dims found
369 | 		body_part_dims[key] = merged_part_dims
370 | 	
371 | 	return body_part_dims
372 | 
373 | def pearson_corr_coef(X, Y):
374 | 	"""
375 |         function to return Pearson's Correlation Coefficient between 2 sample X,Y
376 |         Inputs:
377 |         X - n X D (n = samples , D = feature_dims)
378 |         Y - same shape as X
379 |         Outputs:
380 |         r - Pearson's Corr Coeff  
381 |         """  
382 | 	r = np.sum( np.mean(X - np.mean(X,0),1) * np.mean(Y - np.mean(Y,0),1) ) / ( np.mean(np.std(X,0)) * np.mean(np.std(Y,0)) ) 
383 | 
384 | 	return r
385 | 
386 | def KL_multi_var(X_mean, X_cov, Y_mean, Y_cov):
387 | 	
388 | 	#function to return Multi-variate KL Divergence for Gaussian RV
389 | 	#Inputs:
390 | 	#X_mean = mean vector of 1st RV
391 | 	#X_cov = covariance matrix of 1st RV
392 | 	#Y_mean = mean vector of 2nd RV
393 | 	#Y_cov = covariance matrix of 2nd RV 
394 | 	
395 | 	inv_X_cov = np.linalg.inv(X_cov)
396 | 	inv_Y_cov = np.linalg.inv(Y_cov)
397 | 	last_term = np.matmul((X_mean - Y_mean), (X_mean - Y_mean).T) + X_cov - Y_cov
398 | 
399 | 	kl_xy = 0.5*np.log(np.linalg.det(np.matmul(Y_cov, inv_X_cov))) + 0.5*np.trace(np.matmul(inv_Y_cov, last_term))
400 | 	return kl_xy
401 | 
402 | 


--------------------------------------------------------------------------------
/deltaRNN.py:
--------------------------------------------------------------------------------
  1 | #"""
  2 | #Author :- Ankur Mali
  3 | #"""
  4 | 
  5 | import os
  6 | import sys
  7 | import tensorflow as tf
  8 | import numpy as np
  9 | #from tensorflow.python.ops.rnn_cell import RNNCell
 10 | #from rnn_cell_impl import RNNCell
 11 | from rnn_cell_implement import RNNCell
 12 | 
 13 | class DeltaRNNCell(RNNCell):
 14 |     #"""
 15 |     #Delta RNN - Differential Framework.
 16 |     #Alexander G. Ororbia II, Tomas Mikolov and David Reitter,
 17 |     #"Learning Simpler Language Models with the
 18 |     # Delta Recurrent Neural Network Framework"
 19 |     #"""
 20 | 
 21 |     def __init__(self, num_units, apply_layer_norm=False):
 22 |         self._num_units = num_units
 23 |         self._apply_layer_norm = apply_layer_norm
 24 |         if self._apply_layer_norm:
 25 |            self._layer_norm = tf.contrib.layers.layer_norm
 26 | 
 27 |     @property
 28 |     def input_size(self):
 29 |         return self._num_units
 30 | 
 31 |     @property
 32 |     def output_size(self):
 33 |         return self._num_units
 34 | 
 35 |     @property
 36 |     def state_size(self):
 37 |         return self._num_units
 38 | 
 39 |     def _outer_function(self, inner_function_output,
 40 |                         past_hidden_state, activation=tf.nn.relu,
 41 |                         wx_parameterization_gate=True, scope=None):
 42 |         #"""Check Equation 3 in Delta RNN paper
 43 |         # for basic understanding and to relate our code with papers maths.
 44 |         #"""
 45 | 
 46 |         assert inner_function_output.get_shape().as_list() == \
 47 |             past_hidden_state.get_shape().as_list()
 48 | 
 49 |         with tf.variable_scope(scope or type(self).__name__):
 50 |             with tf.variable_scope("OuterFunction"):
 51 |                 r_bias = tf.get_variable(
 52 |                     "outer_function_gate",
 53 |                     [self._num_units],
 54 |                     dtype=tf.float32, initializer=tf.zeros_initializer)
 55 | 
 56 |                 # Equation 5 in Alex(DRNN paper)
 57 |                 if wx_parameterization_gate:
 58 |                     r = self._W_x_inputs + r_bias
 59 |                 else:
 60 |                     r = r_bias
 61 | 
 62 |                 gate = tf.nn.sigmoid(r)
 63 |                 output = activation((1.0 - gate) * inner_function_output + gate * past_hidden_state)
 64 | 
 65 |         return output
 66 |         # End of outer function
 67 | 
 68 |     # Inner function 
 69 |     def _inner_function(self, inputs, past_hidden_state, 
 70 |                         activation=tf.nn.tanh, scope=None):
 71 |         #second order function as described equation 11 in delta rnn paper
 72 |         #This is used in inner function
 73 |         
 74 |         with tf.variable_scope(scope or type(self).__name__):
 75 |             with tf.variable_scope("InnerFunction"):
 76 |                 with tf.variable_scope("Vh"):
 77 |                     V_h = _linear(past_hidden_state, self._num_units, True)
 78 | 
 79 |                 with tf.variable_scope("Wx"):
 80 |                     self._W_x_inputs = _linear(inputs, self._num_units, True)
 81 | 
 82 |                 alpha = tf.get_variable(
 83 |                     "alpha", [self._num_units], dtype=tf.float32,
 84 |                     initializer=tf.constant_initializer(2.0))
 85 |                     # alpha value 2.0 works better than 1.0
 86 |                 beta_one = tf.get_variable(
 87 |                     "beta_one", [self._num_units], dtype=tf.float32,
 88 |                     initializer=tf.constant_initializer(1.0))
 89 | 
 90 |                 beta_two = tf.get_variable(
 91 |                     "beta_two", [self._num_units], dtype=tf.float32,
 92 |                     initializer=tf.constant_initializer(1.0))
 93 | 
 94 |                 z_t_bias = tf.get_variable(
 95 |                     "z_t_bias", [self._num_units], dtype=tf.float32,
 96 |                     initializer=tf.constant_initializer(0.0))
 97 | 
 98 |                 # 2nd order calculation
 99 |                 #You can change activation function but before get familiar with gating operations and mathematical notations
100 |                 d_1_t = alpha * V_h * self._W_x_inputs
101 |                 d_2_t = beta_one * V_h + beta_two * self._W_x_inputs
102 |      
103 |                 if self._apply_layer_norm:
104 |                    d_1_t = self._layer_norm(d_1_t)
105 |                    d_2_t = self._layer_norm(d_2_t)
106 |    
107 |                 z_t = activation(d_1_t + d_2_t + z_t_bias)
108 | 
109 |         return z_t
110 | 
111 |     def __call__(self, inputs, state, scope=None):
112 |         inner_function_output = self._inner_function(inputs, state)
113 |         output = self._outer_function(inner_function_output, state)
114 | 
115 |         
116 |         return output, output
117 | 
118 | 
119 | 
120 | class DeltaRNNCellBody(RNNCell):
121 |     #
122 |     #Delta RNN - Differential Framework.
123 |     #Alexander G. Ororbia II, Tomas Mikolov and David Reitter,
124 |     #"Learning Simpler Language Models with the
125 |     # Delta Recurrent Neural Network Framework"
126 |     #"""
127 | 
128 |     def __init__(self, num_units, apply_layer_norm=False):
129 |         self._num_units = num_units
130 |         self._apply_layer_norm = apply_layer_norm
131 |         if self._apply_layer_norm: 
132 |            self._layer_norm = tf.contrib.layers.layer_norm 
133 | 
134 |     @property
135 |     def input_size(self):
136 |         return self._num_units
137 | 
138 |     @property
139 |     def output_size(self):
140 |         return self._num_units
141 | 
142 |     @property
143 |     def state_size(self):
144 |         return self._num_units
145 | 
146 |     def _outer_function(self, inner_function_output,
147 |                         past_hidden_state, activation=tf.nn.relu,
148 |                         wx_parameterization_gate=True, scope=None):
149 |         #"""Check Equation 3 in Delta RNN paper
150 |         # for basic understanding and to relate our code with papers maths.
151 |         #"""
152 | 
153 |         assert inner_function_output.get_shape().as_list() == \
154 |             past_hidden_state.get_shape().as_list()
155 | 
156 |         with tf.variable_scope(scope or type(self).__name__):
157 |             with tf.variable_scope("OuterFunction"):
158 |                 r_bias = tf.get_variable(
159 |                     "outer_function_gate",
160 |                     [self._num_units],
161 |                     dtype=tf.float32, initializer=tf.zeros_initializer)
162 | 
163 |                 # Equation 5 in Alex(DRNN paper)
164 |                 if wx_parameterization_gate:
165 |                     r = self._W_x_inputs + r_bias
166 |                 else:
167 |                     r = r_bias
168 | 
169 |                 gate = tf.nn.sigmoid(r)
170 |                 output = activation((1.0 - gate) * inner_function_output + gate * past_hidden_state)
171 | 
172 |         return output
173 |         # """ End of outer function   """
174 | 
175 |     # """ Inner function """
176 |     def _inner_function(self, inputs, past_hidden_state, context, activation=tf.nn.tanh, scope=None): # modified
177 |         #"""second order function as described equation 11 in delta rnn paper
178 |         #This is used in inner function
179 |         #"""
180 |         with tf.variable_scope(scope or type(self).__name__):
181 |             with tf.variable_scope("InnerFunction"):
182 |                 with tf.variable_scope("Vh"):
183 |                     V_h = _linear(past_hidden_state, self._num_units, True)
184 | 
185 |                 with tf.variable_scope("Qm"): # modified
186 |                     Q_m = _linear(context, self._num_units, True) 
187 | 
188 |                 with tf.variable_scope("Wx"):
189 |                     self._W_x_inputs = _linear(inputs, self._num_units, True)
190 | 
191 |                 alpha = tf.get_variable(
192 |                     "alpha", [self._num_units], dtype=tf.float32,
193 |                     initializer=tf.constant_initializer(2.0))
194 |                     #""" alpha value 2.0 works better than 1.0"""
195 |                 beta_one = tf.get_variable(
196 |                     "beta_one", [self._num_units], dtype=tf.float32,
197 |                     initializer=tf.constant_initializer(1.0))
198 | 
199 |                 beta_two = tf.get_variable(
200 |                     "beta_two", [self._num_units], dtype=tf.float32,
201 |                     initializer=tf.constant_initializer(1.0))
202 | 
203 |                 z_t_bias = tf.get_variable(
204 |                     "z_t_bias", [self._num_units], dtype=tf.float32,
205 |                     initializer=tf.constant_initializer(0.0))
206 | 
207 |                 # 2nd order calculation
208 |                 #You can change activation function but before get familiar with gating operations and mathematical notations
209 |                 d_1_t = alpha * V_h * ( self._W_x_inputs + Q_m ) # modified
210 |                 d_2_t = beta_one * V_h  + beta_two * ( self._W_x_inputs + Q_m ) # modified
211 | 
212 |                 if self._apply_layer_norm:
213 |                    d_1_t = self._layer_norm(d_1_t)
214 |                    d_2_t = self._layer_norm(d_2_t)
215 | 
216 |                 z_t = activation(d_1_t + d_2_t + z_t_bias)
217 | 
218 |         return z_t
219 | 
220 |     def __call__(self, inputs, state, context, scope=None):
221 |         inner_function_output = self._inner_function(inputs, state, context)
222 |         output = self._outer_function(inner_function_output, state)
223 | 
224 |         
225 |         return output, output
226 | 
227 | 
228 | class DeltaRNNCellBodyFlow(RNNCell):
229 |     #
230 |     #Delta RNN - Differential Framework.
231 |     #Alexander G. Ororbia II, Tomas Mikolov and David Reitter,
232 |     #"Learning Simpler Language Models with the
233 |     # Delta Recurrent Neural Network Framework"
234 |     #"""
235 | 
236 |     def __init__(self, num_units, apply_layer_norm=False):
237 |         self._num_units = num_units
238 |         self._apply_layer_norm = apply_layer_norm
239 |         if self._apply_layer_norm: 
240 |            self._layer_norm = tf.contrib.layers.layer_norm 
241 | 
242 |     @property
243 |     def input_size(self):
244 |         return self._num_units
245 | 
246 |     @property
247 |     def output_size(self):
248 |         return self._num_units
249 | 
250 |     @property
251 |     def state_size(self):
252 |         return self._num_units
253 | 
254 |     def _outer_function(self, inputs, inner_function_output,
255 |                         past_hidden_state, activation=tf.nn.relu,
256 |                         wx_parameterization_gate=True, scope=None):
257 |         #"""Check Equation 3 in Delta RNN paper
258 |         # for basic understanding and to relate our code with papers maths.
259 |         #"""
260 | 
261 |         assert inner_function_output.get_shape().as_list() == \
262 |             past_hidden_state.get_shape().as_list()
263 | 
264 |         with tf.variable_scope(scope or type(self).__name__):
265 |             with tf.variable_scope("OuterFunction"):
266 |                 r_bias = tf.get_variable("outer_function_vel_bias", [self._num_units], dtype=tf.float32, initializer=tf.zeros_initializer)
267 |                 W_vel = tf.get_variable("outer_function_W_vel", [54, self._num_units ], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())  
268 | 
269 |                 # Equation 5 in Alex(DRNN paper)
270 |                 if wx_parameterization_gate:
271 |                     #r = self._W_x_inputs + r_bias
272 |                     r = tf.matmul(inputs[:,54:108], W_vel) + r_bias # modified
273 |                 else:
274 |                     r = r_bias
275 | 
276 |                 gate = tf.nn.sigmoid(r)
277 |                 output = activation((1.0 - gate) * inner_function_output + gate * past_hidden_state)
278 | 
279 |         return output
280 |         # """ End of outer function   """
281 | 
282 |     # """ Inner function """
283 |     def _inner_function(self, inputs, past_hidden_state, context, activation=tf.nn.tanh, scope=None): # modified
284 |         #"""second order function as described equation 11 in delta rnn paper
285 |         #This is used in inner function
286 |         #"""
287 |         with tf.variable_scope(scope or type(self).__name__):
288 |             with tf.variable_scope("InnerFunction"):
289 |                 with tf.variable_scope("Vh"):
290 |                     V_h = _linear(past_hidden_state, self._num_units, True)
291 | 
292 |                 with tf.variable_scope("Qm"): # modified
293 |                     Q_m = _linear(context, self._num_units, True) 
294 | 
295 |                 with tf.variable_scope("Wx"):
296 |                     self._W_x_inputs = _linear(inputs[:,0:54], self._num_units, True)
297 | 
298 |                 alpha = tf.get_variable(
299 |                     "alpha", [self._num_units], dtype=tf.float32,
300 |                     initializer=tf.constant_initializer(2.0))
301 |                     #""" alpha value 2.0 works better than 1.0"""
302 |                 beta_one = tf.get_variable(
303 |                     "beta_one", [self._num_units], dtype=tf.float32,
304 |                     initializer=tf.constant_initializer(1.0))
305 | 
306 |                 beta_two = tf.get_variable(
307 |                     "beta_two", [self._num_units], dtype=tf.float32,
308 |                     initializer=tf.constant_initializer(1.0))
309 | 
310 |                 z_t_bias = tf.get_variable(
311 |                     "z_t_bias", [self._num_units], dtype=tf.float32,
312 |                     initializer=tf.constant_initializer(0.0))
313 | 
314 |                 # 2nd order calculation
315 |                 #You can change activation function but before get familiar with gating operations and mathematical notations
316 |                 d_1_t = alpha * V_h * ( self._W_x_inputs + Q_m ) # modified
317 |                 d_2_t = beta_one * V_h  + beta_two * ( self._W_x_inputs + Q_m ) # modified
318 | 
319 |                 if self._apply_layer_norm:
320 |                    d_1_t = self._layer_norm(d_1_t)
321 |                    d_2_t = self._layer_norm(d_2_t)
322 | 
323 |                 z_t = activation(d_1_t + d_2_t + z_t_bias)
324 | 
325 |         return z_t
326 | 
327 |     def __call__(self, inputs, state, context, scope=None):
328 |         inner_function_output = self._inner_function(inputs, state, context)
329 |         output = self._outer_function(inputs, inner_function_output, state)
330 | 
331 |         
332 |         return output, output
333 | 
334 | 
335 | def _linear(args, output_size, bias, bias_start=0.0, scope=None):
336 |     #"""Linear mapping """
337 |     if args is None or (isinstance(args, (list, tuple)) and not args):
338 |         raise ValueError("`args` must be specified, please check definition for input variables")
339 |     if not isinstance(args, (list, tuple)):
340 |         args = [args]
341 | 
342 |     # dimension 1 cell size calculation.
343 |     total_arg_size = 0
344 |     shapes = [a.get_shape().as_list() for a in args]
345 |     for shape in shapes:
346 |         if len(shape) != 2:
347 |             raise ValueError(
348 |                 "Linear is expecting 2Dimensional Arguments: %s" % str(shapes))
349 |         if not shape[1]:
350 |             raise ValueError(
351 |                 "Linear expects shape[1] of arguments: %s" % str(shapes))
352 |         else:
353 |             total_arg_size += shape[1]
354 | 
355 |     with tf.variable_scope(scope or "Linear"):
356 |         matrix = tf.get_variable("Matrix", [total_arg_size, output_size])
357 |         if len(args) == 1:
358 |             res = tf.matmul(args[0], matrix)
359 |         else:
360 |             res = tf.matmul(tf.concat(1, args), matrix)
361 |         if not bias:
362 |             return res
363 |         bias_term = tf.get_variable(
364 |             "Bias", [output_size],
365 |             initializer=tf.constant_initializer(bias_start))
366 |     return res + bias_term
367 | 


--------------------------------------------------------------------------------
/forward_kinematics.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | 
  3 | import numpy as np
  4 | import h5py
  5 | import matplotlib
  6 | import matplotlib.pyplot as plt
  7 | import matplotlib.animation as animation
  8 | from mpl_toolkits.mplot3d import Axes3D
  9 | import viz
 10 | import time
 11 | import copy
 12 | import data_utils
 13 | import cv2
 14 | from PIL import Image
 15 | 
 16 | def fkl( angles, parent, offset, rotInd, expmapInd ):
 17 |   """
 18 |   Convert joint angles and bone lenghts into the 3d points of a person.
 19 |   Based on expmap2xyz.m, available at
 20 |   https://github.com/asheshjain399/RNNexp/blob/7fc5a53292dc0f232867beb66c3a9ef845d705cb/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/exp2xyz.m
 21 | 
 22 |   Args
 23 |     angles: 99-long vector with 3d position and 3d joint angles in expmap format
 24 |     parent: 32-long vector with parent-child relationships in the kinematic tree
 25 |     offset: 96-long vector with bone lenghts
 26 |     rotInd: 32-long list with indices into angles
 27 |     expmapInd: 32-long list with indices into expmap angles
 28 |   Returns
 29 |     xyz: 32x3 3d points that represent a person in 3d space
 30 |   """
 31 | 
 32 |   assert len(angles) == 99
 33 | 
 34 |   # Structure that indicates parents for each joint
 35 |   njoints   = 32
 36 |   xyzStruct = [dict() for x in range(njoints)]
 37 | 
 38 |   for i in np.arange( njoints ):
 39 | 
 40 |     if not rotInd[i] : # If the list is empty
 41 |       xangle, yangle, zangle = 0, 0, 0
 42 |     else:
 43 |       xangle = angles[ rotInd[i][0]-1 ]
 44 |       yangle = angles[ rotInd[i][1]-1 ]
 45 |       zangle = angles[ rotInd[i][2]-1 ]
 46 | 
 47 |     r = angles[ expmapInd[i] ]
 48 | 
 49 |     thisRotation = data_utils.expmap2rotmat(r)
 50 |     thisPosition = np.array([xangle, yangle, zangle])
 51 | 
 52 |     if parent[i] == -1: # Root node
 53 |       xyzStruct[i]['rotation'] = thisRotation
 54 |       xyzStruct[i]['xyz']      = np.reshape(offset[i,:], (1,3)) + thisPosition
 55 |     else:
 56 |       xyzStruct[i]['xyz'] = (offset[i,:] + thisPosition).dot( xyzStruct[ parent[i] ]['rotation'] ) + xyzStruct[ parent[i] ]['xyz']
 57 |       xyzStruct[i]['rotation'] = thisRotation.dot( xyzStruct[ parent[i] ]['rotation'] )
 58 | 
 59 |   xyz = [xyzStruct[i]['xyz'] for i in range(njoints)]
 60 |   xyz = np.array( xyz ).squeeze()
 61 |   xyz = xyz[:,[0,2,1]]
 62 |   # xyz = xyz[:,[2,0,1]]
 63 | 
 64 | 
 65 |   return np.reshape( xyz, [-1] )
 66 | 
 67 | def revert_coordinate_space(channels, R0, T0):
 68 |   """
 69 |   Bring a series of poses to a canonical form so they are facing the camera when they start.
 70 |   Adapted from
 71 |   https://github.com/asheshjain399/RNNexp/blob/7fc5a53292dc0f232867beb66c3a9ef845d705cb/structural_rnn/CRFProblems/H3.6m/dataParser/Utils/revertCoordinateSpace.m
 72 | 
 73 |   Args
 74 |     channels: n-by-99 matrix of poses
 75 |     R0: 3x3 rotation for the first frame
 76 |     T0: 1x3 position for the first frame
 77 |   Returns
 78 |     channels_rec: The passed poses, but the first has T0 and R0, and the
 79 |                   rest of the sequence is modified accordingly.
 80 |   """
 81 |   n, d = channels.shape
 82 | 
 83 |   channels_rec = copy.copy(channels)
 84 |   R_prev = R0
 85 |   T_prev = T0
 86 |   rootRotInd = np.arange(3,6)
 87 | 
 88 |   # Loop through the passed posses
 89 |   for ii in range(n):
 90 |     R_diff = data_utils.expmap2rotmat( channels[ii, rootRotInd] )
 91 |     R = R_diff.dot( R_prev )
 92 | 
 93 |     channels_rec[ii, rootRotInd] = data_utils.rotmat2expmap(R)
 94 |     T = T_prev + ((R_prev.T).dot( np.reshape(channels[ii,:3],[3,1]))).reshape(-1)
 95 |     channels_rec[ii,:3] = T
 96 |     T_prev = T
 97 |     R_prev = R
 98 | 
 99 |   return channels_rec
100 | 
101 | 
102 | def _some_variables():
103 |   """
104 |   We define some variables that are useful to run the kinematic tree
105 | 
106 |   Args
107 |     None
108 |   Returns
109 |     parent: 32-long vector with parent-child relationships in the kinematic tree
110 |     offset: 96-long vector with bone lenghts
111 |     rotInd: 32-long list with indices into angles
112 |     expmapInd: 32-long list with indices into expmap angles
113 |   """
114 | 
115 |   parent = np.array([0, 1, 2, 3, 4, 5, 1, 7, 8, 9,10, 1,12,13,14,15,13,
116 |                     17,18,19,20,21,20,23,13,25,26,27,28,29,28,31])-1
117 | 
118 |   offset = np.array([0.000000,0.000000,0.000000,-132.948591,0.000000,0.000000,0.000000,-442.894612,0.000000,0.000000,-454.206447,0.000000,0.000000,0.000000,162.767078,0.000000,0.000000,74.999437,132.948826,0.000000,0.000000,0.000000,-442.894413,0.000000,0.000000,-454.206590,0.000000,0.000000,0.000000,162.767426,0.000000,0.000000,74.999948,0.000000,0.100000,0.000000,0.000000,233.383263,0.000000,0.000000,257.077681,0.000000,0.000000,121.134938,0.000000,0.000000,115.002227,0.000000,0.000000,257.077681,0.000000,0.000000,151.034226,0.000000,0.000000,278.882773,0.000000,0.000000,251.733451,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,99.999627,0.000000,100.000188,0.000000,0.000000,0.000000,0.000000,0.000000,257.077681,0.000000,0.000000,151.031437,0.000000,0.000000,278.892924,0.000000,0.000000,251.728680,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,99.999888,0.000000,137.499922,0.000000,0.000000,0.000000,0.000000])
119 |   offset = offset.reshape(-1,3)
120 | 
121 |   rotInd = [[5, 6, 4],
122 |             [8, 9, 7],
123 |             [11, 12, 10],
124 |             [14, 15, 13],
125 |             [17, 18, 16],
126 |             [],
127 |             [20, 21, 19],
128 |             [23, 24, 22],
129 |             [26, 27, 25],
130 |             [29, 30, 28],
131 |             [],
132 |             [32, 33, 31],
133 |             [35, 36, 34],
134 |             [38, 39, 37],
135 |             [41, 42, 40],
136 |             [],
137 |             [44, 45, 43],
138 |             [47, 48, 46],
139 |             [50, 51, 49],
140 |             [53, 54, 52],
141 |             [56, 57, 55],
142 |             [],
143 |             [59, 60, 58],
144 |             [],
145 |             [62, 63, 61],
146 |             [65, 66, 64],
147 |             [68, 69, 67],
148 |             [71, 72, 70],
149 |             [74, 75, 73],
150 |             [],
151 |             [77, 78, 76],
152 |             []]
153 | 
154 |   expmapInd = np.split(np.arange(4,100)-1,32)
155 | 
156 |   return parent, offset, rotInd, expmapInd
157 | 
158 | def main():
159 | 
160 |   # Load all the data
161 |   parent, offset, rotInd, expmapInd = _some_variables()
162 |   action = 'eating'
163 |   test_set_sequence = '5' 
164 |   # numpy implementation
165 |   with h5py.File( 'samples.h5', 'r' ) as h5f:
166 |     expmap_pred = h5f['expmap/preds/' + action + '_' + test_set_sequence][:]
167 |     expmap_gt = h5f['expmap/gt/' + action + '_' + test_set_sequence ][:]
168 | 
169 |   nframes_gt, nframes_pred = expmap_gt.shape[0], expmap_pred.shape[0]
170 | 
171 |   # Put them together and revert the coordinate space
172 |   expmap_all = revert_coordinate_space( np.vstack((expmap_gt, expmap_pred)), np.eye(3), np.zeros(3) )
173 |   expmap_gt   = expmap_all[:nframes_gt,:]
174 |   expmap_pred = expmap_all[nframes_gt:,:]
175 | 
176 |   # Compute 3d points for each frame
177 |   xyz_gt, xyz_pred = np.zeros((nframes_gt, 96)), np.zeros((nframes_pred, 96))
178 |   for i in range( nframes_gt ):
179 |     xyz_gt[i,:] = fkl( expmap_gt[i,:], parent, offset, rotInd, expmapInd )
180 |   for i in range( nframes_pred ):
181 |     xyz_pred[i,:] = fkl( expmap_pred[i,:], parent, offset, rotInd, expmapInd )
182 | 
183 |   # setting up stuff to save video
184 |   FFMpegWriter = animation.writers['ffmpeg']
185 |   metadata = dict(title= action + '_' + test_set_sequence, artist='Matplotlib', comment='Movie support!')
186 |   writer = FFMpegWriter(fps=25, codec="libx264", bitrate=-1, metadata=metadata)
187 | 
188 |   # === Plot and animate ===
189 |   fig = plt.figure()
190 |   gt_ax = fig.add_subplot(1, 2, 1, projection='3d')
191 |   pred_ax = fig.add_subplot(1, 2, 2, projection='3d')
192 |   ob_gt = viz.Ax3DPose(gt_ax)
193 |   ob_pred = viz.Ax3DPose(pred_ax)  
194 | 
195 |   # setting viewing angle
196 |   gt_ax.view_init(azim=135)
197 |   pred_ax.view_init(azim=45)
198 | 
199 |   with writer.saving(fig, action + "_" + test_set_sequence +".mp4", 100):
200 | 
201 |     # Plot the conditioning ground truth
202 |     for i in range(nframes_gt):
203 |       ob_gt.update( xyz_gt[i,:] )
204 |       #plt.show(block=False)
205 |       fig.canvas.draw()
206 |       #plt.pause(0.001)
207 |       #writer.grab_frame() 
208 | 
209 |     # Plot the prediction
210 |     #for i in range(nframes_pred):
211 |       ob_pred.update( xyz_pred[i,:], lcolor="#9b59b6", rcolor="#2ecc71" )
212 |       plt.show(block=False)
213 |       fig.canvas.draw()
214 |       plt.pause(0.001)
215 |       writer.grab_frame()
216 | 
217 | if __name__ == '__main__':
218 |   main()
219 | 


--------------------------------------------------------------------------------
/forward_kinematics_v2.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | 
  3 | import numpy as np
  4 | import h5py
  5 | import matplotlib
  6 | import matplotlib.pyplot as plt
  7 | import matplotlib.animation as animation
  8 | from mpl_toolkits.mplot3d import Axes3D
  9 | import viz
 10 | import time
 11 | import copy
 12 | import data_utils
 13 | import cv2
 14 | from PIL import Image
 15 | 
 16 | def fkl( angles, parent, offset, rotInd, expmapInd ):
 17 |   """
 18 |   Convert joint angles and bone lenghts into the 3d points of a person.
 19 |   Based on expmap2xyz.m, available at
 20 |   https://github.com/asheshjain399/RNNexp/blob/7fc5a53292dc0f232867beb66c3a9ef845d705cb/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/exp2xyz.m
 21 | 
 22 |   Args
 23 |     angles: 99-long vector with 3d position and 3d joint angles in expmap format
 24 |     parent: 32-long vector with parent-child relationships in the kinematic tree
 25 |     offset: 96-long vector with bone lenghts
 26 |     rotInd: 32-long list with indices into angles
 27 |     expmapInd: 32-long list with indices into expmap angles
 28 |   Returns
 29 |     xyz: 32x3 3d points that represent a person in 3d space
 30 |   """
 31 | 
 32 |   assert len(angles) == 99
 33 | 
 34 |   # Structure that indicates parents for each joint
 35 |   njoints   = 32
 36 |   xyzStruct = [dict() for x in range(njoints)]
 37 | 
 38 |   for i in np.arange( njoints ):
 39 | 
 40 |     if not rotInd[i] : # If the list is empty
 41 |       xangle, yangle, zangle = 0, 0, 0
 42 |     else:
 43 |       xangle = angles[ rotInd[i][0]-1 ]
 44 |       yangle = angles[ rotInd[i][1]-1 ]
 45 |       zangle = angles[ rotInd[i][2]-1 ]
 46 | 
 47 |     r = angles[ expmapInd[i] ]
 48 | 
 49 |     thisRotation = data_utils.expmap2rotmat(r)
 50 |     thisPosition = np.array([xangle, yangle, zangle])
 51 | 
 52 |     if parent[i] == -1: # Root node
 53 |       xyzStruct[i]['rotation'] = thisRotation
 54 |       xyzStruct[i]['xyz']      = np.reshape(offset[i,:], (1,3)) + thisPosition
 55 |     else:
 56 |       xyzStruct[i]['xyz'] = (offset[i,:] + thisPosition).dot( xyzStruct[ parent[i] ]['rotation'] ) + xyzStruct[ parent[i] ]['xyz']
 57 |       xyzStruct[i]['rotation'] = thisRotation.dot( xyzStruct[ parent[i] ]['rotation'] )
 58 | 
 59 |   xyz = [xyzStruct[i]['xyz'] for i in range(njoints)]
 60 |   xyz = np.array( xyz ).squeeze()
 61 |   xyz = xyz[:,[0,2,1]]
 62 |   # xyz = xyz[:,[2,0,1]]
 63 | 
 64 | 
 65 |   return np.reshape( xyz, [-1] )
 66 | 
 67 | def revert_coordinate_space(channels, R0, T0):
 68 |   """
 69 |   Bring a series of poses to a canonical form so they are facing the camera when they start.
 70 |   Adapted from
 71 |   https://github.com/asheshjain399/RNNexp/blob/7fc5a53292dc0f232867beb66c3a9ef845d705cb/structural_rnn/CRFProblems/H3.6m/dataParser/Utils/revertCoordinateSpace.m
 72 | 
 73 |   Args
 74 |     channels: n-by-99 matrix of poses
 75 |     R0: 3x3 rotation for the first frame
 76 |     T0: 1x3 position for the first frame
 77 |   Returns
 78 |     channels_rec: The passed poses, but the first has T0 and R0, and the
 79 |                   rest of the sequence is modified accordingly.
 80 |   """
 81 |   n, d = channels.shape
 82 | 
 83 |   channels_rec = copy.copy(channels)
 84 |   R_prev = R0
 85 |   T_prev = T0
 86 |   rootRotInd = np.arange(3,6)
 87 | 
 88 |   # Loop through the passed posses
 89 |   for ii in range(n):
 90 |     R_diff = data_utils.expmap2rotmat( channels[ii, rootRotInd] )
 91 |     R = R_diff.dot( R_prev )
 92 | 
 93 |     channels_rec[ii, rootRotInd] = data_utils.rotmat2expmap(R)
 94 |     T = T_prev + ((R_prev.T).dot( np.reshape(channels[ii,:3],[3,1]))).reshape(-1)
 95 |     channels_rec[ii,:3] = T
 96 |     T_prev = T
 97 |     R_prev = R
 98 | 
 99 |   return channels_rec
100 | 
101 | 
102 | def _some_variables():
103 |   """
104 |   We define some variables that are useful to run the kinematic tree
105 | 
106 |   Args
107 |     None
108 |   Returns
109 |     parent: 32-long vector with parent-child relationships in the kinematic tree
110 |     offset: 96-long vector with bone lenghts
111 |     rotInd: 32-long list with indices into angles
112 |     expmapInd: 32-long list with indices into expmap angles
113 |   """
114 | 
115 |   parent = np.array([0, 1, 2, 3, 4, 5, 1, 7, 8, 9,10, 1,12,13,14,15,13,
116 |                     17,18,19,20,21,20,23,13,25,26,27,28,29,28,31])-1
117 | 
118 |   offset = np.array([0.000000,0.000000,0.000000,-132.948591,0.000000,0.000000,0.000000,-442.894612,0.000000,0.000000,-454.206447,0.000000,0.000000,0.000000,162.767078,0.000000,0.000000,74.999437,132.948826,0.000000,0.000000,0.000000,-442.894413,0.000000,0.000000,-454.206590,0.000000,0.000000,0.000000,162.767426,0.000000,0.000000,74.999948,0.000000,0.100000,0.000000,0.000000,233.383263,0.000000,0.000000,257.077681,0.000000,0.000000,121.134938,0.000000,0.000000,115.002227,0.000000,0.000000,257.077681,0.000000,0.000000,151.034226,0.000000,0.000000,278.882773,0.000000,0.000000,251.733451,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,99.999627,0.000000,100.000188,0.000000,0.000000,0.000000,0.000000,0.000000,257.077681,0.000000,0.000000,151.031437,0.000000,0.000000,278.892924,0.000000,0.000000,251.728680,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,99.999888,0.000000,137.499922,0.000000,0.000000,0.000000,0.000000])
119 |   offset = offset.reshape(-1,3)
120 | 
121 |   rotInd = [[5, 6, 4],
122 |             [8, 9, 7],
123 |             [11, 12, 10],
124 |             [14, 15, 13],
125 |             [17, 18, 16],
126 |             [],
127 |             [20, 21, 19],
128 |             [23, 24, 22],
129 |             [26, 27, 25],
130 |             [29, 30, 28],
131 |             [],
132 |             [32, 33, 31],
133 |             [35, 36, 34],
134 |             [38, 39, 37],
135 |             [41, 42, 40],
136 |             [],
137 |             [44, 45, 43],
138 |             [47, 48, 46],
139 |             [50, 51, 49],
140 |             [53, 54, 52],
141 |             [56, 57, 55],
142 |             [],
143 |             [59, 60, 58],
144 |             [],
145 |             [62, 63, 61],
146 |             [65, 66, 64],
147 |             [68, 69, 67],
148 |             [71, 72, 70],
149 |             [74, 75, 73],
150 |             [],
151 |             [77, 78, 76],
152 |             []]
153 | 
154 |   expmapInd = np.split(np.arange(4,100)-1,32)
155 | 
156 |   return parent, offset, rotInd, expmapInd
157 | 
158 | def read_data(gt_sequences, pred_sequences):
159 | 	
160 | 	euler_gt_sequences = np.zeros((100, 99))
161 | 	euler_pred_sequences = np.zeros((100, 99))
162 | 	
163 | 	# converting back to euler angles	
164 | 	for j in np.arange( gt_sequences.shape[1] ):
165 | 	         for k in np.arange(3,97,3):
166 | 		   	euler_gt_sequences[j, k:k+3] = data_utils.rotmat2euler(data_utils.expmap2rotmat( gt_sequences[j, k:k+3] ))
167 | 			euler_pred_sequences[j, k:k+3] = data_utils.rotmat2euler(data_utils.expmap2rotmat( pred_sequences[j, k:k+3] )) 
168 | 
169 | 	euler_gt_sequences[:,0:6] = 0
170 | 	euler_pred_sequences[:,0:6] = 0
171 | 	
172 | 	return euler_gt_sequences, euler_pred_sequences
173 | 
174 | def compute_metrics(euler_gt_sequences, euler_pred_sequences):
175 | 	
176 | 	# computing 1) fourier coeffs 2)power of fft 3) normalizing power of fft dim-wise 4) cumsum over freq. 5) EMD 
177 | 	gt_fourier_coeffs = np.zeros(euler_gt_sequences.shape)
178 | 	pred_fourier_coeffs = np.zeros(euler_pred_sequences.shape)
179 | 	
180 | 	# power vars
181 | 	gt_power = np.zeros((gt_fourier_coeffs.shape))
182 | 	pred_power = np.zeros((gt_fourier_coeffs.shape))
183 | 	
184 | 	# normalizing power vars
185 | 	gt_norm_power = np.zeros(gt_fourier_coeffs.shape)
186 | 	pred_norm_power = np.zeros(gt_fourier_coeffs.shape)
187 | 	
188 | 	cdf_gt_power = np.zeros(gt_norm_power.shape)
189 | 	cdf_pred_power = np.zeros(pred_norm_power.shape)
190 | 	
191 | 	emd = np.zeros(cdf_pred_power.shape[1])
192 | 	
193 | 	# used to store powers of feature_dims and sequences used for avg later
194 | 	seq_feature_power = np.zeros(euler_gt_sequences.shape[1])
195 | 	power_weighted_emd = 0
196 | 		
197 | 	for d in range(euler_gt_sequences.shape[1]):
198 | 		gt_fourier_coeffs[:,d] = np.fft.fft(euler_gt_sequences[:,d]) # slice is 1D array
199 | 		pred_fourier_coeffs[:,d] = np.fft.fft(euler_pred_sequences[:,d])
200 | 
201 | 		# computing power of fft per sequence per dim
202 | 		gt_power[:,d] = np.square(np.absolute(gt_fourier_coeffs[:,d]))
203 | 		pred_power[:,d] = np.square(np.absolute(pred_fourier_coeffs[:,d]))
204 | 			
205 | 		# matching power of gt and pred sequences
206 | 		gt_total_power = np.sum(gt_power[:,d])
207 | 		pred_total_power = np.sum(pred_power[:,d])
208 | 			
209 | 		# computing seq_power and feature_dims power 
210 | 		seq_feature_power[d] = gt_total_power
211 | 			
212 | 		# normalizing power per sequence per dim
213 | 		if gt_total_power != 0:
214 | 			gt_norm_power[:,d] = gt_power[:,d] / gt_total_power 
215 | 			
216 | 		if pred_total_power !=0:
217 | 			pred_norm_power[:,d] = pred_power[:,d] / pred_total_power
218 | 	
219 | 		# computing cumsum over freq
220 | 		cdf_gt_power[:,d] = np.cumsum(gt_norm_power[:,d]) # slice is 1D
221 | 		cdf_pred_power[:,d] = np.cumsum(pred_norm_power[:,d])
222 | 	
223 | 		# computing EMD 
224 | 		emd[d] = np.linalg.norm((cdf_pred_power[:,d] - cdf_gt_power[:,d]), ord=1)
225 | 
226 | 	# computing weighted emd (by sequence and feature powers)	
227 | 	power_weighted_emd = np.average(emd, weights=seq_feature_power) 
228 | 
229 | 	return power_weighted_emd
230 | 
231 | 
232 | def main():
233 | 
234 |   # Load all the data
235 |   parent, offset, rotInd, expmapInd = _some_variables()
236 | 
237 |   # short-term models
238 |   with h5py.File( '../final_exp_samples/short-term/jul_unsup_sa/walking_samples.h5', 'r' ) as h5f5:
239 |     jul_unsup_sa_expmap_pred = h5f5['expmap/preds/walking_6'][:]
240 |     expmap_gt_5 = h5f5['expmap/gt/walking_6'][:]
241 | 
242 |   with h5py.File( '../final_exp_samples/short-term/pgru_skip_1/walking_samples_v2.h5', 'r' ) as h5f6:
243 |     pgru_skip_1_expmap_pred = h5f6['expmap/preds/walking_6'][:]
244 |     expmap_gt_6 = h5f6['expmap/gt/walking_6'][:]
245 | 
246 |   # load mocap gt and PGRU-d model predictions
247 |   with h5py.File( '../final_exp_samples/long-term/pgru-d/walking_samples_v2.h5', 'r' ) as h5f1:
248 |     pgru_d_expmap_pred = h5f1['expmap/preds/walking_6'][:]
249 |     expmap_gt_1 = h5f1['expmap/gt/walking_6'][:]
250 | 
251 |   with h5py.File( '../final_exp_samples/long-term/gru-d/walking_samples.h5', 'r' ) as h5f2:
252 |     gru_d_expmap_pred = h5f2['expmap/preds/walking_6'][:]
253 |     expmap_gt_2 = h5f2['expmap/gt/walking_6'][:]
254 | 
255 |   with h5py.File( '../final_exp_samples/long-term/pgru-a/walking_samples.h5', 'r' ) as h5f3:
256 |     pgru_a_expmap_pred = h5f3['expmap/preds/walking_6'][:]
257 |     expmap_gt_3 = h5f3['expmap/gt/walking_6'][:]
258 | 
259 |   with h5py.File( '../final_exp_samples/long-term/julietta/walking_samples.h5', 'r' ) as h5f4:
260 |     jul_long_expmap_pred = h5f4['expmap/preds/walking_6'][:]
261 |     expmap_gt_4 = h5f4['expmap/gt/walking_6'][:]  
262 |     
263 |   nframes_gt, nframes_pred = expmap_gt_1.shape[0], pgru_d_expmap_pred.shape[0]
264 | 
265 |   # computing NPSS metric for all models
266 |   #euler_gt_5_seq, euler_jul_unsup_sa_seq = read_data(jul_unsup_sa_expmap_pred, expmap_gt_5)
267 |   #euler_gt_6_seq, euler_pgru_skip_1_seq = read_data(pgru_skip_1_expmap_pred, expmap_gt_6)
268 | 
269 |   #euler_gt_1_seq, euler_pgru_d_seq = read_data(pgru_d_expmap_pred, expmap_gt_1)
270 |   #euler_gt_2_seq, euler_gru_d_seq = read_data(gru_d_expmap_pred, expmap_gt_2)
271 |   #euler_gt_3_seq, euler_pgru_a_seq = read_data(pgru_a_expmap_pred, expmap_gt_3)
272 |   #euler_gt_4_seq, euler_jul_long_seq = read_data(jul_long_expmap_pred, expmap_gt_4)
273 | 
274 |   #jul_unsup_sa_emd = compute_metrics(euler_gt_5_seq, euler_jul_unsup_sa_seq)
275 |   #pgru_skip_1_emd = compute_metrics(euler_gt_6_seq, euler_pgru_skip_1_seq)
276 | 
277 |   #pgru_d_emd = compute_metrics(euler_gt_1_seq, euler_pgru_d_seq)
278 |   #gru_d_emd = compute_metrics(euler_gt_2_seq, euler_gru_d_seq)
279 |   #pgru_a_emd = compute_metrics(euler_gt_3_seq, euler_pgru_a_seq)
280 |   #jul_long_emd = compute_metrics(euler_gt_4_seq, euler_jul_long_seq)
281 | 
282 |   # Put them together and revert the coordinate space
283 |   expmap_all = revert_coordinate_space( np.vstack((expmap_gt_1, pgru_d_expmap_pred)), np.eye(3), np.zeros(3) )
284 |   expmap_gt   = expmap_all[:nframes_gt,:]
285 |   pgru_d_expmap_pred = expmap_all[nframes_gt:,:]
286 | 
287 |   # gru-d revert co-ord space
288 |   expmap_all = revert_coordinate_space( np.vstack((expmap_gt_2, gru_d_expmap_pred)), np.eye(3), np.zeros(3) )
289 |   gru_d_expmap_pred = expmap_all[nframes_gt:,:]
290 | 
291 |   # pgru-ac revert co-ord space
292 |   expmap_all = revert_coordinate_space( np.vstack((expmap_gt_3, pgru_a_expmap_pred)), np.eye(3), np.zeros(3) )
293 |   pgru_a_expmap_pred = expmap_all[nframes_gt:,:]
294 | 
295 |   # julietta-long revert co-ord space
296 |   expmap_all = revert_coordinate_space( np.vstack((expmap_gt_4, jul_long_expmap_pred)), np.eye(3), np.zeros(3) )
297 |   jul_long_expmap_pred = expmap_all[nframes_gt:,:]
298 | 
299 |   # jul_unsup_sa revert co-ord space
300 |   expmap_all = revert_coordinate_space( np.vstack((expmap_gt_5, jul_unsup_sa_expmap_pred)), np.eye(3), np.zeros(3) )
301 |   jul_unsup_sa_expmap_pred = expmap_all[nframes_gt:,:]
302 | 
303 |   # pgru_skip_1 revert co-ord space
304 |   expmap_all = revert_coordinate_space( np.vstack((expmap_gt_6, pgru_skip_1_expmap_pred)), np.eye(3), np.zeros(3) )
305 |   pgru_skip_1_expmap_pred = expmap_all[nframes_gt:,:]
306 | 
307 |  
308 |   # Compute 3d points for each frame
309 |   xyz_gt, pgru_d_xyz_pred = np.zeros((nframes_gt, 96)), np.zeros((nframes_pred, 96))
310 |   gru_d_xyz_pred = np.zeros((nframes_gt, 96))
311 |   pgru_a_xyz_pred = np.zeros((nframes_gt, 96))
312 |   jul_long_xyz_pred = np.zeros((nframes_gt, 96))
313 | 
314 |   jul_unsup_sa_xyz_pred = np.zeros((nframes_gt, 96))
315 |   pgru_skip_1_xyz_pred = np.zeros((nframes_gt, 96))
316 | 
317 |   # ground-truth xyz frames
318 |   for i in range( nframes_gt ):
319 |     xyz_gt[i,:] = fkl( expmap_gt[i,:], parent, offset, rotInd, expmapInd )
320 | 
321 |   # pgru-d xyz frames
322 |   for i in range( nframes_pred ):
323 |     pgru_d_xyz_pred[i,:] = fkl( pgru_d_expmap_pred[i,:], parent, offset, rotInd, expmapInd )
324 | 
325 |   # gru-d xyz frames
326 |   for i in range( nframes_pred ):
327 |     gru_d_xyz_pred[i,:] = fkl( gru_d_expmap_pred[i,:], parent, offset, rotInd, expmapInd )
328 | 
329 |   # gru-ac xyz frames
330 |   for i in range( nframes_pred ):
331 |     pgru_a_xyz_pred[i,:] = fkl( pgru_a_expmap_pred[i,:], parent, offset, rotInd, expmapInd )
332 | 
333 |   # jul-long xyz frames
334 |   for i in range( nframes_pred ):
335 |     jul_long_xyz_pred[i,:] = fkl( jul_long_expmap_pred[i,:], parent, offset, rotInd, expmapInd )
336 | 
337 |   # jul-unsup-sa xyz frames
338 |   for i in range( nframes_pred ):
339 |     jul_unsup_sa_xyz_pred[i,:] = fkl( jul_unsup_sa_expmap_pred[i,:], parent, offset, rotInd, expmapInd )
340 | 
341 |   # pgru-skip-1 xyz frames
342 |   for i in range( nframes_pred ):
343 |     pgru_skip_1_xyz_pred[i,:] = fkl( pgru_skip_1_expmap_pred[i,:], parent, offset, rotInd, expmapInd )
344 | 
345 |   # setting up stuff to save video
346 |   FFMpegWriter = animation.writers['ffmpeg']
347 |   metadata = dict(title='Walking Sequence 6', artist='Matplotlib', comment='Movie support!')
348 |   writer = FFMpegWriter(fps=12, codec="libx264", bitrate=-1, metadata=metadata)
349 | 
350 |   # === Plot and animate ===
351 |   fig = plt.figure(figsize=(22.0,11.0))
352 |   fig.suptitle("Walking Sequence 6") 
353 |   fig.subplots_adjust(left=0.05, bottom=0.05, right=0.95, top=0.95, wspace=None, hspace=None)
354 |   gt_ax = fig.add_subplot(3, 3, 2, projection='3d')  
355 |   jul_unsup_sa_pred_ax = fig.add_subplot(3, 3, 4, projection='3d')
356 |   pgru_skip_1_pred_ax = fig.add_subplot(3, 3, 5, projection='3d')
357 |   jul_long_pred_ax = fig.add_subplot(3, 3, 6, projection='3d')
358 |   pgru_a_pred_ax = fig.add_subplot(3, 3, 7, projection='3d')
359 |   gru_d_pred_ax = fig.add_subplot(3, 3, 8, projection='3d')
360 |   pgru_d_pred_ax = fig.add_subplot(3, 3, 9, projection='3d')
361 | 
362 |   # setting viewing angle
363 |   gt_ax.view_init(azim=135)
364 |   jul_unsup_sa_pred_ax.view_init(azim=45)
365 |   pgru_skip_1_pred_ax.view_init(azim=45)
366 |   jul_long_pred_ax.view_init(azim=45)
367 |   pgru_a_pred_ax.view_init(azim=45)
368 |   gru_d_pred_ax.view_init(azim=45)
369 |   pgru_d_pred_ax.view_init(azim=45)
370 | 
371 |   font = {'family': 'serif',
372 |         'color':  'black',
373 |         'weight': 'normal',
374 |         'size': 12,
375 |          }
376 | 
377 |   # titles and legends for subplots
378 |   gt_ax.set_title("Ground-Truth")
379 |   
380 |   #jul_unsup_sa_emd_str = '$\mathrm{NPSS}=%.3f$'%(jul_unsup_sa_emd)
381 |   jul_unsup_sa_pred_ax.set_title("A")
382 |   #jul_unsup_sa_pred_ax.text2D(0.35,0.80, jul_unsup_sa_emd_str, fontdict=font, transform=jul_unsup_sa_pred_ax.transAxes)
383 | 
384 |   #pgru_skip_1_emd_str = '$\mathrm{NPSS}=%.3f$'%(pgru_skip_1_emd)
385 |   pgru_skip_1_pred_ax.set_title("B")
386 |   #pgru_skip_1_pred_ax.text2D(0.35,0.80, pgru_skip_1_emd_str, fontdict=font, transform=pgru_skip_1_pred_ax.transAxes)
387 | 
388 |   #jul_long_emd_str = '$\mathrm{NPSS}=%.3f$'%(jul_long_emd)
389 |   jul_long_pred_ax.set_title("C")
390 |   #jul_long_pred_ax.text2D(0.35,0.80, jul_long_emd_str, fontdict=font, transform=jul_long_pred_ax.transAxes)
391 | 
392 |   #pgru_a_emd_str = '$\mathrm{NPSS}=%.3f$'%(pgru_a_emd)
393 |   pgru_a_pred_ax.set_title("D")
394 |   #pgru_a_pred_ax.text2D(0.35,0.80, pgru_a_emd_str, fontdict=font, transform=pgru_a_pred_ax.transAxes)
395 | 
396 |   #gru_d_emd_str = '$\mathrm{NPSS}=%.3f$'%(gru_d_emd)
397 |   gru_d_pred_ax.set_title("E")
398 |   #gru_d_pred_ax.text2D(0.35, 0.80, gru_d_emd_str, fontdict=font, transform=gru_d_pred_ax.transAxes)
399 | 
400 |   #pgru_d_emd_str = '$\mathrm{NPSS}=%.3f$'%(pgru_d_emd)
401 |   pgru_d_pred_ax.set_title("F")
402 |   #pgru_d_pred_ax.text2D(0.35, 0.80, pgru_d_emd_str, fontdict=font, transform=pgru_d_pred_ax.transAxes)
403 | 
404 |   ob_gt = viz.Ax3DPose(gt_ax)
405 |   jul_unsup_sa_ob_pred = viz.Ax3DPose(jul_unsup_sa_pred_ax)
406 |   pgru_skip_1_ob_pred = viz.Ax3DPose(pgru_skip_1_pred_ax) 
407 |   jul_long_ob_pred = viz.Ax3DPose(jul_long_pred_ax)
408 |   pgru_a_ob_pred = viz.Ax3DPose(pgru_a_pred_ax)
409 |   gru_d_ob_pred = viz.Ax3DPose(gru_d_pred_ax)
410 |   pgru_d_ob_pred = viz.Ax3DPose(pgru_d_pred_ax)
411 |   
412 |   with writer.saving(fig, "walking_seq_6.mp4", 100):
413 |     
414 |     for i in range(nframes_gt):
415 |       # Plot the conditioning ground truth
416 |       ob_gt.update( xyz_gt[i,:] )
417 |       fig.canvas.draw()
418 | 
419 |       # Plot the jul-unsup-sa prediction
420 |       jul_unsup_sa_ob_pred.update( jul_unsup_sa_xyz_pred[i,:], lcolor="#9b59b6", rcolor="#2ecc71" )
421 |       plt.show(block=False)
422 |       fig.canvas.draw()	 
423 |       
424 |       # Plot the pgru-skip-1 prediction
425 |       pgru_skip_1_ob_pred.update( pgru_skip_1_xyz_pred[i,:], lcolor="#9b59b6", rcolor="#2ecc71" )
426 |       plt.show(block=False)
427 |       fig.canvas.draw()
428 | 
429 |       # Plot the jul-long prediction
430 |       jul_long_ob_pred.update( jul_long_xyz_pred[i,:], lcolor="#9b59b6", rcolor="#2ecc71" )
431 |       plt.show(block=False)
432 |       fig.canvas.draw()
433 | 
434 |       # Plot the pgru-ac prediction
435 |       pgru_a_ob_pred.update( pgru_a_xyz_pred[i,:], lcolor="#9b59b6", rcolor="#2ecc71" )
436 |       plt.show(block=False)
437 |       fig.canvas.draw()
438 | 
439 |       # Plot the gru-d prediction
440 |       gru_d_ob_pred.update( gru_d_xyz_pred[i,:], lcolor="#9b59b6", rcolor="#2ecc71" )
441 |       plt.show(block=False)
442 |       fig.canvas.draw()
443 |       
444 |       # Plot the pgru-ac prediction
445 |       pgru_d_ob_pred.update( pgru_d_xyz_pred[i,:], lcolor="#9b59b6", rcolor="#2ecc71" )
446 |       plt.show(block=False)
447 |       fig.canvas.draw()
448 |       writer.grab_frame()
449 |       
450 | if __name__ == '__main__':
451 |   main()
452 | 


--------------------------------------------------------------------------------
/metrics.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Fri Jul 27 01:41:56 2018
  5 | 
  6 | @author: anand
  7 | """
  8 | 
  9 | import h5py
 10 | import numpy as np
 11 | import sklearn.metrics.pairwise as metrics
 12 | import matplotlib.pyplot as plt
 13 | import matplotlib.style as style
 14 | import data_utils
 15 | import forward_kinematics
 16 | 
 17 | def read_data(fname):
 18 | 	
 19 | 	hf = h5py.File(fname,'r')
 20 | 	action = 'discussion'
 21 | 	
 22 | 	gt_sequences = np.zeros((8, 100, 99))
 23 | 	pred_sequences = np.zeros((8, 100, 99))
 24 | 
 25 | 	euler_gt_sequences = np.zeros((8, 100, 99))
 26 | 	euler_pred_sequences = np.zeros((8, 100, 99))
 27 | 	
 28 | 	error_hf = hf.get('mean_'+ action + '_error/')
 29 | 	errors = np.array(error_hf)
 30 | 
 31 | 	for i in range(8):
 32 | 		gt_fname = 'expmap/gt/' + action + '_' + str(i)
 33 | 		n1 = np.array(hf.get(gt_fname))
 34 | 		gt_sequences[i,:,:] = n1
 35 | 		
 36 | 		pred_fname = 'expmap/preds/' + action + '_' + str(i) 
 37 | 		n2 = np.array(hf.get(pred_fname))
 38 | 		pred_sequences[i,:,:] = n2
 39 | 
 40 | 		# converting back to euler angles	
 41 | 		for j in np.arange( gt_sequences.shape[1] ):
 42 | 		         for k in np.arange(3,97,3):
 43 | 			   	euler_gt_sequences[i, j, k:k+3] = data_utils.rotmat2euler(data_utils.expmap2rotmat( gt_sequences[i, j, k:k+3] ))
 44 | 				euler_pred_sequences[i, j, k:k+3] = data_utils.rotmat2euler(data_utils.expmap2rotmat( pred_sequences[i, j, k:k+3] )) 
 45 | 
 46 | 		euler_gt_sequences[i,:,0:6] = 0
 47 | 		euler_pred_sequences[i,:,0:6] = 0
 48 | 	
 49 | 	return euler_gt_sequences, euler_pred_sequences, errors
 50 | 	
 51 | def compute_metrics(euler_gt_sequences, euler_pred_sequences):
 52 | 
 53 | 	# computing 1) fourier coeffs 2)power of fft 3) normalizing power of fft dim-wise 4) cumsum over freq. 5) EMD 
 54 | 	gt_fourier_coeffs = np.zeros(euler_gt_sequences.shape)
 55 | 	pred_fourier_coeffs = np.zeros(euler_pred_sequences.shape)
 56 | 	
 57 | 	# power vars
 58 | 	gt_power = np.zeros((gt_fourier_coeffs.shape))
 59 | 	pred_power = np.zeros((gt_fourier_coeffs.shape))
 60 | 	
 61 | 	# normalizing power vars
 62 | 	gt_norm_power = np.zeros(gt_fourier_coeffs.shape)
 63 | 	pred_norm_power = np.zeros(gt_fourier_coeffs.shape)
 64 | 	
 65 | 	cdf_gt_power = np.zeros(gt_norm_power.shape)
 66 | 	cdf_pred_power = np.zeros(pred_norm_power.shape)
 67 | 	
 68 | 	emd = np.zeros(cdf_pred_power.shape[0:3:2])
 69 | 	
 70 | 	# used to store powers of feature_dims and sequences used for avg later
 71 | 	seq_feature_power = np.zeros(euler_gt_sequences.shape[0:3:2])
 72 | 	power_weighted_emd = 0
 73 | 	
 74 | 	for s in range(euler_gt_sequences.shape[0]):
 75 | 		
 76 | 		for d in range(euler_gt_sequences.shape[2]):
 77 | 			gt_fourier_coeffs[s,:,d] = np.fft.fft(euler_gt_sequences[s,:,d]) # slice is 1D array
 78 | 			pred_fourier_coeffs[s,:,d] = np.fft.fft(euler_pred_sequences[s,:,d])
 79 | 
 80 | 			# computing power of fft per sequence per dim
 81 | 			gt_power[s,:,d] = np.square(np.absolute(gt_fourier_coeffs[s,:,d]))
 82 | 			pred_power[s,:,d] = np.square(np.absolute(pred_fourier_coeffs[s,:,d]))
 83 | 			
 84 | 			# matching power of gt and pred sequences
 85 | 			gt_total_power = np.sum(gt_power[s,:,d])
 86 | 			pred_total_power = np.sum(pred_power[s,:,d])
 87 | 			#power_diff = gt_total_power - pred_total_power
 88 | 			
 89 | 			# adding power diff to zero freq of pred seq
 90 | 			#pred_power[s,0,d] = pred_power[s,0,d] + power_diff
 91 | 			
 92 | 			# computing seq_power and feature_dims power 
 93 | 			seq_feature_power[s,d] = gt_total_power
 94 | 			
 95 | 			# normalizing power per sequence per dim
 96 | 			if gt_total_power != 0:
 97 | 				gt_norm_power[s,:,d] = gt_power[s,:,d] / gt_total_power 
 98 | 			
 99 | 			if pred_total_power !=0:
100 | 				pred_norm_power[s,:,d] = pred_power[s,:,d] / pred_total_power
101 | 	
102 | 			# computing cumsum over freq
103 | 			cdf_gt_power[s,:,d] = np.cumsum(gt_norm_power[s,:,d]) # slice is 1D
104 | 			cdf_pred_power[s,:,d] = np.cumsum(pred_norm_power[s,:,d])
105 | 	
106 | 			# computing EMD 
107 | 			emd[s,d] = np.linalg.norm((cdf_pred_power[s,:,d] - cdf_gt_power[s,:,d]), ord=1)
108 | 
109 | 	# computing weighted emd (by sequence and feature powers)	
110 | 	power_weighted_emd = np.average(emd, weights=seq_feature_power) 
111 | 
112 | 	return power_weighted_emd
113 | 
114 | 
115 | # read data from all models	
116 | #gru_nl_nd_gt_sequence, gru_nl_nd_pred_sequence, gru_nl_nd_errors = read_data('../multi_exp_samples/long-term/simple_gru_no_plan_no_deriv/discussion_samples_v2.h5')	
117 | #pgru_d_nl_gt_sequence, pgru_d_nl_pred_sequence, pgru_d_nl_errors = read_data('../final_exp_samples/long-term/full_pgru_no_loss/discussion_samples_v2.h5')	
118 | #no_plan_gt_sequence, no_plan_pred_sequence, no_plan_errors = read_data('../final_exp_samples/long-term/gru-d/discussion_samples.h5')
119 | #plan_gt_sequence, plan_pred_sequence, plan_errors = read_data('../final_exp_samples/long-term/pgru-d/discussion_samples.h5')
120 | #multi_base_gt, multi_base_pred, multi_base_errors = read_data('../multi_action_samples/samples_1_layer_attn_drop_0.2_5k.h5')
121 | 
122 | # compute metrics for all models
123 | multi_base_npss = compute_metrics(multi_base_gt, multi_base_pred)
124 | #gru_nl_nd_emd = compute_metrics(gru_nl_nd_gt_sequence, gru_nl_nd_pred_sequence)
125 | #pgru_d_nl_emd = compute_metrics(pgru_d_nl_gt_sequence, pgru_d_nl_pred_sequence)
126 | #no_plan_emd  = compute_metrics(no_plan_gt_sequence, no_plan_pred_sequence)
127 | #plan_emd = compute_metrics(plan_gt_sequence, plan_pred_sequence)
128 | #auto_cond_emd = compute_metrics(auto_cond_gt_sequence, auto_cond_pred_sequence)
129 | #jul_emd = compute_metrics(jul_gt_sequence, jul_pred_sequence)
130 | #skip_1_emd = compute_metrics(skip_1_gt_sequence, skip_1_pred_sequence)
131 | #jul_unsup_emd = compute_metrics(jul_unsup_gt_sequence, jul_unsup_pred_sequence)
132 | 
133 | 


--------------------------------------------------------------------------------
/rnn_cell_extensions.py:
--------------------------------------------------------------------------------
  1 | 
  2 | """ Extensions to TF RNN class by una_dinosaria"""
  3 | 
  4 | from __future__ import absolute_import
  5 | from __future__ import division
  6 | from __future__ import print_function
  7 | 
  8 | import tensorflow as tf
  9 | 
 10 | #from tensorflow.contrib.rnn.python.ops.core_rnn_cell import RNNCell
 11 | from rnn_cell_impl import RNNCell # cell definitions with layer_norm
 12 | 
 13 | 
 14 | # The import for LSTMStateTuple changes in TF >= 1.2.0
 15 | from pkg_resources import parse_version as pv
 16 | if pv(tf.__version__) >= pv('1.2.0'):
 17 |   from tensorflow.contrib.rnn import LSTMStateTuple
 18 | else:
 19 |   from tensorflow.contrib.rnn.python.ops.core_rnn_cell import LSTMStateTuple
 20 | del pv
 21 | 
 22 | from tensorflow.python.ops import variable_scope as vs
 23 | 
 24 | import collections
 25 | import math
 26 | 
 27 | class ResidualWrapper(RNNCell):
 28 |   """Operator adding residual connections to a given cell."""
 29 | 
 30 |   def __init__(self, cell):
 31 |     """Create a cell with added residual connection.
 32 | 
 33 |     Args:
 34 |       cell: an RNNCell. The input is added to the output.
 35 | 
 36 |     Raises:
 37 |       TypeError: if cell is not an RNNCell.
 38 |     """
 39 |     if not isinstance(cell, RNNCell):
 40 |       raise TypeError("The parameter cell is not a RNNCell.")
 41 | 
 42 |     self._cell = cell
 43 | 
 44 |   @property
 45 |   def state_size(self):
 46 |     return self._cell.state_size
 47 | 
 48 |   @property
 49 |   def output_size(self):
 50 |     return self._cell.output_size
 51 | 
 52 |   def __call__(self, inputs, state, scope=None):
 53 |     """Run the cell and add a residual connection."""
 54 | 
 55 |     # Run the rnn as usual
 56 |     output, new_state = self._cell(inputs, state, scope)
 57 | 
 58 |     # Add the residual connection
 59 |     output = tf.add(output, inputs)
 60 | 
 61 |     return output, new_state
 62 | 
 63 | 
 64 | class ResidualWrapperv1(RNNCell):
 65 |   """Operator adding residual connections to a given cell."""
 66 | 
 67 |   def __init__(self, cell, output_size):
 68 |     """Create a cell with added residual connection.
 69 | 
 70 |     Args:
 71 |       cell: an RNNCell. The input is added to the output.
 72 | 
 73 |     Raises:
 74 |       TypeError: if cell is not an RNNCell.
 75 |     """
 76 |     if not isinstance(cell, RNNCell):
 77 |       raise TypeError("The parameter cell is not a RNNCell.")
 78 | 
 79 |     self._cell = cell
 80 |     self._output_size = output_size
 81 | 
 82 |     self.r = tf.get_variable("r_interp", [self._output_size], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())
 83 | 
 84 |   @property
 85 |   def state_size(self):
 86 |     return self._cell.state_size
 87 | 
 88 |   @property
 89 |   def output_size(self):
 90 |     return self._cell.output_size
 91 | 
 92 |   def __call__(self, inputs, state, scope=None): # modified
 93 |     """Run the cell and add a residual connection."""
 94 | 
 95 |     # Run the rnn as usual
 96 |     output, new_state = self._cell(inputs, state, scope) # modified
 97 | 
 98 |     # perform residual_v1 interpolation op
 99 |     output = (1.0 - self.r) * output + self.r * inputs
100 |  
101 |     return output, new_state
102 | 
103 | 
104 | class ResidualWrapperv2(RNNCell):
105 |   """Operator adding residual connections to a given cell."""
106 | 
107 |   def __init__(self, cell, output_size):
108 |     """Create a cell with added residual connection.
109 | 
110 |     Args:
111 |       cell: an RNNCell. The input is added to the output.
112 | 
113 |     Raises:
114 |       TypeError: if cell is not an RNNCell.
115 |     """
116 |     if not isinstance(cell, RNNCell):
117 |       raise TypeError("The parameter cell is not a RNNCell.")
118 | 
119 |     self._cell = cell
120 |     self._output_size = output_size
121 | 
122 |     self.r = tf.get_variable("r_interp", [self._output_size], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())
123 |     self.W_res = tf.get_variable("W_res", [self._output_size, self._output_size], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())
124 |     self.b_res = tf.get_variable("b_res", [self._output_size], dtype=tf.float32, initializer=tf.constant_initializer(0.1))
125 | 
126 |   @property
127 |   def state_size(self):
128 |     return self._cell.state_size
129 | 
130 |   @property
131 |   def output_size(self):
132 |     return self._cell.output_size
133 | 
134 |   def __call__(self, inputs, state, scope=None): # modified
135 |     """Run the cell and add a residual connection."""
136 | 
137 |     # Run the rnn as usual
138 |     output, new_state = self._cell(inputs, state, scope) # modified
139 | 
140 |     # perform residual_v2 interpolation op
141 |     output = (1.0 - self.r) * output + self.r * (tf.matmul(inputs, self.W_res) + self.b_res)
142 |  
143 |     return output, new_state
144 | 
145 | 
146 | 
147 | 
148 | class LinearSpaceDecoderWrapper(RNNCell):
149 |   """Operator adding a linear encoder to an RNN cell"""
150 | 
151 |   def __init__(self, cell, output_size):
152 |     """Create a cell with with a linear encoder in space.
153 | 
154 |     Args:
155 |       cell: an RNNCell. The input is passed through a linear layer.
156 | 
157 |     Raises:
158 |       TypeError: if cell is not an RNNCell.
159 |     """
160 |     if not isinstance(cell, RNNCell):
161 |       raise TypeError("The parameter cell is not a RNNCell.")
162 | 
163 |     self._cell = cell
164 | 
165 |     print( 'output_size = {0}'.format(output_size) )
166 |     print( ' state_size = {0}'.format(self._cell.state_size) )
167 | 
168 |     # Tuple if multi-rnn
169 |     if isinstance(self._cell.state_size,tuple):
170 | 
171 |       # Fine if GRU...
172 |       insize = self._cell.state_size[-1]
173 | 
174 |       # LSTMStateTuple if LSTM
175 |       if isinstance( insize, LSTMStateTuple ):
176 |         insize = insize.h
177 | 
178 |     else:
179 |       # Fine if not multi-rnn
180 |       insize = self._cell.state_size
181 | 
182 |     self.w_out = tf.get_variable("proj_w_out",
183 |         [insize, output_size],
184 |         dtype=tf.float32,
185 |         initializer=tf.random_uniform_initializer(minval=-0.04, maxval=0.04))
186 |     self.b_out = tf.get_variable("proj_b_out", [output_size],
187 |         dtype=tf.float32,
188 |         initializer=tf.random_uniform_initializer(minval=-0.04, maxval=0.04))
189 | 
190 |     self.linear_output_size = output_size
191 | 
192 | 
193 |   @property
194 |   def state_size(self):
195 |     return self._cell.state_size
196 | 
197 |   @property
198 |   def output_size(self):
199 |     return self.linear_output_size
200 | 
201 |   def __call__(self, inputs, state, scope=None):
202 |     """Use a linear layer and pass the output to the cell."""
203 | 
204 |     # Run the rnn as usual
205 |     output, new_state = self._cell(inputs, state, scope)
206 | 
207 |     # Apply the multiplication to everything
208 |     output = tf.matmul(output, self.w_out) + self.b_out
209 | 
210 |     return output, new_state
211 | 


--------------------------------------------------------------------------------
/translate_lm.py:
--------------------------------------------------------------------------------
  1 | 
  2 | """Simple code for training an RNN for motion prediction."""
  3 | 
  4 | from __future__ import absolute_import
  5 | from __future__ import division
  6 | from __future__ import print_function
  7 | 
  8 | import math
  9 | import os
 10 | import random
 11 | import sys
 12 | import time
 13 | import h5py
 14 | 
 15 | import numpy as np
 16 | from six.moves import xrange # pylint: disable=redefined-builtin
 17 | import tensorflow as tf
 18 | 
 19 | import data_utils
 20 | import seq2seq_model
 21 | import motion_rnn_lm
 22 | 
 23 | # Learning
 24 | tf.app.flags.DEFINE_float("learning_rate", .0002, "Learning rate.")
 25 | tf.app.flags.DEFINE_float("learning_rate_decay_factor", 0.8, "Learning rate is multiplied by this much. 1 means no decay.")
 26 | tf.app.flags.DEFINE_integer("learning_rate_step", 2000, "Every this many steps, do decay.")
 27 | tf.app.flags.DEFINE_float("max_gradient_norm", 1, "Clip gradients to this norm.")
 28 | tf.app.flags.DEFINE_integer("batch_size", 32, "Batch size to use during training.")
 29 | tf.app.flags.DEFINE_integer("iterations", int(2e4), "Iterations to train for.")
 30 | # Architecture
 31 | tf.app.flags.DEFINE_string("architecture", "tied", "Seq2seq architecture to use: [basic, tied].")
 32 | tf.app.flags.DEFINE_string("loop_type", "closed", "loop type to use: [open, closed].")
 33 | tf.app.flags.DEFINE_integer("body_size", 128, "Size of each body rnn model layer.")
 34 | tf.app.flags.DEFINE_string("body_cell", "gru", "RNN cell type of body rnn : [elman, lstm, gru]")
 35 | tf.app.flags.DEFINE_integer("plan_size", 128, "Size of each plan rnn model layer.")
 36 | tf.app.flags.DEFINE_string("plan_cell", "gru", "RNN cell type of body rnn : [elman, lstm, gru]")
 37 | tf.app.flags.DEFINE_integer("num_layers", 1, "Number of layers in the model.")
 38 | tf.app.flags.DEFINE_integer("seq_length_in", 50, "Number of frames to feed into the encoder. 25 fps")
 39 | tf.app.flags.DEFINE_integer("seq_length_out", 25, "Number of frames that the decoder has to predict. 25fps")
 40 | tf.app.flags.DEFINE_boolean("omit_one_hot", False, "Whether to remove one-hot encoding from the data")
 41 | tf.app.flags.DEFINE_boolean("residual_velocities", True, "Add a residual connection that effectively models velocities")
 42 | # Directories
 43 | tf.app.flags.DEFINE_string("data_dir", os.path.normpath("./data/h3.6m/dataset"), "Data directory")
 44 | tf.app.flags.DEFINE_string("train_dir", os.path.normpath("./experiments/"), "Training directory.")
 45 | 
 46 | tf.app.flags.DEFINE_string("action","all", "The action to train on. all means all the actions, all_periodic means walking, eating and smoking")
 47 | tf.app.flags.DEFINE_string("loss_to_use","sampling_based", "The type of loss to use, supervised or sampling_based")
 48 | 
 49 | tf.app.flags.DEFINE_integer("test_every", 1000, "How often to compute error on the test set.")
 50 | tf.app.flags.DEFINE_integer("save_every", 1000, "How often to compute error on the test set.")
 51 | tf.app.flags.DEFINE_boolean("sample", False, "Set to True for sampling.")
 52 | tf.app.flags.DEFINE_boolean("use_cpu", False, "Whether to use the CPU")
 53 | tf.app.flags.DEFINE_integer("load", 0, "Try to load a previous checkpoint.")
 54 | 
 55 | FLAGS = tf.app.flags.FLAGS
 56 | 
 57 | train_dir = os.path.normpath(os.path.join( FLAGS.train_dir, FLAGS.action,
 58 |   'out_{0}'.format(FLAGS.seq_length_out),
 59 |   'iterations_{0}'.format(FLAGS.iterations),
 60 |   FLAGS.architecture,
 61 |   'loop_type_{0}'.format(FLAGS.loop_type),
 62 |   FLAGS.loss_to_use,
 63 |   'omit_one_hot' if FLAGS.omit_one_hot else 'one_hot',
 64 |   'depth_{0}'.format(FLAGS.num_layers),
 65 |   'plan_cell_{0}'.format(FLAGS.plan_cell),
 66 |   'plan_size_{0}'.format(FLAGS.plan_size),
 67 |   'body_cell_{0}'.format(FLAGS.body_cell),
 68 |   'body_size_{0}'.format(FLAGS.body_size), 
 69 |   'lr_{0}'.format(FLAGS.learning_rate),
 70 |   'residual_vel' if FLAGS.residual_velocities else 'not_residual_vel'))
 71 | 
 72 | summaries_dir = os.path.normpath(os.path.join( train_dir, "log" )) # Directory for TB summaries
 73 | 
 74 | def create_model(session, actions, sampling=False):
 75 |   """Create translation model and initialize or load parameters in session."""
 76 | 
 77 |   model = motion_rnn_lm.MotionRNNModelLM(
 78 |       FLAGS.architecture,
 79 |       FLAGS.loop_type,
 80 |       FLAGS.seq_length_in if not sampling else 50,
 81 |       FLAGS.seq_length_out if not sampling else 100,
 82 |       FLAGS.body_size,
 83 |       FLAGS.body_cell,
 84 |       FLAGS.plan_size,
 85 |       FLAGS.plan_cell,
 86 |       FLAGS.num_layers,
 87 |       FLAGS.max_gradient_norm,
 88 |       FLAGS.batch_size,
 89 |       FLAGS.learning_rate,
 90 |       FLAGS.learning_rate_decay_factor,
 91 |       summaries_dir,
 92 |       FLAGS.loss_to_use if not sampling else "sampling_based",
 93 |       len( actions ),
 94 |       not FLAGS.omit_one_hot,
 95 |       FLAGS.residual_velocities,
 96 |       dtype=tf.float32)
 97 | 
 98 |   if FLAGS.load <= 0:
 99 |     print("Creating model with fresh parameters.")
100 |     session.run(tf.global_variables_initializer())
101 |     return model
102 | 
103 |   ckpt = tf.train.get_checkpoint_state( train_dir, latest_filename="checkpoint")
104 |   print( "train_dir", train_dir )
105 | 
106 |   if ckpt and ckpt.model_checkpoint_path:
107 |     # Check if the specific checkpoint exists
108 |     if FLAGS.load > 0:
109 |       if os.path.isfile(os.path.join(train_dir,"checkpoint-{0}.index".format(FLAGS.load))):
110 |         ckpt_name = os.path.normpath(os.path.join( os.path.join(train_dir,"checkpoint-{0}".format(FLAGS.load)) ))
111 |       else:
112 |         raise ValueError("Asked to load checkpoint {0}, but it does not seem to exist".format(FLAGS.load))
113 |     else:
114 |       ckpt_name = os.path.basename( ckpt.model_checkpoint_path )
115 | 
116 |     print("Loading model {0}".format( ckpt_name ))
117 |     model.saver.restore( session, ckpt.model_checkpoint_path )
118 |     return model
119 |   else:
120 |     print("Could not find checkpoint. Aborting.")
121 |     raise( ValueError, "Checkpoint {0} does not seem to exist".format( ckpt.model_checkpoint_path ) )
122 | 
123 |   return model
124 | 
125 | 
126 | def train():
127 |   """Train a seq2seq model on human motion"""
128 | 
129 |   actions = define_actions( FLAGS.action )
130 | 
131 |   number_of_actions = len( actions )
132 | 
133 |   train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use = read_all_data( actions, FLAGS.seq_length_in, FLAGS.seq_length_out, FLAGS.data_dir, not FLAGS.omit_one_hot )
134 | 
135 |   # Limit TF to take a fraction of the GPU memory
136 |   gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.95)
137 |   device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1}
138 | 
139 |   with tf.Session(config=tf.ConfigProto( gpu_options=gpu_options, device_count = device_count )) as sess:
140 | 
141 |     # === Create the model ===
142 |     print("Creating %d layers of %d units for plan RNN." % (FLAGS.num_layers, FLAGS.plan_size))
143 |     print("Creating %d layers of %d units for body RNN." % (FLAGS.num_layers, FLAGS.body_size))
144 | 
145 |     model = create_model( sess, actions )
146 |     model.train_writer.add_graph( sess.graph )
147 |     print( "Model created" )
148 | 
149 |     # === Read and denormalize the gt with srnn's seeds, as we'll need them
150 |     # many times for evaluation in Euler Angles ===
151 |     srnn_gts_euler = get_srnn_gts( actions, model, test_set, data_mean, data_std, dim_to_ignore, FLAGS.omit_one_hot ) # modified
152 | 
153 |     #=== This is the training loop ===
154 |     step_time, loss, val_loss = 0.0, 0.0, 0.0
155 |     current_step = 0 if FLAGS.load <= 0 else FLAGS.load + 1
156 |     previous_losses = []
157 | 
158 |     step_time, loss = 0, 0
159 | 
160 |     for _ in xrange( FLAGS.iterations ):
161 | 
162 |       start_time = time.time()
163 | 
164 |       # === Training step ===
165 |       encoder_inputs, encoder_outputs, plan_inputs, decoder_inputs, decoder_outputs = model.get_batch( train_set, FLAGS.omit_one_hot ) # modified
166 |       _, step_loss, loss_summary, lr_summary = model.step( sess, encoder_inputs, encoder_outputs, plan_inputs, decoder_inputs, decoder_outputs, False )
167 |       model.train_writer.add_summary( loss_summary, current_step )
168 |       model.train_writer.add_summary( lr_summary, current_step )
169 | 
170 |       if current_step % 10 == 0:
171 |         print("step {0:04d}; step_loss: {1:.4f}".format(current_step, step_loss ))
172 | 
173 |       step_time += (time.time() - start_time) / FLAGS.test_every
174 |       loss += step_loss / FLAGS.test_every
175 |       current_step += 1
176 | 
177 |       # === step decay ===
178 |       #if current_step % FLAGS.learning_rate_step == 0:
179 |       #  sess.run(model.learning_rate_decay_op)
180 | 
181 |       # Once in a while, we save checkpoint, print statistics, and run evals.
182 |       if current_step % FLAGS.test_every == 0:
183 | 
184 |         # === Validation with randomly chosen seeds ===
185 |         forward_only = True
186 | 
187 |         encoder_inputs, encoder_outputs, plan_inputs, decoder_inputs, decoder_outputs = model.get_batch( test_set, FLAGS.omit_one_hot ) # modified
188 |         step_loss, loss_summary = model.step(sess, encoder_inputs, encoder_outputs, plan_inputs, decoder_inputs, decoder_outputs, forward_only)
189 |         val_loss = step_loss # Loss book-keeping
190 | 
191 |         model.test_writer.add_summary(loss_summary, current_step)
192 | 
193 |         print()
194 |         print("{0: <16} |".format("milliseconds"), end="")
195 |         for ms in [80, 160, 320, 400, 560, 1000]:
196 |           print(" {0:5d} |".format(ms), end="")
197 |         print()
198 | 
199 |         # === Validation with srnn's seeds ===
200 |         for action in actions:
201 | 
202 |           # Evaluate the model on the test batches
203 |           encoder_inputs, encoder_outputs, plan_inputs, decoder_inputs, decoder_outputs = model.get_batch_srnn( test_set, action )
204 |           srnn_loss, srnn_poses, _ = model.step(sess, encoder_inputs, encoder_outputs, plan_inputs, decoder_inputs, decoder_outputs, True, True)
205 |     
206 |           # Denormalize the output
207 |           srnn_pred_expmap = data_utils.revert_output_format( srnn_poses, data_mean, data_std, dim_to_ignore, actions,  FLAGS.omit_one_hot ) # modified
208 | 
209 |           # Save the errors here
210 |           mean_errors = np.zeros( (len(srnn_pred_expmap), srnn_pred_expmap[0].shape[0]) )
211 | 
212 |           # Training is done in exponential map, but the error is reported in
213 |           # Euler angles, as in previous work.
214 |           # See https://github.com/asheshjain399/RNNexp/issues/6#issuecomment-247769197
215 |           N_SEQUENCE_TEST = 8
216 |           for i in np.arange(N_SEQUENCE_TEST):
217 |             eulerchannels_pred = srnn_pred_expmap[i]
218 | 
219 |             # Convert from exponential map to Euler angles
220 |             for j in np.arange( eulerchannels_pred.shape[0] ):
221 |               for k in np.arange(3,97,3):
222 |                 eulerchannels_pred[j,k:k+3] = data_utils.rotmat2euler( data_utils.expmap2rotmat( eulerchannels_pred[j,k:k+3] ))
223 | 
224 |             # The global translation (first 3 entries) and global rotation
225 |             # (next 3 entries) are also not considered in the error, so the_key
226 |             # are set to zero.
227 |             # See https://github.com/asheshjain399/RNNexp/issues/6#issuecomment-249404882
228 |             gt_i=np.copy(srnn_gts_euler[action][i])
229 |             gt_i[:,0:6] = 0
230 | 
231 |             # Now compute the l2 error. The following is numpy port of the error
232 |             # function provided by Ashesh Jain (in matlab), available at
233 |             # https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/dataParser/Utils/motionGenerationError.m#L40-L54
234 |             idx_to_use = np.where( np.std( gt_i, 0 ) > 1e-4 )[0]
235 |             
236 |             euc_error = np.power( gt_i[:,idx_to_use] - eulerchannels_pred[:,idx_to_use], 2)
237 |             euc_error = np.sum(euc_error, 1)
238 |             euc_error = np.sqrt( euc_error )
239 |             mean_errors[i,:] = euc_error
240 | 
241 |           # This is simply the mean error over the N_SEQUENCE_TEST examples
242 |           mean_mean_errors = np.mean( mean_errors, 0 )
243 | 
244 |           # Pretty print of the results for 80, 160, 320, 400, 560 and 1000 ms
245 |           print("{0: <16} |".format(action), end="")
246 |           for ms in [1,3,7,9,13,24]:
247 |             if FLAGS.seq_length_out >= ms+1:
248 |               print(" {0:.3f} |".format( mean_mean_errors[ms] ), end="")
249 |             else:
250 |               print("   n/a |", end="")
251 |           print()
252 | 
253 |           # Ugly massive if-then to log the error to tensorboard :shrug:
254 |           if action == "walking":
255 |             summaries = sess.run(
256 |               [model.walking_err80_summary,
257 |                model.walking_err160_summary,
258 |                model.walking_err320_summary,
259 |                model.walking_err400_summary,
260 |                model.walking_err560_summary,
261 |                model.walking_err1000_summary],
262 |               {model.walking_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
263 |                model.walking_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
264 |                model.walking_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
265 |                model.walking_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
266 |                model.walking_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
267 |                model.walking_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
268 |           elif action == "eating":
269 |             summaries = sess.run(
270 |               [model.eating_err80_summary,
271 |                model.eating_err160_summary,
272 |                model.eating_err320_summary,
273 |                model.eating_err400_summary,
274 |                model.eating_err560_summary,
275 |                model.eating_err1000_summary],
276 |               {model.eating_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
277 |                model.eating_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
278 |                model.eating_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
279 |                model.eating_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
280 |                model.eating_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
281 |                model.eating_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
282 |           elif action == "smoking":
283 |             summaries = sess.run(
284 |               [model.smoking_err80_summary,
285 |                model.smoking_err160_summary,
286 |                model.smoking_err320_summary,
287 |                model.smoking_err400_summary,
288 |                model.smoking_err560_summary,
289 |                model.smoking_err1000_summary],
290 |               {model.smoking_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
291 |                model.smoking_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
292 |                model.smoking_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
293 |                model.smoking_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
294 |                model.smoking_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
295 |                model.smoking_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
296 |           elif action == "discussion":
297 |             summaries = sess.run(
298 |               [model.discussion_err80_summary,
299 |                model.discussion_err160_summary,
300 |                model.discussion_err320_summary,
301 |                model.discussion_err400_summary,
302 |                model.discussion_err560_summary,
303 |                model.discussion_err1000_summary],
304 |               {model.discussion_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
305 |                model.discussion_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
306 |                model.discussion_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
307 |                model.discussion_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
308 |                model.discussion_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
309 |                model.discussion_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
310 |           elif action == "directions":
311 |             summaries = sess.run(
312 |               [model.directions_err80_summary,
313 |                model.directions_err160_summary,
314 |                model.directions_err320_summary,
315 |                model.directions_err400_summary,
316 |                model.directions_err560_summary,
317 |                model.directions_err1000_summary],
318 |               {model.directions_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
319 |                model.directions_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
320 |                model.directions_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
321 |                model.directions_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
322 |                model.directions_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
323 |                model.directions_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
324 |           elif action == "greeting":
325 |             summaries = sess.run(
326 |               [model.greeting_err80_summary,
327 |                model.greeting_err160_summary,
328 |                model.greeting_err320_summary,
329 |                model.greeting_err400_summary,
330 |                model.greeting_err560_summary,
331 |                model.greeting_err1000_summary],
332 |               {model.greeting_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
333 |                model.greeting_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
334 |                model.greeting_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
335 |                model.greeting_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
336 |                model.greeting_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
337 |                model.greeting_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
338 |           elif action == "phoning":
339 |             summaries = sess.run(
340 |               [model.phoning_err80_summary,
341 |                model.phoning_err160_summary,
342 |                model.phoning_err320_summary,
343 |                model.phoning_err400_summary,
344 |                model.phoning_err560_summary,
345 |                model.phoning_err1000_summary],
346 |               {model.phoning_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
347 |                model.phoning_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
348 |                model.phoning_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
349 |                model.phoning_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
350 |                model.phoning_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
351 |                model.phoning_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
352 |           elif action == "posing":
353 |             summaries = sess.run(
354 |               [model.posing_err80_summary,
355 |                model.posing_err160_summary,
356 |                model.posing_err320_summary,
357 |                model.posing_err400_summary,
358 |                model.posing_err560_summary,
359 |                model.posing_err1000_summary],
360 |               {model.posing_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
361 |                model.posing_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
362 |                model.posing_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
363 |                model.posing_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
364 |                model.posing_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
365 |                model.posing_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
366 |           elif action == "purchases":
367 |             summaries = sess.run(
368 |               [model.purchases_err80_summary,
369 |                model.purchases_err160_summary,
370 |                model.purchases_err320_summary,
371 |                model.purchases_err400_summary,
372 |                model.purchases_err560_summary,
373 |                model.purchases_err1000_summary],
374 |               {model.purchases_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
375 |                model.purchases_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
376 |                model.purchases_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
377 |                model.purchases_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
378 |                model.purchases_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
379 |                model.purchases_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
380 |           elif action == "sitting":
381 |             summaries = sess.run(
382 |               [model.sitting_err80_summary,
383 |                model.sitting_err160_summary,
384 |                model.sitting_err320_summary,
385 |                model.sitting_err400_summary,
386 |                model.sitting_err560_summary,
387 |                model.sitting_err1000_summary],
388 |               {model.sitting_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
389 |                model.sitting_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
390 |                model.sitting_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
391 |                model.sitting_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
392 |                model.sitting_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
393 |                model.sitting_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
394 |           elif action == "sittingdown":
395 |             summaries = sess.run(
396 |               [model.sittingdown_err80_summary,
397 |                model.sittingdown_err160_summary,
398 |                model.sittingdown_err320_summary,
399 |                model.sittingdown_err400_summary,
400 |                model.sittingdown_err560_summary,
401 |                model.sittingdown_err1000_summary],
402 |               {model.sittingdown_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
403 |                model.sittingdown_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
404 |                model.sittingdown_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
405 |                model.sittingdown_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
406 |                model.sittingdown_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
407 |                model.sittingdown_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
408 |           elif action == "takingphoto":
409 |             summaries = sess.run(
410 |               [model.takingphoto_err80_summary,
411 |                model.takingphoto_err160_summary,
412 |                model.takingphoto_err320_summary,
413 |                model.takingphoto_err400_summary,
414 |                model.takingphoto_err560_summary,
415 |                model.takingphoto_err1000_summary],
416 |               {model.takingphoto_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
417 |                model.takingphoto_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
418 |                model.takingphoto_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
419 |                model.takingphoto_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
420 |                model.takingphoto_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
421 |                model.takingphoto_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
422 |           elif action == "waiting":
423 |             summaries = sess.run(
424 |               [model.waiting_err80_summary,
425 |                model.waiting_err160_summary,
426 |                model.waiting_err320_summary,
427 |                model.waiting_err400_summary,
428 |                model.waiting_err560_summary,
429 |                model.waiting_err1000_summary],
430 |               {model.waiting_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
431 |                model.waiting_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
432 |                model.waiting_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
433 |                model.waiting_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
434 |                model.waiting_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
435 |                model.waiting_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
436 |           elif action == "walkingdog":
437 |             summaries = sess.run(
438 |               [model.walkingdog_err80_summary,
439 |                model.walkingdog_err160_summary,
440 |                model.walkingdog_err320_summary,
441 |                model.walkingdog_err400_summary,
442 |                model.walkingdog_err560_summary,
443 |                model.walkingdog_err1000_summary],
444 |               {model.walkingdog_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
445 |                model.walkingdog_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
446 |                model.walkingdog_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
447 |                model.walkingdog_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
448 |                model.walkingdog_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
449 |                model.walkingdog_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
450 |           elif action == "walkingtogether":
451 |             summaries = sess.run(
452 |               [model.walkingtogether_err80_summary,
453 |                model.walkingtogether_err160_summary,
454 |                model.walkingtogether_err320_summary,
455 |                model.walkingtogether_err400_summary,
456 |                model.walkingtogether_err560_summary,
457 |                model.walkingtogether_err1000_summary],
458 |               {model.walkingtogether_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
459 |                model.walkingtogether_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
460 |                model.walkingtogether_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
461 |                model.walkingtogether_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
462 |                model.walkingtogether_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
463 |                model.walkingtogether_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
464 | 
465 |           for i in np.arange(len( summaries )):
466 |             model.test_writer.add_summary(summaries[i], current_step)
467 | 
468 | 
469 |         print()
470 |         print("============================\n"
471 |               "Global step:         %d\n"
472 |               "Learning rate:       %.6f\n"
473 |               "Step-time (ms):     %.4f\n"
474 |               "Train loss avg:      %.4f\n"
475 |               "--------------------------\n"
476 |               "Val loss:            %.4f\n"
477 |               "srnn loss:           %.4f\n"
478 |               "============================" % (model.global_step.eval(),
479 |               model.learning_rate.eval(), step_time*1000, loss,
480 |               val_loss, srnn_loss))
481 |         print()
482 | 
483 |         previous_losses.append(loss)
484 | 
485 |         # Save the model
486 |         if current_step % FLAGS.save_every == 0:
487 |           print( "Saving the model..." ); start_time = time.time()
488 |           model.saver.save(sess, os.path.normpath(os.path.join(train_dir, 'checkpoint')), global_step=current_step )
489 |           print( "done in {0:.2f} ms".format( (time.time() - start_time)*1000) )
490 | 
491 |         # Reset global time and loss
492 |         step_time, loss = 0, 0
493 | 
494 |         sys.stdout.flush()
495 | 
496 | 
497 | def get_srnn_gts( actions, model, test_set, data_mean, data_std, dim_to_ignore, one_hot, to_euler=True ):
498 |   """
499 |   Get the ground truths for srnn's sequences, and convert to Euler angles.
500 |   (the error is always computed in Euler angles).
501 | 
502 |   Args
503 |     actions: a list of actions to get ground truths for.
504 |     model: training model we are using (we only use the "get_batch" method).
505 |     test_set: dictionary with normalized training data.
506 |     data_mean: d-long vector with the mean of the training data.
507 |     data_std: d-long vector with the standard deviation of the training data.
508 |     dim_to_ignore: dimensions that we are not using to train/predict.
509 |     one_hot: whether the data comes with one-hot encoding indicating action.
510 |     to_euler: whether to convert the angles to Euler format or keep thm in exponential map
511 | 
512 |   Returns
513 |     srnn_gts_euler: a dictionary where the keys are actions, and the values
514 |       are the ground_truth, denormalized expected outputs of srnns's seeds.
515 |   """
516 |   srnn_gts_euler = {}
517 | 
518 |   for action in actions:
519 | 
520 |     srnn_gt_euler = []
521 |     _,_, _, _, srnn_expmap = model.get_batch_srnn( test_set, action ) # modified
522 | 
523 |     # expmap -> rotmat -> euler
524 |     for i in np.arange( srnn_expmap.shape[0] ):
525 |       denormed = data_utils.unNormalizeData(srnn_expmap[i,:,:], data_mean, data_std, dim_to_ignore, actions, one_hot )
526 | 
527 |       if to_euler:
528 |         for j in np.arange( denormed.shape[0] ):
529 |           for k in np.arange(3,97,3):
530 |             denormed[j,k:k+3] = data_utils.rotmat2euler( data_utils.expmap2rotmat( denormed[j,k:k+3] ))
531 | 
532 |       srnn_gt_euler.append( denormed );
533 | 
534 |     # Put back in the dictionary
535 |     srnn_gts_euler[action] = srnn_gt_euler
536 | 
537 |   return srnn_gts_euler
538 | 
539 | 
540 | def sample():
541 |   """Sample predictions for srnn's seeds"""
542 | 
543 |   if FLAGS.load <= 0:
544 |     raise( ValueError, "Must give an iteration to read parameters from")
545 | 
546 |   actions = define_actions( FLAGS.action )
547 | 
548 |   # Use the CPU if asked to
549 |   device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1}
550 |   with tf.Session(config=tf.ConfigProto( device_count = device_count )) as sess:
551 | 
552 |     # === Create the model ===
553 |     print("Creating %d layers of %d units for plan RNN." % (FLAGS.num_layers, FLAGS.plan_size))
554 |     print("Creating %d layers of %d units for body RNN." % (FLAGS.num_layers, FLAGS.body_size))
555 |     sampling     = True
556 |     model = create_model(sess, actions, sampling)
557 |     print("Model created")
558 | 
559 |     # Load all the data
560 |     train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use = read_all_data(actions, FLAGS.seq_length_in, FLAGS.seq_length_out, FLAGS.data_dir, not FLAGS.omit_one_hot )
561 | 
562 |     # === Read and denormalize the gt with srnn's seeds, as we'll need them
563 |     # many times for evaluation in Euler Angles ===
564 |     srnn_gts_expmap = get_srnn_gts( actions, model, test_set, data_mean, data_std, dim_to_ignore, FLAGS.omit_one_hot, to_euler=False ) # modified
565 |     srnn_gts_euler = get_srnn_gts( actions, model, test_set, data_mean, data_std, dim_to_ignore, FLAGS.omit_one_hot ) # modified
566 | 
567 |     # Clean and create a new h5 file of samples
568 |     SAMPLES_FNAME = 'samples.h5'
569 |     try:
570 |       os.remove( SAMPLES_FNAME )
571 |     except OSError:
572 |       pass
573 | 
574 |     # Predict and save for each action
575 |     for action in actions:
576 | 
577 |       # Make prediction with srnn' seeds
578 |       encoder_inputs, encoder_outputs, plan_inputs, decoder_inputs, decoder_outputs = model.get_batch_srnn( test_set, action )
579 |       forward_only = True
580 |       srnn_seeds = True
581 |       srnn_loss, srnn_poses, _ = model.step(sess, encoder_inputs, encoder_outputs, plan_inputs, decoder_inputs, decoder_outputs, forward_only, srnn_seeds)
582 | 
583 |       # denormalizes too
584 |       srnn_pred_expmap = data_utils.revert_output_format( srnn_poses, data_mean, data_std, dim_to_ignore, actions, FLAGS.omit_one_hot ) # modified
585 | 
586 |       # Save the conditioning seeds
587 | 
588 |       # Save the samples
589 |       with h5py.File( SAMPLES_FNAME, 'a' ) as hf:
590 |         for i in np.arange(8):
591 |           # Save conditioning ground truth
592 |           node_name = 'expmap/gt/{1}_{0}'.format(i, action)
593 |           hf.create_dataset( node_name, data=srnn_gts_expmap[action][i] )
594 |           # Save prediction
595 |           node_name = 'expmap/preds/{1}_{0}'.format(i, action)
596 |           hf.create_dataset( node_name, data=srnn_pred_expmap[i] )
597 | 
598 |       # Compute and save the errors here
599 |       mean_errors = np.zeros( (len(srnn_pred_expmap), srnn_pred_expmap[0].shape[0]) )
600 | 
601 |       for i in np.arange(8):
602 | 
603 |         eulerchannels_pred = srnn_pred_expmap[i]
604 | 
605 |         for j in np.arange( eulerchannels_pred.shape[0] ):
606 |           for k in np.arange(3,97,3):
607 |             eulerchannels_pred[j,k:k+3] = data_utils.rotmat2euler( data_utils.expmap2rotmat( eulerchannels_pred[j,k:k+3] ))
608 | 
609 |         eulerchannels_pred[:,0:6] = 0
610 | 
611 |         # Pick only the dimensions with sufficient standard deviation. Others are ignored.
612 |         idx_to_use = np.where( np.std( eulerchannels_pred, 0 ) > 1e-4 )[0]
613 | 
614 |         euc_error = np.power( srnn_gts_euler[action][i][:,idx_to_use] - eulerchannels_pred[:,idx_to_use], 2)
615 |         euc_error = np.sum(euc_error, 1)
616 |         euc_error = np.sqrt( euc_error )
617 |         mean_errors[i,:] = euc_error
618 | 
619 |       mean_mean_errors = np.mean( mean_errors, 0 )
620 |       print( action )
621 |       print( ','.join(map(str, mean_mean_errors.tolist() )) )
622 | 
623 |       with h5py.File( SAMPLES_FNAME, 'a' ) as hf:
624 |         node_name = 'mean_{0}_error'.format( action )
625 |         hf.create_dataset( node_name, data=mean_mean_errors )
626 | 
627 |   return
628 | 
629 | 
630 | def define_actions( action ):
631 |   """
632 |   Define the list of actions we are using.
633 | 
634 |   Args
635 |     action: String with the passed action. Could be "all"
636 |   Returns
637 |     actions: List of strings of actions
638 |   Raises
639 |     ValueError if the action is not included in H3.6M
640 |   """
641 | 
642 |   actions = ["walking", "eating", "smoking", "discussion",  "directions",
643 |               "greeting", "phoning", "posing", "purchases", "sitting",
644 |               "sittingdown", "takingphoto", "waiting", "walkingdog",
645 |               "walkingtogether"]
646 | 
647 |   if action in actions:
648 |     return [action]
649 | 
650 |   if action == "all":
651 |     return actions
652 | 
653 |   if action == "all_srnn":
654 |     return ["walking", "eating", "smoking", "discussion"]
655 | 
656 |   raise( ValueError, "Unrecognized action: %d" % action )
657 | 
658 | 
659 | def read_all_data( actions, seq_length_in, seq_length_out, data_dir, one_hot ):
660 |   """
661 |   Loads data for training/testing and normalizes it.
662 | 
663 |   Args
664 |     actions: list of strings (actions) to load
665 |     seq_length_in: number of frames to use in the burn-in sequence
666 |     seq_length_out: number of frames to use in the output sequence
667 |     data_dir: directory to load the data from
668 |     one_hot: whether to use one-hot encoding per action
669 |   Returns
670 |     train_set: dictionary with normalized training data
671 |     test_set: dictionary with test data
672 |     data_mean: d-long vector with the mean of the training data
673 |     data_std: d-long vector with the standard dev of the training data
674 |     dim_to_ignore: dimensions that are not used becaused stdev is too small
675 |     dim_to_use: dimensions that we are actually using in the model
676 |   """
677 | 
678 |   # === Read training data ===
679 |   print ("Reading training data (seq_len_in: {0}, seq_len_out {1}).".format(
680 |            seq_length_in, seq_length_out))
681 | 
682 |   train_subject_ids = [1,6,7,8,9,11]
683 |   test_subject_ids = [5]
684 | 
685 |   train_set, complete_train = data_utils.load_data( data_dir, train_subject_ids, actions, one_hot )
686 |   test_set,  complete_test  = data_utils.load_data( data_dir, test_subject_ids,  actions, one_hot )
687 | 
688 |   # Compute normalization stats
689 |   data_mean, data_std, dim_to_ignore, dim_to_use = data_utils.normalization_stats(complete_train)
690 | 
691 |   # MODIFIED
692 |   #print(*dim_to_use)
693 | 
694 |   # Normalize -- subtract mean, divide by stdev
695 |   train_set = data_utils.normalize_data( train_set, data_mean, data_std, dim_to_use, actions, one_hot )
696 |   test_set  = data_utils.normalize_data( test_set,  data_mean, data_std, dim_to_use, actions, one_hot )
697 |   print("done reading data.")
698 | 
699 |   return train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use
700 | 
701 | 
702 | def main(_):
703 |   if FLAGS.sample:
704 |     sample()
705 |   else:
706 |     train()
707 | 
708 | if __name__ == "__main__":
709 |   tf.app.run()
710 | 


--------------------------------------------------------------------------------
/translate_simple_lm.py:
--------------------------------------------------------------------------------
  1 | 
  2 | """Simple code for training an RNN for motion prediction."""
  3 | 
  4 | from __future__ import absolute_import
  5 | from __future__ import division
  6 | from __future__ import print_function
  7 | 
  8 | import math
  9 | import os
 10 | import random
 11 | import sys
 12 | import time
 13 | import h5py
 14 | 
 15 | import numpy as np
 16 | from six.moves import xrange # pylint: disable=redefined-builtin
 17 | import tensorflow as tf
 18 | 
 19 | import data_utils
 20 | import seq2seq_model
 21 | import motion_rnn_simple_lm
 22 | 
 23 | print("Setting seed.")
 24 | np.random.seed(42)
 25 | 
 26 | # Learning
 27 | tf.app.flags.DEFINE_float("learning_rate", 0.0001, "Learning rate.")
 28 | tf.app.flags.DEFINE_float("learning_rate_decay_factor", 0.6, "Learning rate is multiplied by this much. 1 means no decay.")
 29 | tf.app.flags.DEFINE_integer("learning_rate_step", 1500, "Every this many steps, do decay.")
 30 | tf.app.flags.DEFINE_float("max_gradient_norm", 1, "Clip gradients to this norm.")
 31 | tf.app.flags.DEFINE_integer("batch_size", 32, "Batch size to use during training.")
 32 | tf.app.flags.DEFINE_integer("iterations", int(1e4), "Iterations to train for.")
 33 | # Architecture
 34 | tf.app.flags.DEFINE_string("architecture", "tied", "Seq2seq architecture to use: [basic, tied].")
 35 | tf.app.flags.DEFINE_string("loop_type", "open", "loop type to use: [open, closed].")
 36 | tf.app.flags.DEFINE_integer("body_size", 512, "Size of each body rnn model layer.")
 37 | tf.app.flags.DEFINE_string("body_cell", "gru", "RNN cell type of body rnn : [elman, lstm, gru]")
 38 | tf.app.flags.DEFINE_integer("num_layers", 2, "Number of layers in the model.")
 39 | tf.app.flags.DEFINE_integer("seq_length_in", 50, "Number of frames to feed into the encoder. 25 fps")
 40 | tf.app.flags.DEFINE_integer("seq_length_out", 100, "Number of frames that the decoder has to predict. 25fps")
 41 | tf.app.flags.DEFINE_boolean("omit_one_hot", False, "Whether to remove one-hot encoding from the data")
 42 | tf.app.flags.DEFINE_boolean("residual_velocities", False, "Add a residual connection that effectively models velocities")
 43 | # Directories
 44 | tf.app.flags.DEFINE_string("data_dir", os.path.normpath("./data/h3.6m/dataset"), "Data directory")
 45 | tf.app.flags.DEFINE_string("train_dir", os.path.normpath("./final_exp_samples/simple_gru_no_plan_no_deriv"), "Training directory.")
 46 | 
 47 | tf.app.flags.DEFINE_string("action","all", "The action to train on. all means all the actions, all_periodic means walking, eating and smoking")
 48 | tf.app.flags.DEFINE_string("loss_to_use","sampling_based", "The type of loss to use, supervised or sampling_based")
 49 | 
 50 | tf.app.flags.DEFINE_integer("test_every", 1000, "How often to compute error on the test set.")
 51 | tf.app.flags.DEFINE_integer("save_every", 5000, "How often to compute error on the test set.")
 52 | tf.app.flags.DEFINE_boolean("sample", False, "Set to True for sampling.")
 53 | tf.app.flags.DEFINE_boolean("use_cpu", False, "Whether to use the CPU")
 54 | tf.app.flags.DEFINE_integer("load", 0, "Try to load a previous checkpoint.")
 55 | 
 56 | FLAGS = tf.app.flags.FLAGS
 57 | 
 58 | train_dir = os.path.normpath(os.path.join( FLAGS.train_dir, FLAGS.action,
 59 |   'out_{0}'.format(FLAGS.seq_length_out),
 60 |   'iterations_{0}'.format(FLAGS.iterations),
 61 |   FLAGS.architecture,
 62 |   'loop_type_{0}'.format(FLAGS.loop_type),
 63 |   FLAGS.loss_to_use,
 64 |   'omit_one_hot' if FLAGS.omit_one_hot else 'one_hot',
 65 |   'depth_{0}'.format(FLAGS.num_layers),
 66 |   'body_cell_{0}'.format(FLAGS.body_cell),
 67 |   'body_size_{0}'.format(FLAGS.body_size), 
 68 |   'lr_{0}'.format(FLAGS.learning_rate),
 69 |   'residual_vel' if FLAGS.residual_velocities else 'not_residual_vel'))
 70 | 
 71 | summaries_dir = os.path.normpath(os.path.join( train_dir, "log" )) # Directory for TB summaries
 72 | 
 73 | def create_model(session, actions, sampling=False):
 74 |   """Create translation model and initialize or load parameters in session."""
 75 | 
 76 |   model = motion_rnn_simple_lm.MotionRNNModelSimpleLM(
 77 |       FLAGS.architecture,
 78 |       FLAGS.loop_type,
 79 |       FLAGS.seq_length_in if not sampling else 50,
 80 |       FLAGS.seq_length_out if not sampling else 100,
 81 |       FLAGS.body_size,
 82 |       FLAGS.body_cell,
 83 |       FLAGS.num_layers,
 84 |       FLAGS.max_gradient_norm,
 85 |       FLAGS.batch_size,
 86 |       FLAGS.learning_rate,
 87 |       FLAGS.learning_rate_decay_factor,
 88 |       summaries_dir,
 89 |       FLAGS.loss_to_use if not sampling else "sampling_based",
 90 |       len( actions ),
 91 |       not FLAGS.omit_one_hot,
 92 |       FLAGS.residual_velocities,
 93 |       dtype=tf.float32)
 94 | 
 95 |   if FLAGS.load <= 0:
 96 |     print("Creating model with fresh parameters.")
 97 |     session.run(tf.global_variables_initializer())
 98 |     return model
 99 | 
100 |   ckpt = tf.train.get_checkpoint_state( train_dir, latest_filename="checkpoint")
101 |   print( "train_dir", train_dir )
102 | 
103 |   if ckpt and ckpt.model_checkpoint_path:
104 |     # Check if the specific checkpoint exists
105 |     if FLAGS.load > 0:
106 |       if os.path.isfile(os.path.join(train_dir,"checkpoint-{0}.index".format(FLAGS.load))):
107 |         ckpt_name = os.path.normpath(os.path.join( os.path.join(train_dir,"checkpoint-{0}".format(FLAGS.load)) ))
108 |       else:
109 |         raise ValueError("Asked to load checkpoint {0}, but it does not seem to exist".format(FLAGS.load))
110 |     else:
111 |       ckpt_name = os.path.basename( ckpt.model_checkpoint_path )
112 | 
113 |     print("Loading model {0}".format( ckpt_name ))
114 |     model.saver.restore( session, ckpt.model_checkpoint_path )
115 |     return model
116 |   else:
117 |     print("Could not find checkpoint. Aborting.")
118 |     raise( ValueError, "Checkpoint {0} does not seem to exist".format( ckpt.model_checkpoint_path ) )
119 | 
120 |   return model
121 | 
122 | 
123 | def train():
124 |   """Train a seq2seq model on human motion"""
125 | 
126 |   actions = define_actions( FLAGS.action )
127 | 
128 |   number_of_actions = len( actions )
129 | 
130 |   train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use = read_all_data( actions, FLAGS.seq_length_in, FLAGS.seq_length_out, FLAGS.data_dir, not FLAGS.omit_one_hot )
131 | 
132 |   # Limit TF to take a fraction of the GPU memory
133 |   gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.95)
134 |   device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1}
135 | 
136 |   # setting graph-level seed
137 |   tf.set_random_seed(42)
138 | 
139 |   with tf.Session(config=tf.ConfigProto( gpu_options=gpu_options, device_count = device_count )) as sess:
140 | 
141 |     # === Create the model ===
142 |     print("Creating %d layers of %d units for body RNN." % (FLAGS.num_layers, FLAGS.body_size))
143 | 
144 |     model = create_model( sess, actions )
145 |     model.train_writer.add_graph( sess.graph )
146 |     print( "Model created" )
147 | 
148 |     # === Read and denormalize the gt with srnn's seeds, as we'll need them
149 |     # many times for evaluation in Euler Angles ===
150 |     srnn_gts_euler = get_srnn_gts( actions, model, test_set, data_mean, data_std, dim_to_ignore, FLAGS.omit_one_hot ) # modified
151 | 
152 |     #=== This is the training loop ===
153 |     step_time, loss, val_loss = 0.0, 0.0, 0.0
154 |     current_step = 0 if FLAGS.load <= 0 else FLAGS.load + 1
155 |     previous_losses = []
156 | 
157 |     step_time, loss = 0, 0
158 |     sampling_weight = 0.0
159 | 
160 |     sampling_schedule = [500, 1000, 2000, 4000, 7000]
161 |     sampling_weights = [0.2, 0.4, 0.6, 0.8, 1.0]
162 |     sampling_weight = 0.0
163 |     samp_cnt = -1
164 | 
165 |     for _ in xrange( FLAGS.iterations ):
166 | 
167 |       start_time = time.time()
168 | 
169 |       # === Training step ===
170 |       encoder_inputs, encoder_outputs, decoder_inputs, decoder_outputs = model.get_batch( train_set, FLAGS.omit_one_hot ) # modified
171 | 
172 |       is_training = True
173 |       dropout_prob = 0.3
174 |       use_sample = False
175 | 
176 |       if samp_cnt < len(sampling_schedule)-1:
177 |       	if current_step == sampling_schedule[samp_cnt+1]:
178 |          sampling_weight = sampling_weights[samp_cnt+1] 
179 |          samp_cnt = samp_cnt + 1
180 | 
181 |       _, step_loss, step_sampling_loss, loss_summary, lr_summary = model.step( sess, encoder_inputs, encoder_outputs, decoder_inputs, decoder_outputs, is_training, use_sample, dropout_prob, sampling_weight, False )      
182 | 
183 |       model.train_writer.add_summary( loss_summary, current_step )
184 |       model.train_writer.add_summary( lr_summary, current_step )
185 | 
186 |       if current_step % 10 == 0:
187 |         print("step {0:04d}; step_loss: {1:.4f}; sampling_loss: {2:.4f}".format(current_step, step_loss, sampling_weight*step_sampling_loss ))
188 | 
189 |       step_time += (time.time() - start_time) / FLAGS.test_every
190 |       loss += step_loss / FLAGS.test_every
191 |       current_step += 1
192 | 
193 |       # === step decay ===
194 |       if current_step % FLAGS.learning_rate_step == 0: #and current_step <= 60000:
195 |         sess.run(model.learning_rate_decay_op)
196 | 
197 |       # Once in a while, we save checkpoint, print statistics, and run evals.
198 |       if current_step % FLAGS.test_every == 0:
199 | 
200 |         # === Validation with randomly chosen seeds ===
201 |         forward_only = True
202 |         is_training = False
203 |         use_sample = True
204 |         dropout_prob = 0.0
205 | 
206 |         encoder_inputs, encoder_outputs, decoder_inputs, decoder_outputs = model.get_batch( test_set, FLAGS.omit_one_hot ) # modified
207 |         step_loss, loss_summary = model.step(sess, encoder_inputs, encoder_outputs, decoder_inputs, decoder_outputs, is_training, use_sample, dropout_prob, sampling_weight, forward_only)
208 |         val_loss = step_loss # Loss book-keeping
209 | 
210 |         model.test_writer.add_summary(loss_summary, current_step)
211 | 
212 |         print()
213 |         print("{0: <16} |".format("milliseconds"), end="")
214 |         for ms in [80, 160, 320, 400, 560, 1000]:
215 |           print(" {0:5d} |".format(ms), end="")
216 |         print()
217 | 
218 |         # === Validation with srnn's seeds ===
219 |         for action in actions:
220 | 
221 |           # Evaluate the model on the test batches
222 |           encoder_inputs, encoder_outputs, decoder_inputs, decoder_outputs = model.get_batch_srnn( test_set, action )
223 | 
224 |           is_training = False 
225 |           use_sample = True
226 |           dropout_prob = 0.0
227 | 
228 |           srnn_loss, srnn_poses, _ = model.step(sess, encoder_inputs, encoder_outputs, decoder_inputs, decoder_outputs, is_training, use_sample, dropout_prob, sampling_weight, True, True)
229 |     
230 |           # Denormalize the output
231 |           srnn_pred_expmap = data_utils.revert_output_format( srnn_poses, data_mean, data_std, dim_to_ignore, actions, FLAGS.omit_one_hot ) # modified
232 | 
233 |           # Save the errors here
234 |           mean_errors = np.zeros( (len(srnn_pred_expmap), srnn_pred_expmap[0].shape[0]) )
235 | 
236 |           # Training is done in exponential map, but the error is reported in
237 |           # Euler angles, as in previous work.
238 |           # See https://github.com/asheshjain399/RNNexp/issues/6#issuecomment-247769197
239 |           N_SEQUENCE_TEST = 8
240 |           for i in np.arange(N_SEQUENCE_TEST):
241 |             eulerchannels_pred = srnn_pred_expmap[i]
242 | 
243 |             # Convert from exponential map to Euler angles
244 |             for j in np.arange( eulerchannels_pred.shape[0] ):
245 |               for k in np.arange(3,97,3):
246 |                 eulerchannels_pred[j,k:k+3] = data_utils.rotmat2euler( data_utils.expmap2rotmat( eulerchannels_pred[j,k:k+3] ))
247 | 
248 |             # The global translation (first 3 entries) and global rotation
249 |             # (next 3 entries) are also not considered in the error, so the_key
250 |             # are set to zero.
251 |             # See https://github.com/asheshjain399/RNNexp/issues/6#issuecomment-249404882
252 |             gt_i=np.copy(srnn_gts_euler[action][i])
253 |             gt_i[:,0:6] = 0
254 | 
255 |             # Now compute the l2 error. The following is numpy port of the error
256 |             # function provided by Ashesh Jain (in matlab), available at
257 |             # https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/dataParser/Utils/motionGenerationError.m#L40-L54
258 |             idx_to_use = np.where( np.std( gt_i, 0 ) > 1e-4 )[0]
259 |             
260 |             euc_error = np.power( gt_i[:,idx_to_use] - eulerchannels_pred[:,idx_to_use], 2)
261 |             euc_error = np.sum(euc_error, 1)
262 |             euc_error = np.sqrt( euc_error )
263 |             mean_errors[i,:] = euc_error
264 | 
265 |           # This is simply the mean error over the N_SEQUENCE_TEST examples
266 |           mean_mean_errors = np.mean( mean_errors, 0 )
267 | 
268 |           # Pretty print of the results for 80, 160, 320, 400, 560 and 1000 ms
269 |           print("{0: <16} |".format(action), end="")
270 |           for ms in [1,3,7,9,13,24]:
271 |             if FLAGS.seq_length_out >= ms+1:
272 |               print(" {0:.3f} |".format( mean_mean_errors[ms] ), end="")
273 |             else:
274 |               print("   n/a |", end="")
275 |           print()
276 | 
277 |           # Ugly massive if-then to log the error to tensorboard :shrug:
278 |           if action == "walking":
279 |             summaries = sess.run(
280 |               [model.walking_err80_summary,
281 |                model.walking_err160_summary,
282 |                model.walking_err320_summary,
283 |                model.walking_err400_summary,
284 |                model.walking_err560_summary,
285 |                model.walking_err1000_summary],
286 |               {model.walking_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
287 |                model.walking_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
288 |                model.walking_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
289 |                model.walking_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
290 |                model.walking_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
291 |                model.walking_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
292 |           elif action == "eating":
293 |             summaries = sess.run(
294 |               [model.eating_err80_summary,
295 |                model.eating_err160_summary,
296 |                model.eating_err320_summary,
297 |                model.eating_err400_summary,
298 |                model.eating_err560_summary,
299 |                model.eating_err1000_summary],
300 |               {model.eating_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
301 |                model.eating_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
302 |                model.eating_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
303 |                model.eating_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
304 |                model.eating_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
305 |                model.eating_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
306 |           elif action == "smoking":
307 |             summaries = sess.run(
308 |               [model.smoking_err80_summary,
309 |                model.smoking_err160_summary,
310 |                model.smoking_err320_summary,
311 |                model.smoking_err400_summary,
312 |                model.smoking_err560_summary,
313 |                model.smoking_err1000_summary],
314 |               {model.smoking_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
315 |                model.smoking_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
316 |                model.smoking_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
317 |                model.smoking_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
318 |                model.smoking_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
319 |                model.smoking_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
320 |           elif action == "discussion":
321 |             summaries = sess.run(
322 |               [model.discussion_err80_summary,
323 |                model.discussion_err160_summary,
324 |                model.discussion_err320_summary,
325 |                model.discussion_err400_summary,
326 |                model.discussion_err560_summary,
327 |                model.discussion_err1000_summary],
328 |               {model.discussion_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
329 |                model.discussion_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
330 |                model.discussion_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
331 |                model.discussion_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
332 |                model.discussion_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
333 |                model.discussion_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
334 |           elif action == "directions":
335 |             summaries = sess.run(
336 |               [model.directions_err80_summary,
337 |                model.directions_err160_summary,
338 |                model.directions_err320_summary,
339 |                model.directions_err400_summary,
340 |                model.directions_err560_summary,
341 |                model.directions_err1000_summary],
342 |               {model.directions_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
343 |                model.directions_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
344 |                model.directions_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
345 |                model.directions_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
346 |                model.directions_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
347 |                model.directions_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
348 |           elif action == "greeting":
349 |             summaries = sess.run(
350 |               [model.greeting_err80_summary,
351 |                model.greeting_err160_summary,
352 |                model.greeting_err320_summary,
353 |                model.greeting_err400_summary,
354 |                model.greeting_err560_summary,
355 |                model.greeting_err1000_summary],
356 |               {model.greeting_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
357 |                model.greeting_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
358 |                model.greeting_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
359 |                model.greeting_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
360 |                model.greeting_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
361 |                model.greeting_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
362 |           elif action == "phoning":
363 |             summaries = sess.run(
364 |               [model.phoning_err80_summary,
365 |                model.phoning_err160_summary,
366 |                model.phoning_err320_summary,
367 |                model.phoning_err400_summary,
368 |                model.phoning_err560_summary,
369 |                model.phoning_err1000_summary],
370 |               {model.phoning_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
371 |                model.phoning_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
372 |                model.phoning_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
373 |                model.phoning_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
374 |                model.phoning_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
375 |                model.phoning_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
376 |           elif action == "posing":
377 |             summaries = sess.run(
378 |               [model.posing_err80_summary,
379 |                model.posing_err160_summary,
380 |                model.posing_err320_summary,
381 |                model.posing_err400_summary,
382 |                model.posing_err560_summary,
383 |                model.posing_err1000_summary],
384 |               {model.posing_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
385 |                model.posing_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
386 |                model.posing_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
387 |                model.posing_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
388 |                model.posing_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
389 |                model.posing_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
390 |           elif action == "purchases":
391 |             summaries = sess.run(
392 |               [model.purchases_err80_summary,
393 |                model.purchases_err160_summary,
394 |                model.purchases_err320_summary,
395 |                model.purchases_err400_summary,
396 |                model.purchases_err560_summary,
397 |                model.purchases_err1000_summary],
398 |               {model.purchases_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
399 |                model.purchases_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
400 |                model.purchases_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
401 |                model.purchases_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
402 |                model.purchases_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
403 |                model.purchases_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
404 |           elif action == "sitting":
405 |             summaries = sess.run(
406 |               [model.sitting_err80_summary,
407 |                model.sitting_err160_summary,
408 |                model.sitting_err320_summary,
409 |                model.sitting_err400_summary,
410 |                model.sitting_err560_summary,
411 |                model.sitting_err1000_summary],
412 |               {model.sitting_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
413 |                model.sitting_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
414 |                model.sitting_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
415 |                model.sitting_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
416 |                model.sitting_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
417 |                model.sitting_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
418 |           elif action == "sittingdown":
419 |             summaries = sess.run(
420 |               [model.sittingdown_err80_summary,
421 |                model.sittingdown_err160_summary,
422 |                model.sittingdown_err320_summary,
423 |                model.sittingdown_err400_summary,
424 |                model.sittingdown_err560_summary,
425 |                model.sittingdown_err1000_summary],
426 |               {model.sittingdown_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
427 |                model.sittingdown_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
428 |                model.sittingdown_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
429 |                model.sittingdown_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
430 |                model.sittingdown_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
431 |                model.sittingdown_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
432 |           elif action == "takingphoto":
433 |             summaries = sess.run(
434 |               [model.takingphoto_err80_summary,
435 |                model.takingphoto_err160_summary,
436 |                model.takingphoto_err320_summary,
437 |                model.takingphoto_err400_summary,
438 |                model.takingphoto_err560_summary,
439 |                model.takingphoto_err1000_summary],
440 |               {model.takingphoto_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
441 |                model.takingphoto_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
442 |                model.takingphoto_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
443 |                model.takingphoto_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
444 |                model.takingphoto_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
445 |                model.takingphoto_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
446 |           elif action == "waiting":
447 |             summaries = sess.run(
448 |               [model.waiting_err80_summary,
449 |                model.waiting_err160_summary,
450 |                model.waiting_err320_summary,
451 |                model.waiting_err400_summary,
452 |                model.waiting_err560_summary,
453 |                model.waiting_err1000_summary],
454 |               {model.waiting_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
455 |                model.waiting_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
456 |                model.waiting_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
457 |                model.waiting_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
458 |                model.waiting_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
459 |                model.waiting_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
460 |           elif action == "walkingdog":
461 |             summaries = sess.run(
462 |               [model.walkingdog_err80_summary,
463 |                model.walkingdog_err160_summary,
464 |                model.walkingdog_err320_summary,
465 |                model.walkingdog_err400_summary,
466 |                model.walkingdog_err560_summary,
467 |                model.walkingdog_err1000_summary],
468 |               {model.walkingdog_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
469 |                model.walkingdog_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
470 |                model.walkingdog_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
471 |                model.walkingdog_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
472 |                model.walkingdog_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
473 |                model.walkingdog_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
474 |           elif action == "walkingtogether":
475 |             summaries = sess.run(
476 |               [model.walkingtogether_err80_summary,
477 |                model.walkingtogether_err160_summary,
478 |                model.walkingtogether_err320_summary,
479 |                model.walkingtogether_err400_summary,
480 |                model.walkingtogether_err560_summary,
481 |                model.walkingtogether_err1000_summary],
482 |               {model.walkingtogether_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
483 |                model.walkingtogether_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
484 |                model.walkingtogether_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
485 |                model.walkingtogether_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
486 |                model.walkingtogether_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
487 |                model.walkingtogether_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
488 | 
489 |           for i in np.arange(len( summaries )):
490 |             model.test_writer.add_summary(summaries[i], current_step)
491 | 
492 | 
493 |         print()
494 |         print("============================\n"
495 |               "Global step:         %d\n"
496 |               "Learning rate:       %.6f\n"
497 |               "Step-time (ms):     %.4f\n"
498 |               "Train loss avg:      %.4f\n"
499 |               "--------------------------\n"
500 |               "Val loss:            %.4f\n"
501 |               "srnn loss:           %.4f\n"
502 |               "============================" % (model.global_step.eval(),
503 |               model.learning_rate.eval(), step_time*1000, loss,
504 |               val_loss, srnn_loss))
505 |         print()
506 | 
507 |         previous_losses.append(loss)
508 | 
509 |         # Save the model
510 |         if current_step % FLAGS.save_every == 0:
511 |           print( "Saving the model..." ); start_time = time.time()
512 |           model.saver.save(sess, os.path.normpath(os.path.join(train_dir, 'checkpoint')), global_step=current_step )
513 |           print( "done in {0:.2f} ms".format( (time.time() - start_time)*1000) )
514 | 
515 |         # Reset global time and loss
516 |         step_time, loss = 0, 0
517 | 
518 |         sys.stdout.flush()
519 | 
520 | 
521 | def get_srnn_gts( actions, model, test_set, data_mean, data_std, dim_to_ignore, one_hot, to_euler=True ):
522 |   """
523 |   Get the ground truths for srnn's sequences, and convert to Euler angles.
524 |   (the error is always computed in Euler angles).
525 | 
526 |   Args
527 |     actions: a list of actions to get ground truths for.
528 |     model: training model we are using (we only use the "get_batch" method).
529 |     test_set: dictionary with normalized training data.
530 |     data_mean: d-long vector with the mean of the training data.
531 |     data_std: d-long vector with the standard deviation of the training data.
532 |     dim_to_ignore: dimensions that we are not using to train/predict.
533 |     one_hot: whether the data comes with one-hot encoding indicating action.
534 |     to_euler: whether to convert the angles to Euler format or keep thm in exponential map
535 | 
536 |   Returns
537 |     srnn_gts_euler: a dictionary where the keys are actions, and the values
538 |       are the ground_truth, denormalized expected outputs of srnns's seeds.
539 |   """
540 |   srnn_gts_euler = {}
541 | 
542 |   for action in actions:
543 | 
544 |     srnn_gt_euler = []
545 |     _, _, _, srnn_expmap = model.get_batch_srnn( test_set, action ) # modified
546 | 
547 |     # expmap -> rotmat -> euler
548 |     for i in np.arange( srnn_expmap.shape[0] ):
549 |       denormed = data_utils.unNormalizeData(srnn_expmap[i,:,:], data_mean, data_std, dim_to_ignore, actions, one_hot ) # modified
550 | 
551 |       if to_euler:
552 |         for j in np.arange( denormed.shape[0] ):
553 |           for k in np.arange(3,97,3):
554 |             denormed[j,k:k+3] = data_utils.rotmat2euler( data_utils.expmap2rotmat( denormed[j,k:k+3] ))
555 | 
556 |       srnn_gt_euler.append( denormed );
557 | 
558 |     # Put back in the dictionary
559 |     srnn_gts_euler[action] = srnn_gt_euler
560 | 
561 |   return srnn_gts_euler
562 | 
563 | 
564 | def sample():
565 |   """Sample predictions for srnn's seeds"""
566 | 
567 |   if FLAGS.load <= 0:
568 |     raise( ValueError, "Must give an iteration to read parameters from")
569 | 
570 |   actions = define_actions( FLAGS.action )
571 | 
572 |   # Use the CPU if asked to
573 |   device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1}
574 |   with tf.Session(config=tf.ConfigProto( device_count = device_count )) as sess:
575 | 
576 |     # === Create the model ===
577 |     print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.body_size))
578 |     sampling     = True
579 |     model = create_model(sess, actions, sampling)
580 |     print("Model created")
581 | 
582 |     # Load all the data
583 |     train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use = read_all_data( actions, FLAGS.seq_length_in, FLAGS.seq_length_out, FLAGS.data_dir, not FLAGS.omit_one_hot )
584 | 
585 |     # === Read and denormalize the gt with srnn's seeds, as we'll need them
586 |     # many times for evaluation in Euler Angles ===
587 |     srnn_gts_expmap = get_srnn_gts( actions, model, test_set, data_mean, data_std, dim_to_ignore, FLAGS.omit_one_hot, to_euler=False )
588 |     srnn_gts_euler = get_srnn_gts( actions, model, test_set, data_mean, data_std, dim_to_ignore, FLAGS.omit_one_hot )
589 | 
590 |     # Clean and create a new h5 file of samples
591 |     SAMPLES_FNAME = 'samples.h5'
592 |     try:
593 |       os.remove( SAMPLES_FNAME )
594 |     except OSError:
595 |       pass
596 | 
597 |     # Predict and save for each action
598 |     for action in actions:
599 | 
600 |       # Make prediction with srnn' seeds
601 |       encoder_inputs, encoder_outputs, decoder_inputs, decoder_outputs = model.get_batch_srnn( test_set, action )
602 |       forward_only = True
603 |       srnn_seeds = True
604 |       
605 |       is_training = False
606 |       use_sample = True
607 |       dropout_prob = 0.0
608 |       sampling_weight = 0.0
609 | 
610 |       srnn_loss, srnn_poses, _ = model.step(sess, encoder_inputs, encoder_outputs, decoder_inputs, decoder_outputs, is_training, use_sample, dropout_prob, sampling_weight, forward_only, srnn_seeds)
611 | 
612 |       # denormalizes too
613 |       srnn_pred_expmap = data_utils.revert_output_format( srnn_poses, data_mean, data_std, dim_to_ignore, actions, FLAGS.omit_one_hot ) # modified
614 | 
615 |       # Save the conditioning seeds
616 | 
617 |       # Save the samples
618 |       with h5py.File( SAMPLES_FNAME, 'a' ) as hf:
619 |         for i in np.arange(8):
620 |           # Save conditioning ground truth
621 |           node_name = 'expmap/gt/{1}_{0}'.format(i, action)
622 |           hf.create_dataset( node_name, data=srnn_gts_expmap[action][i] )
623 |           # Save prediction
624 |           node_name = 'expmap/preds/{1}_{0}'.format(i, action)
625 |           hf.create_dataset( node_name, data=srnn_pred_expmap[i] )
626 | 
627 |       # Compute and save the errors here
628 |       mean_errors = np.zeros( (len(srnn_pred_expmap), srnn_pred_expmap[0].shape[0]) )
629 | 
630 |       for i in np.arange(8):
631 | 
632 |         eulerchannels_pred = srnn_pred_expmap[i]
633 | 
634 |         for j in np.arange( eulerchannels_pred.shape[0] ):
635 |           for k in np.arange(3,97,3):
636 |             eulerchannels_pred[j,k:k+3] = data_utils.rotmat2euler(data_utils.expmap2rotmat( eulerchannels_pred[j,k:k+3] ))
637 | 
638 |         eulerchannels_pred[:,0:6] = 0
639 | 
640 |         # Pick only the dimensions with sufficient standard deviation. Others are ignored.
641 |         idx_to_use = np.where( np.std( eulerchannels_pred, 0 ) > 1e-4 )[0]
642 | 
643 |         euc_error = np.power( srnn_gts_euler[action][i][:,idx_to_use] - eulerchannels_pred[:,idx_to_use], 2)
644 |         euc_error = np.sum(euc_error, 1)
645 |         euc_error = np.sqrt( euc_error )
646 |         mean_errors[i,:] = euc_error
647 | 
648 |       mean_mean_errors = np.mean( mean_errors, 0 )
649 |       print( action )
650 |       print( ','.join(map(str, mean_mean_errors.tolist() )) )
651 | 
652 |       with h5py.File( SAMPLES_FNAME, 'a' ) as hf:
653 |         node_name = 'mean_{0}_error'.format( action )
654 |         hf.create_dataset( node_name, data=mean_mean_errors )
655 | 
656 |   return
657 | 
658 | 
659 | def define_actions( action ):
660 |   """
661 |   Define the list of actions we are using.
662 | 
663 |   Args
664 |     action: String with the passed action. Could be "all"
665 |   Returns
666 |     actions: List of strings of actions
667 |   Raises
668 |     ValueError if the action is not included in H3.6M
669 |   """
670 | 
671 |   actions = ["walking", "eating", "smoking", "discussion",  "directions",
672 |               "greeting", "phoning", "posing", "purchases", "sitting",
673 |               "sittingdown", "takingphoto", "waiting", "walkingdog",
674 |               "walkingtogether"]
675 | 
676 |   if action in actions:
677 |     return [action]
678 | 
679 |   if action == "all":
680 |     return actions
681 | 
682 |   if action == "all_srnn":
683 |     return ["walking", "eating", "smoking", "discussion"]
684 | 
685 |   raise( ValueError, "Unrecognized action: %d" % action )
686 | 
687 | 
688 | def read_all_data( actions, seq_length_in, seq_length_out, data_dir, one_hot ):
689 |   """
690 |   Loads data for training/testing and normalizes it.
691 | 
692 |   Args
693 |     actions: list of strings (actions) to load
694 |     seq_length_in: number of frames to use in the burn-in sequence
695 |     seq_length_out: number of frames to use in the output sequence
696 |     data_dir: directory to load the data from
697 |     one_hot: whether to use one-hot encoding per action
698 |   Returns
699 |     train_set: dictionary with normalized training data
700 |     test_set: dictionary with test data
701 |     data_mean: d-long vector with the mean of the training data
702 |     data_std: d-long vector with the standard dev of the training data
703 |     dim_to_ignore: dimensions that are not used becaused stdev is too small
704 |     dim_to_use: dimensions that we are actually using in the model
705 |   """
706 | 
707 |   # === Read training data ===
708 |   print ("Reading training data (seq_len_in: {0}, seq_len_out {1}).".format(
709 |            seq_length_in, seq_length_out))
710 | 
711 |   train_subject_ids = [1,6,7,8,9,11]
712 |   test_subject_ids = [5]
713 | 
714 |   train_set, complete_train = data_utils.load_data( data_dir, train_subject_ids, actions, one_hot )
715 |   test_set,  complete_test  = data_utils.load_data( data_dir, test_subject_ids,  actions, one_hot )
716 | 
717 |   # Compute normalization stats
718 |   data_mean, data_std, dim_to_ignore, dim_to_use = data_utils.normalization_stats(complete_train)
719 | 
720 |   # Normalize -- subtract mean, divide by stdev
721 |   train_set = data_utils.normalize_data( train_set, data_mean, data_std, dim_to_use, actions, one_hot )
722 |   test_set  = data_utils.normalize_data( test_set,  data_mean, data_std, dim_to_use, actions, one_hot )
723 |   print("done reading data.")
724 | 
725 |   return train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use
726 | 
727 | 
728 | def main(_):
729 |   if FLAGS.sample:
730 |     sample()
731 |   else:
732 |     train()
733 | 
734 | if __name__ == "__main__":
735 |   tf.app.run()
736 | 


--------------------------------------------------------------------------------
/translate_simple_lm_flow.py:
--------------------------------------------------------------------------------
  1 | 
  2 | """Simple code for training an RNN for motion prediction."""
  3 | 
  4 | from __future__ import absolute_import
  5 | from __future__ import division
  6 | from __future__ import print_function
  7 | 
  8 | import math
  9 | import os
 10 | import random
 11 | import sys
 12 | import time
 13 | import h5py
 14 | 
 15 | import numpy as np
 16 | from six.moves import xrange # pylint: disable=redefined-builtin
 17 | import tensorflow as tf
 18 | 
 19 | import data_utils
 20 | import seq2seq_model
 21 | import motion_rnn_simple_lm_flow
 22 | 
 23 | print("Setting seed.")
 24 | np.random.seed(42)
 25 | 
 26 | # Learning
 27 | tf.app.flags.DEFINE_float("learning_rate", .0001, "Learning rate.")
 28 | tf.app.flags.DEFINE_float("learning_rate_decay_factor", 0.6, "Learning rate is multiplied by this much. 1 means no decay.")
 29 | tf.app.flags.DEFINE_integer("learning_rate_step", 2000, "Every this many steps, do decay.")
 30 | tf.app.flags.DEFINE_float("max_gradient_norm", 1, "Clip gradients to this norm.")
 31 | tf.app.flags.DEFINE_integer("batch_size", 32, "Batch size to use during training.")
 32 | tf.app.flags.DEFINE_integer("iterations", int(1e4), "Iterations to train for.")
 33 | # Architecture
 34 | tf.app.flags.DEFINE_string("architecture", "tied", "Seq2seq architecture to use: [basic, tied].")
 35 | tf.app.flags.DEFINE_string("loop_type", "open", "loop type to use: [open, closed].")
 36 | tf.app.flags.DEFINE_integer("body_size", 512, "Size of each body rnn model layer.")
 37 | tf.app.flags.DEFINE_string("body_cell", "gru", "RNN cell type of body rnn : [elman, lstm, gru]")
 38 | tf.app.flags.DEFINE_integer("num_layers", 2, "Number of layers in the model.")
 39 | tf.app.flags.DEFINE_integer("seq_length_in", 50, "Number of frames to feed into the encoder. 25 fps")
 40 | tf.app.flags.DEFINE_integer("seq_length_out", 25, "Number of frames that the decoder has to predict. 25fps")
 41 | tf.app.flags.DEFINE_boolean("omit_one_hot", False, "Whether to remove one-hot encoding from the data")
 42 | tf.app.flags.DEFINE_boolean("residual_velocities", False, "Add a residual connection that effectively models velocities")
 43 | # Directories
 44 | tf.app.flags.DEFINE_string("data_dir", os.path.normpath("./data/h3.6m/dataset"), "Data directory")
 45 | tf.app.flags.DEFINE_string("train_dir", os.path.normpath("./simple_lm_flow_experiments/"), "Training directory.")
 46 | 
 47 | tf.app.flags.DEFINE_string("action","all", "The action to train on. all means all the actions, all_periodic means walking, eating and smoking")
 48 | tf.app.flags.DEFINE_string("loss_to_use","sampling_based", "The type of loss to use, supervised or sampling_based")
 49 | 
 50 | tf.app.flags.DEFINE_integer("test_every", 1000, "How often to compute error on the test set.")
 51 | tf.app.flags.DEFINE_integer("save_every", 5000, "How often to compute error on the test set.")
 52 | tf.app.flags.DEFINE_boolean("sample", False, "Set to True for sampling.")
 53 | tf.app.flags.DEFINE_boolean("use_cpu", False, "Whether to use the CPU")
 54 | tf.app.flags.DEFINE_integer("load", 0, "Try to load a previous checkpoint.")
 55 | 
 56 | FLAGS = tf.app.flags.FLAGS
 57 | 
 58 | train_dir = os.path.normpath(os.path.join( FLAGS.train_dir, FLAGS.action,
 59 |   'out_{0}'.format(FLAGS.seq_length_out),
 60 |   'iterations_{0}'.format(FLAGS.iterations),
 61 |   FLAGS.architecture,
 62 |   'loop_type_{0}'.format(FLAGS.loop_type),
 63 |   FLAGS.loss_to_use,
 64 |   'omit_one_hot' if FLAGS.omit_one_hot else 'one_hot',
 65 |   'depth_{0}'.format(FLAGS.num_layers),
 66 |   'body_cell_{0}'.format(FLAGS.body_cell),
 67 |   'body_size_{0}'.format(FLAGS.body_size), 
 68 |   'lr_{0}'.format(FLAGS.learning_rate),
 69 |   'residual_vel' if FLAGS.residual_velocities else 'not_residual_vel'))
 70 | 
 71 | summaries_dir = os.path.normpath(os.path.join( train_dir, "log" )) # Directory for TB summaries
 72 | 
 73 | def create_model(session, actions, sampling=False):
 74 |   """Create translation model and initialize or load parameters in session."""
 75 | 
 76 |   model = motion_rnn_simple_lm_flow.MotionRNNModelSimpleLM(
 77 |       FLAGS.architecture,
 78 |       FLAGS.loop_type,
 79 |       FLAGS.seq_length_in if not sampling else 50,
 80 |       FLAGS.seq_length_out if not sampling else 100,
 81 |       FLAGS.body_size,
 82 |       FLAGS.body_cell,
 83 |       FLAGS.num_layers,
 84 |       FLAGS.max_gradient_norm,
 85 |       FLAGS.batch_size,
 86 |       FLAGS.learning_rate,
 87 |       FLAGS.learning_rate_decay_factor,
 88 |       summaries_dir,
 89 |       FLAGS.loss_to_use if not sampling else "sampling_based",
 90 |       len( actions ),
 91 |       not FLAGS.omit_one_hot,
 92 |       FLAGS.residual_velocities,
 93 |       dtype=tf.float32)
 94 | 
 95 |   if FLAGS.load <= 0:
 96 |     print("Creating model with fresh parameters.")
 97 |     session.run(tf.global_variables_initializer())
 98 |     return model
 99 | 
100 |   ckpt = tf.train.get_checkpoint_state( train_dir, latest_filename="checkpoint")
101 |   print( "train_dir", train_dir )
102 | 
103 |   if ckpt and ckpt.model_checkpoint_path:
104 |     # Check if the specific checkpoint exists
105 |     if FLAGS.load > 0:
106 |       if os.path.isfile(os.path.join(train_dir,"checkpoint-{0}.index".format(FLAGS.load))):
107 |         ckpt_name = os.path.normpath(os.path.join( os.path.join(train_dir,"checkpoint-{0}".format(FLAGS.load)) ))
108 |       else:
109 |         raise ValueError("Asked to load checkpoint {0}, but it does not seem to exist".format(FLAGS.load))
110 |     else:
111 |       ckpt_name = os.path.basename( ckpt.model_checkpoint_path )
112 | 
113 |     print("Loading model {0}".format( ckpt_name ))
114 |     model.saver.restore( session, ckpt.model_checkpoint_path )
115 |     return model
116 |   else:
117 |     print("Could not find checkpoint. Aborting.")
118 |     raise( ValueError, "Checkpoint {0} does not seem to exist".format( ckpt.model_checkpoint_path ) )
119 | 
120 |   return model
121 | 
122 | 
123 | def train():
124 |   """Train a seq2seq model on human motion"""
125 | 
126 |   actions = define_actions( FLAGS.action )
127 | 
128 |   number_of_actions = len( actions )
129 | 
130 |   train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use = read_all_data( actions, FLAGS.seq_length_in, FLAGS.seq_length_out, FLAGS.data_dir, not FLAGS.omit_one_hot )
131 | 
132 |   # Limit TF to take a fraction of the GPU memory
133 |   gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.95)
134 |   device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1}
135 | 
136 |   # setting graph-level seed
137 |   tf.set_random_seed(42)
138 | 
139 |   with tf.Session(config=tf.ConfigProto( gpu_options=gpu_options, device_count = device_count )) as sess:
140 | 
141 |     # === Create the model ===
142 |     print("Creating %d layers of %d units for body RNN." % (FLAGS.num_layers, FLAGS.body_size))
143 | 
144 |     model = create_model( sess, actions )
145 |     model.train_writer.add_graph( sess.graph )
146 |     print( "Model created" )
147 | 
148 |     # === Read and denormalize the gt with srnn's seeds, as we'll need them
149 |     # many times for evaluation in Euler Angles ===
150 |     srnn_gts_euler = get_srnn_gts( actions, model, test_set, data_mean, data_std, dim_to_ignore, FLAGS.omit_one_hot ) # modified
151 | 
152 |     #=== This is the training loop ===
153 |     step_time, loss, val_loss = 0.0, 0.0, 0.0
154 |     current_step = 0 if FLAGS.load <= 0 else FLAGS.load + 1
155 |     previous_losses = []
156 | 
157 |     step_time, loss = 0, 0
158 |     n_trials = 30
159 |     sampling_schedule = [500, 1000, 2000, 4000, 7000]
160 |     sampling_weights = [0.2, 0.4, 0.6, 0.8, 1.0]
161 |     sampling_weight = 0.0
162 |     samp_cnt = -1
163 | 
164 |     for _ in xrange( FLAGS.iterations ):
165 | 
166 |       start_time = time.time()
167 | 
168 |       # === Training step ===
169 |       encoder_inputs, encoder_outputs, decoder_inputs, decoder_outputs = model.get_batch( train_set, FLAGS.omit_one_hot ) # modified
170 | 
171 |       is_training = True
172 |       dropout_prob = 0.3
173 |       use_sample = False
174 | 
175 |       if samp_cnt < len(sampling_schedule)-1:
176 |       	if current_step == sampling_schedule[samp_cnt+1]:
177 |          sampling_weight = sampling_weights[samp_cnt+1] 
178 |          samp_cnt = samp_cnt + 1
179 | 
180 |       _, step_loss, step_sample_loss, loss_summary, lr_summary = model.step( sess, encoder_inputs, encoder_outputs, decoder_inputs, decoder_outputs, is_training, use_sample, dropout_prob, sampling_weight, False)
181 |  
182 |       
183 |       model.train_writer.add_summary( loss_summary, current_step )
184 |       model.train_writer.add_summary( lr_summary, current_step )
185 | 
186 |       if current_step % 10 == 0:
187 |         print("step {0:04d}; step_loss: {1:.4f}; step_sample_loss: {2:.4f}".format(current_step, step_loss, sampling_weight*step_sample_loss ))
188 | 
189 |       step_time += (time.time() - start_time) / FLAGS.test_every
190 |       loss += step_loss / FLAGS.test_every
191 |       current_step += 1
192 | 
193 |       # === step decay ===
194 |       if current_step % FLAGS.learning_rate_step == 0:
195 |         sess.run(model.learning_rate_decay_op)
196 | 
197 |       # Once in a while, we save checkpoint, print statistics, and run evals.
198 |       if current_step % FLAGS.test_every == 0:
199 | 
200 |         # === Validation with randomly chosen seeds ===
201 |         forward_only = True
202 |         is_training = False
203 |         use_sample = True
204 |         dropout_prob = 0.0
205 | 
206 |         encoder_inputs, encoder_outputs, decoder_inputs, decoder_outputs = model.get_batch( test_set, FLAGS.omit_one_hot ) # modified
207 |         step_loss, loss_summary = model.step(sess, encoder_inputs, encoder_outputs, decoder_inputs, decoder_outputs, is_training, use_sample, dropout_prob, sampling_weight, forward_only)
208 |         val_loss = step_loss # Loss book-keeping
209 | 
210 |         model.test_writer.add_summary(loss_summary, current_step)
211 | 
212 |         print()
213 |         print("{0: <16} |".format("milliseconds"), end="")
214 |         for ms in [80, 160, 320, 400, 560, 1000]:
215 |           print(" {0:5d} |".format(ms), end="")
216 |         print()
217 | 
218 |         # === Validation with srnn's seeds ===
219 |         for action in actions:
220 | 
221 |           # Evaluate the model on the test batches
222 |           encoder_inputs, encoder_outputs, decoder_inputs, decoder_outputs = model.get_batch_srnn( test_set, action )
223 | 
224 |           is_training = False 
225 |           use_sample = True
226 |           dropout_prob = 0.0
227 | 
228 |           srnn_loss, srnn_poses, _ = model.step(sess, encoder_inputs, encoder_outputs, decoder_inputs, decoder_outputs, is_training, use_sample, dropout_prob, sampling_weight, True, True)
229 |     
230 |           # Denormalize the output
231 |           srnn_pred_expmap = data_utils.revert_output_format( srnn_poses, data_mean, data_std, dim_to_ignore, actions, FLAGS.omit_one_hot ) # modified
232 | 
233 |           # Save the errors here
234 |           mean_errors = np.zeros( (len(srnn_pred_expmap), srnn_pred_expmap[0].shape[0]) )
235 | 
236 |           # Training is done in exponential map, but the error is reported in
237 |           # Euler angles, as in previous work.
238 |           # See https://github.com/asheshjain399/RNNexp/issues/6#issuecomment-247769197
239 |           N_SEQUENCE_TEST = 8
240 |           for i in np.arange(N_SEQUENCE_TEST):
241 |             eulerchannels_pred = srnn_pred_expmap[i]
242 | 
243 |             # Convert from exponential map to Euler angles
244 |             for j in np.arange( eulerchannels_pred.shape[0] ):
245 |               for k in np.arange(3,97,3):
246 |                 eulerchannels_pred[j,k:k+3] = data_utils.rotmat2euler( data_utils.expmap2rotmat( eulerchannels_pred[j,k:k+3] ))
247 | 
248 |             # The global translation (first 3 entries) and global rotation
249 |             # (next 3 entries) are also not considered in the error, so the_key
250 |             # are set to zero.
251 |             # See https://github.com/asheshjain399/RNNexp/issues/6#issuecomment-249404882
252 |             gt_i=np.copy(srnn_gts_euler[action][i])
253 |             gt_i[:,0:6] = 0
254 | 
255 |             # Now compute the l2 error. The following is numpy port of the error
256 |             # function provided by Ashesh Jain (in matlab), available at
257 |             # https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/dataParser/Utils/motionGenerationError.m#L40-L54
258 |             idx_to_use = np.where( np.std( gt_i, 0 ) > 1e-4 )[0]
259 |             
260 |             euc_error = np.power( gt_i[:,idx_to_use] - eulerchannels_pred[:,idx_to_use], 2)
261 |             euc_error = np.sum(euc_error, 1)
262 |             euc_error = np.sqrt( euc_error )
263 |             mean_errors[i,:] = euc_error
264 | 
265 |           # This is simply the mean error over the N_SEQUENCE_TEST examples
266 |           mean_mean_errors = np.mean( mean_errors, 0 )
267 | 
268 |           # Pretty print of the results for 80, 160, 320, 400, 560 and 1000 ms
269 |           print("{0: <16} |".format(action), end="")
270 |           for ms in [1,3,7,9,13,24]:
271 |             if FLAGS.seq_length_out >= ms+1:
272 |               print(" {0:.3f} |".format( mean_mean_errors[ms] ), end="")
273 |             else:
274 |               print("   n/a |", end="")
275 |           print()
276 | 
277 |           # Ugly massive if-then to log the error to tensorboard :shrug:
278 |           if action == "walking":
279 |             summaries = sess.run(
280 |               [model.walking_err80_summary,
281 |                model.walking_err160_summary,
282 |                model.walking_err320_summary,
283 |                model.walking_err400_summary,
284 |                model.walking_err560_summary,
285 |                model.walking_err1000_summary],
286 |               {model.walking_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
287 |                model.walking_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
288 |                model.walking_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
289 |                model.walking_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
290 |                model.walking_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
291 |                model.walking_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
292 |           elif action == "eating":
293 |             summaries = sess.run(
294 |               [model.eating_err80_summary,
295 |                model.eating_err160_summary,
296 |                model.eating_err320_summary,
297 |                model.eating_err400_summary,
298 |                model.eating_err560_summary,
299 |                model.eating_err1000_summary],
300 |               {model.eating_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
301 |                model.eating_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
302 |                model.eating_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
303 |                model.eating_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
304 |                model.eating_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
305 |                model.eating_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
306 |           elif action == "smoking":
307 |             summaries = sess.run(
308 |               [model.smoking_err80_summary,
309 |                model.smoking_err160_summary,
310 |                model.smoking_err320_summary,
311 |                model.smoking_err400_summary,
312 |                model.smoking_err560_summary,
313 |                model.smoking_err1000_summary],
314 |               {model.smoking_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
315 |                model.smoking_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
316 |                model.smoking_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
317 |                model.smoking_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
318 |                model.smoking_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
319 |                model.smoking_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
320 |           elif action == "discussion":
321 |             summaries = sess.run(
322 |               [model.discussion_err80_summary,
323 |                model.discussion_err160_summary,
324 |                model.discussion_err320_summary,
325 |                model.discussion_err400_summary,
326 |                model.discussion_err560_summary,
327 |                model.discussion_err1000_summary],
328 |               {model.discussion_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
329 |                model.discussion_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
330 |                model.discussion_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
331 |                model.discussion_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
332 |                model.discussion_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
333 |                model.discussion_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
334 |           elif action == "directions":
335 |             summaries = sess.run(
336 |               [model.directions_err80_summary,
337 |                model.directions_err160_summary,
338 |                model.directions_err320_summary,
339 |                model.directions_err400_summary,
340 |                model.directions_err560_summary,
341 |                model.directions_err1000_summary],
342 |               {model.directions_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
343 |                model.directions_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
344 |                model.directions_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
345 |                model.directions_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
346 |                model.directions_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
347 |                model.directions_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
348 |           elif action == "greeting":
349 |             summaries = sess.run(
350 |               [model.greeting_err80_summary,
351 |                model.greeting_err160_summary,
352 |                model.greeting_err320_summary,
353 |                model.greeting_err400_summary,
354 |                model.greeting_err560_summary,
355 |                model.greeting_err1000_summary],
356 |               {model.greeting_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
357 |                model.greeting_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
358 |                model.greeting_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
359 |                model.greeting_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
360 |                model.greeting_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
361 |                model.greeting_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
362 |           elif action == "phoning":
363 |             summaries = sess.run(
364 |               [model.phoning_err80_summary,
365 |                model.phoning_err160_summary,
366 |                model.phoning_err320_summary,
367 |                model.phoning_err400_summary,
368 |                model.phoning_err560_summary,
369 |                model.phoning_err1000_summary],
370 |               {model.phoning_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
371 |                model.phoning_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
372 |                model.phoning_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
373 |                model.phoning_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
374 |                model.phoning_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
375 |                model.phoning_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
376 |           elif action == "posing":
377 |             summaries = sess.run(
378 |               [model.posing_err80_summary,
379 |                model.posing_err160_summary,
380 |                model.posing_err320_summary,
381 |                model.posing_err400_summary,
382 |                model.posing_err560_summary,
383 |                model.posing_err1000_summary],
384 |               {model.posing_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
385 |                model.posing_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
386 |                model.posing_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
387 |                model.posing_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
388 |                model.posing_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
389 |                model.posing_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
390 |           elif action == "purchases":
391 |             summaries = sess.run(
392 |               [model.purchases_err80_summary,
393 |                model.purchases_err160_summary,
394 |                model.purchases_err320_summary,
395 |                model.purchases_err400_summary,
396 |                model.purchases_err560_summary,
397 |                model.purchases_err1000_summary],
398 |               {model.purchases_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
399 |                model.purchases_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
400 |                model.purchases_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
401 |                model.purchases_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
402 |                model.purchases_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
403 |                model.purchases_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
404 |           elif action == "sitting":
405 |             summaries = sess.run(
406 |               [model.sitting_err80_summary,
407 |                model.sitting_err160_summary,
408 |                model.sitting_err320_summary,
409 |                model.sitting_err400_summary,
410 |                model.sitting_err560_summary,
411 |                model.sitting_err1000_summary],
412 |               {model.sitting_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
413 |                model.sitting_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
414 |                model.sitting_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
415 |                model.sitting_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
416 |                model.sitting_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
417 |                model.sitting_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
418 |           elif action == "sittingdown":
419 |             summaries = sess.run(
420 |               [model.sittingdown_err80_summary,
421 |                model.sittingdown_err160_summary,
422 |                model.sittingdown_err320_summary,
423 |                model.sittingdown_err400_summary,
424 |                model.sittingdown_err560_summary,
425 |                model.sittingdown_err1000_summary],
426 |               {model.sittingdown_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
427 |                model.sittingdown_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
428 |                model.sittingdown_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
429 |                model.sittingdown_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
430 |                model.sittingdown_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
431 |                model.sittingdown_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
432 |           elif action == "takingphoto":
433 |             summaries = sess.run(
434 |               [model.takingphoto_err80_summary,
435 |                model.takingphoto_err160_summary,
436 |                model.takingphoto_err320_summary,
437 |                model.takingphoto_err400_summary,
438 |                model.takingphoto_err560_summary,
439 |                model.takingphoto_err1000_summary],
440 |               {model.takingphoto_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
441 |                model.takingphoto_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
442 |                model.takingphoto_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
443 |                model.takingphoto_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
444 |                model.takingphoto_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
445 |                model.takingphoto_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
446 |           elif action == "waiting":
447 |             summaries = sess.run(
448 |               [model.waiting_err80_summary,
449 |                model.waiting_err160_summary,
450 |                model.waiting_err320_summary,
451 |                model.waiting_err400_summary,
452 |                model.waiting_err560_summary,
453 |                model.waiting_err1000_summary],
454 |               {model.waiting_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
455 |                model.waiting_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
456 |                model.waiting_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
457 |                model.waiting_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
458 |                model.waiting_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
459 |                model.waiting_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
460 |           elif action == "walkingdog":
461 |             summaries = sess.run(
462 |               [model.walkingdog_err80_summary,
463 |                model.walkingdog_err160_summary,
464 |                model.walkingdog_err320_summary,
465 |                model.walkingdog_err400_summary,
466 |                model.walkingdog_err560_summary,
467 |                model.walkingdog_err1000_summary],
468 |               {model.walkingdog_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
469 |                model.walkingdog_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
470 |                model.walkingdog_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
471 |                model.walkingdog_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
472 |                model.walkingdog_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
473 |                model.walkingdog_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
474 |           elif action == "walkingtogether":
475 |             summaries = sess.run(
476 |               [model.walkingtogether_err80_summary,
477 |                model.walkingtogether_err160_summary,
478 |                model.walkingtogether_err320_summary,
479 |                model.walkingtogether_err400_summary,
480 |                model.walkingtogether_err560_summary,
481 |                model.walkingtogether_err1000_summary],
482 |               {model.walkingtogether_err80: mean_mean_errors[1] if FLAGS.seq_length_out >= 2 else None,
483 |                model.walkingtogether_err160: mean_mean_errors[3] if FLAGS.seq_length_out >= 4 else None,
484 |                model.walkingtogether_err320: mean_mean_errors[7] if FLAGS.seq_length_out >= 8 else None,
485 |                model.walkingtogether_err400: mean_mean_errors[9] if FLAGS.seq_length_out >= 10 else None,
486 |                model.walkingtogether_err560: mean_mean_errors[13] if FLAGS.seq_length_out >= 14 else None,
487 |                model.walkingtogether_err1000: mean_mean_errors[24] if FLAGS.seq_length_out >= 25 else None})
488 | 
489 |           for i in np.arange(len( summaries )):
490 |             model.test_writer.add_summary(summaries[i], current_step)
491 | 
492 | 
493 |         print()
494 |         print("============================\n"
495 |               "Global step:         %d\n"
496 |               "Learning rate:       %.6f\n"
497 |               "Step-time (ms):     %.4f\n"
498 |               "Train loss avg:      %.4f\n"
499 |               "--------------------------\n"
500 |               "Val loss:            %.4f\n"
501 |               "srnn loss:           %.4f\n"
502 |               "============================" % (model.global_step.eval(),
503 |               model.learning_rate.eval(), step_time*1000, loss,
504 |               val_loss, srnn_loss))
505 |         print()
506 | 
507 |         previous_losses.append(loss)
508 | 
509 |         # Save the model
510 |         if current_step % FLAGS.save_every == 0:
511 |           print( "Saving the model..." ); start_time = time.time()
512 |           model.saver.save(sess, os.path.normpath(os.path.join(train_dir, 'checkpoint')), global_step=current_step )
513 |           print( "done in {0:.2f} ms".format( (time.time() - start_time)*1000) )
514 | 
515 |         # Reset global time and loss
516 |         step_time, loss = 0, 0
517 | 
518 |         sys.stdout.flush()
519 | 
520 | 
521 | def get_srnn_gts( actions, model, test_set, data_mean, data_std, dim_to_ignore, one_hot, to_euler=True ):
522 |   """
523 |   Get the ground truths for srnn's sequences, and convert to Euler angles.
524 |   (the error is always computed in Euler angles).
525 | 
526 |   Args
527 |     actions: a list of actions to get ground truths for.
528 |     model: training model we are using (we only use the "get_batch" method).
529 |     test_set: dictionary with normalized training data.
530 |     data_mean: d-long vector with the mean of the training data.
531 |     data_std: d-long vector with the standard deviation of the training data.
532 |     dim_to_ignore: dimensions that we are not using to train/predict.
533 |     one_hot: whether the data comes with one-hot encoding indicating action.
534 |     to_euler: whether to convert the angles to Euler format or keep thm in exponential map
535 | 
536 |   Returns
537 |     srnn_gts_euler: a dictionary where the keys are actions, and the values
538 |       are the ground_truth, denormalized expected outputs of srnns's seeds.
539 |   """
540 |   srnn_gts_euler = {}
541 | 
542 |   for action in actions:
543 | 
544 |     srnn_gt_euler = []
545 |     _, _, _, srnn_expmap = model.get_batch_srnn( test_set, action ) # modified
546 | 
547 |     # expmap -> rotmat -> euler
548 |     for i in np.arange( srnn_expmap.shape[0] ):
549 |       denormed = data_utils.unNormalizeData(srnn_expmap[i,:,:], data_mean, data_std, dim_to_ignore, actions, one_hot )
550 | 
551 |       if to_euler:
552 |         for j in np.arange( denormed.shape[0] ):
553 |           for k in np.arange(3,97,3):
554 |             denormed[j,k:k+3] = data_utils.rotmat2euler( data_utils.expmap2rotmat( denormed[j,k:k+3] ))
555 | 
556 |       srnn_gt_euler.append( denormed );
557 | 
558 |     # Put back in the dictionary
559 |     srnn_gts_euler[action] = srnn_gt_euler
560 | 
561 |   return srnn_gts_euler
562 | 
563 | 
564 | def sample():
565 |   """Sample predictions for srnn's seeds"""
566 | 
567 |   if FLAGS.load <= 0:
568 |     raise( ValueError, "Must give an iteration to read parameters from")
569 | 
570 |   actions = define_actions( FLAGS.action )
571 | 
572 |   # Use the CPU if asked to
573 |   device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1}
574 |   with tf.Session(config=tf.ConfigProto( device_count = device_count )) as sess:
575 | 
576 |     # === Create the model ===
577 |     print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.body_size))
578 |     sampling     = True
579 |     model = create_model(sess, actions, sampling)
580 |     print("Model created")
581 | 
582 |     # Load all the data
583 |     train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use = read_all_data( actions, FLAGS.seq_length_in, FLAGS.seq_length_out, FLAGS.data_dir, not FLAGS.omit_one_hot )
584 | 
585 |     # === Read and denormalize the gt with srnn's seeds, as we'll need them
586 |     # many times for evaluation in Euler Angles ===
587 |     srnn_gts_expmap = get_srnn_gts( actions, model, test_set, data_mean, data_std, dim_to_ignore, FLAGS.omit_one_hot, to_euler=False )
588 |     srnn_gts_euler = get_srnn_gts( actions, model, test_set, data_mean, data_std, dim_to_ignore, FLAGS.omit_one_hot )
589 | 
590 |     # Clean and create a new h5 file of samples
591 |     SAMPLES_FNAME = 'samples.h5'
592 |     try:
593 |       os.remove( SAMPLES_FNAME )
594 |     except OSError:
595 |       pass
596 | 
597 |     # Predict and save for each action
598 |     for action in actions:
599 | 
600 |       # Make prediction with srnn' seeds
601 |       encoder_inputs, encoder_outputs, decoder_inputs, decoder_outputs = model.get_batch_srnn( test_set, action )
602 |       forward_only = True
603 |       srnn_seeds = True
604 |       
605 |       is_training = False
606 |       use_sample = True
607 |       dropout_prob = 0.0
608 |       sampling_weight = 0.0
609 | 
610 |       srnn_loss, srnn_poses, _ = model.step(sess, encoder_inputs, encoder_outputs, decoder_inputs, decoder_outputs, is_training, use_sample, dropout_prob, sampling_weight, forward_only, srnn_seeds)
611 | 
612 |       # denormalizes too
613 |       srnn_pred_expmap = data_utils.revert_output_format( srnn_poses, data_mean, data_std, dim_to_ignore, actions, FLAGS.omit_one_hot )
614 | 
615 |       # Save the conditioning seeds
616 | 
617 |       # Save the samples
618 |       with h5py.File( SAMPLES_FNAME, 'a' ) as hf:
619 |         for i in np.arange(8):
620 |           # Save conditioning ground truth
621 |           node_name = 'expmap/gt/{1}_{0}'.format(i, action)
622 |           hf.create_dataset( node_name, data=srnn_gts_expmap[action][i] )
623 |           # Save prediction
624 |           node_name = 'expmap/preds/{1}_{0}'.format(i, action)
625 |           hf.create_dataset( node_name, data=srnn_pred_expmap[i] )
626 | 
627 |       # Compute and save the errors here
628 |       mean_errors = np.zeros( (len(srnn_pred_expmap), srnn_pred_expmap[0].shape[0]) )
629 | 
630 |       for i in np.arange(8):
631 | 
632 |         eulerchannels_pred = srnn_pred_expmap[i]
633 | 
634 |         for j in np.arange( eulerchannels_pred.shape[0] ):
635 |           for k in np.arange(3,97,3):
636 |             eulerchannels_pred[j,k:k+3] = data_utils.rotmat2euler(data_utils.expmap2rotmat( eulerchannels_pred[j,k:k+3] ))
637 | 
638 |         eulerchannels_pred[:,0:6] = 0
639 | 
640 |         # Pick only the dimensions with sufficient standard deviation. Others are ignored.
641 |         idx_to_use = np.where( np.std( eulerchannels_pred, 0 ) > 1e-4 )[0]
642 | 
643 |         euc_error = np.power( srnn_gts_euler[action][i][:,idx_to_use] - eulerchannels_pred[:,idx_to_use], 2)
644 |         euc_error = np.sum(euc_error, 1)
645 |         euc_error = np.sqrt( euc_error )
646 |         mean_errors[i,:] = euc_error
647 | 
648 |       mean_mean_errors = np.mean( mean_errors, 0 )
649 |       print( action )
650 |       print( ','.join(map(str, mean_mean_errors.tolist() )) )
651 | 
652 |       with h5py.File( SAMPLES_FNAME, 'a' ) as hf:
653 |         node_name = 'mean_{0}_error'.format( action )
654 |         hf.create_dataset( node_name, data=mean_mean_errors )
655 | 
656 |   return
657 | 
658 | 
659 | def define_actions( action ):
660 |   """
661 |   Define the list of actions we are using.
662 | 
663 |   Args
664 |     action: String with the passed action. Could be "all"
665 |   Returns
666 |     actions: List of strings of actions
667 |   Raises
668 |     ValueError if the action is not included in H3.6M
669 |   """
670 | 
671 |   actions = ["walking", "eating", "smoking", "discussion",  "directions",
672 |               "greeting", "phoning", "posing", "purchases", "sitting",
673 |               "sittingdown", "takingphoto", "waiting", "walkingdog",
674 |               "walkingtogether"]
675 | 
676 |   if action in actions:
677 |     return [action]
678 | 
679 |   if action == "all":
680 |     return actions
681 | 
682 |   if action == "all_srnn":
683 |     return ["walking", "eating", "smoking", "discussion"]
684 | 
685 |   raise( ValueError, "Unrecognized action: %d" % action )
686 | 
687 | 
688 | def read_all_data( actions, seq_length_in, seq_length_out, data_dir, one_hot ):
689 |   """
690 |   Loads data for training/testing and normalizes it.
691 | 
692 |   Args
693 |     actions: list of strings (actions) to load
694 |     seq_length_in: number of frames to use in the burn-in sequence
695 |     seq_length_out: number of frames to use in the output sequence
696 |     data_dir: directory to load the data from
697 |     one_hot: whether to use one-hot encoding per action
698 |   Returns
699 |     train_set: dictionary with normalized training data
700 |     test_set: dictionary with test data
701 |     data_mean: d-long vector with the mean of the training data
702 |     data_std: d-long vector with the standard dev of the training data
703 |     dim_to_ignore: dimensions that are not used becaused stdev is too small
704 |     dim_to_use: dimensions that we are actually using in the model
705 |   """
706 | 
707 |   # === Read training data ===
708 |   print ("Reading training data (seq_len_in: {0}, seq_len_out {1}).".format(
709 |            seq_length_in, seq_length_out))
710 | 
711 |   train_subject_ids = [1,6,7,8,9,11]
712 |   test_subject_ids = [5]
713 | 
714 |   train_set, complete_train = data_utils.load_data( data_dir, train_subject_ids, actions, one_hot )
715 |   test_set,  complete_test  = data_utils.load_data( data_dir, test_subject_ids,  actions, one_hot )
716 | 
717 |   # Compute normalization stats
718 |   data_mean, data_std, dim_to_ignore, dim_to_use = data_utils.normalization_stats(complete_train)
719 | 
720 |   # MODIFIED
721 |   #print(*dim_to_use)
722 | 
723 |   # Normalize -- subtract mean, divide by stdev
724 |   train_set = data_utils.normalize_data( train_set, data_mean, data_std, dim_to_use, actions, one_hot )
725 |   test_set  = data_utils.normalize_data( test_set,  data_mean, data_std, dim_to_use, actions, one_hot )
726 |   print("done reading data.")
727 | 
728 |   return train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use
729 | 
730 | 
731 | def main(_):
732 |   if FLAGS.sample:
733 |     sample()
734 |   else:
735 |     train()
736 | 
737 | if __name__ == "__main__":
738 |   tf.app.run()
739 | 


--------------------------------------------------------------------------------
/viz.py:
--------------------------------------------------------------------------------
 1 | """Functions to visualize human poses"""
 2 | 
 3 | import matplotlib.pyplot as plt
 4 | import data_utils
 5 | import numpy as np
 6 | import h5py
 7 | import os
 8 | from mpl_toolkits.mplot3d import Axes3D
 9 | 
10 | class Ax3DPose(object):
11 |   def __init__(self, ax, lcolor="#3498db", rcolor="#e74c3c"):
12 |     """
13 |     Create a 3d pose visualizer that can be updated with new poses.
14 | 
15 |     Args
16 |       ax: 3d axis to plot the 3d pose on
17 |       lcolor: String. Colour for the left part of the body
18 |       rcolor: String. Colour for the right part of the body
19 |     """
20 | 
21 |     # Start and endpoints of our representation
22 |     self.I   = np.array([1,2,3,1,7,8,1, 13,14,15,14,18,19,14,26,27])-1
23 |     self.J   = np.array([2,3,4,7,8,9,13,14,15,16,18,19,20,26,27,28])-1
24 |     # Left / right indicator
25 |     self.LR  = np.array([1,1,1,0,0,0,0, 0, 0, 0, 0, 0, 0, 1, 1, 1], dtype=bool)
26 |     self.ax = ax
27 | 
28 |     vals = np.zeros((32, 3))
29 | 
30 |     # Make connection matrix
31 |     self.plots = []
32 |     for i in np.arange( len(self.I) ):
33 |       x = np.array( [vals[self.I[i], 0], vals[self.J[i], 0]] )
34 |       y = np.array( [vals[self.I[i], 1], vals[self.J[i], 1]] )
35 |       z = np.array( [vals[self.I[i], 2], vals[self.J[i], 2]] )
36 |       self.plots.append(self.ax.plot(x, y, z, lw=2, c=lcolor if self.LR[i] else rcolor))
37 | 
38 |     self.ax.set_xlabel("x")
39 |     self.ax.set_ylabel("y")
40 |     self.ax.set_zlabel("z")
41 | 
42 |   def update(self, channels, lcolor="#3498db", rcolor="#e74c3c"):
43 |     """
44 |     Update the plotted 3d pose.
45 | 
46 |     Args
47 |       channels: 96-dim long np array. The pose to plot.
48 |       lcolor: String. Colour for the left part of the body.
49 |       rcolor: String. Colour for the right part of the body.
50 |     Returns
51 |       Nothing. Simply updates the axis with the new pose.
52 |     """
53 | 
54 |     assert channels.size == 96, "channels should have 96 entries, it has %d instead" % channels.size
55 |     vals = np.reshape( channels, (32, -1) )
56 | 
57 |     for i in np.arange( len(self.I) ):
58 |       x = np.array( [vals[self.I[i], 0], vals[self.J[i], 0]] )
59 |       y = np.array( [vals[self.I[i], 1], vals[self.J[i], 1]] )
60 |       z = np.array( [vals[self.I[i], 2], vals[self.J[i], 2]] )
61 |       self.plots[i][0].set_xdata(x)
62 |       self.plots[i][0].set_ydata(y)
63 |       self.plots[i][0].set_3d_properties(z)
64 |       self.plots[i][0].set_color(lcolor if self.LR[i] else rcolor)
65 | 
66 |     r = 750;
67 |     xroot, yroot, zroot = vals[0,0], vals[0,1], vals[0,2]
68 |     self.ax.set_xlim3d([-r+xroot, r+xroot])
69 |     self.ax.set_zlim3d([-r+zroot, r+zroot])
70 |     self.ax.set_ylim3d([-r+yroot, r+yroot])
71 | 
72 |     self.ax.set_aspect('equal')
73 | 


--------------------------------------------------------------------------------