├── README.md
├── time_distributed_dropout.py
├── train.py
└── new_recurrent.py


/README.md:
--------------------------------------------------------------------------------
 1 | # Introduction
 2 | Code for the paper: [Long-Term On-Board Prediction of People in Traffic Scenes under Uncertainty](http://openaccess.thecvf.com/content_cvpr_2018/CameraReady/3887.pdf) 
 3 | 
 4 | # Requirements
 5 | 
 6 | * Python2.7
 7 | * h5py
 8 | * Tensorflow 1.1.0
 9 | * Keras 2.0.3
10 | * tqdm
11 | 
12 | # Data
13 | * The pedestrian tracks extracted from cityscapes are available [here](https://drive.google.com/open?id=1hOkm0O4AMrF0bNzdbY_RgOkeopE30R6U).
14 | 
15 | # Training
16 | * Run train.py. Train tracks must be available at the current directory.
17 | 


--------------------------------------------------------------------------------
/time_distributed_dropout.py:
--------------------------------------------------------------------------------
 1 | import h5py
 2 | import numpy
 3 | import json
 4 | import sys
 5 | import random
 6 | import operator
 7 | import tensorflow as tf
 8 | 
 9 | 
10 | from keras.layers import Input, Dense, Convolution2D, MaxPooling2D, UpSampling2D, Reshape, merge, Lambda, RepeatVector, Dropout
11 | from keras.layers.wrappers import TimeDistributed
12 | from keras.layers.recurrent import LSTM, SimpleRNN
13 | from keras.models import Model, Sequential
14 | from keras.optimizers import SGD
15 | from keras import backend as K
16 | from keras.engine import Layer, InputSpec
17 | 
18 | class nDropout(Layer):
19 |     def __init__(self, p, **kwargs):
20 |         self.supports_masking = True
21 |         self.p = p
22 |         super(nDropout, self).__init__(**kwargs)
23 | 
24 |     def call(self, x, mask=None):
25 | 
26 |         inputs = x;
27 | 
28 |         input_shape = K.shape(x);
29 | 
30 |         initial_state = K.zeros_like(inputs)  # (samples, timesteps, input_dim)
31 |         initial_state = K.sum(initial_state, axis=1)  # (samples,input_dim)
32 |         initial_state = K.ones_like(initial_state)  # (samples, input_dim)
33 | 
34 |         mask = K.zeros_like(inputs)  # (samples, timesteps, input_dim)
35 |         mask = K.sum(mask, axis=(0,1))  # (input_dim,)
36 |         #mask = K.ones_like(K.expand_dims(mask, axis=0))  # (1,input_dim)
37 |         mask = K.ones_like(mask)
38 | 
39 |         mask = K.dropout(mask, level=self.p)
40 |        
41 | 
42 |         initial_state = tf.multiply(initial_state,mask);
43 | 
44 |         def step(inputs,states):
45 |             mask = states[0];
46 |             return tf.multiply(inputs,mask), [mask]
47 | 
48 |         ( _, outputs, _) = K.rnn(step, x, [initial_state]);
49 | 
50 |         return outputs
51 | 
52 |     def get_config(self):
53 |         config = {'p': self.p}
54 |         base_config = super(nDropout, self).get_config()
55 |         return dict(list(base_config.items()) + list(config.items()))
56 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import h5py
  2 | import numpy as np
  3 | import json
  4 | import sys
  5 | import random
  6 | import operator
  7 | import tensorflow as tf
  8 | 
  9 | 
 10 | from keras.layers import Input, Dense, Convolution2D, MaxPooling2D, UpSampling2D, Reshape, merge, Lambda, RepeatVector, Dropout
 11 | from keras.layers.wrappers import TimeDistributed
 12 | from keras import regularizers
 13 | from keras.layers.recurrent import LSTM, SimpleRNN
 14 | from keras.models import Model, Sequential
 15 | from keras.optimizers import SGD
 16 | from keras import backend as K
 17 | from keras.engine import Layer, InputSpec
 18 | 
 19 | from new_recurrent import dLSTM
 20 | from time_distributed_dropout import nDropout
 21 | 
 22 | count = 0;
 23 | 
 24 | def get_lossfunc(true, pred):
 25 | 	x1_data = true[:,:,0]
 26 | 	y1_data = true[:,:,1]
 27 | 	x2_data = true[:,:,2]
 28 | 	y2_data = true[:,:,3]
 29 | 	z_mux1 = pred[:,:,0]
 30 | 	z_muy1 = pred[:,:,1]
 31 | 	z_mux2 = pred[:,:,2]
 32 | 	z_muy2 = pred[:,:,3]
 33 | 	z_sx = pred[:,:,4]
 34 | 	z_sy = pred[:,:,5]
 35 | 
 36 | 	result1 = tf.multiply(tf.square(tf.subtract(x1_data, z_mux1)), K.exp(-z_sx)) + tf.multiply(tf.square(tf.subtract(y1_data, z_muy1)), K.exp(-z_sy))
 37 | 
 38 | 	result2 = tf.multiply(tf.square(tf.subtract(x2_data, z_mux2)), K.exp(-z_sx)) + tf.multiply(tf.square(tf.subtract(y2_data, z_muy2)), K.exp(-z_sy))
 39 | 
 40 | 	reg = tf.add(tf.reduce_mean((z_sx)), tf.reduce_mean((z_sy)))
 41 | 
 42 | 	return tf.add( tf.divide(tf.add(tf.reduce_mean(result1), tf.reduce_mean(result2)), tf.constant(4.0, dtype=tf.float32, shape=(1, 1))), reg)
 43 | 
 44 | 
 45 | def get_center( bbox ):
 46 | 	return [ (float(bbox[0]) + float(bbox[2]))/(2*2048), (float(bbox[1]) + float(bbox[3]))/(2*1024) ];
 47 | 
 48 | def get_bbox_centers( bboxes ):
 49 | 	bbox_centers = [];
 50 | 	for bbox in bboxes:
 51 | 		bbox_centers.append( get_center(bbox) );
 52 | 	return bbox_centers
 53 | 
 54 | def slice_tracks( bboxes, length ):
 55 | 	bboxes = [ bboxes[i:i+length] for i in xrange(len(bboxes) - length + 1) ]
 56 | 	#timestamps = [ timestamps[i:i+length] for i in xrange(len(timestamps) - length + 1) ]
 57 | 	return bboxes
 58 | 
 59 | def get_diff_array( arr ):
 60 | 	arr = arr[:,0:4]
 61 | 	arr1 = arr[1:];
 62 | 	arr2 = arr[0];
 63 | 	return np.subtract(arr1,arr2);
 64 | 
 65 | def training_example( arr, in_frames ):
 66 | 	x = arr[0:in_frames-1];
 67 | 	y = arr[in_frames-1:];
 68 | 	x = np.reshape( x, (in_frames-1,4))
 69 | 	return ( x, y)
 70 | 
 71 | 
 72 | def get_modeld(input_shape1,out_seq):
 73 | 	input1 = Input(shape=input_shape1)
 74 | 
 75 | 	l2_reg = regularizers.l2(0.0001);
 76 | 
 77 | 	decoder_1 = TimeDistributed(Dense(64, activation='relu', kernel_regularizer = l2_reg))(input1)
 78 | 	decoder_1 = nDropout(0.10)(decoder_1)
 79 | 	decoder_1 = dLSTM(128, implementation = 1, kernel_regularizer = l2_reg, recurrent_regularizer = l2_reg, bias_regularizer = l2_reg)(decoder_1);
 80 | 	decoder_1 = RepeatVector(out_seq)(decoder_1);
 81 | 	decoder_1_ = dLSTM(128, kernel_regularizer = l2_reg, recurrent_regularizer = l2_reg, bias_regularizer = l2_reg, implementation = 1, return_sequences=True)(decoder_1);
 82 | 	decoder_1_m = TimeDistributed(Dense(4))(decoder_1_)
 83 | 	decoder_1_v = TimeDistributed(Dense(2, activation='relu'))(decoder_1_)
 84 | 	decoder_1 = merge([decoder_1_m,decoder_1_v], mode='concat', concat_axis=2)
 85 | 	print decoder_1._keras_shape
 86 | 
 87 | 	model = Model(input= [input1], output=decoder_1)
 88 | 	model.compile(optimizer = 'adam', loss = get_lossfunc)
 89 | 
 90 | 	return model	
 91 | 
 92 | in_frames = 8;
 93 | out_frames = 15;
 94 | shuffle_range = 5120*4;
 95 | batch_size = 128;
 96 | min_seq_len = 12;
 97 | 
 98 | source_f = h5py.File('./tracks_train.h5','r');
 99 | 
100 | 
101 | for seq_len in xrange(min_seq_len,in_frames+out_frames+1):
102 | 	data_X = [];
103 | 	data_Y = [];
104 | 
105 | 	model = get_modeld( (in_frames-1,4), seq_len - in_frames );
106 | 	if seq_len > min_seq_len:
107 | 		model.load_weights('ver_len_128_8.h5');
108 | 
109 | 	for track_key in source_f:
110 | 		curr_track = json.loads(source_f[track_key][()])
111 | 		bboxes = curr_track['bboxes']
112 | 		if len(bboxes) >= in_frames + out_frames:
113 | 			first_frame = curr_track['firstFrame'];
114 | 			last_frame = curr_track['lastFrame'];
115 | 			#bboxes = get_bbox_centers( bboxes );
116 | 			bbox_slices = slice_tracks( bboxes, seq_len )
117 | 			for bbox_slice in bbox_slices:
118 | 				diff_array = get_diff_array( np.array(bbox_slice) )
119 | 				( x, y) = training_example( diff_array, in_frames );
120 | 				data_X.append(x);
121 | 				data_Y.append(y);
122 | 				
123 | 	data_X = np.array(data_X);
124 | 	data_Y = np.array(data_Y);
125 | 	print(data_X.shape)
126 | 	print(data_Y.shape)
127 | 	if seq_len >= 12 and seq_len < 19:
128 | 		model.fit([data_X],data_Y,batch_size=64,nb_epoch=35,verbose=1);
129 | 	else:
130 | 		model.fit([data_X],data_Y,batch_size=64,nb_epoch=25,verbose=1);
131 | 	model.save('ver_len_128_8.h5');
132 | 


--------------------------------------------------------------------------------
/new_recurrent.py:
--------------------------------------------------------------------------------
   1 | # -*- coding: utf-8 -*-
   2 | from __future__ import absolute_import
   3 | import numpy as np
   4 | 
   5 | from keras import backend as K
   6 | from keras import activations
   7 | from keras import initializers
   8 | from keras import regularizers
   9 | from keras import constraints
  10 | from keras.engine import Layer
  11 | from keras.engine import InputSpec
  12 | from keras.legacy import interfaces
  13 | 
  14 | 
  15 | def _time_distributed_dense(x, w, b=None, dropout=None,
  16 |                             input_dim=None, output_dim=None,
  17 |                             timesteps=None, training=None):
  18 |     """Apply `y . w + b` for every temporal slice y of x.
  19 | 
  20 |     # Arguments
  21 |         x: input tensor.
  22 |         w: weight matrix.
  23 |         b: optional bias vector.
  24 |         dropout: wether to apply dropout (same dropout mask
  25 |             for every temporal slice of the input).
  26 |         input_dim: integer; optional dimensionality of the input.
  27 |         output_dim: integer; optional dimensionality of the output.
  28 |         timesteps: integer; optional number of timesteps.
  29 |         training: training phase tensor or boolean.
  30 | 
  31 |     # Returns
  32 |         Output tensor.
  33 |     """
  34 |     if not input_dim:
  35 |         input_dim = K.shape(x)[2]
  36 |     if not timesteps:
  37 |         timesteps = K.shape(x)[1]
  38 |     if not output_dim:
  39 |         output_dim = K.shape(w)[1]
  40 | 
  41 |     if dropout is not None and 0. < dropout < 1.:
  42 |         # apply the same dropout pattern at every timestep
  43 |         ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim)))
  44 |         dropout_matrix = K.dropout(ones, dropout)
  45 |         expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps)
  46 |         x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training)
  47 | 
  48 |     # collapse time dimension and batch dimension together
  49 |     x = K.reshape(x, (-1, input_dim))
  50 |     x = K.dot(x, w)
  51 |     if b is not None:
  52 |         x = K.bias_add(x, b)
  53 |     # reshape to 3D tensor
  54 |     if K.backend() == 'tensorflow':
  55 |         x = K.reshape(x, K.stack([-1, timesteps, output_dim]))
  56 |         x.set_shape([None, None, output_dim])
  57 |     else:
  58 |         x = K.reshape(x, (-1, timesteps, output_dim))
  59 |     return x
  60 | 
  61 | 
  62 | class Recurrent(Layer):
  63 |     """Abstract base class for recurrent layers.
  64 | 
  65 |     Do not use in a model -- it's not a valid layer!
  66 |     Use its children classes `LSTM`, `GRU` and `SimpleRNN` instead.
  67 | 
  68 |     All recurrent layers (`LSTM`, `GRU`, `SimpleRNN`) also
  69 |     follow the specifications of this class and accept
  70 |     the keyword arguments listed below.
  71 | 
  72 |     # Example
  73 | 
  74 |     ```python
  75 |         # as the first layer in a Sequential model
  76 |         model = Sequential()
  77 |         model.add(LSTM(32, input_shape=(10, 64)))
  78 |         # now model.output_shape == (None, 32)
  79 |         # note: `None` is the batch dimension.
  80 | 
  81 |         # for subsequent layers, no need to specify the input size:
  82 |         model.add(LSTM(16))
  83 | 
  84 |         # to stack recurrent layers, you must use return_sequences=True
  85 |         # on any recurrent layer that feeds into another recurrent layer.
  86 |         # note that you only need to specify the input size on the first layer.
  87 |         model = Sequential()
  88 |         model.add(LSTM(64, input_dim=64, input_length=10, return_sequences=True))
  89 |         model.add(LSTM(32, return_sequences=True))
  90 |         model.add(LSTM(10))
  91 |     ```
  92 | 
  93 |     # Arguments
  94 |         weights: list of Numpy arrays to set as initial weights.
  95 |             The list should have 3 elements, of shapes:
  96 |             `[(input_dim, output_dim), (output_dim, output_dim), (output_dim,)]`.
  97 |         return_sequences: Boolean. Whether to return the last output
  98 |             in the output sequence, or the full sequence.
  99 |         return_state: Boolean. Whether to return the last state
 100 |             in addition to the output.
 101 |         go_backwards: Boolean (default False).
 102 |             If True, process the input sequence backwards and return the
 103 |             reversed sequence.
 104 |         stateful: Boolean (default False). If True, the last state
 105 |             for each sample at index i in a batch will be used as initial
 106 |             state for the sample of index i in the following batch.
 107 |         unroll: Boolean (default False).
 108 |             If True, the network will be unrolled,
 109 |             else a symbolic loop will be used.
 110 |             Unrolling can speed-up a RNN,
 111 |             although it tends to be more memory-intensive.
 112 |             Unrolling is only suitable for short sequences.
 113 |         implementation: one of {0, 1, or 2}.
 114 |             If set to 0, the RNN will use
 115 |             an implementation that uses fewer, larger matrix products,
 116 |             thus running faster on CPU but consuming more memory.
 117 |             If set to 1, the RNN will use more matrix products,
 118 |             but smaller ones, thus running slower
 119 |             (may actually be faster on GPU) while consuming less memory.
 120 |             If set to 2 (LSTM/GRU only),
 121 |             the RNN will combine the input gate,
 122 |             the forget gate and the output gate into a single matrix,
 123 |             enabling more time-efficient parallelization on the GPU.
 124 |             Note: RNN dropout must be shared for all gates,
 125 |             resulting in a slightly reduced regularization.
 126 |         input_dim: dimensionality of the input (integer).
 127 |             This argument (or alternatively, the keyword argument `input_shape`)
 128 |             is required when using this layer as the first layer in a model.
 129 |         input_length: Length of input sequences, to be specified
 130 |             when it is constant.
 131 |             This argument is required if you are going to connect
 132 |             `Flatten` then `Dense` layers upstream
 133 |             (without it, the shape of the dense outputs cannot be computed).
 134 |             Note that if the recurrent layer is not the first layer
 135 |             in your model, you would need to specify the input length
 136 |             at the level of the first layer
 137 |             (e.g. via the `input_shape` argument)
 138 | 
 139 |     # Input shapes
 140 |         3D tensor with shape `(batch_size, timesteps, input_dim)`,
 141 |         (Optional) 2D tensors with shape `(batch_size, output_dim)`.
 142 | 
 143 |     # Output shape
 144 |         - if `return_state`: a list of tensors. The first tensor is
 145 |             the output. The remaining tensors are the last states,
 146 |             each with shape `(batch_size, units)`.
 147 |         - if `return_sequences`: 3D tensor with shape
 148 |             `(batch_size, timesteps, units)`.
 149 |         - else, 2D tensor with shape `(batch_size, units)`.
 150 | 
 151 |     # Masking
 152 |         This layer supports masking for input data with a variable number
 153 |         of timesteps. To introduce masks to your data,
 154 |         use an [Embedding](embeddings.md) layer with the `mask_zero` parameter
 155 |         set to `True`.
 156 | 
 157 |     # Note on using statefulness in RNNs
 158 |         You can set RNN layers to be 'stateful', which means that the states
 159 |         computed for the samples in one batch will be reused as initial states
 160 |         for the samples in the next batch. This assumes a one-to-one mapping
 161 |         between samples in different successive batches.
 162 | 
 163 |         To enable statefulness:
 164 |             - specify `stateful=True` in the layer constructor.
 165 |             - specify a fixed batch size for your model, by passing
 166 |                 if sequential model:
 167 |                   `batch_input_shape=(...)` to the first layer in your model.
 168 |                 else for functional model with 1 or more Input layers:
 169 |                   `batch_shape=(...)` to all the first layers in your model.
 170 |                 This is the expected shape of your inputs
 171 |                 *including the batch size*.
 172 |                 It should be a tuple of integers, e.g. `(32, 10, 100)`.
 173 |             - specify `shuffle=False` when calling fit().
 174 | 
 175 |         To reset the states of your model, call `.reset_states()` on either
 176 |         a specific layer, or on your entire model.
 177 | 
 178 |     # Note on specifying the initial state of RNNs
 179 |         You can specify the initial state of RNN layers symbolically by
 180 |         calling them with the keyword argument `initial_state`. The value of
 181 |         `initial_state` should be a tensor or list of tensors representing
 182 |         the initial state of the RNN layer.
 183 | 
 184 |         You can specify the initial state of RNN layers numerically by
 185 |         calling `reset_states` with the keyword argument `states`. The value of
 186 |         `states` should be a numpy array or list of numpy arrays representing
 187 |         the initial state of the RNN layer.
 188 |     """
 189 | 
 190 |     def __init__(self, return_sequences=False,
 191 |                  return_state=False,
 192 |                  go_backwards=False,
 193 |                  stateful=False,
 194 |                  unroll=False,
 195 |                  implementation=0,
 196 |                  **kwargs):
 197 |         super(Recurrent, self).__init__(**kwargs)
 198 |         self.return_sequences = return_sequences
 199 |         self.return_state = return_state
 200 |         self.go_backwards = go_backwards
 201 |         if K.backend() == 'cntk' and stateful:
 202 |             raise ValueError('Stateful RNN is not currently supported with CNTK.')
 203 | 
 204 |         self.stateful = stateful
 205 |         self.unroll = unroll
 206 |         self.implementation = implementation
 207 |         self.supports_masking = True
 208 |         self.input_spec = [InputSpec(ndim=3)]
 209 |         self.state_spec = None
 210 |         self.dropout = 0
 211 |         self.recurrent_dropout = 0
 212 | 
 213 |     def compute_output_shape(self, input_shape):
 214 |         if isinstance(input_shape, list):
 215 |             input_shape = input_shape[0]
 216 | 
 217 |         if self.return_sequences:
 218 |             output_shape = (input_shape[0], input_shape[1], self.units)
 219 |         else:
 220 |             output_shape = (input_shape[0], self.units)
 221 | 
 222 |         if self.return_state:
 223 |             state_shape = [(input_shape[0], self.units) for _ in self.states]
 224 |             return [output_shape] + state_shape
 225 |         else:
 226 |             return output_shape
 227 | 
 228 |     def compute_mask(self, inputs, mask):
 229 |         if isinstance(mask, list):
 230 |             mask = mask[0]
 231 |         output_mask = mask if self.return_sequences else None
 232 |         if self.return_state:
 233 |             state_mask = [None for _ in self.states]
 234 |             return [output_mask] + state_mask
 235 |         else:
 236 |             return output_mask
 237 | 
 238 |     def step(self, inputs, states):
 239 |         raise NotImplementedError
 240 | 
 241 |     def get_constants(self, inputs, training=None):
 242 |         return []
 243 | 
 244 |     def get_initial_state(self, inputs):
 245 |         # build an all-zero tensor of shape (samples, output_dim)
 246 |         initial_state = K.zeros_like(inputs)  # (samples, timesteps, input_dim)
 247 |         initial_state = K.sum(initial_state, axis=(1, 2))  # (samples,)
 248 |         initial_state = K.expand_dims(initial_state)  # (samples, 1)
 249 |         initial_state = K.tile(initial_state, [1, self.units])  # (samples, output_dim)
 250 |         initial_state = [initial_state for _ in range(len(self.states))]
 251 |         if len(self.states) == 4:
 252 |             initial_state[2] = K.dropout(K.ones_like(K.sum(inputs, axis=(1))),0.35);
 253 |             initial_state[3] = K.dropout(K.ones_like(initial_state[1]),0.35);
 254 |         return initial_state
 255 | 
 256 |     def preprocess_input(self, inputs, training=None):
 257 |         return inputs
 258 | 
 259 |     def __call__(self, inputs, initial_state=None, **kwargs):
 260 |         # If `initial_state` is specified,
 261 |         # and if it a Keras tensor,
 262 |         # then add it to the inputs and temporarily
 263 |         # modify the input spec to include the state.
 264 |         if initial_state is None:
 265 |             return super(Recurrent, self).__call__(inputs, **kwargs)
 266 | 
 267 |         if not isinstance(initial_state, (list, tuple)):
 268 |             initial_state = [initial_state]
 269 | 
 270 |         is_keras_tensor = hasattr(initial_state[0], '_keras_history')
 271 |         for tensor in initial_state:
 272 |             if hasattr(tensor, '_keras_history') != is_keras_tensor:
 273 |                 raise ValueError('The initial state of an RNN layer cannot be'
 274 |                                  ' specified with a mix of Keras tensors and'
 275 |                                  ' non-Keras tensors')
 276 | 
 277 |         if is_keras_tensor:
 278 |             # Compute the full input spec, including state
 279 |             input_spec = self.input_spec
 280 |             state_spec = self.state_spec
 281 |             if not isinstance(input_spec, list):
 282 |                 input_spec = [input_spec]
 283 |             if not isinstance(state_spec, list):
 284 |                 state_spec = [state_spec]
 285 |             self.input_spec = input_spec + state_spec
 286 | 
 287 |             # Compute the full inputs, including state
 288 |             inputs = [inputs] + list(initial_state)
 289 | 
 290 |             # Perform the call
 291 |             output = super(Recurrent, self).__call__(inputs, **kwargs)
 292 | 
 293 |             # Restore original input spec
 294 |             self.input_spec = input_spec
 295 |             return output
 296 |         else:
 297 |             kwargs['initial_state'] = initial_state
 298 |             return super(Recurrent, self).__call__(inputs, **kwargs)
 299 | 
 300 |     def call(self, inputs, mask=None, training=None, initial_state=None):
 301 |         # input shape: `(samples, time (padded with zeros), input_dim)`
 302 |         # note that the .build() method of subclasses MUST define
 303 |         # self.input_spec and self.state_spec with complete input shapes.
 304 |         if isinstance(inputs, list):
 305 |             initial_state = inputs[1:]
 306 |             inputs = inputs[0]
 307 |         elif initial_state is not None:
 308 |             pass
 309 |         elif self.stateful:
 310 |             initial_state = self.states
 311 |         else:
 312 |             initial_state = self.get_initial_state(inputs)
 313 | 
 314 |         if isinstance(mask, list):
 315 |             mask = mask[0]
 316 | 
 317 |         if len(initial_state) != len(self.states):
 318 |             raise ValueError('Layer has ' + str(len(self.states)) +
 319 |                              ' states but was passed ' +
 320 |                              str(len(initial_state)) +
 321 |                              ' initial states.')
 322 |         input_shape = K.int_shape(inputs)
 323 |         if self.unroll and input_shape[1] is None:
 324 |             raise ValueError('Cannot unroll a RNN if the '
 325 |                              'time dimension is undefined. \n'
 326 |                              '- If using a Sequential model, '
 327 |                              'specify the time dimension by passing '
 328 |                              'an `input_shape` or `batch_input_shape` '
 329 |                              'argument to your first layer. If your '
 330 |                              'first layer is an Embedding, you can '
 331 |                              'also use the `input_length` argument.\n'
 332 |                              '- If using the functional API, specify '
 333 |                              'the time dimension by passing a `shape` '
 334 |                              'or `batch_shape` argument to your Input layer.')
 335 |         constants = self.get_constants(inputs, training=None)
 336 |         preprocessed_input = self.preprocess_input(inputs, training=None)
 337 |         last_output, outputs, states = K.rnn(self.step,
 338 |                                              preprocessed_input,
 339 |                                              initial_state,
 340 |                                              go_backwards=self.go_backwards,
 341 |                                              mask=mask,
 342 |                                              constants=constants,
 343 |                                              unroll=self.unroll,
 344 |                                              input_length=input_shape[1])
 345 |         if self.stateful:
 346 |             updates = []
 347 |             for i in range(len(states)):
 348 |                 updates.append((self.states[i], states[i]))
 349 |             self.add_update(updates, inputs)
 350 | 
 351 |         # Properly set learning phase
 352 |         if 0 < self.dropout + self.recurrent_dropout:
 353 |             last_output._uses_learning_phase = True
 354 |             outputs._uses_learning_phase = True
 355 | 
 356 |         if self.return_sequences:
 357 |             output = outputs
 358 |         else:
 359 |             output = last_output
 360 | 
 361 |         if self.return_state:
 362 |             if not isinstance(states, (list, tuple)):
 363 |                 states = [states]
 364 |             else:
 365 |                 states = list(states)
 366 |             return [output] + states
 367 |         else:
 368 |             return output
 369 | 
 370 |     def reset_states(self, states=None):
 371 |         if not self.stateful:
 372 |             raise AttributeError('Layer must be stateful.')
 373 |         batch_size = self.input_spec[0].shape[0]
 374 |         if not batch_size:
 375 |             raise ValueError('If a RNN is stateful, it needs to know '
 376 |                              'its batch size. Specify the batch size '
 377 |                              'of your input tensors: \n'
 378 |                              '- If using a Sequential model, '
 379 |                              'specify the batch size by passing '
 380 |                              'a `batch_input_shape` '
 381 |                              'argument to your first layer.\n'
 382 |                              '- If using the functional API, specify '
 383 |                              'the time dimension by passing a '
 384 |                              '`batch_shape` argument to your Input layer.')
 385 |         # initialize state if None
 386 |         if self.states[0] is None:
 387 |             self.states = [K.zeros((batch_size, self.units))
 388 |                            for _ in self.states]
 389 |         elif states is None:
 390 |             for state in self.states:
 391 |                 K.set_value(state, np.zeros((batch_size, self.units)))
 392 |         else:
 393 |             if not isinstance(states, (list, tuple)):
 394 |                 states = [states]
 395 |             if len(states) != len(self.states):
 396 |                 raise ValueError('Layer ' + self.name + ' expects ' +
 397 |                                  str(len(self.states)) + ' states, '
 398 |                                  'but it received ' + str(len(states)) +
 399 |                                  ' state values. Input received: ' +
 400 |                                  str(states))
 401 |             for index, (value, state) in enumerate(zip(states, self.states)):
 402 |                 if value.shape != (batch_size, self.units):
 403 |                     raise ValueError('State ' + str(index) +
 404 |                                      ' is incompatible with layer ' +
 405 |                                      self.name + ': expected shape=' +
 406 |                                      str((batch_size, self.units)) +
 407 |                                      ', found shape=' + str(value.shape))
 408 |                 K.set_value(state, value)
 409 | 
 410 |     def get_config(self):
 411 |         config = {'return_sequences': self.return_sequences,
 412 |                   'return_state': self.return_state,
 413 |                   'go_backwards': self.go_backwards,
 414 |                   'stateful': self.stateful,
 415 |                   'unroll': self.unroll,
 416 |                   'implementation': self.implementation}
 417 |         base_config = super(Recurrent, self).get_config()
 418 |         return dict(list(base_config.items()) + list(config.items()))
 419 | 
 420 | 
 421 | class SimpleRNN(Recurrent):
 422 |     """Fully-connected RNN where the output is to be fed back to input.
 423 | 
 424 |     # Arguments
 425 |         units: Positive integer, dimensionality of the output space.
 426 |         activation: Activation function to use
 427 |             (see [activations](../activations.md)).
 428 |             If you pass None, no activation is applied
 429 |             (ie. "linear" activation: `a(x) = x`).
 430 |         use_bias: Boolean, whether the layer uses a bias vector.
 431 |         kernel_initializer: Initializer for the `kernel` weights matrix,
 432 |             used for the linear transformation of the inputs.
 433 |             (see [initializers](../initializers.md)).
 434 |         recurrent_initializer: Initializer for the `recurrent_kernel`
 435 |             weights matrix,
 436 |             used for the linear transformation of the recurrent state.
 437 |             (see [initializers](../initializers.md)).
 438 |         bias_initializer: Initializer for the bias vector
 439 |             (see [initializers](../initializers.md)).
 440 |         kernel_regularizer: Regularizer function applied to
 441 |             the `kernel` weights matrix
 442 |             (see [regularizer](../regularizers.md)).
 443 |         recurrent_regularizer: Regularizer function applied to
 444 |             the `recurrent_kernel` weights matrix
 445 |             (see [regularizer](../regularizers.md)).
 446 |         bias_regularizer: Regularizer function applied to the bias vector
 447 |             (see [regularizer](../regularizers.md)).
 448 |         activity_regularizer: Regularizer function applied to
 449 |             the output of the layer (its "activation").
 450 |             (see [regularizer](../regularizers.md)).
 451 |         kernel_constraint: Constraint function applied to
 452 |             the `kernel` weights matrix
 453 |             (see [constraints](../constraints.md)).
 454 |         recurrent_constraint: Constraint function applied to
 455 |             the `recurrent_kernel` weights matrix
 456 |             (see [constraints](../constraints.md)).
 457 |         bias_constraint: Constraint function applied to the bias vector
 458 |             (see [constraints](../constraints.md)).
 459 |         dropout: Float between 0 and 1.
 460 |             Fraction of the units to drop for
 461 |             the linear transformation of the inputs.
 462 |         recurrent_dropout: Float between 0 and 1.
 463 |             Fraction of the units to drop for
 464 |             the linear transformation of the recurrent state.
 465 | 
 466 |     # References
 467 |         - [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](http://arxiv.org/abs/1512.05287)
 468 |     """
 469 | 
 470 |     @interfaces.legacy_recurrent_support
 471 |     def __init__(self, units,
 472 |                  activation='tanh',
 473 |                  use_bias=True,
 474 |                  kernel_initializer='glorot_uniform',
 475 |                  recurrent_initializer='orthogonal',
 476 |                  bias_initializer='zeros',
 477 |                  kernel_regularizer=None,
 478 |                  recurrent_regularizer=None,
 479 |                  bias_regularizer=None,
 480 |                  activity_regularizer=None,
 481 |                  kernel_constraint=None,
 482 |                  recurrent_constraint=None,
 483 |                  bias_constraint=None,
 484 |                  dropout=0.,
 485 |                  recurrent_dropout=0.,
 486 |                  **kwargs):
 487 |         super(SimpleRNN, self).__init__(**kwargs)
 488 |         self.units = units
 489 |         self.activation = activations.get(activation)
 490 |         self.use_bias = use_bias
 491 | 
 492 |         self.kernel_initializer = initializers.get(kernel_initializer)
 493 |         self.recurrent_initializer = initializers.get(recurrent_initializer)
 494 |         self.bias_initializer = initializers.get(bias_initializer)
 495 | 
 496 |         self.kernel_regularizer = regularizers.get(kernel_regularizer)
 497 |         self.recurrent_regularizer = regularizers.get(recurrent_regularizer)
 498 |         self.bias_regularizer = regularizers.get(bias_regularizer)
 499 |         self.activity_regularizer = regularizers.get(activity_regularizer)
 500 | 
 501 |         self.kernel_constraint = constraints.get(kernel_constraint)
 502 |         self.recurrent_constraint = constraints.get(recurrent_constraint)
 503 |         self.bias_constraint = constraints.get(bias_constraint)
 504 | 
 505 |         self.dropout = min(1., max(0., dropout))
 506 |         self.recurrent_dropout = min(1., max(0., recurrent_dropout))
 507 |         self.state_spec = InputSpec(shape=(None, self.units))
 508 | 
 509 |     def build(self, input_shape):
 510 |         if isinstance(input_shape, list):
 511 |             input_shape = input_shape[0]
 512 | 
 513 |         batch_size = input_shape[0] if self.stateful else None
 514 |         self.input_dim = input_shape[2]
 515 |         self.input_spec[0] = InputSpec(shape=(batch_size, None, self.input_dim))
 516 | 
 517 |         self.states = [None]
 518 |         if self.stateful:
 519 |             self.reset_states()
 520 | 
 521 |         self.kernel = self.add_weight(shape=(self.input_dim, self.units),
 522 |                                       name='kernel',
 523 |                                       initializer=self.kernel_initializer,
 524 |                                       regularizer=self.kernel_regularizer,
 525 |                                       constraint=self.kernel_constraint)
 526 |         self.recurrent_kernel = self.add_weight(
 527 |             shape=(self.units, self.units),
 528 |             name='recurrent_kernel',
 529 |             initializer=self.recurrent_initializer,
 530 |             regularizer=self.recurrent_regularizer,
 531 |             constraint=self.recurrent_constraint)
 532 |         if self.use_bias:
 533 |             self.bias = self.add_weight(shape=(self.units,),
 534 |                                         name='bias',
 535 |                                         initializer=self.bias_initializer,
 536 |                                         regularizer=self.bias_regularizer,
 537 |                                         constraint=self.bias_constraint)
 538 |         else:
 539 |             self.bias = None
 540 |         self.built = True
 541 | 
 542 |     def preprocess_input(self, inputs, training=None):
 543 |         if self.implementation > 0:
 544 |             return inputs
 545 |         else:
 546 |             input_shape = K.int_shape(inputs)
 547 |             input_dim = input_shape[2]
 548 |             timesteps = input_shape[1]
 549 |             return _time_distributed_dense(inputs,
 550 |                                            self.kernel,
 551 |                                            self.bias,
 552 |                                            self.dropout,
 553 |                                            input_dim,
 554 |                                            self.units,
 555 |                                            timesteps,
 556 |                                            training=training)
 557 | 
 558 |     def step(self, inputs, states):
 559 |         if self.implementation == 0:
 560 |             h = inputs
 561 |         else:
 562 |             if 0 < self.dropout < 1:
 563 |                 h = K.dot(inputs * states[1], self.kernel)
 564 |             else:
 565 |                 h = K.dot(inputs, self.kernel)
 566 |             if self.bias is not None:
 567 |                 h = K.bias_add(h, self.bias)
 568 | 
 569 |         prev_output = states[0]
 570 |         if 0 < self.recurrent_dropout < 1:
 571 |             prev_output *= states[2]
 572 |         output = h + K.dot(prev_output, self.recurrent_kernel)
 573 |         if self.activation is not None:
 574 |             output = self.activation(output)
 575 | 
 576 |         # Properly set learning phase on output tensor.
 577 |         if 0 < self.dropout + self.recurrent_dropout:
 578 |             output._uses_learning_phase = True
 579 |         return output, [output]
 580 | 
 581 |     def get_constants(self, inputs, training=None):
 582 |         constants = []
 583 |         if self.implementation != 0 and 0 < self.dropout < 1:
 584 |             input_shape = K.int_shape(inputs)
 585 |             input_dim = input_shape[-1]
 586 |             ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1)))
 587 |             ones = K.tile(ones, (1, int(input_dim)))
 588 | 
 589 |             def dropped_inputs():
 590 |                 return K.dropout(ones, self.dropout)
 591 | 
 592 |             dp_mask = K.in_train_phase(dropped_inputs,
 593 |                                        ones,
 594 |                                        training=training)
 595 |             constants.append(dp_mask)
 596 |         else:
 597 |             constants.append(K.cast_to_floatx(1.))
 598 | 
 599 |         if 0 < self.recurrent_dropout < 1:
 600 |             ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1)))
 601 |             ones = K.tile(ones, (1, self.units))
 602 | 
 603 |             def dropped_inputs():
 604 |                 return K.dropout(ones, self.recurrent_dropout)
 605 |             rec_dp_mask = K.in_train_phase(dropped_inputs,
 606 |                                            ones,
 607 |                                            training=training)
 608 |             constants.append(rec_dp_mask)
 609 |         else:
 610 |             constants.append(K.cast_to_floatx(1.))
 611 |         return constants
 612 | 
 613 |     def get_config(self):
 614 |         config = {'units': self.units,
 615 |                   'activation': activations.serialize(self.activation),
 616 |                   'use_bias': self.use_bias,
 617 |                   'kernel_initializer': initializers.serialize(self.kernel_initializer),
 618 |                   'recurrent_initializer': initializers.serialize(self.recurrent_initializer),
 619 |                   'bias_initializer': initializers.serialize(self.bias_initializer),
 620 |                   'kernel_regularizer': regularizers.serialize(self.kernel_regularizer),
 621 |                   'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer),
 622 |                   'bias_regularizer': regularizers.serialize(self.bias_regularizer),
 623 |                   'activity_regularizer': regularizers.serialize(self.activity_regularizer),
 624 |                   'kernel_constraint': constraints.serialize(self.kernel_constraint),
 625 |                   'recurrent_constraint': constraints.serialize(self.recurrent_constraint),
 626 |                   'bias_constraint': constraints.serialize(self.bias_constraint),
 627 |                   'dropout': self.dropout,
 628 |                   'recurrent_dropout': self.recurrent_dropout}
 629 |         base_config = super(SimpleRNN, self).get_config()
 630 |         return dict(list(base_config.items()) + list(config.items()))
 631 | 
 632 | 
 633 | class GRU(Recurrent):
 634 |     """Gated Recurrent Unit - Cho et al. 2014.
 635 | 
 636 |     # Arguments
 637 |         units: Positive integer, dimensionality of the output space.
 638 |         activation: Activation function to use
 639 |             (see [activations](../activations.md)).
 640 |             If you pass None, no activation is applied
 641 |             (ie. "linear" activation: `a(x) = x`).
 642 |         recurrent_activation: Activation function to use
 643 |             for the recurrent step
 644 |             (see [activations](../activations.md)).
 645 |         use_bias: Boolean, whether the layer uses a bias vector.
 646 |         kernel_initializer: Initializer for the `kernel` weights matrix,
 647 |             used for the linear transformation of the inputs.
 648 |             (see [initializers](../initializers.md)).
 649 |         recurrent_initializer: Initializer for the `recurrent_kernel`
 650 |             weights matrix,
 651 |             used for the linear transformation of the recurrent state.
 652 |             (see [initializers](../initializers.md)).
 653 |         bias_initializer: Initializer for the bias vector
 654 |             (see [initializers](../initializers.md)).
 655 |         kernel_regularizer: Regularizer function applied to
 656 |             the `kernel` weights matrix
 657 |             (see [regularizer](../regularizers.md)).
 658 |         recurrent_regularizer: Regularizer function applied to
 659 |             the `recurrent_kernel` weights matrix
 660 |             (see [regularizer](../regularizers.md)).
 661 |         bias_regularizer: Regularizer function applied to the bias vector
 662 |             (see [regularizer](../regularizers.md)).
 663 |         activity_regularizer: Regularizer function applied to
 664 |             the output of the layer (its "activation").
 665 |             (see [regularizer](../regularizers.md)).
 666 |         kernel_constraint: Constraint function applied to
 667 |             the `kernel` weights matrix
 668 |             (see [constraints](../constraints.md)).
 669 |         recurrent_constraint: Constraint function applied to
 670 |             the `recurrent_kernel` weights matrix
 671 |             (see [constraints](../constraints.md)).
 672 |         bias_constraint: Constraint function applied to the bias vector
 673 |             (see [constraints](../constraints.md)).
 674 |         dropout: Float between 0 and 1.
 675 |             Fraction of the units to drop for
 676 |             the linear transformation of the inputs.
 677 |         recurrent_dropout: Float between 0 and 1.
 678 |             Fraction of the units to drop for
 679 |             the linear transformation of the recurrent state.
 680 | 
 681 |     # References
 682 |         - [On the Properties of Neural Machine Translation: Encoder-Decoder Approaches](https://arxiv.org/abs/1409.1259)
 683 |         - [Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling](http://arxiv.org/abs/1412.3555v1)
 684 |         - [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](http://arxiv.org/abs/1512.05287)
 685 |     """
 686 | 
 687 |     @interfaces.legacy_recurrent_support
 688 |     def __init__(self, units,
 689 |                  activation='tanh',
 690 |                  recurrent_activation='hard_sigmoid',
 691 |                  use_bias=True,
 692 |                  kernel_initializer='glorot_uniform',
 693 |                  recurrent_initializer='orthogonal',
 694 |                  bias_initializer='zeros',
 695 |                  kernel_regularizer=None,
 696 |                  recurrent_regularizer=None,
 697 |                  bias_regularizer=None,
 698 |                  activity_regularizer=None,
 699 |                  kernel_constraint=None,
 700 |                  recurrent_constraint=None,
 701 |                  bias_constraint=None,
 702 |                  dropout=0.,
 703 |                  recurrent_dropout=0.,
 704 |                  **kwargs):
 705 |         super(GRU, self).__init__(**kwargs)
 706 |         self.units = units
 707 |         self.activation = activations.get(activation)
 708 |         self.recurrent_activation = activations.get(recurrent_activation)
 709 |         self.use_bias = use_bias
 710 | 
 711 |         self.kernel_initializer = initializers.get(kernel_initializer)
 712 |         self.recurrent_initializer = initializers.get(recurrent_initializer)
 713 |         self.bias_initializer = initializers.get(bias_initializer)
 714 | 
 715 |         self.kernel_regularizer = regularizers.get(kernel_regularizer)
 716 |         self.recurrent_regularizer = regularizers.get(recurrent_regularizer)
 717 |         self.bias_regularizer = regularizers.get(bias_regularizer)
 718 |         self.activity_regularizer = regularizers.get(activity_regularizer)
 719 | 
 720 |         self.kernel_constraint = constraints.get(kernel_constraint)
 721 |         self.recurrent_constraint = constraints.get(recurrent_constraint)
 722 |         self.bias_constraint = constraints.get(bias_constraint)
 723 | 
 724 |         self.dropout = min(1., max(0., dropout))
 725 |         self.recurrent_dropout = min(1., max(0., recurrent_dropout))
 726 |         self.state_spec = InputSpec(shape=(None, self.units))
 727 | 
 728 |     def build(self, input_shape):
 729 |         if isinstance(input_shape, list):
 730 |             input_shape = input_shape[0]
 731 | 
 732 |         batch_size = input_shape[0] if self.stateful else None
 733 |         self.input_dim = input_shape[2]
 734 |         self.input_spec[0] = InputSpec(shape=(batch_size, None, self.input_dim))
 735 | 
 736 |         self.states = [None]
 737 |         if self.stateful:
 738 |             self.reset_states()
 739 | 
 740 |         self.kernel = self.add_weight(shape=(self.input_dim, self.units * 3),
 741 |                                       name='kernel',
 742 |                                       initializer=self.kernel_initializer,
 743 |                                       regularizer=self.kernel_regularizer,
 744 |                                       constraint=self.kernel_constraint)
 745 |         self.recurrent_kernel = self.add_weight(
 746 |             shape=(self.units, self.units * 3),
 747 |             name='recurrent_kernel',
 748 |             initializer=self.recurrent_initializer,
 749 |             regularizer=self.recurrent_regularizer,
 750 |             constraint=self.recurrent_constraint)
 751 | 
 752 |         if self.use_bias:
 753 |             self.bias = self.add_weight(shape=(self.units * 3,),
 754 |                                         name='bias',
 755 |                                         initializer=self.bias_initializer,
 756 |                                         regularizer=self.bias_regularizer,
 757 |                                         constraint=self.bias_constraint)
 758 |         else:
 759 |             self.bias = None
 760 | 
 761 |         self.kernel_z = self.kernel[:, :self.units]
 762 |         self.recurrent_kernel_z = self.recurrent_kernel[:, :self.units]
 763 |         self.kernel_r = self.kernel[:, self.units: self.units * 2]
 764 |         self.recurrent_kernel_r = self.recurrent_kernel[:,
 765 |                                                         self.units:
 766 |                                                         self.units * 2]
 767 |         self.kernel_h = self.kernel[:, self.units * 2:]
 768 |         self.recurrent_kernel_h = self.recurrent_kernel[:, self.units * 2:]
 769 | 
 770 |         if self.use_bias:
 771 |             self.bias_z = self.bias[:self.units]
 772 |             self.bias_r = self.bias[self.units: self.units * 2]
 773 |             self.bias_h = self.bias[self.units * 2:]
 774 |         else:
 775 |             self.bias_z = None
 776 |             self.bias_r = None
 777 |             self.bias_h = None
 778 |         self.built = True
 779 | 
 780 |     def preprocess_input(self, inputs, training=None):
 781 |         if self.implementation == 0:
 782 |             input_shape = K.int_shape(inputs)
 783 |             input_dim = input_shape[2]
 784 |             timesteps = input_shape[1]
 785 | 
 786 |             x_z = _time_distributed_dense(inputs, self.kernel_z, self.bias_z,
 787 |                                           self.dropout, input_dim, self.units,
 788 |                                           timesteps, training=training)
 789 |             x_r = _time_distributed_dense(inputs, self.kernel_r, self.bias_r,
 790 |                                           self.dropout, input_dim, self.units,
 791 |                                           timesteps, training=training)
 792 |             x_h = _time_distributed_dense(inputs, self.kernel_h, self.bias_h,
 793 |                                           self.dropout, input_dim, self.units,
 794 |                                           timesteps, training=training)
 795 |             return K.concatenate([x_z, x_r, x_h], axis=2)
 796 |         else:
 797 |             return inputs
 798 | 
 799 |     def get_constants(self, inputs, training=None):
 800 |         constants = []
 801 |         if self.implementation != 0 and 0 < self.dropout < 1:
 802 |             input_shape = K.int_shape(inputs)
 803 |             input_dim = input_shape[-1]
 804 |             ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1)))
 805 |             ones = K.tile(ones, (1, int(input_dim)))
 806 | 
 807 |             def dropped_inputs():
 808 |                 return K.dropout(ones, self.dropout)
 809 | 
 810 |             dp_mask = [K.in_train_phase(dropped_inputs,
 811 |                                         ones,
 812 |                                         training=training) for _ in range(3)]
 813 |             constants.append(dp_mask)
 814 |         else:
 815 |             constants.append([K.cast_to_floatx(1.) for _ in range(3)])
 816 | 
 817 |         if 0 < self.recurrent_dropout < 1:
 818 |             ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1)))
 819 |             ones = K.tile(ones, (1, self.units))
 820 | 
 821 |             def dropped_inputs():
 822 |                 return K.dropout(ones, self.recurrent_dropout)
 823 |             rec_dp_mask = [K.in_train_phase(dropped_inputs,
 824 |                                             ones,
 825 |                                             training=training) for _ in range(3)]
 826 |             constants.append(rec_dp_mask)
 827 |         else:
 828 |             constants.append([K.cast_to_floatx(1.) for _ in range(3)])
 829 |         return constants
 830 | 
 831 |     def step(self, inputs, states):
 832 |         h_tm1 = states[0]  # previous memory
 833 |         dp_mask = states[1]  # dropout matrices for recurrent units
 834 |         rec_dp_mask = states[2]
 835 | 
 836 |         if self.implementation == 2:
 837 |             matrix_x = K.dot(inputs * dp_mask[0], self.kernel)
 838 |             if self.use_bias:
 839 |                 matrix_x = K.bias_add(matrix_x, self.bias)
 840 |             matrix_inner = K.dot(h_tm1 * rec_dp_mask[0],
 841 |                                  self.recurrent_kernel[:, :2 * self.units])
 842 | 
 843 |             x_z = matrix_x[:, :self.units]
 844 |             x_r = matrix_x[:, self.units: 2 * self.units]
 845 |             recurrent_z = matrix_inner[:, :self.units]
 846 |             recurrent_r = matrix_inner[:, self.units: 2 * self.units]
 847 | 
 848 |             z = self.recurrent_activation(x_z + recurrent_z)
 849 |             r = self.recurrent_activation(x_r + recurrent_r)
 850 | 
 851 |             x_h = matrix_x[:, 2 * self.units:]
 852 |             recurrent_h = K.dot(r * h_tm1 * rec_dp_mask[0],
 853 |                                 self.recurrent_kernel[:, 2 * self.units:])
 854 |             hh = self.activation(x_h + recurrent_h)
 855 |         else:
 856 |             if self.implementation == 0:
 857 |                 x_z = inputs[:, :self.units]
 858 |                 x_r = inputs[:, self.units: 2 * self.units]
 859 |                 x_h = inputs[:, 2 * self.units:]
 860 |             elif self.implementation == 1:
 861 |                 x_z = K.dot(inputs * dp_mask[0], self.kernel_z)
 862 |                 x_r = K.dot(inputs * dp_mask[1], self.kernel_r)
 863 |                 x_h = K.dot(inputs * dp_mask[2], self.kernel_h)
 864 |                 if self.use_bias:
 865 |                     x_z = K.bias_add(x_z, self.bias_z)
 866 |                     x_r = K.bias_add(x_r, self.bias_r)
 867 |                     x_h = K.bias_add(x_h, self.bias_h)
 868 |             else:
 869 |                 raise ValueError('Unknown `implementation` mode.')
 870 |             z = self.recurrent_activation(x_z + K.dot(h_tm1 * rec_dp_mask[0],
 871 |                                                       self.recurrent_kernel_z))
 872 |             r = self.recurrent_activation(x_r + K.dot(h_tm1 * rec_dp_mask[1],
 873 |                                                       self.recurrent_kernel_r))
 874 | 
 875 |             hh = self.activation(x_h + K.dot(r * h_tm1 * rec_dp_mask[2],
 876 |                                              self.recurrent_kernel_h))
 877 |         h = z * h_tm1 + (1 - z) * hh
 878 |         if 0 < self.dropout + self.recurrent_dropout:
 879 |             h._uses_learning_phase = True
 880 |         return h, [h]
 881 | 
 882 |     def get_config(self):
 883 |         config = {'units': self.units,
 884 |                   'activation': activations.serialize(self.activation),
 885 |                   'recurrent_activation': activations.serialize(self.recurrent_activation),
 886 |                   'use_bias': self.use_bias,
 887 |                   'kernel_initializer': initializers.serialize(self.kernel_initializer),
 888 |                   'recurrent_initializer': initializers.serialize(self.recurrent_initializer),
 889 |                   'bias_initializer': initializers.serialize(self.bias_initializer),
 890 |                   'kernel_regularizer': regularizers.serialize(self.kernel_regularizer),
 891 |                   'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer),
 892 |                   'bias_regularizer': regularizers.serialize(self.bias_regularizer),
 893 |                   'activity_regularizer': regularizers.serialize(self.activity_regularizer),
 894 |                   'kernel_constraint': constraints.serialize(self.kernel_constraint),
 895 |                   'recurrent_constraint': constraints.serialize(self.recurrent_constraint),
 896 |                   'bias_constraint': constraints.serialize(self.bias_constraint),
 897 |                   'dropout': self.dropout,
 898 |                   'recurrent_dropout': self.recurrent_dropout}
 899 |         base_config = super(GRU, self).get_config()
 900 |         return dict(list(base_config.items()) + list(config.items()))
 901 | 
 902 | 
 903 | class dLSTM(Recurrent):
 904 |     """Long-Short Term Memory unit - Hochreiter 1997.
 905 | 
 906 |     For a step-by-step description of the algorithm, see
 907 |     [this tutorial](http://deeplearning.net/tutorial/lstm.html).
 908 | 
 909 |     # Arguments
 910 |         units: Positive integer, dimensionality of the output space.
 911 |         activation: Activation function to use
 912 |             (see [activations](../activations.md)).
 913 |             If you pass None, no activation is applied
 914 |             (ie. "linear" activation: `a(x) = x`).
 915 |         recurrent_activation: Activation function to use
 916 |             for the recurrent step
 917 |             (see [activations](../activations.md)).
 918 |         use_bias: Boolean, whether the layer uses a bias vector.
 919 |         kernel_initializer: Initializer for the `kernel` weights matrix,
 920 |             used for the linear transformation of the inputs.
 921 |             (see [initializers](../initializers.md)).
 922 |         recurrent_initializer: Initializer for the `recurrent_kernel`
 923 |             weights matrix,
 924 |             used for the linear transformation of the recurrent state.
 925 |             (see [initializers](../initializers.md)).
 926 |         bias_initializer: Initializer for the bias vector
 927 |             (see [initializers](../initializers.md)).
 928 |         unit_forget_bias: Boolean.
 929 |             If True, add 1 to the bias of the forget gate at initialization.
 930 |             Setting it to true will also force `bias_initializer="zeros"`.
 931 |             This is recommended in [Jozefowicz et al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf)
 932 |         kernel_regularizer: Regularizer function applied to
 933 |             the `kernel` weights matrix
 934 |             (see [regularizer](../regularizers.md)).
 935 |         recurrent_regularizer: Regularizer function applied to
 936 |             the `recurrent_kernel` weights matrix
 937 |             (see [regularizer](../regularizers.md)).
 938 |         bias_regularizer: Regularizer function applied to the bias vector
 939 |             (see [regularizer](../regularizers.md)).
 940 |         activity_regularizer: Regularizer function applied to
 941 |             the output of the layer (its "activation").
 942 |             (see [regularizer](../regularizers.md)).
 943 |         kernel_constraint: Constraint function applied to
 944 |             the `kernel` weights matrix
 945 |             (see [constraints](../constraints.md)).
 946 |         recurrent_constraint: Constraint function applied to
 947 |             the `recurrent_kernel` weights matrix
 948 |             (see [constraints](../constraints.md)).
 949 |         bias_constraint: Constraint function applied to the bias vector
 950 |             (see [constraints](../constraints.md)).
 951 |         dropout: Float between 0 and 1.
 952 |             Fraction of the units to drop for
 953 |             the linear transformation of the inputs.
 954 |         recurrent_dropout: Float between 0 and 1.
 955 |             Fraction of the units to drop for
 956 |             the linear transformation of the recurrent state.
 957 | 
 958 |     # References
 959 |         - [Long short-term memory](http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf) (original 1997 paper)
 960 |         - [Learning to forget: Continual prediction with LSTM](http://www.mitpressjournals.org/doi/pdf/10.1162/089976600300015015)
 961 |         - [Supervised sequence labeling with recurrent neural networks](http://www.cs.toronto.edu/~graves/preprint.pdf)
 962 |         - [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](http://arxiv.org/abs/1512.05287)
 963 |     """
 964 |     @interfaces.legacy_recurrent_support
 965 |     def __init__(self, units,
 966 |                  activation='tanh',
 967 |                  recurrent_activation='hard_sigmoid',
 968 |                  use_bias=True,
 969 |                  kernel_initializer='glorot_uniform',
 970 |                  recurrent_initializer='orthogonal',
 971 |                  bias_initializer='zeros',
 972 |                  unit_forget_bias=True,
 973 |                  kernel_regularizer=None,
 974 |                  recurrent_regularizer=None,
 975 |                  bias_regularizer=None,
 976 |                  activity_regularizer=None,
 977 |                  kernel_constraint=None,
 978 |                  recurrent_constraint=None,
 979 |                  bias_constraint=None,
 980 |                  dropout=0.,
 981 |                  recurrent_dropout=0.,
 982 |                  **kwargs):
 983 |         super(dLSTM, self).__init__(**kwargs)
 984 |         self.units = units
 985 |         self.activation = activations.get(activation)
 986 |         self.recurrent_activation = activations.get(recurrent_activation)
 987 |         self.use_bias = use_bias
 988 | 
 989 |         self.kernel_initializer = initializers.get(kernel_initializer)
 990 |         self.recurrent_initializer = initializers.get(recurrent_initializer)
 991 |         self.bias_initializer = initializers.get(bias_initializer)
 992 |         self.unit_forget_bias = unit_forget_bias
 993 | 
 994 |         self.kernel_regularizer = regularizers.get(kernel_regularizer)
 995 |         self.recurrent_regularizer = regularizers.get(recurrent_regularizer)
 996 |         self.bias_regularizer = regularizers.get(bias_regularizer)
 997 |         self.activity_regularizer = regularizers.get(activity_regularizer)
 998 | 
 999 |         self.kernel_constraint = constraints.get(kernel_constraint)
1000 |         self.recurrent_constraint = constraints.get(recurrent_constraint)
1001 |         self.bias_constraint = constraints.get(bias_constraint)
1002 | 
1003 |         self.dropout = min(1., max(0., dropout))
1004 |         self.recurrent_dropout = min(1., max(0., recurrent_dropout))
1005 |         self.state_spec = [InputSpec(shape=(None, self.units)),
1006 |                            InputSpec(shape=(None, self.units))]
1007 | 
1008 |     def build(self, input_shape):
1009 |         if isinstance(input_shape, list):
1010 |             input_shape = input_shape[0]
1011 | 
1012 |         batch_size = input_shape[0] if self.stateful else None
1013 |         self.input_dim = input_shape[2]
1014 |         self.input_spec[0] = InputSpec(shape=(batch_size, None, self.input_dim))
1015 | 
1016 |         self.states = [None, None, None, None]
1017 |         if self.stateful:
1018 |             self.reset_states()
1019 | 
1020 |         self.kernel = self.add_weight(shape=(self.input_dim, self.units * 4),
1021 |                                       name='kernel',
1022 |                                       initializer=self.kernel_initializer,
1023 |                                       regularizer=self.kernel_regularizer,
1024 |                                       constraint=self.kernel_constraint)
1025 |         self.recurrent_kernel = self.add_weight(
1026 |             shape=(self.units, self.units * 4),
1027 |             name='recurrent_kernel',
1028 |             initializer=self.recurrent_initializer,
1029 |             regularizer=self.recurrent_regularizer,
1030 |             constraint=self.recurrent_constraint)
1031 | 
1032 |         if self.use_bias:
1033 |             if self.unit_forget_bias:
1034 |                 def bias_initializer(shape, *args, **kwargs):
1035 |                     return K.concatenate([
1036 |                         self.bias_initializer((self.units,), *args, **kwargs),
1037 |                         initializers.Ones()((self.units,), *args, **kwargs),
1038 |                         self.bias_initializer((self.units * 2,), *args, **kwargs),
1039 |                     ])
1040 |             else:
1041 |                 bias_initializer = self.bias_initializer
1042 |             self.bias = self.add_weight(shape=(self.units * 4,),
1043 |                                         name='bias',
1044 |                                         initializer=bias_initializer,
1045 |                                         regularizer=self.bias_regularizer,
1046 |                                         constraint=self.bias_constraint)
1047 |         else:
1048 |             self.bias = None
1049 | 
1050 |         self.kernel_i = self.kernel[:, :self.units]
1051 |         self.kernel_f = self.kernel[:, self.units: self.units * 2]
1052 |         self.kernel_c = self.kernel[:, self.units * 2: self.units * 3]
1053 |         self.kernel_o = self.kernel[:, self.units * 3:]
1054 | 
1055 |         self.recurrent_kernel_i = self.recurrent_kernel[:, :self.units]
1056 |         self.recurrent_kernel_f = self.recurrent_kernel[:, self.units: self.units * 2]
1057 |         self.recurrent_kernel_c = self.recurrent_kernel[:, self.units * 2: self.units * 3]
1058 |         self.recurrent_kernel_o = self.recurrent_kernel[:, self.units * 3:]
1059 | 
1060 |         if self.use_bias:
1061 |             self.bias_i = self.bias[:self.units]
1062 |             self.bias_f = self.bias[self.units: self.units * 2]
1063 |             self.bias_c = self.bias[self.units * 2: self.units * 3]
1064 |             self.bias_o = self.bias[self.units * 3:]
1065 |         else:
1066 |             self.bias_i = None
1067 |             self.bias_f = None
1068 |             self.bias_c = None
1069 |             self.bias_o = None
1070 |         self.built = True
1071 | 
1072 |     def preprocess_input(self, inputs, training=None):
1073 |         if self.implementation == 0:
1074 |             input_shape = K.int_shape(inputs)
1075 |             input_dim = input_shape[2]
1076 |             timesteps = input_shape[1]
1077 | 
1078 |             x_i = _time_distributed_dense(inputs, self.kernel_i, self.bias_i,
1079 |                                           self.dropout, input_dim, self.units,
1080 |                                           timesteps, training=training)
1081 |             x_f = _time_distributed_dense(inputs, self.kernel_f, self.bias_f,
1082 |                                           self.dropout, input_dim, self.units,
1083 |                                           timesteps, training=training)
1084 |             x_c = _time_distributed_dense(inputs, self.kernel_c, self.bias_c,
1085 |                                           self.dropout, input_dim, self.units,
1086 |                                           timesteps, training=training)
1087 |             x_o = _time_distributed_dense(inputs, self.kernel_o, self.bias_o,
1088 |                                           self.dropout, input_dim, self.units,
1089 |                                           timesteps, training=training)
1090 |             return K.concatenate([x_i, x_f, x_c, x_o], axis=2)
1091 |         else:
1092 |             return inputs
1093 | 
1094 |     def get_constants(self, inputs, training=None):
1095 |         constants = []
1096 |         if self.implementation != 0 and 0 < self.dropout < 1:
1097 |             input_shape = K.int_shape(inputs)
1098 |             input_dim = input_shape[-1]
1099 |             ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1)))
1100 |             ones = K.tile(ones, (1, int(input_dim)))
1101 | 
1102 |             def dropped_inputs():
1103 |                 return K.dropout(ones, self.dropout)
1104 | 
1105 |             dp_mask = [K.in_train_phase(dropped_inputs,
1106 |                                         ones,
1107 |                                         training=training) for _ in range(4)]
1108 |             constants.append(dp_mask)
1109 |         else:
1110 |             constants.append([K.cast_to_floatx(1.) for _ in range(4)])
1111 | 
1112 |         if 0 < self.recurrent_dropout < 1:
1113 |             ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1)))
1114 |             ones = K.tile(ones, (1, self.units))
1115 | 
1116 |             def dropped_inputs():
1117 |                 return K.dropout(ones, self.recurrent_dropout)
1118 |             rec_dp_mask = [K.in_train_phase(dropped_inputs,
1119 |                                             ones,
1120 |                                             training=training) for _ in range(4)]
1121 |             constants.append(rec_dp_mask)
1122 |         else:
1123 |             constants.append([K.cast_to_floatx(1.) for _ in range(4)])
1124 |         return constants
1125 | 
1126 |     def step(self, inputs, states):
1127 |         h_tm1 = states[0]
1128 |         c_tm1 = states[1]
1129 |         dp_mask = states[2]
1130 |         rec_dp_mask = states[3]
1131 | 
1132 |         if self.implementation == 2:
1133 |             z = K.dot(inputs * dp_mask[0], self.kernel)
1134 |             z += K.dot(h_tm1 * rec_dp_mask, self.recurrent_kernel)
1135 |             if self.use_bias:
1136 |                 z = K.bias_add(z, self.bias)
1137 | 
1138 |             z0 = z[:, :self.units]
1139 |             z1 = z[:, self.units: 2 * self.units]
1140 |             z2 = z[:, 2 * self.units: 3 * self.units]
1141 |             z3 = z[:, 3 * self.units:]
1142 | 
1143 |             i = self.recurrent_activation(z0)
1144 |             f = self.recurrent_activation(z1)
1145 |             c = f * c_tm1 + i * self.activation(z2)
1146 |             o = self.recurrent_activation(z3)
1147 |         else:
1148 |             if self.implementation == 0:
1149 |                 x_i = inputs[:, :self.units]
1150 |                 x_f = inputs[:, self.units: 2 * self.units]
1151 |                 x_c = inputs[:, 2 * self.units: 3 * self.units]
1152 |                 x_o = inputs[:, 3 * self.units:]
1153 |             elif self.implementation == 1:
1154 |                 x_i = K.dot(inputs * dp_mask, self.kernel_i) + self.bias_i
1155 |                 x_f = K.dot(inputs * dp_mask, self.kernel_f) + self.bias_f
1156 |                 x_c = K.dot(inputs * dp_mask, self.kernel_c) + self.bias_c
1157 |                 x_o = K.dot(inputs * dp_mask, self.kernel_o) + self.bias_o
1158 |             else:
1159 |                 raise ValueError('Unknown `implementation` mode.')
1160 | 
1161 |             i = self.recurrent_activation(x_i + K.dot(h_tm1 * rec_dp_mask,
1162 |                                                       self.recurrent_kernel_i))
1163 |             f = self.recurrent_activation(x_f + K.dot(h_tm1 * rec_dp_mask,
1164 |                                                       self.recurrent_kernel_f))
1165 |             c = f * c_tm1 + i * self.activation(x_c + K.dot(h_tm1 * rec_dp_mask,
1166 |                                                             self.recurrent_kernel_c))
1167 |             o = self.recurrent_activation(x_o + K.dot(h_tm1 * rec_dp_mask,
1168 |                                                       self.recurrent_kernel_o))
1169 |         h = o * self.activation(c)
1170 |         #if 0 < self.dropout + self.recurrent_dropout:
1171 |         #    h._uses_learning_phase = True
1172 |         return h, [h, c, dp_mask, rec_dp_mask]
1173 | 
1174 |     def get_config(self):
1175 |         config = {'units': self.units,
1176 |                   'activation': activations.serialize(self.activation),
1177 |                   'recurrent_activation': activations.serialize(self.recurrent_activation),
1178 |                   'use_bias': self.use_bias,
1179 |                   'kernel_initializer': initializers.serialize(self.kernel_initializer),
1180 |                   'recurrent_initializer': initializers.serialize(self.recurrent_initializer),
1181 |                   'bias_initializer': initializers.serialize(self.bias_initializer),
1182 |                   'unit_forget_bias': self.unit_forget_bias,
1183 |                   'kernel_regularizer': regularizers.serialize(self.kernel_regularizer),
1184 |                   'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer),
1185 |                   'bias_regularizer': regularizers.serialize(self.bias_regularizer),
1186 |                   'activity_regularizer': regularizers.serialize(self.activity_regularizer),
1187 |                   'kernel_constraint': constraints.serialize(self.kernel_constraint),
1188 |                   'recurrent_constraint': constraints.serialize(self.recurrent_constraint),
1189 |                   'bias_constraint': constraints.serialize(self.bias_constraint),
1190 |                   'dropout': self.dropout,
1191 |                   'recurrent_dropout': self.recurrent_dropout}
1192 |         base_config = super(dLSTM, self).get_config()
1193 |         return dict(list(base_config.items()) + list(config.items()))
1194 | 


--------------------------------------------------------------------------------