├── README.md ├── time_distributed_dropout.py ├── train.py └── new_recurrent.py /README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | Code for the paper: [Long-Term On-Board Prediction of People in Traffic Scenes under Uncertainty](http://openaccess.thecvf.com/content_cvpr_2018/CameraReady/3887.pdf) 3 | 4 | # Requirements 5 | 6 | * Python2.7 7 | * h5py 8 | * Tensorflow 1.1.0 9 | * Keras 2.0.3 10 | * tqdm 11 | 12 | # Data 13 | * The pedestrian tracks extracted from cityscapes are available [here](https://drive.google.com/open?id=1hOkm0O4AMrF0bNzdbY_RgOkeopE30R6U). 14 | 15 | # Training 16 | * Run train.py. Train tracks must be available at the current directory. 17 | -------------------------------------------------------------------------------- /time_distributed_dropout.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import numpy 3 | import json 4 | import sys 5 | import random 6 | import operator 7 | import tensorflow as tf 8 | 9 | 10 | from keras.layers import Input, Dense, Convolution2D, MaxPooling2D, UpSampling2D, Reshape, merge, Lambda, RepeatVector, Dropout 11 | from keras.layers.wrappers import TimeDistributed 12 | from keras.layers.recurrent import LSTM, SimpleRNN 13 | from keras.models import Model, Sequential 14 | from keras.optimizers import SGD 15 | from keras import backend as K 16 | from keras.engine import Layer, InputSpec 17 | 18 | class nDropout(Layer): 19 | def __init__(self, p, **kwargs): 20 | self.supports_masking = True 21 | self.p = p 22 | super(nDropout, self).__init__(**kwargs) 23 | 24 | def call(self, x, mask=None): 25 | 26 | inputs = x; 27 | 28 | input_shape = K.shape(x); 29 | 30 | initial_state = K.zeros_like(inputs) # (samples, timesteps, input_dim) 31 | initial_state = K.sum(initial_state, axis=1) # (samples,input_dim) 32 | initial_state = K.ones_like(initial_state) # (samples, input_dim) 33 | 34 | mask = K.zeros_like(inputs) # (samples, timesteps, input_dim) 35 | mask = K.sum(mask, axis=(0,1)) # (input_dim,) 36 | #mask = K.ones_like(K.expand_dims(mask, axis=0)) # (1,input_dim) 37 | mask = K.ones_like(mask) 38 | 39 | mask = K.dropout(mask, level=self.p) 40 | 41 | 42 | initial_state = tf.multiply(initial_state,mask); 43 | 44 | def step(inputs,states): 45 | mask = states[0]; 46 | return tf.multiply(inputs,mask), [mask] 47 | 48 | ( _, outputs, _) = K.rnn(step, x, [initial_state]); 49 | 50 | return outputs 51 | 52 | def get_config(self): 53 | config = {'p': self.p} 54 | base_config = super(nDropout, self).get_config() 55 | return dict(list(base_config.items()) + list(config.items())) 56 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import numpy as np 3 | import json 4 | import sys 5 | import random 6 | import operator 7 | import tensorflow as tf 8 | 9 | 10 | from keras.layers import Input, Dense, Convolution2D, MaxPooling2D, UpSampling2D, Reshape, merge, Lambda, RepeatVector, Dropout 11 | from keras.layers.wrappers import TimeDistributed 12 | from keras import regularizers 13 | from keras.layers.recurrent import LSTM, SimpleRNN 14 | from keras.models import Model, Sequential 15 | from keras.optimizers import SGD 16 | from keras import backend as K 17 | from keras.engine import Layer, InputSpec 18 | 19 | from new_recurrent import dLSTM 20 | from time_distributed_dropout import nDropout 21 | 22 | count = 0; 23 | 24 | def get_lossfunc(true, pred): 25 | x1_data = true[:,:,0] 26 | y1_data = true[:,:,1] 27 | x2_data = true[:,:,2] 28 | y2_data = true[:,:,3] 29 | z_mux1 = pred[:,:,0] 30 | z_muy1 = pred[:,:,1] 31 | z_mux2 = pred[:,:,2] 32 | z_muy2 = pred[:,:,3] 33 | z_sx = pred[:,:,4] 34 | z_sy = pred[:,:,5] 35 | 36 | result1 = tf.multiply(tf.square(tf.subtract(x1_data, z_mux1)), K.exp(-z_sx)) + tf.multiply(tf.square(tf.subtract(y1_data, z_muy1)), K.exp(-z_sy)) 37 | 38 | result2 = tf.multiply(tf.square(tf.subtract(x2_data, z_mux2)), K.exp(-z_sx)) + tf.multiply(tf.square(tf.subtract(y2_data, z_muy2)), K.exp(-z_sy)) 39 | 40 | reg = tf.add(tf.reduce_mean((z_sx)), tf.reduce_mean((z_sy))) 41 | 42 | return tf.add( tf.divide(tf.add(tf.reduce_mean(result1), tf.reduce_mean(result2)), tf.constant(4.0, dtype=tf.float32, shape=(1, 1))), reg) 43 | 44 | 45 | def get_center( bbox ): 46 | return [ (float(bbox[0]) + float(bbox[2]))/(2*2048), (float(bbox[1]) + float(bbox[3]))/(2*1024) ]; 47 | 48 | def get_bbox_centers( bboxes ): 49 | bbox_centers = []; 50 | for bbox in bboxes: 51 | bbox_centers.append( get_center(bbox) ); 52 | return bbox_centers 53 | 54 | def slice_tracks( bboxes, length ): 55 | bboxes = [ bboxes[i:i+length] for i in xrange(len(bboxes) - length + 1) ] 56 | #timestamps = [ timestamps[i:i+length] for i in xrange(len(timestamps) - length + 1) ] 57 | return bboxes 58 | 59 | def get_diff_array( arr ): 60 | arr = arr[:,0:4] 61 | arr1 = arr[1:]; 62 | arr2 = arr[0]; 63 | return np.subtract(arr1,arr2); 64 | 65 | def training_example( arr, in_frames ): 66 | x = arr[0:in_frames-1]; 67 | y = arr[in_frames-1:]; 68 | x = np.reshape( x, (in_frames-1,4)) 69 | return ( x, y) 70 | 71 | 72 | def get_modeld(input_shape1,out_seq): 73 | input1 = Input(shape=input_shape1) 74 | 75 | l2_reg = regularizers.l2(0.0001); 76 | 77 | decoder_1 = TimeDistributed(Dense(64, activation='relu', kernel_regularizer = l2_reg))(input1) 78 | decoder_1 = nDropout(0.10)(decoder_1) 79 | decoder_1 = dLSTM(128, implementation = 1, kernel_regularizer = l2_reg, recurrent_regularizer = l2_reg, bias_regularizer = l2_reg)(decoder_1); 80 | decoder_1 = RepeatVector(out_seq)(decoder_1); 81 | decoder_1_ = dLSTM(128, kernel_regularizer = l2_reg, recurrent_regularizer = l2_reg, bias_regularizer = l2_reg, implementation = 1, return_sequences=True)(decoder_1); 82 | decoder_1_m = TimeDistributed(Dense(4))(decoder_1_) 83 | decoder_1_v = TimeDistributed(Dense(2, activation='relu'))(decoder_1_) 84 | decoder_1 = merge([decoder_1_m,decoder_1_v], mode='concat', concat_axis=2) 85 | print decoder_1._keras_shape 86 | 87 | model = Model(input= [input1], output=decoder_1) 88 | model.compile(optimizer = 'adam', loss = get_lossfunc) 89 | 90 | return model 91 | 92 | in_frames = 8; 93 | out_frames = 15; 94 | shuffle_range = 5120*4; 95 | batch_size = 128; 96 | min_seq_len = 12; 97 | 98 | source_f = h5py.File('./tracks_train.h5','r'); 99 | 100 | 101 | for seq_len in xrange(min_seq_len,in_frames+out_frames+1): 102 | data_X = []; 103 | data_Y = []; 104 | 105 | model = get_modeld( (in_frames-1,4), seq_len - in_frames ); 106 | if seq_len > min_seq_len: 107 | model.load_weights('ver_len_128_8.h5'); 108 | 109 | for track_key in source_f: 110 | curr_track = json.loads(source_f[track_key][()]) 111 | bboxes = curr_track['bboxes'] 112 | if len(bboxes) >= in_frames + out_frames: 113 | first_frame = curr_track['firstFrame']; 114 | last_frame = curr_track['lastFrame']; 115 | #bboxes = get_bbox_centers( bboxes ); 116 | bbox_slices = slice_tracks( bboxes, seq_len ) 117 | for bbox_slice in bbox_slices: 118 | diff_array = get_diff_array( np.array(bbox_slice) ) 119 | ( x, y) = training_example( diff_array, in_frames ); 120 | data_X.append(x); 121 | data_Y.append(y); 122 | 123 | data_X = np.array(data_X); 124 | data_Y = np.array(data_Y); 125 | print(data_X.shape) 126 | print(data_Y.shape) 127 | if seq_len >= 12 and seq_len < 19: 128 | model.fit([data_X],data_Y,batch_size=64,nb_epoch=35,verbose=1); 129 | else: 130 | model.fit([data_X],data_Y,batch_size=64,nb_epoch=25,verbose=1); 131 | model.save('ver_len_128_8.h5'); 132 | -------------------------------------------------------------------------------- /new_recurrent.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | import numpy as np 4 | 5 | from keras import backend as K 6 | from keras import activations 7 | from keras import initializers 8 | from keras import regularizers 9 | from keras import constraints 10 | from keras.engine import Layer 11 | from keras.engine import InputSpec 12 | from keras.legacy import interfaces 13 | 14 | 15 | def _time_distributed_dense(x, w, b=None, dropout=None, 16 | input_dim=None, output_dim=None, 17 | timesteps=None, training=None): 18 | """Apply `y . w + b` for every temporal slice y of x. 19 | 20 | # Arguments 21 | x: input tensor. 22 | w: weight matrix. 23 | b: optional bias vector. 24 | dropout: wether to apply dropout (same dropout mask 25 | for every temporal slice of the input). 26 | input_dim: integer; optional dimensionality of the input. 27 | output_dim: integer; optional dimensionality of the output. 28 | timesteps: integer; optional number of timesteps. 29 | training: training phase tensor or boolean. 30 | 31 | # Returns 32 | Output tensor. 33 | """ 34 | if not input_dim: 35 | input_dim = K.shape(x)[2] 36 | if not timesteps: 37 | timesteps = K.shape(x)[1] 38 | if not output_dim: 39 | output_dim = K.shape(w)[1] 40 | 41 | if dropout is not None and 0. < dropout < 1.: 42 | # apply the same dropout pattern at every timestep 43 | ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim))) 44 | dropout_matrix = K.dropout(ones, dropout) 45 | expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps) 46 | x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training) 47 | 48 | # collapse time dimension and batch dimension together 49 | x = K.reshape(x, (-1, input_dim)) 50 | x = K.dot(x, w) 51 | if b is not None: 52 | x = K.bias_add(x, b) 53 | # reshape to 3D tensor 54 | if K.backend() == 'tensorflow': 55 | x = K.reshape(x, K.stack([-1, timesteps, output_dim])) 56 | x.set_shape([None, None, output_dim]) 57 | else: 58 | x = K.reshape(x, (-1, timesteps, output_dim)) 59 | return x 60 | 61 | 62 | class Recurrent(Layer): 63 | """Abstract base class for recurrent layers. 64 | 65 | Do not use in a model -- it's not a valid layer! 66 | Use its children classes `LSTM`, `GRU` and `SimpleRNN` instead. 67 | 68 | All recurrent layers (`LSTM`, `GRU`, `SimpleRNN`) also 69 | follow the specifications of this class and accept 70 | the keyword arguments listed below. 71 | 72 | # Example 73 | 74 | ```python 75 | # as the first layer in a Sequential model 76 | model = Sequential() 77 | model.add(LSTM(32, input_shape=(10, 64))) 78 | # now model.output_shape == (None, 32) 79 | # note: `None` is the batch dimension. 80 | 81 | # for subsequent layers, no need to specify the input size: 82 | model.add(LSTM(16)) 83 | 84 | # to stack recurrent layers, you must use return_sequences=True 85 | # on any recurrent layer that feeds into another recurrent layer. 86 | # note that you only need to specify the input size on the first layer. 87 | model = Sequential() 88 | model.add(LSTM(64, input_dim=64, input_length=10, return_sequences=True)) 89 | model.add(LSTM(32, return_sequences=True)) 90 | model.add(LSTM(10)) 91 | ``` 92 | 93 | # Arguments 94 | weights: list of Numpy arrays to set as initial weights. 95 | The list should have 3 elements, of shapes: 96 | `[(input_dim, output_dim), (output_dim, output_dim), (output_dim,)]`. 97 | return_sequences: Boolean. Whether to return the last output 98 | in the output sequence, or the full sequence. 99 | return_state: Boolean. Whether to return the last state 100 | in addition to the output. 101 | go_backwards: Boolean (default False). 102 | If True, process the input sequence backwards and return the 103 | reversed sequence. 104 | stateful: Boolean (default False). If True, the last state 105 | for each sample at index i in a batch will be used as initial 106 | state for the sample of index i in the following batch. 107 | unroll: Boolean (default False). 108 | If True, the network will be unrolled, 109 | else a symbolic loop will be used. 110 | Unrolling can speed-up a RNN, 111 | although it tends to be more memory-intensive. 112 | Unrolling is only suitable for short sequences. 113 | implementation: one of {0, 1, or 2}. 114 | If set to 0, the RNN will use 115 | an implementation that uses fewer, larger matrix products, 116 | thus running faster on CPU but consuming more memory. 117 | If set to 1, the RNN will use more matrix products, 118 | but smaller ones, thus running slower 119 | (may actually be faster on GPU) while consuming less memory. 120 | If set to 2 (LSTM/GRU only), 121 | the RNN will combine the input gate, 122 | the forget gate and the output gate into a single matrix, 123 | enabling more time-efficient parallelization on the GPU. 124 | Note: RNN dropout must be shared for all gates, 125 | resulting in a slightly reduced regularization. 126 | input_dim: dimensionality of the input (integer). 127 | This argument (or alternatively, the keyword argument `input_shape`) 128 | is required when using this layer as the first layer in a model. 129 | input_length: Length of input sequences, to be specified 130 | when it is constant. 131 | This argument is required if you are going to connect 132 | `Flatten` then `Dense` layers upstream 133 | (without it, the shape of the dense outputs cannot be computed). 134 | Note that if the recurrent layer is not the first layer 135 | in your model, you would need to specify the input length 136 | at the level of the first layer 137 | (e.g. via the `input_shape` argument) 138 | 139 | # Input shapes 140 | 3D tensor with shape `(batch_size, timesteps, input_dim)`, 141 | (Optional) 2D tensors with shape `(batch_size, output_dim)`. 142 | 143 | # Output shape 144 | - if `return_state`: a list of tensors. The first tensor is 145 | the output. The remaining tensors are the last states, 146 | each with shape `(batch_size, units)`. 147 | - if `return_sequences`: 3D tensor with shape 148 | `(batch_size, timesteps, units)`. 149 | - else, 2D tensor with shape `(batch_size, units)`. 150 | 151 | # Masking 152 | This layer supports masking for input data with a variable number 153 | of timesteps. To introduce masks to your data, 154 | use an [Embedding](embeddings.md) layer with the `mask_zero` parameter 155 | set to `True`. 156 | 157 | # Note on using statefulness in RNNs 158 | You can set RNN layers to be 'stateful', which means that the states 159 | computed for the samples in one batch will be reused as initial states 160 | for the samples in the next batch. This assumes a one-to-one mapping 161 | between samples in different successive batches. 162 | 163 | To enable statefulness: 164 | - specify `stateful=True` in the layer constructor. 165 | - specify a fixed batch size for your model, by passing 166 | if sequential model: 167 | `batch_input_shape=(...)` to the first layer in your model. 168 | else for functional model with 1 or more Input layers: 169 | `batch_shape=(...)` to all the first layers in your model. 170 | This is the expected shape of your inputs 171 | *including the batch size*. 172 | It should be a tuple of integers, e.g. `(32, 10, 100)`. 173 | - specify `shuffle=False` when calling fit(). 174 | 175 | To reset the states of your model, call `.reset_states()` on either 176 | a specific layer, or on your entire model. 177 | 178 | # Note on specifying the initial state of RNNs 179 | You can specify the initial state of RNN layers symbolically by 180 | calling them with the keyword argument `initial_state`. The value of 181 | `initial_state` should be a tensor or list of tensors representing 182 | the initial state of the RNN layer. 183 | 184 | You can specify the initial state of RNN layers numerically by 185 | calling `reset_states` with the keyword argument `states`. The value of 186 | `states` should be a numpy array or list of numpy arrays representing 187 | the initial state of the RNN layer. 188 | """ 189 | 190 | def __init__(self, return_sequences=False, 191 | return_state=False, 192 | go_backwards=False, 193 | stateful=False, 194 | unroll=False, 195 | implementation=0, 196 | **kwargs): 197 | super(Recurrent, self).__init__(**kwargs) 198 | self.return_sequences = return_sequences 199 | self.return_state = return_state 200 | self.go_backwards = go_backwards 201 | if K.backend() == 'cntk' and stateful: 202 | raise ValueError('Stateful RNN is not currently supported with CNTK.') 203 | 204 | self.stateful = stateful 205 | self.unroll = unroll 206 | self.implementation = implementation 207 | self.supports_masking = True 208 | self.input_spec = [InputSpec(ndim=3)] 209 | self.state_spec = None 210 | self.dropout = 0 211 | self.recurrent_dropout = 0 212 | 213 | def compute_output_shape(self, input_shape): 214 | if isinstance(input_shape, list): 215 | input_shape = input_shape[0] 216 | 217 | if self.return_sequences: 218 | output_shape = (input_shape[0], input_shape[1], self.units) 219 | else: 220 | output_shape = (input_shape[0], self.units) 221 | 222 | if self.return_state: 223 | state_shape = [(input_shape[0], self.units) for _ in self.states] 224 | return [output_shape] + state_shape 225 | else: 226 | return output_shape 227 | 228 | def compute_mask(self, inputs, mask): 229 | if isinstance(mask, list): 230 | mask = mask[0] 231 | output_mask = mask if self.return_sequences else None 232 | if self.return_state: 233 | state_mask = [None for _ in self.states] 234 | return [output_mask] + state_mask 235 | else: 236 | return output_mask 237 | 238 | def step(self, inputs, states): 239 | raise NotImplementedError 240 | 241 | def get_constants(self, inputs, training=None): 242 | return [] 243 | 244 | def get_initial_state(self, inputs): 245 | # build an all-zero tensor of shape (samples, output_dim) 246 | initial_state = K.zeros_like(inputs) # (samples, timesteps, input_dim) 247 | initial_state = K.sum(initial_state, axis=(1, 2)) # (samples,) 248 | initial_state = K.expand_dims(initial_state) # (samples, 1) 249 | initial_state = K.tile(initial_state, [1, self.units]) # (samples, output_dim) 250 | initial_state = [initial_state for _ in range(len(self.states))] 251 | if len(self.states) == 4: 252 | initial_state[2] = K.dropout(K.ones_like(K.sum(inputs, axis=(1))),0.35); 253 | initial_state[3] = K.dropout(K.ones_like(initial_state[1]),0.35); 254 | return initial_state 255 | 256 | def preprocess_input(self, inputs, training=None): 257 | return inputs 258 | 259 | def __call__(self, inputs, initial_state=None, **kwargs): 260 | # If `initial_state` is specified, 261 | # and if it a Keras tensor, 262 | # then add it to the inputs and temporarily 263 | # modify the input spec to include the state. 264 | if initial_state is None: 265 | return super(Recurrent, self).__call__(inputs, **kwargs) 266 | 267 | if not isinstance(initial_state, (list, tuple)): 268 | initial_state = [initial_state] 269 | 270 | is_keras_tensor = hasattr(initial_state[0], '_keras_history') 271 | for tensor in initial_state: 272 | if hasattr(tensor, '_keras_history') != is_keras_tensor: 273 | raise ValueError('The initial state of an RNN layer cannot be' 274 | ' specified with a mix of Keras tensors and' 275 | ' non-Keras tensors') 276 | 277 | if is_keras_tensor: 278 | # Compute the full input spec, including state 279 | input_spec = self.input_spec 280 | state_spec = self.state_spec 281 | if not isinstance(input_spec, list): 282 | input_spec = [input_spec] 283 | if not isinstance(state_spec, list): 284 | state_spec = [state_spec] 285 | self.input_spec = input_spec + state_spec 286 | 287 | # Compute the full inputs, including state 288 | inputs = [inputs] + list(initial_state) 289 | 290 | # Perform the call 291 | output = super(Recurrent, self).__call__(inputs, **kwargs) 292 | 293 | # Restore original input spec 294 | self.input_spec = input_spec 295 | return output 296 | else: 297 | kwargs['initial_state'] = initial_state 298 | return super(Recurrent, self).__call__(inputs, **kwargs) 299 | 300 | def call(self, inputs, mask=None, training=None, initial_state=None): 301 | # input shape: `(samples, time (padded with zeros), input_dim)` 302 | # note that the .build() method of subclasses MUST define 303 | # self.input_spec and self.state_spec with complete input shapes. 304 | if isinstance(inputs, list): 305 | initial_state = inputs[1:] 306 | inputs = inputs[0] 307 | elif initial_state is not None: 308 | pass 309 | elif self.stateful: 310 | initial_state = self.states 311 | else: 312 | initial_state = self.get_initial_state(inputs) 313 | 314 | if isinstance(mask, list): 315 | mask = mask[0] 316 | 317 | if len(initial_state) != len(self.states): 318 | raise ValueError('Layer has ' + str(len(self.states)) + 319 | ' states but was passed ' + 320 | str(len(initial_state)) + 321 | ' initial states.') 322 | input_shape = K.int_shape(inputs) 323 | if self.unroll and input_shape[1] is None: 324 | raise ValueError('Cannot unroll a RNN if the ' 325 | 'time dimension is undefined. \n' 326 | '- If using a Sequential model, ' 327 | 'specify the time dimension by passing ' 328 | 'an `input_shape` or `batch_input_shape` ' 329 | 'argument to your first layer. If your ' 330 | 'first layer is an Embedding, you can ' 331 | 'also use the `input_length` argument.\n' 332 | '- If using the functional API, specify ' 333 | 'the time dimension by passing a `shape` ' 334 | 'or `batch_shape` argument to your Input layer.') 335 | constants = self.get_constants(inputs, training=None) 336 | preprocessed_input = self.preprocess_input(inputs, training=None) 337 | last_output, outputs, states = K.rnn(self.step, 338 | preprocessed_input, 339 | initial_state, 340 | go_backwards=self.go_backwards, 341 | mask=mask, 342 | constants=constants, 343 | unroll=self.unroll, 344 | input_length=input_shape[1]) 345 | if self.stateful: 346 | updates = [] 347 | for i in range(len(states)): 348 | updates.append((self.states[i], states[i])) 349 | self.add_update(updates, inputs) 350 | 351 | # Properly set learning phase 352 | if 0 < self.dropout + self.recurrent_dropout: 353 | last_output._uses_learning_phase = True 354 | outputs._uses_learning_phase = True 355 | 356 | if self.return_sequences: 357 | output = outputs 358 | else: 359 | output = last_output 360 | 361 | if self.return_state: 362 | if not isinstance(states, (list, tuple)): 363 | states = [states] 364 | else: 365 | states = list(states) 366 | return [output] + states 367 | else: 368 | return output 369 | 370 | def reset_states(self, states=None): 371 | if not self.stateful: 372 | raise AttributeError('Layer must be stateful.') 373 | batch_size = self.input_spec[0].shape[0] 374 | if not batch_size: 375 | raise ValueError('If a RNN is stateful, it needs to know ' 376 | 'its batch size. Specify the batch size ' 377 | 'of your input tensors: \n' 378 | '- If using a Sequential model, ' 379 | 'specify the batch size by passing ' 380 | 'a `batch_input_shape` ' 381 | 'argument to your first layer.\n' 382 | '- If using the functional API, specify ' 383 | 'the time dimension by passing a ' 384 | '`batch_shape` argument to your Input layer.') 385 | # initialize state if None 386 | if self.states[0] is None: 387 | self.states = [K.zeros((batch_size, self.units)) 388 | for _ in self.states] 389 | elif states is None: 390 | for state in self.states: 391 | K.set_value(state, np.zeros((batch_size, self.units))) 392 | else: 393 | if not isinstance(states, (list, tuple)): 394 | states = [states] 395 | if len(states) != len(self.states): 396 | raise ValueError('Layer ' + self.name + ' expects ' + 397 | str(len(self.states)) + ' states, ' 398 | 'but it received ' + str(len(states)) + 399 | ' state values. Input received: ' + 400 | str(states)) 401 | for index, (value, state) in enumerate(zip(states, self.states)): 402 | if value.shape != (batch_size, self.units): 403 | raise ValueError('State ' + str(index) + 404 | ' is incompatible with layer ' + 405 | self.name + ': expected shape=' + 406 | str((batch_size, self.units)) + 407 | ', found shape=' + str(value.shape)) 408 | K.set_value(state, value) 409 | 410 | def get_config(self): 411 | config = {'return_sequences': self.return_sequences, 412 | 'return_state': self.return_state, 413 | 'go_backwards': self.go_backwards, 414 | 'stateful': self.stateful, 415 | 'unroll': self.unroll, 416 | 'implementation': self.implementation} 417 | base_config = super(Recurrent, self).get_config() 418 | return dict(list(base_config.items()) + list(config.items())) 419 | 420 | 421 | class SimpleRNN(Recurrent): 422 | """Fully-connected RNN where the output is to be fed back to input. 423 | 424 | # Arguments 425 | units: Positive integer, dimensionality of the output space. 426 | activation: Activation function to use 427 | (see [activations](../activations.md)). 428 | If you pass None, no activation is applied 429 | (ie. "linear" activation: `a(x) = x`). 430 | use_bias: Boolean, whether the layer uses a bias vector. 431 | kernel_initializer: Initializer for the `kernel` weights matrix, 432 | used for the linear transformation of the inputs. 433 | (see [initializers](../initializers.md)). 434 | recurrent_initializer: Initializer for the `recurrent_kernel` 435 | weights matrix, 436 | used for the linear transformation of the recurrent state. 437 | (see [initializers](../initializers.md)). 438 | bias_initializer: Initializer for the bias vector 439 | (see [initializers](../initializers.md)). 440 | kernel_regularizer: Regularizer function applied to 441 | the `kernel` weights matrix 442 | (see [regularizer](../regularizers.md)). 443 | recurrent_regularizer: Regularizer function applied to 444 | the `recurrent_kernel` weights matrix 445 | (see [regularizer](../regularizers.md)). 446 | bias_regularizer: Regularizer function applied to the bias vector 447 | (see [regularizer](../regularizers.md)). 448 | activity_regularizer: Regularizer function applied to 449 | the output of the layer (its "activation"). 450 | (see [regularizer](../regularizers.md)). 451 | kernel_constraint: Constraint function applied to 452 | the `kernel` weights matrix 453 | (see [constraints](../constraints.md)). 454 | recurrent_constraint: Constraint function applied to 455 | the `recurrent_kernel` weights matrix 456 | (see [constraints](../constraints.md)). 457 | bias_constraint: Constraint function applied to the bias vector 458 | (see [constraints](../constraints.md)). 459 | dropout: Float between 0 and 1. 460 | Fraction of the units to drop for 461 | the linear transformation of the inputs. 462 | recurrent_dropout: Float between 0 and 1. 463 | Fraction of the units to drop for 464 | the linear transformation of the recurrent state. 465 | 466 | # References 467 | - [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](http://arxiv.org/abs/1512.05287) 468 | """ 469 | 470 | @interfaces.legacy_recurrent_support 471 | def __init__(self, units, 472 | activation='tanh', 473 | use_bias=True, 474 | kernel_initializer='glorot_uniform', 475 | recurrent_initializer='orthogonal', 476 | bias_initializer='zeros', 477 | kernel_regularizer=None, 478 | recurrent_regularizer=None, 479 | bias_regularizer=None, 480 | activity_regularizer=None, 481 | kernel_constraint=None, 482 | recurrent_constraint=None, 483 | bias_constraint=None, 484 | dropout=0., 485 | recurrent_dropout=0., 486 | **kwargs): 487 | super(SimpleRNN, self).__init__(**kwargs) 488 | self.units = units 489 | self.activation = activations.get(activation) 490 | self.use_bias = use_bias 491 | 492 | self.kernel_initializer = initializers.get(kernel_initializer) 493 | self.recurrent_initializer = initializers.get(recurrent_initializer) 494 | self.bias_initializer = initializers.get(bias_initializer) 495 | 496 | self.kernel_regularizer = regularizers.get(kernel_regularizer) 497 | self.recurrent_regularizer = regularizers.get(recurrent_regularizer) 498 | self.bias_regularizer = regularizers.get(bias_regularizer) 499 | self.activity_regularizer = regularizers.get(activity_regularizer) 500 | 501 | self.kernel_constraint = constraints.get(kernel_constraint) 502 | self.recurrent_constraint = constraints.get(recurrent_constraint) 503 | self.bias_constraint = constraints.get(bias_constraint) 504 | 505 | self.dropout = min(1., max(0., dropout)) 506 | self.recurrent_dropout = min(1., max(0., recurrent_dropout)) 507 | self.state_spec = InputSpec(shape=(None, self.units)) 508 | 509 | def build(self, input_shape): 510 | if isinstance(input_shape, list): 511 | input_shape = input_shape[0] 512 | 513 | batch_size = input_shape[0] if self.stateful else None 514 | self.input_dim = input_shape[2] 515 | self.input_spec[0] = InputSpec(shape=(batch_size, None, self.input_dim)) 516 | 517 | self.states = [None] 518 | if self.stateful: 519 | self.reset_states() 520 | 521 | self.kernel = self.add_weight(shape=(self.input_dim, self.units), 522 | name='kernel', 523 | initializer=self.kernel_initializer, 524 | regularizer=self.kernel_regularizer, 525 | constraint=self.kernel_constraint) 526 | self.recurrent_kernel = self.add_weight( 527 | shape=(self.units, self.units), 528 | name='recurrent_kernel', 529 | initializer=self.recurrent_initializer, 530 | regularizer=self.recurrent_regularizer, 531 | constraint=self.recurrent_constraint) 532 | if self.use_bias: 533 | self.bias = self.add_weight(shape=(self.units,), 534 | name='bias', 535 | initializer=self.bias_initializer, 536 | regularizer=self.bias_regularizer, 537 | constraint=self.bias_constraint) 538 | else: 539 | self.bias = None 540 | self.built = True 541 | 542 | def preprocess_input(self, inputs, training=None): 543 | if self.implementation > 0: 544 | return inputs 545 | else: 546 | input_shape = K.int_shape(inputs) 547 | input_dim = input_shape[2] 548 | timesteps = input_shape[1] 549 | return _time_distributed_dense(inputs, 550 | self.kernel, 551 | self.bias, 552 | self.dropout, 553 | input_dim, 554 | self.units, 555 | timesteps, 556 | training=training) 557 | 558 | def step(self, inputs, states): 559 | if self.implementation == 0: 560 | h = inputs 561 | else: 562 | if 0 < self.dropout < 1: 563 | h = K.dot(inputs * states[1], self.kernel) 564 | else: 565 | h = K.dot(inputs, self.kernel) 566 | if self.bias is not None: 567 | h = K.bias_add(h, self.bias) 568 | 569 | prev_output = states[0] 570 | if 0 < self.recurrent_dropout < 1: 571 | prev_output *= states[2] 572 | output = h + K.dot(prev_output, self.recurrent_kernel) 573 | if self.activation is not None: 574 | output = self.activation(output) 575 | 576 | # Properly set learning phase on output tensor. 577 | if 0 < self.dropout + self.recurrent_dropout: 578 | output._uses_learning_phase = True 579 | return output, [output] 580 | 581 | def get_constants(self, inputs, training=None): 582 | constants = [] 583 | if self.implementation != 0 and 0 < self.dropout < 1: 584 | input_shape = K.int_shape(inputs) 585 | input_dim = input_shape[-1] 586 | ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) 587 | ones = K.tile(ones, (1, int(input_dim))) 588 | 589 | def dropped_inputs(): 590 | return K.dropout(ones, self.dropout) 591 | 592 | dp_mask = K.in_train_phase(dropped_inputs, 593 | ones, 594 | training=training) 595 | constants.append(dp_mask) 596 | else: 597 | constants.append(K.cast_to_floatx(1.)) 598 | 599 | if 0 < self.recurrent_dropout < 1: 600 | ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) 601 | ones = K.tile(ones, (1, self.units)) 602 | 603 | def dropped_inputs(): 604 | return K.dropout(ones, self.recurrent_dropout) 605 | rec_dp_mask = K.in_train_phase(dropped_inputs, 606 | ones, 607 | training=training) 608 | constants.append(rec_dp_mask) 609 | else: 610 | constants.append(K.cast_to_floatx(1.)) 611 | return constants 612 | 613 | def get_config(self): 614 | config = {'units': self.units, 615 | 'activation': activations.serialize(self.activation), 616 | 'use_bias': self.use_bias, 617 | 'kernel_initializer': initializers.serialize(self.kernel_initializer), 618 | 'recurrent_initializer': initializers.serialize(self.recurrent_initializer), 619 | 'bias_initializer': initializers.serialize(self.bias_initializer), 620 | 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), 621 | 'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer), 622 | 'bias_regularizer': regularizers.serialize(self.bias_regularizer), 623 | 'activity_regularizer': regularizers.serialize(self.activity_regularizer), 624 | 'kernel_constraint': constraints.serialize(self.kernel_constraint), 625 | 'recurrent_constraint': constraints.serialize(self.recurrent_constraint), 626 | 'bias_constraint': constraints.serialize(self.bias_constraint), 627 | 'dropout': self.dropout, 628 | 'recurrent_dropout': self.recurrent_dropout} 629 | base_config = super(SimpleRNN, self).get_config() 630 | return dict(list(base_config.items()) + list(config.items())) 631 | 632 | 633 | class GRU(Recurrent): 634 | """Gated Recurrent Unit - Cho et al. 2014. 635 | 636 | # Arguments 637 | units: Positive integer, dimensionality of the output space. 638 | activation: Activation function to use 639 | (see [activations](../activations.md)). 640 | If you pass None, no activation is applied 641 | (ie. "linear" activation: `a(x) = x`). 642 | recurrent_activation: Activation function to use 643 | for the recurrent step 644 | (see [activations](../activations.md)). 645 | use_bias: Boolean, whether the layer uses a bias vector. 646 | kernel_initializer: Initializer for the `kernel` weights matrix, 647 | used for the linear transformation of the inputs. 648 | (see [initializers](../initializers.md)). 649 | recurrent_initializer: Initializer for the `recurrent_kernel` 650 | weights matrix, 651 | used for the linear transformation of the recurrent state. 652 | (see [initializers](../initializers.md)). 653 | bias_initializer: Initializer for the bias vector 654 | (see [initializers](../initializers.md)). 655 | kernel_regularizer: Regularizer function applied to 656 | the `kernel` weights matrix 657 | (see [regularizer](../regularizers.md)). 658 | recurrent_regularizer: Regularizer function applied to 659 | the `recurrent_kernel` weights matrix 660 | (see [regularizer](../regularizers.md)). 661 | bias_regularizer: Regularizer function applied to the bias vector 662 | (see [regularizer](../regularizers.md)). 663 | activity_regularizer: Regularizer function applied to 664 | the output of the layer (its "activation"). 665 | (see [regularizer](../regularizers.md)). 666 | kernel_constraint: Constraint function applied to 667 | the `kernel` weights matrix 668 | (see [constraints](../constraints.md)). 669 | recurrent_constraint: Constraint function applied to 670 | the `recurrent_kernel` weights matrix 671 | (see [constraints](../constraints.md)). 672 | bias_constraint: Constraint function applied to the bias vector 673 | (see [constraints](../constraints.md)). 674 | dropout: Float between 0 and 1. 675 | Fraction of the units to drop for 676 | the linear transformation of the inputs. 677 | recurrent_dropout: Float between 0 and 1. 678 | Fraction of the units to drop for 679 | the linear transformation of the recurrent state. 680 | 681 | # References 682 | - [On the Properties of Neural Machine Translation: Encoder-Decoder Approaches](https://arxiv.org/abs/1409.1259) 683 | - [Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling](http://arxiv.org/abs/1412.3555v1) 684 | - [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](http://arxiv.org/abs/1512.05287) 685 | """ 686 | 687 | @interfaces.legacy_recurrent_support 688 | def __init__(self, units, 689 | activation='tanh', 690 | recurrent_activation='hard_sigmoid', 691 | use_bias=True, 692 | kernel_initializer='glorot_uniform', 693 | recurrent_initializer='orthogonal', 694 | bias_initializer='zeros', 695 | kernel_regularizer=None, 696 | recurrent_regularizer=None, 697 | bias_regularizer=None, 698 | activity_regularizer=None, 699 | kernel_constraint=None, 700 | recurrent_constraint=None, 701 | bias_constraint=None, 702 | dropout=0., 703 | recurrent_dropout=0., 704 | **kwargs): 705 | super(GRU, self).__init__(**kwargs) 706 | self.units = units 707 | self.activation = activations.get(activation) 708 | self.recurrent_activation = activations.get(recurrent_activation) 709 | self.use_bias = use_bias 710 | 711 | self.kernel_initializer = initializers.get(kernel_initializer) 712 | self.recurrent_initializer = initializers.get(recurrent_initializer) 713 | self.bias_initializer = initializers.get(bias_initializer) 714 | 715 | self.kernel_regularizer = regularizers.get(kernel_regularizer) 716 | self.recurrent_regularizer = regularizers.get(recurrent_regularizer) 717 | self.bias_regularizer = regularizers.get(bias_regularizer) 718 | self.activity_regularizer = regularizers.get(activity_regularizer) 719 | 720 | self.kernel_constraint = constraints.get(kernel_constraint) 721 | self.recurrent_constraint = constraints.get(recurrent_constraint) 722 | self.bias_constraint = constraints.get(bias_constraint) 723 | 724 | self.dropout = min(1., max(0., dropout)) 725 | self.recurrent_dropout = min(1., max(0., recurrent_dropout)) 726 | self.state_spec = InputSpec(shape=(None, self.units)) 727 | 728 | def build(self, input_shape): 729 | if isinstance(input_shape, list): 730 | input_shape = input_shape[0] 731 | 732 | batch_size = input_shape[0] if self.stateful else None 733 | self.input_dim = input_shape[2] 734 | self.input_spec[0] = InputSpec(shape=(batch_size, None, self.input_dim)) 735 | 736 | self.states = [None] 737 | if self.stateful: 738 | self.reset_states() 739 | 740 | self.kernel = self.add_weight(shape=(self.input_dim, self.units * 3), 741 | name='kernel', 742 | initializer=self.kernel_initializer, 743 | regularizer=self.kernel_regularizer, 744 | constraint=self.kernel_constraint) 745 | self.recurrent_kernel = self.add_weight( 746 | shape=(self.units, self.units * 3), 747 | name='recurrent_kernel', 748 | initializer=self.recurrent_initializer, 749 | regularizer=self.recurrent_regularizer, 750 | constraint=self.recurrent_constraint) 751 | 752 | if self.use_bias: 753 | self.bias = self.add_weight(shape=(self.units * 3,), 754 | name='bias', 755 | initializer=self.bias_initializer, 756 | regularizer=self.bias_regularizer, 757 | constraint=self.bias_constraint) 758 | else: 759 | self.bias = None 760 | 761 | self.kernel_z = self.kernel[:, :self.units] 762 | self.recurrent_kernel_z = self.recurrent_kernel[:, :self.units] 763 | self.kernel_r = self.kernel[:, self.units: self.units * 2] 764 | self.recurrent_kernel_r = self.recurrent_kernel[:, 765 | self.units: 766 | self.units * 2] 767 | self.kernel_h = self.kernel[:, self.units * 2:] 768 | self.recurrent_kernel_h = self.recurrent_kernel[:, self.units * 2:] 769 | 770 | if self.use_bias: 771 | self.bias_z = self.bias[:self.units] 772 | self.bias_r = self.bias[self.units: self.units * 2] 773 | self.bias_h = self.bias[self.units * 2:] 774 | else: 775 | self.bias_z = None 776 | self.bias_r = None 777 | self.bias_h = None 778 | self.built = True 779 | 780 | def preprocess_input(self, inputs, training=None): 781 | if self.implementation == 0: 782 | input_shape = K.int_shape(inputs) 783 | input_dim = input_shape[2] 784 | timesteps = input_shape[1] 785 | 786 | x_z = _time_distributed_dense(inputs, self.kernel_z, self.bias_z, 787 | self.dropout, input_dim, self.units, 788 | timesteps, training=training) 789 | x_r = _time_distributed_dense(inputs, self.kernel_r, self.bias_r, 790 | self.dropout, input_dim, self.units, 791 | timesteps, training=training) 792 | x_h = _time_distributed_dense(inputs, self.kernel_h, self.bias_h, 793 | self.dropout, input_dim, self.units, 794 | timesteps, training=training) 795 | return K.concatenate([x_z, x_r, x_h], axis=2) 796 | else: 797 | return inputs 798 | 799 | def get_constants(self, inputs, training=None): 800 | constants = [] 801 | if self.implementation != 0 and 0 < self.dropout < 1: 802 | input_shape = K.int_shape(inputs) 803 | input_dim = input_shape[-1] 804 | ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) 805 | ones = K.tile(ones, (1, int(input_dim))) 806 | 807 | def dropped_inputs(): 808 | return K.dropout(ones, self.dropout) 809 | 810 | dp_mask = [K.in_train_phase(dropped_inputs, 811 | ones, 812 | training=training) for _ in range(3)] 813 | constants.append(dp_mask) 814 | else: 815 | constants.append([K.cast_to_floatx(1.) for _ in range(3)]) 816 | 817 | if 0 < self.recurrent_dropout < 1: 818 | ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) 819 | ones = K.tile(ones, (1, self.units)) 820 | 821 | def dropped_inputs(): 822 | return K.dropout(ones, self.recurrent_dropout) 823 | rec_dp_mask = [K.in_train_phase(dropped_inputs, 824 | ones, 825 | training=training) for _ in range(3)] 826 | constants.append(rec_dp_mask) 827 | else: 828 | constants.append([K.cast_to_floatx(1.) for _ in range(3)]) 829 | return constants 830 | 831 | def step(self, inputs, states): 832 | h_tm1 = states[0] # previous memory 833 | dp_mask = states[1] # dropout matrices for recurrent units 834 | rec_dp_mask = states[2] 835 | 836 | if self.implementation == 2: 837 | matrix_x = K.dot(inputs * dp_mask[0], self.kernel) 838 | if self.use_bias: 839 | matrix_x = K.bias_add(matrix_x, self.bias) 840 | matrix_inner = K.dot(h_tm1 * rec_dp_mask[0], 841 | self.recurrent_kernel[:, :2 * self.units]) 842 | 843 | x_z = matrix_x[:, :self.units] 844 | x_r = matrix_x[:, self.units: 2 * self.units] 845 | recurrent_z = matrix_inner[:, :self.units] 846 | recurrent_r = matrix_inner[:, self.units: 2 * self.units] 847 | 848 | z = self.recurrent_activation(x_z + recurrent_z) 849 | r = self.recurrent_activation(x_r + recurrent_r) 850 | 851 | x_h = matrix_x[:, 2 * self.units:] 852 | recurrent_h = K.dot(r * h_tm1 * rec_dp_mask[0], 853 | self.recurrent_kernel[:, 2 * self.units:]) 854 | hh = self.activation(x_h + recurrent_h) 855 | else: 856 | if self.implementation == 0: 857 | x_z = inputs[:, :self.units] 858 | x_r = inputs[:, self.units: 2 * self.units] 859 | x_h = inputs[:, 2 * self.units:] 860 | elif self.implementation == 1: 861 | x_z = K.dot(inputs * dp_mask[0], self.kernel_z) 862 | x_r = K.dot(inputs * dp_mask[1], self.kernel_r) 863 | x_h = K.dot(inputs * dp_mask[2], self.kernel_h) 864 | if self.use_bias: 865 | x_z = K.bias_add(x_z, self.bias_z) 866 | x_r = K.bias_add(x_r, self.bias_r) 867 | x_h = K.bias_add(x_h, self.bias_h) 868 | else: 869 | raise ValueError('Unknown `implementation` mode.') 870 | z = self.recurrent_activation(x_z + K.dot(h_tm1 * rec_dp_mask[0], 871 | self.recurrent_kernel_z)) 872 | r = self.recurrent_activation(x_r + K.dot(h_tm1 * rec_dp_mask[1], 873 | self.recurrent_kernel_r)) 874 | 875 | hh = self.activation(x_h + K.dot(r * h_tm1 * rec_dp_mask[2], 876 | self.recurrent_kernel_h)) 877 | h = z * h_tm1 + (1 - z) * hh 878 | if 0 < self.dropout + self.recurrent_dropout: 879 | h._uses_learning_phase = True 880 | return h, [h] 881 | 882 | def get_config(self): 883 | config = {'units': self.units, 884 | 'activation': activations.serialize(self.activation), 885 | 'recurrent_activation': activations.serialize(self.recurrent_activation), 886 | 'use_bias': self.use_bias, 887 | 'kernel_initializer': initializers.serialize(self.kernel_initializer), 888 | 'recurrent_initializer': initializers.serialize(self.recurrent_initializer), 889 | 'bias_initializer': initializers.serialize(self.bias_initializer), 890 | 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), 891 | 'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer), 892 | 'bias_regularizer': regularizers.serialize(self.bias_regularizer), 893 | 'activity_regularizer': regularizers.serialize(self.activity_regularizer), 894 | 'kernel_constraint': constraints.serialize(self.kernel_constraint), 895 | 'recurrent_constraint': constraints.serialize(self.recurrent_constraint), 896 | 'bias_constraint': constraints.serialize(self.bias_constraint), 897 | 'dropout': self.dropout, 898 | 'recurrent_dropout': self.recurrent_dropout} 899 | base_config = super(GRU, self).get_config() 900 | return dict(list(base_config.items()) + list(config.items())) 901 | 902 | 903 | class dLSTM(Recurrent): 904 | """Long-Short Term Memory unit - Hochreiter 1997. 905 | 906 | For a step-by-step description of the algorithm, see 907 | [this tutorial](http://deeplearning.net/tutorial/lstm.html). 908 | 909 | # Arguments 910 | units: Positive integer, dimensionality of the output space. 911 | activation: Activation function to use 912 | (see [activations](../activations.md)). 913 | If you pass None, no activation is applied 914 | (ie. "linear" activation: `a(x) = x`). 915 | recurrent_activation: Activation function to use 916 | for the recurrent step 917 | (see [activations](../activations.md)). 918 | use_bias: Boolean, whether the layer uses a bias vector. 919 | kernel_initializer: Initializer for the `kernel` weights matrix, 920 | used for the linear transformation of the inputs. 921 | (see [initializers](../initializers.md)). 922 | recurrent_initializer: Initializer for the `recurrent_kernel` 923 | weights matrix, 924 | used for the linear transformation of the recurrent state. 925 | (see [initializers](../initializers.md)). 926 | bias_initializer: Initializer for the bias vector 927 | (see [initializers](../initializers.md)). 928 | unit_forget_bias: Boolean. 929 | If True, add 1 to the bias of the forget gate at initialization. 930 | Setting it to true will also force `bias_initializer="zeros"`. 931 | This is recommended in [Jozefowicz et al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf) 932 | kernel_regularizer: Regularizer function applied to 933 | the `kernel` weights matrix 934 | (see [regularizer](../regularizers.md)). 935 | recurrent_regularizer: Regularizer function applied to 936 | the `recurrent_kernel` weights matrix 937 | (see [regularizer](../regularizers.md)). 938 | bias_regularizer: Regularizer function applied to the bias vector 939 | (see [regularizer](../regularizers.md)). 940 | activity_regularizer: Regularizer function applied to 941 | the output of the layer (its "activation"). 942 | (see [regularizer](../regularizers.md)). 943 | kernel_constraint: Constraint function applied to 944 | the `kernel` weights matrix 945 | (see [constraints](../constraints.md)). 946 | recurrent_constraint: Constraint function applied to 947 | the `recurrent_kernel` weights matrix 948 | (see [constraints](../constraints.md)). 949 | bias_constraint: Constraint function applied to the bias vector 950 | (see [constraints](../constraints.md)). 951 | dropout: Float between 0 and 1. 952 | Fraction of the units to drop for 953 | the linear transformation of the inputs. 954 | recurrent_dropout: Float between 0 and 1. 955 | Fraction of the units to drop for 956 | the linear transformation of the recurrent state. 957 | 958 | # References 959 | - [Long short-term memory](http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf) (original 1997 paper) 960 | - [Learning to forget: Continual prediction with LSTM](http://www.mitpressjournals.org/doi/pdf/10.1162/089976600300015015) 961 | - [Supervised sequence labeling with recurrent neural networks](http://www.cs.toronto.edu/~graves/preprint.pdf) 962 | - [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](http://arxiv.org/abs/1512.05287) 963 | """ 964 | @interfaces.legacy_recurrent_support 965 | def __init__(self, units, 966 | activation='tanh', 967 | recurrent_activation='hard_sigmoid', 968 | use_bias=True, 969 | kernel_initializer='glorot_uniform', 970 | recurrent_initializer='orthogonal', 971 | bias_initializer='zeros', 972 | unit_forget_bias=True, 973 | kernel_regularizer=None, 974 | recurrent_regularizer=None, 975 | bias_regularizer=None, 976 | activity_regularizer=None, 977 | kernel_constraint=None, 978 | recurrent_constraint=None, 979 | bias_constraint=None, 980 | dropout=0., 981 | recurrent_dropout=0., 982 | **kwargs): 983 | super(dLSTM, self).__init__(**kwargs) 984 | self.units = units 985 | self.activation = activations.get(activation) 986 | self.recurrent_activation = activations.get(recurrent_activation) 987 | self.use_bias = use_bias 988 | 989 | self.kernel_initializer = initializers.get(kernel_initializer) 990 | self.recurrent_initializer = initializers.get(recurrent_initializer) 991 | self.bias_initializer = initializers.get(bias_initializer) 992 | self.unit_forget_bias = unit_forget_bias 993 | 994 | self.kernel_regularizer = regularizers.get(kernel_regularizer) 995 | self.recurrent_regularizer = regularizers.get(recurrent_regularizer) 996 | self.bias_regularizer = regularizers.get(bias_regularizer) 997 | self.activity_regularizer = regularizers.get(activity_regularizer) 998 | 999 | self.kernel_constraint = constraints.get(kernel_constraint) 1000 | self.recurrent_constraint = constraints.get(recurrent_constraint) 1001 | self.bias_constraint = constraints.get(bias_constraint) 1002 | 1003 | self.dropout = min(1., max(0., dropout)) 1004 | self.recurrent_dropout = min(1., max(0., recurrent_dropout)) 1005 | self.state_spec = [InputSpec(shape=(None, self.units)), 1006 | InputSpec(shape=(None, self.units))] 1007 | 1008 | def build(self, input_shape): 1009 | if isinstance(input_shape, list): 1010 | input_shape = input_shape[0] 1011 | 1012 | batch_size = input_shape[0] if self.stateful else None 1013 | self.input_dim = input_shape[2] 1014 | self.input_spec[0] = InputSpec(shape=(batch_size, None, self.input_dim)) 1015 | 1016 | self.states = [None, None, None, None] 1017 | if self.stateful: 1018 | self.reset_states() 1019 | 1020 | self.kernel = self.add_weight(shape=(self.input_dim, self.units * 4), 1021 | name='kernel', 1022 | initializer=self.kernel_initializer, 1023 | regularizer=self.kernel_regularizer, 1024 | constraint=self.kernel_constraint) 1025 | self.recurrent_kernel = self.add_weight( 1026 | shape=(self.units, self.units * 4), 1027 | name='recurrent_kernel', 1028 | initializer=self.recurrent_initializer, 1029 | regularizer=self.recurrent_regularizer, 1030 | constraint=self.recurrent_constraint) 1031 | 1032 | if self.use_bias: 1033 | if self.unit_forget_bias: 1034 | def bias_initializer(shape, *args, **kwargs): 1035 | return K.concatenate([ 1036 | self.bias_initializer((self.units,), *args, **kwargs), 1037 | initializers.Ones()((self.units,), *args, **kwargs), 1038 | self.bias_initializer((self.units * 2,), *args, **kwargs), 1039 | ]) 1040 | else: 1041 | bias_initializer = self.bias_initializer 1042 | self.bias = self.add_weight(shape=(self.units * 4,), 1043 | name='bias', 1044 | initializer=bias_initializer, 1045 | regularizer=self.bias_regularizer, 1046 | constraint=self.bias_constraint) 1047 | else: 1048 | self.bias = None 1049 | 1050 | self.kernel_i = self.kernel[:, :self.units] 1051 | self.kernel_f = self.kernel[:, self.units: self.units * 2] 1052 | self.kernel_c = self.kernel[:, self.units * 2: self.units * 3] 1053 | self.kernel_o = self.kernel[:, self.units * 3:] 1054 | 1055 | self.recurrent_kernel_i = self.recurrent_kernel[:, :self.units] 1056 | self.recurrent_kernel_f = self.recurrent_kernel[:, self.units: self.units * 2] 1057 | self.recurrent_kernel_c = self.recurrent_kernel[:, self.units * 2: self.units * 3] 1058 | self.recurrent_kernel_o = self.recurrent_kernel[:, self.units * 3:] 1059 | 1060 | if self.use_bias: 1061 | self.bias_i = self.bias[:self.units] 1062 | self.bias_f = self.bias[self.units: self.units * 2] 1063 | self.bias_c = self.bias[self.units * 2: self.units * 3] 1064 | self.bias_o = self.bias[self.units * 3:] 1065 | else: 1066 | self.bias_i = None 1067 | self.bias_f = None 1068 | self.bias_c = None 1069 | self.bias_o = None 1070 | self.built = True 1071 | 1072 | def preprocess_input(self, inputs, training=None): 1073 | if self.implementation == 0: 1074 | input_shape = K.int_shape(inputs) 1075 | input_dim = input_shape[2] 1076 | timesteps = input_shape[1] 1077 | 1078 | x_i = _time_distributed_dense(inputs, self.kernel_i, self.bias_i, 1079 | self.dropout, input_dim, self.units, 1080 | timesteps, training=training) 1081 | x_f = _time_distributed_dense(inputs, self.kernel_f, self.bias_f, 1082 | self.dropout, input_dim, self.units, 1083 | timesteps, training=training) 1084 | x_c = _time_distributed_dense(inputs, self.kernel_c, self.bias_c, 1085 | self.dropout, input_dim, self.units, 1086 | timesteps, training=training) 1087 | x_o = _time_distributed_dense(inputs, self.kernel_o, self.bias_o, 1088 | self.dropout, input_dim, self.units, 1089 | timesteps, training=training) 1090 | return K.concatenate([x_i, x_f, x_c, x_o], axis=2) 1091 | else: 1092 | return inputs 1093 | 1094 | def get_constants(self, inputs, training=None): 1095 | constants = [] 1096 | if self.implementation != 0 and 0 < self.dropout < 1: 1097 | input_shape = K.int_shape(inputs) 1098 | input_dim = input_shape[-1] 1099 | ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) 1100 | ones = K.tile(ones, (1, int(input_dim))) 1101 | 1102 | def dropped_inputs(): 1103 | return K.dropout(ones, self.dropout) 1104 | 1105 | dp_mask = [K.in_train_phase(dropped_inputs, 1106 | ones, 1107 | training=training) for _ in range(4)] 1108 | constants.append(dp_mask) 1109 | else: 1110 | constants.append([K.cast_to_floatx(1.) for _ in range(4)]) 1111 | 1112 | if 0 < self.recurrent_dropout < 1: 1113 | ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) 1114 | ones = K.tile(ones, (1, self.units)) 1115 | 1116 | def dropped_inputs(): 1117 | return K.dropout(ones, self.recurrent_dropout) 1118 | rec_dp_mask = [K.in_train_phase(dropped_inputs, 1119 | ones, 1120 | training=training) for _ in range(4)] 1121 | constants.append(rec_dp_mask) 1122 | else: 1123 | constants.append([K.cast_to_floatx(1.) for _ in range(4)]) 1124 | return constants 1125 | 1126 | def step(self, inputs, states): 1127 | h_tm1 = states[0] 1128 | c_tm1 = states[1] 1129 | dp_mask = states[2] 1130 | rec_dp_mask = states[3] 1131 | 1132 | if self.implementation == 2: 1133 | z = K.dot(inputs * dp_mask[0], self.kernel) 1134 | z += K.dot(h_tm1 * rec_dp_mask, self.recurrent_kernel) 1135 | if self.use_bias: 1136 | z = K.bias_add(z, self.bias) 1137 | 1138 | z0 = z[:, :self.units] 1139 | z1 = z[:, self.units: 2 * self.units] 1140 | z2 = z[:, 2 * self.units: 3 * self.units] 1141 | z3 = z[:, 3 * self.units:] 1142 | 1143 | i = self.recurrent_activation(z0) 1144 | f = self.recurrent_activation(z1) 1145 | c = f * c_tm1 + i * self.activation(z2) 1146 | o = self.recurrent_activation(z3) 1147 | else: 1148 | if self.implementation == 0: 1149 | x_i = inputs[:, :self.units] 1150 | x_f = inputs[:, self.units: 2 * self.units] 1151 | x_c = inputs[:, 2 * self.units: 3 * self.units] 1152 | x_o = inputs[:, 3 * self.units:] 1153 | elif self.implementation == 1: 1154 | x_i = K.dot(inputs * dp_mask, self.kernel_i) + self.bias_i 1155 | x_f = K.dot(inputs * dp_mask, self.kernel_f) + self.bias_f 1156 | x_c = K.dot(inputs * dp_mask, self.kernel_c) + self.bias_c 1157 | x_o = K.dot(inputs * dp_mask, self.kernel_o) + self.bias_o 1158 | else: 1159 | raise ValueError('Unknown `implementation` mode.') 1160 | 1161 | i = self.recurrent_activation(x_i + K.dot(h_tm1 * rec_dp_mask, 1162 | self.recurrent_kernel_i)) 1163 | f = self.recurrent_activation(x_f + K.dot(h_tm1 * rec_dp_mask, 1164 | self.recurrent_kernel_f)) 1165 | c = f * c_tm1 + i * self.activation(x_c + K.dot(h_tm1 * rec_dp_mask, 1166 | self.recurrent_kernel_c)) 1167 | o = self.recurrent_activation(x_o + K.dot(h_tm1 * rec_dp_mask, 1168 | self.recurrent_kernel_o)) 1169 | h = o * self.activation(c) 1170 | #if 0 < self.dropout + self.recurrent_dropout: 1171 | # h._uses_learning_phase = True 1172 | return h, [h, c, dp_mask, rec_dp_mask] 1173 | 1174 | def get_config(self): 1175 | config = {'units': self.units, 1176 | 'activation': activations.serialize(self.activation), 1177 | 'recurrent_activation': activations.serialize(self.recurrent_activation), 1178 | 'use_bias': self.use_bias, 1179 | 'kernel_initializer': initializers.serialize(self.kernel_initializer), 1180 | 'recurrent_initializer': initializers.serialize(self.recurrent_initializer), 1181 | 'bias_initializer': initializers.serialize(self.bias_initializer), 1182 | 'unit_forget_bias': self.unit_forget_bias, 1183 | 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), 1184 | 'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer), 1185 | 'bias_regularizer': regularizers.serialize(self.bias_regularizer), 1186 | 'activity_regularizer': regularizers.serialize(self.activity_regularizer), 1187 | 'kernel_constraint': constraints.serialize(self.kernel_constraint), 1188 | 'recurrent_constraint': constraints.serialize(self.recurrent_constraint), 1189 | 'bias_constraint': constraints.serialize(self.bias_constraint), 1190 | 'dropout': self.dropout, 1191 | 'recurrent_dropout': self.recurrent_dropout} 1192 | base_config = super(dLSTM, self).get_config() 1193 | return dict(list(base_config.items()) + list(config.items())) 1194 | --------------------------------------------------------------------------------