├── CnnLayer.py ├── CnnVd10.py ├── CnnVd6.py ├── README.md ├── RestNet.py └── dnn.py /CnnLayer.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | class CnnLayer(object): 5 | 6 | def __init__(self): 7 | print 'Init cnn layer' 8 | 9 | 10 | def __call__(self, inputs, is_training=False, reuse=False, scope=None): 11 | with tf.variable_scope(scope or type(self).__name__, reuse=reuse): 12 | 13 | print 'Layer: ' + scope 14 | print 'Input: ' 15 | print inputs.get_shape() 16 | 17 | with tf.variable_scope('prep_data_l1', reuse=reuse): 18 | inputs_img = tf.reshape(inputs, tf.pack( [ tf.shape(inputs)[0] , 11, 3, 40] ) ) 19 | inputs_img = tf.transpose(inputs_img, [ 0 , 1, 3, 2 ] ) 20 | 21 | print 'Input Img: ' 22 | print inputs_img.get_shape() 23 | 24 | hidden = self.convolution(inputs_img, 'conv_l1', 3, 256, 9, 9, reuse, is_training) 25 | 26 | with tf.variable_scope('pool_l1', reuse=reuse): 27 | pool = tf.nn.max_pool(hidden, ksize=[1, 1, 1, 1], strides=[1, 1, 3, 1], padding='VALID') 28 | 29 | print 'poll_l1: ' 30 | print pool.get_shape() 31 | 32 | hidden = self.convolution(pool, 'conv_l2', 256, 256, 3, 4, reuse, is_training) 33 | 34 | with tf.variable_scope('out_op', reuse=reuse): 35 | shape = hidden.get_shape().as_list() 36 | outputs = tf.reshape(hidden, tf.pack( [tf.shape(hidden)[0], shape[1] * shape[2] * shape[3] ] ) ) 37 | 38 | print 'Outputs: ' 39 | print outputs.get_shape() 40 | 41 | return outputs 42 | 43 | def convolution(self, inputs_img, name_layer, in_dim, out_dim, t_conv_size, f_conv_size, reuse, is_training): 44 | with tf.variable_scope('parameters_'+name_layer, reuse=reuse): 45 | n = t_conv_size*f_conv_size*out_dim 46 | weights = tf.get_variable('weights_'+name_layer, [t_conv_size, f_conv_size, in_dim, out_dim], initializer = tf.random_normal_initializer(stddev=np.sqrt(2.0 / n))) 47 | biases = tf.get_variable('biases_'+name_layer, [out_dim], initializer=tf.constant_initializer(0) ) 48 | 49 | with tf.variable_scope('conv_'+name_layer, reuse=reuse): 50 | conv = tf.nn.conv2d(inputs_img, weights, [1, 1, 1, 1], padding='VALID') 51 | #print conv.get_shape() 52 | conv = tf.contrib.layers.batch_norm(conv, 53 | is_training=is_training, 54 | scope='batch_norm', 55 | reuse = reuse) 56 | hidden = tf.nn.relu(conv + biases) 57 | 58 | print 'hidden_'+ name_layer 59 | print hidden.get_shape() 60 | 61 | return hidden 62 | 63 | 64 | 65 | 66 | # def __call__(self, inputs, is_training=False, reuse=False, scope=None): 67 | # ''' 68 | # Do the forward computation 69 | # Args: 70 | # inputs: the input to the layer 71 | # is_training: whether or not the network is in training mode 72 | # reuse: wheter or not the variables in the network should be reused 73 | # scope: the variable scope of the layer 74 | # Returns: 75 | # The output of the layer 76 | # ''' 77 | 78 | # with tf.variable_scope(scope or type(self).__name__, reuse=reuse): 79 | # with tf.variable_scope('parameters', reuse=reuse): 80 | # f = 9 81 | # d_1 = 3 82 | # k = 256 83 | # n = f*f*k 84 | # weights_l1= tf.get_variable('weights_fc_1', [f, f, d_1, k], initializer = tf.random_normal_initializer(stddev=np.sqrt(2.0 / n))) 85 | # biases_l1 = tf.get_variable('biases_fc_1', [k], initializer=tf.constant_initializer(0) ) 86 | # k = 256 87 | # n = f*f*k 88 | # weights_l2 = tf.get_variable('weights_fc_2', [3, 4, k, k], initializer = tf.random_normal_initializer(stddev=np.sqrt(2.0 / n))) 89 | # biases_l2 = tf.get_variable('biases_fc_2', [k], initializer=tf.constant_initializer(0) ) 90 | 91 | 92 | 93 | 94 | 95 | # print inputs.get_shape() 96 | # inputs_img = tf.reshape(inputs, tf.pack( [ tf.shape(inputs)[0] , 11, 3, 40] ) ) 97 | # inputs_img = tf.transpose(inputs_img, [ 0 , 1, 3, 2 ] ) 98 | # print inputs_img.get_shape() 99 | 100 | # with tf.variable_scope('conv_l1', reuse=reuse): 101 | # conv = tf.nn.conv2d(inputs_img, weights_l1, [1, 1, 1, 1], padding='VALID') 102 | # #print conv.get_shape() 103 | 104 | # conv = tf.contrib.layers.batch_norm(conv, 105 | # is_training=is_training, 106 | # scope='batch_norm', 107 | # reuse = reuse) 108 | 109 | # hidden = tf.nn.relu(conv + biases_l1) 110 | # print hidden.get_shape() 111 | 112 | # pool = tf.nn.max_pool(hidden, ksize=[1, 1, 1, 1], strides=[1, 1, 3, 1], padding='VALID') 113 | # print pool.get_shape() 114 | 115 | # with tf.variable_scope('conv_l2', reuse=reuse): 116 | # conv = tf.nn.conv2d(pool, weights_l2, [1, 1, 1, 1], padding='VALID') 117 | # #print conv.get_shape() 118 | 119 | # conv = tf.contrib.layers.batch_norm(conv, 120 | # is_training=is_training, 121 | # scope='batch_norm', 122 | # reuse = reuse) 123 | 124 | # hidden = tf.nn.relu(conv + biases_l2) 125 | # print hidden.get_shape() 126 | 127 | # shape = hidden.get_shape().as_list() 128 | # outputs = tf.reshape(hidden, tf.pack( [tf.shape(pool)[0], shape[1] * shape[2] * shape[3] ] ) ) 129 | # print outputs.get_shape() 130 | 131 | # print 'Layer: ' + scope 132 | # print 'Input: ' 133 | # print inputs.get_shape() 134 | # print 'Outputs: ' 135 | # print outputs.get_shape() 136 | 137 | 138 | # return outputs 139 | -------------------------------------------------------------------------------- /CnnVd10.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | class CnnVd10(object): 5 | 6 | def __init__(self): 7 | print 'Init cnn layer' 8 | 9 | def __call__(self, inputs, is_training=False, reuse=False, scope=None): 10 | with tf.variable_scope(scope or type(self).__name__, reuse=reuse): 11 | with tf.variable_scope('prep_data_l1', reuse=reuse): 12 | print inputs.get_shape() 13 | # For ddeltas features, the input map is examples x time x freq x 3 14 | #inputs_img = tf.reshape(inputs, tf.pack( [ tf.shape(inputs)[0] , 11, 3, 40] ) ) 15 | #inputs_img = tf.transpose(inputs_img, [ 0 , 1, 3, 2 ] ) 16 | #inputs_img = inputs_img[:,:,:,0] 17 | #inputs_img = tf.reshape(inputs_img, tf.pack( [ tf.shape(inputs_img)[0] , 11, 40, 1] ) ) 18 | # For nodelta features 19 | # In Vd10 we use 8 contex window (8*2 +1 = 17) and 64 fbank filter bands 20 | inputs_img = tf.reshape(inputs, tf.pack( [ tf.shape(inputs)[0] , 17, 64, 1] ) ) 21 | print inputs_img.get_shape() 22 | hidden = self.convolution(inputs_img, 'conv_l1', 1, 64, 3, 3, reuse, is_training) 23 | hidden = self.convolution(hidden, 'conv_l2', 64, 64, 3, 3, reuse, is_training) 24 | with tf.variable_scope('pool_l2', reuse=reuse): 25 | pool = tf.nn.max_pool(hidden, ksize=[1, 1, 2, 1], strides=[1, 1, 2, 1], padding='VALID') 26 | 27 | 28 | hidden = self.convolution(pool, 'conv_l3', 64, 128, 3, 3, reuse, is_training) 29 | hidden = self.convolution(hidden, 'conv_l4', 128, 128, 3, 3, reuse, is_training) 30 | with tf.variable_scope('pool_l4', reuse=reuse): 31 | pool = tf.nn.max_pool(hidden, ksize=[1, 1, 2, 1], strides=[1, 1, 2, 1], padding='VALID') 32 | 33 | 34 | hidden = self.convolution(pool, 'conv_l5', 128, 128, 3, 3, reuse, is_training) 35 | hidden = self.convolution(hidden, 'conv_l6', 128, 128, 3, 3, reuse, is_training) 36 | with tf.variable_scope('pool_l6', reuse=reuse): 37 | pool = tf.nn.max_pool(hidden, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') 38 | 39 | 40 | hidden = self.convolution(pool, 'conv_l7', 128, 256, 3, 3, reuse, is_training) 41 | hidden = self.convolution(hidden, 'conv_l8', 256, 256, 3, 3, reuse, is_training) 42 | with tf.variable_scope('pool_l8', reuse=reuse): 43 | pool = tf.nn.max_pool(hidden, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') 44 | 45 | hidden = self.convolution(pool, 'conv_l9', 256, 256, 3, 3, reuse, is_training) 46 | hidden = self.convolution(hidden, 'conv_l10', 256, 256, 3, 3, reuse, is_training) 47 | with tf.variable_scope('pool_l8', reuse=reuse): 48 | pool = tf.nn.max_pool(hidden, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') 49 | 50 | with tf.variable_scope('out_op', reuse=reuse): 51 | shape = pool.get_shape().as_list() 52 | outputs = tf.reshape(pool, tf.pack( [tf.shape(pool)[0], shape[1] * shape[2] * shape[3] ] ) ) 53 | 54 | print 'Layer: ' + scope 55 | print 'Input: ' 56 | print inputs.get_shape() 57 | print 'Outputs: ' 58 | print outputs.get_shape() 59 | return outputs 60 | 61 | 62 | def convolution(self, inputs_img, name_layer, in_dim, out_dim, t_conv_size, f_conv_size, reuse, is_training): 63 | with tf.variable_scope('parameters_'+name_layer, reuse=reuse): 64 | n = t_conv_size*f_conv_size*out_dim 65 | weights = tf.get_variable('weights_'+name_layer, [t_conv_size, f_conv_size, in_dim, out_dim], initializer = tf.random_normal_initializer(stddev=np.sqrt(2.0 / n))) 66 | biases = tf.get_variable('biases_'+name_layer, [out_dim], initializer=tf.constant_initializer(0) ) 67 | 68 | with tf.variable_scope('conv'+name_layer, reuse=reuse): 69 | # In vd10 conv is with paddin in both axes 70 | conv = tf.nn.conv2d(inputs_img, weights, [1, 1, 1, 1], padding='SAME') 71 | #print conv.get_shape() 72 | conv = tf.contrib.layers.batch_norm(conv, 73 | is_training=is_training, 74 | scope='batch_norm'+name_layer, 75 | reuse = reuse) 76 | hidden = tf.nn.relu(conv + biases) 77 | 78 | if applay_dropout: 79 | if is_training: 80 | hidden = tf.nn.dropout(hidden, 0.75) 81 | 82 | 83 | print 'hidden_'+ name_layer 84 | print hidden.get_shape() 85 | return hidden 86 | -------------------------------------------------------------------------------- /CnnVd6.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | class CnnVd6(object): 5 | def __init__(self): 6 | print 'Init cnn layer' 7 | 8 | 9 | def __call__(self, inputs, is_training=False, reuse=False, scope=None): 10 | with tf.variable_scope(scope or type(self).__name__, reuse=reuse): 11 | with tf.variable_scope('prep_data_l1', reuse=reuse): 12 | print inputs.get_shape() 13 | # For ddeltas features, the input map is examples x time x freq x 3 14 | #inputs_img = tf.reshape(inputs, tf.pack( [ tf.shape(inputs)[0] , 11, 3, 40] ) ) 15 | #inputs_img = tf.transpose(inputs_img, [ 0 , 1, 3, 2 ] ) 16 | #inputs_img = inputs_img[:,:,:,0] 17 | #inputs_img = tf.reshape(inputs_img, tf.pack( [ tf.shape(inputs_img)[0] , 11, 40, 1] ) ) 18 | # For nodelta features 19 | inputs_img = tf.reshape(inputs, tf.pack( [ tf.shape(inputs)[0] , 11, 40, 1] ) ) 20 | print inputs_img.get_shape() 21 | 22 | hidden = self.convolution(inputs_img, 'conv_l1', 1, 64, 1, 3, reuse, is_training) 23 | hidden = self.convolution(hidden, 'conv_l2', 64, 64, 3, 3, reuse, is_training) 24 | with tf.variable_scope('pool_l2', reuse=reuse): 25 | pool = tf.nn.max_pool(hidden, ksize=[1, 1, 2, 1], strides=[1, 1, 2, 1], padding='VALID') 26 | 27 | hidden = self.convolution(pool, 'conv_l3', 64, 128, 3, 3, reuse, is_training) 28 | hidden = self.convolution(hidden, 'conv_l4', 128, 128, 3, 3, reuse, is_training) 29 | with tf.variable_scope('pool_l4', reuse=reuse): 30 | pool = tf.nn.max_pool(hidden, ksize=[1, 1, 2, 1], strides=[1, 1, 2, 1], padding='VALID') 31 | 32 | hidden = self.convolution(pool, 'conv_l5', 128, 256, 3, 3, reuse, is_training) 33 | hidden = self.convolution(hidden, 'conv_l6', 256, 256, 3, 3, reuse, is_training) 34 | 35 | with tf.variable_scope('out_op', reuse=reuse): 36 | shape = hidden.get_shape().as_list() 37 | outputs = tf.reshape(hidden, tf.pack( [tf.shape(hidden)[0], shape[1] * shape[2] * shape[3] ] ) ) 38 | 39 | print 'Layer: ' + scope 40 | print 'Input: ' 41 | print inputs.get_shape() 42 | print 'Outputs: ' 43 | print outputs.get_shape() 44 | return outputs 45 | 46 | 47 | def convolution(self, inputs_img, name_layer, in_dim, out_dim, t_conv_size, f_conv_size, reuse, is_training): 48 | with tf.variable_scope('parameters_'+name_layer, reuse=reuse): 49 | n = t_conv_size*f_conv_size*out_dim 50 | weights = tf.get_variable('weights_'+name_layer, [t_conv_size, f_conv_size, in_dim, out_dim], initializer = tf.random_normal_initializer(stddev=np.sqrt(2.0 / n))) 51 | biases = tf.get_variable('biases_'+name_layer, [out_dim], initializer=tf.constant_initializer(0) ) 52 | 53 | with tf.variable_scope('conv'+name_layer, reuse=reuse): 54 | conv = tf.nn.conv2d(inputs_img, weights, [1, 1, 1, 1], padding='VALID') 55 | #print conv.get_shape() 56 | conv = tf.contrib.layers.batch_norm(conv, 57 | is_training=is_training, 58 | scope='batch_norm', 59 | reuse = reuse) 60 | hidden = tf.nn.relu(conv + biases) 61 | print 'hidden_'+ name_layer 62 | print hidden.get_shape() 63 | return hidden 64 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ResNet-Kaldi-Tensorflow-ASR 2 | 3 | ResNet and other CNN implementations in Tensorflow presented in the paper: 4 | Deep Residual Networks with Auditory Inspired Features for Robust Speech Recognition. 5 | 6 | To use with kaldi and [tfkaldi](https://github.com/vrenkens/tfkaldi). 7 | -------------------------------------------------------------------------------- /RestNet.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | class RestNet(object): 5 | 6 | def __init__(self): 7 | print 'Init cnn layer' 8 | 9 | def __call__(self, inputs, is_training=False, reuse=False, scope=None): 10 | with tf.variable_scope(scope or type(self).__name__, reuse=reuse): 11 | with tf.variable_scope('prep_data_l1', reuse=reuse): 12 | print inputs.get_shape() 13 | inputs_img = tf.reshape(inputs, tf.pack( [ tf.shape(inputs)[0] , 17, 64, 1] ) ) 14 | 15 | 16 | x = self() 17 | 18 | x = self.convolution(inputs_img, 'first_conv_l1', 1, 64, 7, 7, reuse, is_training) 19 | with tf.variable_scope('pool_l1', reuse=reuse): 20 | x = tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') 21 | 22 | 23 | x = self.residual('l1', x, 64, 64, [1, 1, 1, 1], reuse, is_training) 24 | 25 | x = self.residual('l2', x, 64, 128, [1, 1, 2, 1], reuse, is_training) 26 | 27 | x = self.residual('l3', x, 128, 128, [1, 1, 2, 1], reuse, is_training) 28 | 29 | x = self.residual('l4', x, 128, 256, [1, 2, 2, 1], reuse, is_training) 30 | 31 | x = self.residual('l5', x, 256, 256, [1, 2, 2, 1], reuse, is_training) 32 | 33 | #x = self.residual('l6', x, 256, 512, [1, 2, 2, 1], reuse, is_training) 34 | 35 | with tf.variable_scope('out_op', reuse=reuse): 36 | x = tf.nn.avg_pool(x, [1,3,2,1], [1,3,2,1], 'SAME') 37 | shape = x.get_shape().as_list() 38 | x = tf.reshape(x, tf.pack( [tf.shape(x)[0], shape[1] * shape[2] * shape[3] ] ) ) 39 | 40 | outputs = self.fully_connected('fcl', x, reuse, is_training) 41 | 42 | print 'Layer: ' + scope 43 | print 'Input: ' 44 | print inputs.get_shape() 45 | print inputs_img.get_shape() 46 | print 'Outputs: ' 47 | print outputs.get_shape() 48 | return outputs 49 | 50 | 51 | def convolution(self, inputs_img, name_layer, in_dim, out_dim, t_conv_size, f_conv_size, reuse, is_training): 52 | with tf.variable_scope('parameters_'+name_layer, reuse=reuse): 53 | n = t_conv_size*f_conv_size*out_dim 54 | weights = tf.get_variable('weights_'+name_layer, [t_conv_size, f_conv_size, in_dim, out_dim], initializer = tf.random_normal_initializer(stddev=np.sqrt(2.0 / n))) 55 | biases = tf.get_variable('biases_'+name_layer, [out_dim], initializer=tf.constant_initializer(0) ) 56 | 57 | with tf.variable_scope('conv'+name_layer, reuse=reuse): 58 | conv = tf.nn.conv2d(inputs_img, weights, [1, 1, 1, 1], padding='SAME') 59 | #print conv.get_shape() 60 | conv = tf.contrib.layers.batch_norm(conv, 61 | is_training=is_training, 62 | scope='batch_norm', 63 | reuse = reuse) 64 | hidden = tf.nn.relu(conv + biases) 65 | print 'hidden_'+ name_layer 66 | print hidden.get_shape() 67 | return hidden 68 | 69 | 70 | def fully_connected(self, name_layer, x, reuse, is_training): 71 | 72 | print 'Layer: ' + name_layer 73 | print 'Input: ' 74 | print x.get_shape() 75 | 76 | output = 1000 77 | 78 | with tf.variable_scope(name_layer, reuse=reuse): 79 | with tf.variable_scope(name_layer +'_parameters', reuse=reuse): 80 | 81 | stddev = 1/(int(x.get_shape()[1])**0.5) 82 | 83 | weights = tf.get_variable( 84 | 'weights', [x.get_shape()[1], output], 85 | initializer=tf.random_normal_initializer(stddev=stddev)) 86 | 87 | biases = tf.get_variable( 88 | 'biases', [output], 89 | initializer=tf.constant_initializer(0)) 90 | 91 | x = tf.matmul(x, weights) + biases 92 | 93 | x = tf.contrib.layers.batch_norm(x, 94 | is_training=is_training, 95 | scope='batch_norm_'+name_layer, 96 | reuse = reuse) 97 | 98 | x = tf.nn.relu(x) 99 | 100 | print 'Outputs: ' 101 | print x.get_shape() 102 | 103 | return x 104 | 105 | 106 | def residual(self, name_layer, x, in_filter, out_filter, stride, reuse, is_training): 107 | """Residual unit with 2 sub layers.""" 108 | 109 | print 'Layer: ' + name_layer 110 | print 'Input: ' 111 | print x.get_shape() 112 | 113 | orig_x = x 114 | with tf.variable_scope( name_layer + 'sub1', reuse=reuse): 115 | with tf.variable_scope('parameters_sub1_'+name_layer, reuse=reuse): 116 | n = 3*3*out_filter 117 | weights_sub1 = tf.get_variable('weights_sub1_'+name_layer, [3, 3, in_filter, out_filter], initializer = tf.random_normal_initializer(stddev=np.sqrt(2.0 / n))) 118 | biases_sub1 = tf.get_variable('biases_sub1'+name_layer, [out_filter], initializer=tf.constant_initializer(0.01) ) 119 | 120 | with tf.variable_scope('conv_sub1'+name_layer, reuse=reuse): 121 | x = tf.nn.conv2d(x, weights_sub1, stride, padding='SAME') 122 | 123 | x = tf.contrib.layers.batch_norm(x, 124 | is_training=is_training, 125 | scope='batch_norm_sub1_'+name_layer, 126 | reuse = reuse) 127 | x = tf.nn.relu(x + biases_sub1) 128 | 129 | 130 | with tf.variable_scope( name_layer + 'sub2', reuse=reuse): 131 | with tf.variable_scope('parameters_sub2_'+name_layer, reuse=reuse): 132 | n = 3*3*out_filter 133 | weights_sub2 = tf.get_variable('weights_sub2_'+name_layer, [3, 3, out_filter, out_filter], initializer = tf.random_normal_initializer(stddev=np.sqrt(2.0 / n))) 134 | biases_sub2 = tf.get_variable('biases_sub2_'+name_layer, [out_filter], initializer=tf.constant_initializer(0.01) ) 135 | with tf.variable_scope('conv_sub2'+name_layer, reuse=reuse): 136 | x = tf.nn.conv2d(x, weights_sub2, [1, 1, 1, 1], padding='SAME') 137 | 138 | with tf.variable_scope('sub_add'): 139 | orig_x = tf.nn.avg_pool(orig_x, stride, stride, 'SAME') 140 | 141 | if in_filter != out_filter: 142 | if in_filter==1: 143 | orig_x = tf.pad(orig_x, [[0, 0], [0, 0], [0, 0], [(out_filter - in_filter) // 2 , ((out_filter - in_filter) // 2) + 1 ]]) 144 | else: 145 | orig_x = tf.pad(orig_x, [[0, 0], [0, 0], [0, 0], 146 | [(out_filter - in_filter) // 2, 147 | (out_filter - in_filter) // 2]]) 148 | 149 | x += orig_x 150 | 151 | x = tf.contrib.layers.batch_norm(x, 152 | is_training=is_training, 153 | scope='batch_norm_sub1_'+name_layer, 154 | reuse = reuse) 155 | x = tf.nn.relu(x + biases_sub2) 156 | 157 | print 'Outputs: ' 158 | print x.get_shape() 159 | 160 | return x 161 | -------------------------------------------------------------------------------- /dnn.py: -------------------------------------------------------------------------------- 1 | '''@file dnn.py 2 | The DNN neural network classifier''' 3 | 4 | import seq_convertors 5 | import tensorflow as tf 6 | from classifier import Classifier 7 | from layer import FFLayer 8 | #from CnnVd6 import CnnVd6 9 | #from CnnVd10no import CnnVd10no 10 | #from RestNet import RestNet 11 | from CnnLayer import CnnLayer 12 | from activation import TfActivation 13 | 14 | class DNN(Classifier): 15 | '''This class is a graph for feedforward fully connected neural nets.''' 16 | 17 | def __init__(self, output_dim, num_layers, num_units, activation, 18 | layerwise_init=True): 19 | ''' 20 | DNN constructor 21 | 22 | Args: 23 | output_dim: the DNN output dimension 24 | num_layers: number of hidden layers 25 | num_units: number of hidden units 26 | activation: the activation function 27 | layerwise_init: if True the layers will be added one by one, 28 | otherwise all layers will be added to the network in the 29 | beginning 30 | ''' 31 | 32 | #super constructor 33 | super(DNN, self).__init__(output_dim) 34 | 35 | #save all the DNN properties 36 | self.num_layers = num_layers 37 | self.num_units = num_units 38 | self.activation = activation 39 | self.layerwise_init = layerwise_init 40 | 41 | def __call__(self, inputs, seq_length, is_training=False, reuse=False, 42 | scope=None): 43 | ''' 44 | Add the DNN variables and operations to the graph 45 | 46 | Args: 47 | inputs: the inputs to the neural network, this is a list containing 48 | a [batch_size, input_dim] tensor for each time step 49 | seq_length: The sequence lengths of the input utterances, if None 50 | the maximal sequence length will be taken 51 | is_training: whether or not the network is in training mode 52 | reuse: wheter or not the variables in the network should be reused 53 | scope: the name scope 54 | 55 | Returns: 56 | A triple containing: 57 | - output logits 58 | - the output logits sequence lengths as a vector 59 | - a saver object 60 | - a dictionary of control operations: 61 | -add: add a layer to the network 62 | -init: initialise the final layer 63 | ''' 64 | 65 | with tf.variable_scope(scope or type(self).__name__, reuse=reuse): 66 | 67 | #input layer 68 | layer = FFLayer(self.num_units, self.activation) 69 | 70 | #output layer 71 | outlayer = FFLayer(self.output_dim, 72 | TfActivation(None, lambda(x): x), 0) 73 | 74 | #do the forward computation 75 | 76 | #convert the sequential data to non sequential data 77 | nonseq_inputs = seq_convertors.seq2nonseq(inputs, seq_length) 78 | 79 | activations = [None]*self.num_layers 80 | #activations[0] = layer(nonseq_inputs, is_training, reuse, 'layer0') 81 | 82 | #cnn_layer = RestNet() 83 | #cnn_layer = CnnVd6() 84 | cnn_layer = CnnLayer() 85 | activations[0] = cnn_layer(nonseq_inputs, is_training, reuse, 'layer0') 86 | for l in range(1, self.num_layers): 87 | activations[l] = layer(activations[l-1], is_training, reuse, 88 | 'layer' + str(l)) 89 | 90 | if self.layerwise_init: 91 | 92 | #variable that determines how many layers are initialised 93 | #in the neural net 94 | initialisedlayers = tf.get_variable( 95 | 'initialisedlayers', [], 96 | initializer=tf.constant_initializer(0), 97 | trainable=False, 98 | dtype=tf.int32) 99 | 100 | #operation to increment the number of layers 101 | add_layer_op = initialisedlayers.assign(initialisedlayers+1).op 102 | 103 | #compute the logits by selecting the activations at the layer 104 | #that has last been added to the network, this is used for layer 105 | #by layer initialisation 106 | logits = tf.case( 107 | [(tf.equal(initialisedlayers, tf.constant(l)), 108 | Callable(activations[l])) 109 | for l in range(len(activations))], 110 | default=Callable(activations[-1]), 111 | exclusive=True, name='layerSelector') 112 | 113 | logits.set_shape([None, self.num_units]) 114 | else: 115 | logits = activations[-1] 116 | 117 | logits = outlayer(logits, is_training, reuse, 118 | 'layer' + str(self.num_layers)) 119 | 120 | 121 | if self.layerwise_init: 122 | #operation to initialise the final layer 123 | init_last_layer_op = tf.initialize_variables( 124 | tf.get_collection( 125 | tf.GraphKeys.VARIABLES, 126 | scope=(tf.get_variable_scope().name + '/layer' 127 | + str(self.num_layers)))) 128 | 129 | control_ops = {'add':add_layer_op, 'init':init_last_layer_op} 130 | else: 131 | control_ops = None 132 | 133 | #convert the logits to sequence logits to match expected output 134 | seq_logits = seq_convertors.nonseq2seq(logits, seq_length, 135 | len(inputs)) 136 | 137 | #create a saver 138 | saver = tf.train.Saver() 139 | 140 | return seq_logits, seq_length, saver, control_ops 141 | 142 | class Callable(object): 143 | '''A class for an object that is callable''' 144 | 145 | def __init__(self, value): 146 | ''' 147 | Callable constructor 148 | 149 | Args: 150 | tensor: a tensor 151 | ''' 152 | 153 | self.value = value 154 | 155 | def __call__(self): 156 | ''' 157 | get the object 158 | 159 | Returns: 160 | the object 161 | ''' 162 | 163 | return self.value 164 | --------------------------------------------------------------------------------