├── CnnLayer.py
├── CnnVd10.py
├── CnnVd6.py
├── README.md
├── RestNet.py
└── dnn.py


/CnnLayer.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | 
  4 | class CnnLayer(object):
  5 | 
  6 |     def __init__(self):
  7 |         print 'Init cnn layer'
  8 | 
  9 |    
 10 |     def __call__(self, inputs, is_training=False, reuse=False, scope=None):        
 11 |         with tf.variable_scope(scope or type(self).__name__, reuse=reuse):
 12 |             
 13 |             print 'Layer: ' + scope
 14 |             print 'Input: ' 
 15 |             print inputs.get_shape()
 16 | 
 17 |             with tf.variable_scope('prep_data_l1', reuse=reuse):             
 18 |                 inputs_img = tf.reshape(inputs, tf.pack( [ tf.shape(inputs)[0] , 11, 3, 40] )  ) 
 19 |                 inputs_img = tf.transpose(inputs_img, [ 0 , 1, 3, 2 ] )  
 20 |     
 21 |             print 'Input Img: ' 
 22 |             print inputs_img.get_shape()
 23 | 
 24 |             hidden = self.convolution(inputs_img, 'conv_l1', 3, 256, 9, 9, reuse, is_training)
 25 | 
 26 |             with tf.variable_scope('pool_l1', reuse=reuse):
 27 |                 pool = tf.nn.max_pool(hidden, ksize=[1, 1, 1, 1], strides=[1, 1, 3, 1], padding='VALID')
 28 | 
 29 |             print 'poll_l1: ' 
 30 |             print pool.get_shape()
 31 | 
 32 |             hidden = self.convolution(pool, 'conv_l2', 256, 256, 3, 4, reuse, is_training)
 33 |             
 34 |             with tf.variable_scope('out_op', reuse=reuse):
 35 |                 shape = hidden.get_shape().as_list()
 36 |                 outputs = tf.reshape(hidden, tf.pack( [tf.shape(hidden)[0], shape[1]  * shape[2]  * shape[3]   ] ) )
 37 |          
 38 |             print 'Outputs: ' 
 39 |             print outputs.get_shape()
 40 |         
 41 |         return outputs
 42 | 
 43 |     def convolution(self, inputs_img, name_layer, in_dim, out_dim, t_conv_size, f_conv_size, reuse, is_training):
 44 |         with tf.variable_scope('parameters_'+name_layer, reuse=reuse):
 45 |             n = t_conv_size*f_conv_size*out_dim
 46 |             weights = tf.get_variable('weights_'+name_layer, [t_conv_size, f_conv_size, in_dim, out_dim],  initializer = tf.random_normal_initializer(stddev=np.sqrt(2.0 / n)))
 47 |             biases = tf.get_variable('biases_'+name_layer,   [out_dim],   initializer=tf.constant_initializer(0) )
 48 | 
 49 |         with tf.variable_scope('conv_'+name_layer, reuse=reuse):
 50 |             conv = tf.nn.conv2d(inputs_img,  weights, [1, 1, 1, 1], padding='VALID')
 51 |             #print conv.get_shape()
 52 |             conv = tf.contrib.layers.batch_norm(conv,
 53 |                 is_training=is_training,
 54 |                 scope='batch_norm',
 55 |                 reuse = reuse)
 56 |             hidden = tf.nn.relu(conv + biases)
 57 | 
 58 |             print 'hidden_'+ name_layer
 59 |             print hidden.get_shape()
 60 | 
 61 |         return hidden  
 62 | 
 63 | 
 64 | 
 65 | 
 66 |     # def __call__(self, inputs, is_training=False, reuse=False, scope=None):
 67 |     #     '''
 68 |     #     Do the forward computation
 69 |     #     Args:
 70 |     #         inputs: the input to the layer
 71 |     #         is_training: whether or not the network is in training mode
 72 |     #         reuse: wheter or not the variables in the network should be reused
 73 |     #         scope: the variable scope of the layer
 74 |     #     Returns:
 75 |     #         The output of the layer
 76 |     #     '''
 77 | 
 78 |     #     with tf.variable_scope(scope or type(self).__name__, reuse=reuse):
 79 |     #         with tf.variable_scope('parameters', reuse=reuse):
 80 |     #                 f = 9
 81 |     #                 d_1 = 3
 82 |     #                 k = 256
 83 |     #                 n = f*f*k
 84 |     #                 weights_l1= tf.get_variable('weights_fc_1', [f, f, d_1, k],  initializer = tf.random_normal_initializer(stddev=np.sqrt(2.0 / n)))
 85 |     #                 biases_l1 = tf.get_variable('biases_fc_1',   [k],   initializer=tf.constant_initializer(0) )
 86 |     #                 k = 256
 87 |     #                 n = f*f*k
 88 |     #                 weights_l2 = tf.get_variable('weights_fc_2', [3, 4, k, k],  initializer = tf.random_normal_initializer(stddev=np.sqrt(2.0 / n)))
 89 |     #                 biases_l2 = tf.get_variable('biases_fc_2',   [k],   initializer=tf.constant_initializer(0) )
 90 | 
 91 | 
 92 | 
 93 | 
 94 | 
 95 |     #         print inputs.get_shape()
 96 |     #         inputs_img = tf.reshape(inputs, tf.pack( [ tf.shape(inputs)[0] , 11, 3, 40] )  ) 
 97 |     #         inputs_img = tf.transpose(inputs_img, [ 0 , 1, 3, 2 ] )  
 98 |     #         print inputs_img.get_shape()
 99 | 
100 |     #         with tf.variable_scope('conv_l1', reuse=reuse):
101 |     #             conv = tf.nn.conv2d(inputs_img,  weights_l1, [1, 1, 1, 1], padding='VALID')
102 |     #             #print conv.get_shape()
103 | 
104 |     #             conv = tf.contrib.layers.batch_norm(conv,
105 |     #                 is_training=is_training,
106 |     #                 scope='batch_norm',
107 |     #                 reuse = reuse)
108 | 
109 |     #             hidden = tf.nn.relu(conv + biases_l1)
110 |     #             print hidden.get_shape()        
111 | 
112 |     #             pool = tf.nn.max_pool(hidden, ksize=[1, 1, 1, 1], strides=[1, 1, 3, 1], padding='VALID')
113 |     #             print pool.get_shape()
114 | 
115 |     #         with tf.variable_scope('conv_l2', reuse=reuse):
116 |     #             conv = tf.nn.conv2d(pool, weights_l2, [1, 1, 1, 1], padding='VALID')
117 |     #             #print conv.get_shape()
118 |             
119 |     #             conv = tf.contrib.layers.batch_norm(conv,
120 |     #                 is_training=is_training,
121 |     #                 scope='batch_norm',
122 |     #                 reuse = reuse)
123 | 
124 |     #             hidden = tf.nn.relu(conv + biases_l2)
125 |     #             print hidden.get_shape()
126 | 
127 |     #             shape = hidden.get_shape().as_list()
128 |     #             outputs = tf.reshape(hidden, tf.pack( [tf.shape(pool)[0], shape[1]  * shape[2]  * shape[3]   ] ) )
129 |     #             print outputs.get_shape()
130 | 
131 |     #     print 'Layer: ' + scope
132 |     #     print 'Input: ' 
133 |     #     print  inputs.get_shape()
134 |     #     print 'Outputs: ' 
135 |     #     print outputs.get_shape()
136 | 
137 | 
138 |     #     return outputs
139 | 


--------------------------------------------------------------------------------
/CnnVd10.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | 
 4 | class CnnVd10(object):
 5 | 
 6 |     def __init__(self):
 7 |         print 'Init cnn layer'
 8 | 
 9 |     def __call__(self, inputs, is_training=False, reuse=False, scope=None):
10 |         with tf.variable_scope(scope or type(self).__name__, reuse=reuse):        
11 |             with tf.variable_scope('prep_data_l1', reuse=reuse):
12 |                 print inputs.get_shape()
13 |                 # For ddeltas features, the input map is examples x time x freq x 3
14 |         #inputs_img = tf.reshape(inputs, tf.pack( [ tf.shape(inputs)[0] , 11, 3, 40] )  ) 
15 |                 #inputs_img = tf.transpose(inputs_img, [ 0 , 1, 3, 2 ] )  
16 |         #inputs_img = inputs_img[:,:,:,0]
17 |                 #inputs_img = tf.reshape(inputs_img, tf.pack( [ tf.shape(inputs_img)[0] , 11, 40, 1] )  ) 
18 |         # For nodelta features
19 |         # In Vd10 we use 8 contex window (8*2 +1 = 17) and 64 fbank filter bands
20 |                 inputs_img = tf.reshape(inputs, tf.pack( [ tf.shape(inputs)[0] , 17, 64, 1] )  ) 
21 |             print inputs_img.get_shape()
22 | 	        hidden = self.convolution(inputs_img, 'conv_l1', 1, 64, 3, 3, reuse, is_training)
23 |             hidden = self.convolution(hidden, 'conv_l2', 64, 64, 3, 3, reuse, is_training)
24 |             with tf.variable_scope('pool_l2', reuse=reuse):
25 |                 pool = tf.nn.max_pool(hidden, ksize=[1, 1, 2, 1], strides=[1, 1, 2, 1], padding='VALID')
26 | 
27 | 
28 |             hidden = self.convolution(pool, 'conv_l3', 64, 128, 3, 3, reuse, is_training)
29 |             hidden = self.convolution(hidden, 'conv_l4', 128, 128, 3, 3, reuse, is_training)
30 |             with tf.variable_scope('pool_l4', reuse=reuse):
31 |                 pool = tf.nn.max_pool(hidden, ksize=[1, 1, 2, 1], strides=[1, 1, 2, 1], padding='VALID')
32 | 
33 | 
34 |             hidden = self.convolution(pool, 'conv_l5', 128, 128, 3, 3, reuse, is_training)
35 |             hidden = self.convolution(hidden, 'conv_l6', 128, 128, 3, 3, reuse, is_training)
36 |             with tf.variable_scope('pool_l6', reuse=reuse):
37 |                 pool = tf.nn.max_pool(hidden, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
38 | 
39 | 
40 |             hidden = self.convolution(pool, 'conv_l7', 128, 256, 3, 3, reuse, is_training)
41 |             hidden = self.convolution(hidden, 'conv_l8', 256, 256, 3, 3, reuse, is_training)
42 |             with tf.variable_scope('pool_l8', reuse=reuse):
43 |                 pool = tf.nn.max_pool(hidden, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
44 | 
45 |             hidden = self.convolution(pool, 'conv_l9', 256, 256, 3, 3, reuse, is_training)
46 |             hidden = self.convolution(hidden, 'conv_l10', 256, 256, 3, 3, reuse, is_training)
47 |             with tf.variable_scope('pool_l8', reuse=reuse):
48 |                 pool = tf.nn.max_pool(hidden, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
49 | 
50 |             with tf.variable_scope('out_op', reuse=reuse):
51 |                 shape = pool.get_shape().as_list()
52 |                 outputs = tf.reshape(pool, tf.pack( [tf.shape(pool)[0], shape[1]  * shape[2]  * shape[3]   ] ) )
53 | 
54 |         print 'Layer: ' + scope
55 |         print 'Input: ' 
56 |         print  inputs.get_shape()
57 |         print 'Outputs: ' 
58 |         print outputs.get_shape()
59 |         return outputs
60 | 
61 | 
62 |     def convolution(self, inputs_img, name_layer, in_dim, out_dim, t_conv_size, f_conv_size, reuse, is_training):
63 |         with tf.variable_scope('parameters_'+name_layer, reuse=reuse):
64 |             n = t_conv_size*f_conv_size*out_dim
65 |             weights = tf.get_variable('weights_'+name_layer, [t_conv_size, f_conv_size, in_dim, out_dim],  initializer = tf.random_normal_initializer(stddev=np.sqrt(2.0 / n)))
66 |             biases = tf.get_variable('biases_'+name_layer,   [out_dim],   initializer=tf.constant_initializer(0) )
67 | 
68 |         with tf.variable_scope('conv'+name_layer, reuse=reuse):
69 |             # In vd10 conv is with paddin in both axes
70 |             conv = tf.nn.conv2d(inputs_img,  weights, [1, 1, 1, 1], padding='SAME')
71 |             #print conv.get_shape()
72 |             conv = tf.contrib.layers.batch_norm(conv,
73 |                 is_training=is_training,
74 |                 scope='batch_norm'+name_layer,
75 |                 reuse = reuse)
76 |             hidden = tf.nn.relu(conv + biases)
77 | 
78 |             if applay_dropout:
79 |                 if is_training:
80 |                     hidden =  tf.nn.dropout(hidden, 0.75)
81 |             
82 | 
83 |         print 'hidden_'+ name_layer
84 |         print hidden.get_shape() 
85 |         return hidden       
86 | 


--------------------------------------------------------------------------------
/CnnVd6.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | 
 4 | class CnnVd6(object):
 5 |     def __init__(self):
 6 |         print 'Init cnn layer'
 7 | 
 8 | 
 9 |     def __call__(self, inputs, is_training=False, reuse=False, scope=None):
10 |         with tf.variable_scope(scope or type(self).__name__, reuse=reuse):        
11 |             with tf.variable_scope('prep_data_l1', reuse=reuse):
12 |                 print inputs.get_shape()
13 |                 # For ddeltas features, the input map is examples x time x freq x 3
14 | 		#inputs_img = tf.reshape(inputs, tf.pack( [ tf.shape(inputs)[0] , 11, 3, 40] )  ) 
15 |                 #inputs_img = tf.transpose(inputs_img, [ 0 , 1, 3, 2 ] )  
16 | 		#inputs_img = inputs_img[:,:,:,0]
17 |                 #inputs_img = tf.reshape(inputs_img, tf.pack( [ tf.shape(inputs_img)[0] , 11, 40, 1] )  ) 
18 | 		        # For nodelta features
19 |                 inputs_img = tf.reshape(inputs, tf.pack( [ tf.shape(inputs)[0] , 11, 40, 1] )  ) 
20 |                 print inputs_img.get_shape()
21 | 
22 |             hidden = self.convolution(inputs_img, 'conv_l1', 1, 64, 1, 3, reuse, is_training)
23 |             hidden = self.convolution(hidden, 'conv_l2', 64, 64, 3, 3, reuse, is_training)
24 |             with tf.variable_scope('pool_l2', reuse=reuse):
25 |                 pool = tf.nn.max_pool(hidden, ksize=[1, 1, 2, 1], strides=[1, 1, 2, 1], padding='VALID')
26 | 
27 |             hidden = self.convolution(pool, 'conv_l3', 64, 128, 3, 3, reuse, is_training)
28 |             hidden = self.convolution(hidden, 'conv_l4', 128, 128, 3, 3, reuse, is_training)
29 |             with tf.variable_scope('pool_l4', reuse=reuse):
30 |                 pool = tf.nn.max_pool(hidden, ksize=[1, 1, 2, 1], strides=[1, 1, 2, 1], padding='VALID')
31 | 
32 |             hidden = self.convolution(pool, 'conv_l5', 128, 256, 3, 3, reuse, is_training)
33 |             hidden = self.convolution(hidden, 'conv_l6', 256, 256, 3, 3, reuse, is_training)
34 | 
35 |             with tf.variable_scope('out_op', reuse=reuse):
36 |                 shape = hidden.get_shape().as_list()
37 |                 outputs = tf.reshape(hidden, tf.pack( [tf.shape(hidden)[0], shape[1]  * shape[2]  * shape[3]   ] ) )
38 | 
39 |         print 'Layer: ' + scope
40 |         print 'Input: ' 
41 |         print  inputs.get_shape()
42 |         print 'Outputs: ' 
43 |         print outputs.get_shape()
44 |         return outputs
45 | 
46 | 
47 |     def convolution(self, inputs_img, name_layer, in_dim, out_dim, t_conv_size, f_conv_size, reuse, is_training):
48 |         with tf.variable_scope('parameters_'+name_layer, reuse=reuse):
49 |             n = t_conv_size*f_conv_size*out_dim
50 |             weights = tf.get_variable('weights_'+name_layer, [t_conv_size, f_conv_size, in_dim, out_dim],  initializer = tf.random_normal_initializer(stddev=np.sqrt(2.0 / n)))
51 |             biases = tf.get_variable('biases_'+name_layer,   [out_dim],   initializer=tf.constant_initializer(0) )
52 | 
53 |         with tf.variable_scope('conv'+name_layer, reuse=reuse):
54 |             conv = tf.nn.conv2d(inputs_img,  weights, [1, 1, 1, 1], padding='VALID')
55 |             #print conv.get_shape()
56 |             conv = tf.contrib.layers.batch_norm(conv,
57 |                 is_training=is_training,
58 |                 scope='batch_norm',
59 |                 reuse = reuse)
60 |             hidden = tf.nn.relu(conv + biases)
61 |         print 'hidden_'+ name_layer
62 |         print hidden.get_shape() 
63 |         return hidden
64 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # ResNet-Kaldi-Tensorflow-ASR
2 | 
3 | ResNet and other CNN implementations in Tensorflow presented in the paper: 
4 | Deep Residual Networks with Auditory Inspired Features for Robust Speech Recognition.
5 | 
6 | To use with kaldi and [tfkaldi](https://github.com/vrenkens/tfkaldi).
7 | 


--------------------------------------------------------------------------------
/RestNet.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | 
  4 | class RestNet(object):
  5 | 
  6 |     def __init__(self):
  7 |         print 'Init cnn layer'
  8 | 
  9 |     def __call__(self, inputs, is_training=False, reuse=False, scope=None):
 10 |         with tf.variable_scope(scope or type(self).__name__, reuse=reuse):        
 11 |             with tf.variable_scope('prep_data_l1', reuse=reuse):
 12 |                 print inputs.get_shape()
 13 |                 inputs_img = tf.reshape(inputs, tf.pack( [ tf.shape(inputs)[0] , 17, 64, 1] )  ) 
 14 |             
 15 |             
 16 |             x = self()
 17 | 
 18 |             x = self.convolution(inputs_img, 'first_conv_l1', 1, 64, 7, 7, reuse, is_training)
 19 |             with tf.variable_scope('pool_l1', reuse=reuse):
 20 |                 x = tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
 21 | 
 22 | 
 23 |             x = self.residual('l1', x, 64, 64, [1, 1, 1, 1], reuse, is_training)
 24 | 
 25 |             x = self.residual('l2', x, 64, 128, [1, 1, 2, 1], reuse, is_training)
 26 | 
 27 |             x = self.residual('l3', x, 128, 128, [1, 1, 2, 1], reuse, is_training)
 28 | 
 29 |             x = self.residual('l4', x, 128, 256, [1, 2, 2, 1], reuse, is_training)
 30 | 
 31 |             x = self.residual('l5', x, 256, 256, [1, 2, 2, 1], reuse, is_training)
 32 | 
 33 |             #x = self.residual('l6', x, 256, 512, [1, 2, 2, 1], reuse, is_training)
 34 | 
 35 |             with tf.variable_scope('out_op', reuse=reuse):
 36 |                 x = tf.nn.avg_pool(x, [1,3,2,1], [1,3,2,1], 'SAME')
 37 |                 shape = x.get_shape().as_list()
 38 |                 x = tf.reshape(x, tf.pack( [tf.shape(x)[0], shape[1]  * shape[2]  * shape[3]   ] ) )
 39 | 
 40 |             outputs = self.fully_connected('fcl', x, reuse, is_training)
 41 | 
 42 |         print 'Layer: ' + scope
 43 |         print 'Input: ' 
 44 |         print inputs.get_shape()
 45 |         print inputs_img.get_shape()
 46 |         print 'Outputs: ' 
 47 |         print outputs.get_shape()       
 48 |         return outputs
 49 | 
 50 | 
 51 |     def convolution(self, inputs_img, name_layer, in_dim, out_dim, t_conv_size, f_conv_size, reuse, is_training):
 52 |         with tf.variable_scope('parameters_'+name_layer, reuse=reuse):
 53 |             n = t_conv_size*f_conv_size*out_dim
 54 |             weights = tf.get_variable('weights_'+name_layer, [t_conv_size, f_conv_size, in_dim, out_dim],  initializer = tf.random_normal_initializer(stddev=np.sqrt(2.0 / n)))
 55 |             biases = tf.get_variable('biases_'+name_layer,   [out_dim],   initializer=tf.constant_initializer(0) )
 56 | 
 57 |         with tf.variable_scope('conv'+name_layer, reuse=reuse):
 58 |             conv = tf.nn.conv2d(inputs_img,  weights, [1, 1, 1, 1], padding='SAME')
 59 |             #print conv.get_shape()
 60 |             conv = tf.contrib.layers.batch_norm(conv,
 61 |                 is_training=is_training,
 62 |                 scope='batch_norm',
 63 |                 reuse = reuse)
 64 |             hidden = tf.nn.relu(conv + biases)
 65 |         print 'hidden_'+ name_layer
 66 |         print hidden.get_shape() 
 67 |         return hidden
 68 | 
 69 | 
 70 |     def fully_connected(self, name_layer, x, reuse, is_training):
 71 | 
 72 |         print 'Layer: ' + name_layer
 73 |         print 'Input: ' 
 74 |         print  x.get_shape()
 75 | 
 76 |         output = 1000
 77 | 
 78 |         with tf.variable_scope(name_layer, reuse=reuse):
 79 |             with tf.variable_scope(name_layer +'_parameters', reuse=reuse):
 80 | 
 81 |                 stddev = 1/(int(x.get_shape()[1])**0.5)
 82 | 
 83 |                 weights = tf.get_variable(
 84 |                     'weights', [x.get_shape()[1], output],
 85 |                     initializer=tf.random_normal_initializer(stddev=stddev))
 86 | 
 87 |                 biases = tf.get_variable(
 88 |                     'biases', [output],
 89 |                     initializer=tf.constant_initializer(0))
 90 | 
 91 |             x = tf.matmul(x, weights) + biases
 92 | 
 93 |             x = tf.contrib.layers.batch_norm(x,
 94 |                     is_training=is_training,
 95 |                     scope='batch_norm_'+name_layer,
 96 |                     reuse = reuse)
 97 |             
 98 |             x = tf.nn.relu(x)
 99 |             
100 |         print 'Outputs: ' 
101 |         print x.get_shape()
102 |             
103 |         return x
104 | 
105 | 
106 |     def residual(self, name_layer, x, in_filter, out_filter, stride, reuse, is_training):
107 |         """Residual unit with 2 sub layers."""
108 |         
109 |         print 'Layer: ' + name_layer
110 |         print 'Input: ' 
111 |         print  x.get_shape()
112 | 
113 |         orig_x = x
114 |         with tf.variable_scope( name_layer + 'sub1', reuse=reuse):
115 |             with tf.variable_scope('parameters_sub1_'+name_layer, reuse=reuse):
116 |                 n = 3*3*out_filter
117 |                 weights_sub1 = tf.get_variable('weights_sub1_'+name_layer, [3, 3, in_filter, out_filter],  initializer = tf.random_normal_initializer(stddev=np.sqrt(2.0 / n)))
118 |                 biases_sub1 = tf.get_variable('biases_sub1'+name_layer,   [out_filter],   initializer=tf.constant_initializer(0.01) )
119 | 
120 |             with tf.variable_scope('conv_sub1'+name_layer, reuse=reuse):
121 |                 x = tf.nn.conv2d(x,  weights_sub1, stride, padding='SAME')
122 |            
123 |                 x = tf.contrib.layers.batch_norm(x,
124 |                     is_training=is_training,
125 |                     scope='batch_norm_sub1_'+name_layer,
126 |                     reuse = reuse)
127 |                 x = tf.nn.relu(x + biases_sub1)
128 |             
129 | 
130 |         with tf.variable_scope( name_layer + 'sub2', reuse=reuse):
131 |             with tf.variable_scope('parameters_sub2_'+name_layer, reuse=reuse):
132 |                 n = 3*3*out_filter
133 |                 weights_sub2 = tf.get_variable('weights_sub2_'+name_layer, [3, 3, out_filter, out_filter],  initializer = tf.random_normal_initializer(stddev=np.sqrt(2.0 / n)))
134 |                 biases_sub2 = tf.get_variable('biases_sub2_'+name_layer,   [out_filter],   initializer=tf.constant_initializer(0.01) )
135 |             with tf.variable_scope('conv_sub2'+name_layer, reuse=reuse):
136 |                 x = tf.nn.conv2d(x,  weights_sub2, [1, 1, 1, 1], padding='SAME')
137 |            
138 |             with tf.variable_scope('sub_add'):
139 |                 orig_x = tf.nn.avg_pool(orig_x, stride, stride, 'SAME')
140 |                 
141 |                 if in_filter != out_filter: 
142 |                     if in_filter==1:
143 |                         orig_x = tf.pad(orig_x, [[0, 0], [0, 0], [0, 0],  [(out_filter - in_filter) // 2  ,  ((out_filter - in_filter) // 2) + 1 ]])
144 |                     else:
145 |                         orig_x = tf.pad(orig_x, [[0, 0], [0, 0], [0, 0],
146 |                              [(out_filter - in_filter) // 2,
147 |                               (out_filter - in_filter) // 2]])
148 | 
149 |                 x += orig_x
150 | 
151 |                 x = tf.contrib.layers.batch_norm(x,
152 |                     is_training=is_training,
153 |                     scope='batch_norm_sub1_'+name_layer,
154 |                     reuse = reuse)
155 |                 x = tf.nn.relu(x + biases_sub2)
156 |             
157 |         print 'Outputs: ' 
158 |         print x.get_shape()
159 | 
160 |         return x
161 | 


--------------------------------------------------------------------------------
/dnn.py:
--------------------------------------------------------------------------------
  1 | '''@file dnn.py
  2 | The DNN neural network classifier'''
  3 | 
  4 | import seq_convertors
  5 | import tensorflow as tf
  6 | from classifier import Classifier
  7 | from layer import FFLayer
  8 | #from CnnVd6 import CnnVd6
  9 | #from CnnVd10no import CnnVd10no
 10 | #from RestNet import RestNet
 11 | from CnnLayer import CnnLayer
 12 | from activation import TfActivation
 13 | 
 14 | class DNN(Classifier):
 15 |     '''This class is a graph for feedforward fully connected neural nets.'''
 16 | 
 17 |     def __init__(self, output_dim, num_layers, num_units, activation,
 18 |                  layerwise_init=True):
 19 |         '''
 20 |         DNN constructor
 21 | 
 22 |         Args:
 23 |             output_dim: the DNN output dimension
 24 |             num_layers: number of hidden layers
 25 |             num_units: number of hidden units
 26 |             activation: the activation function
 27 |             layerwise_init: if True the layers will be added one by one,
 28 |                 otherwise all layers will be added to the network in the
 29 |                 beginning
 30 |         '''
 31 | 
 32 |         #super constructor
 33 |         super(DNN, self).__init__(output_dim)
 34 | 
 35 |         #save all the DNN properties
 36 |         self.num_layers = num_layers
 37 |         self.num_units = num_units
 38 |         self.activation = activation
 39 |         self.layerwise_init = layerwise_init
 40 | 
 41 |     def __call__(self, inputs, seq_length, is_training=False, reuse=False,
 42 |                  scope=None):
 43 |         '''
 44 |         Add the DNN variables and operations to the graph
 45 | 
 46 |         Args:
 47 |             inputs: the inputs to the neural network, this is a list containing
 48 |                 a [batch_size, input_dim] tensor for each time step
 49 |             seq_length: The sequence lengths of the input utterances, if None
 50 |                 the maximal sequence length will be taken
 51 |             is_training: whether or not the network is in training mode
 52 |             reuse: wheter or not the variables in the network should be reused
 53 |             scope: the name scope
 54 | 
 55 |         Returns:
 56 |             A triple containing:
 57 |                 - output logits
 58 |                 - the output logits sequence lengths as a vector
 59 |                 - a saver object
 60 |                 - a dictionary of control operations:
 61 |                     -add: add a layer to the network
 62 |                     -init: initialise the final layer
 63 |         '''
 64 | 
 65 |         with tf.variable_scope(scope or type(self).__name__, reuse=reuse):
 66 | 
 67 |             #input layer
 68 |             layer = FFLayer(self.num_units, self.activation)
 69 | 
 70 |             #output layer
 71 |             outlayer = FFLayer(self.output_dim,
 72 |                                TfActivation(None, lambda(x): x), 0)
 73 | 
 74 |             #do the forward computation
 75 | 
 76 |             #convert the sequential data to non sequential data
 77 |             nonseq_inputs = seq_convertors.seq2nonseq(inputs, seq_length)
 78 | 
 79 |             activations = [None]*self.num_layers
 80 |             #activations[0] = layer(nonseq_inputs, is_training, reuse, 'layer0')
 81 |             
 82 |             #cnn_layer = RestNet()
 83 |             #cnn_layer = CnnVd6()
 84 |             cnn_layer = CnnLayer()	
 85 |             activations[0] = cnn_layer(nonseq_inputs, is_training, reuse, 'layer0')
 86 |             for l in range(1, self.num_layers):
 87 |                 activations[l] = layer(activations[l-1], is_training, reuse,
 88 |                                        'layer' + str(l))
 89 | 
 90 |             if self.layerwise_init:
 91 | 
 92 |                 #variable that determines how many layers are initialised
 93 |                 #in the neural net
 94 |                 initialisedlayers = tf.get_variable(
 95 |                     'initialisedlayers', [],
 96 |                     initializer=tf.constant_initializer(0),
 97 |                     trainable=False,
 98 |                     dtype=tf.int32)
 99 | 
100 |                 #operation to increment the number of layers
101 |                 add_layer_op = initialisedlayers.assign(initialisedlayers+1).op
102 | 
103 |                 #compute the logits by selecting the activations at the layer
104 |                 #that has last been added to the network, this is used for layer
105 |                 #by layer initialisation
106 |                 logits = tf.case(
107 |                     [(tf.equal(initialisedlayers, tf.constant(l)),
108 |                       Callable(activations[l]))
109 |                      for l in range(len(activations))],
110 |                     default=Callable(activations[-1]),
111 |                     exclusive=True, name='layerSelector')
112 | 
113 |                 logits.set_shape([None, self.num_units])
114 |             else:
115 |                 logits = activations[-1]
116 | 
117 |             logits = outlayer(logits, is_training, reuse,
118 |                               'layer' + str(self.num_layers))
119 | 
120 | 
121 |             if self.layerwise_init:
122 |                 #operation to initialise the final layer
123 |                 init_last_layer_op = tf.initialize_variables(
124 |                     tf.get_collection(
125 |                         tf.GraphKeys.VARIABLES,
126 |                         scope=(tf.get_variable_scope().name + '/layer'
127 |                                + str(self.num_layers))))
128 | 
129 |                 control_ops = {'add':add_layer_op, 'init':init_last_layer_op}
130 |             else:
131 |                 control_ops = None
132 | 
133 |             #convert the logits to sequence logits to match expected output
134 |             seq_logits = seq_convertors.nonseq2seq(logits, seq_length,
135 |                                                    len(inputs))
136 | 
137 |             #create a saver
138 |             saver = tf.train.Saver()
139 | 
140 |         return seq_logits, seq_length, saver, control_ops
141 | 
142 | class Callable(object):
143 |     '''A class for an object that is callable'''
144 | 
145 |     def __init__(self, value):
146 |         '''
147 |         Callable constructor
148 | 
149 |         Args:
150 |             tensor: a tensor
151 |         '''
152 | 
153 |         self.value = value
154 | 
155 |     def __call__(self):
156 |         '''
157 |         get the object
158 | 
159 |         Returns:
160 |             the object
161 |         '''
162 | 
163 |         return self.value
164 | 


--------------------------------------------------------------------------------