├── .gitignore
├── CNN2EnhancedSpeech.PNG
├── CepstralCNN
    ├── CepstralQSRCNN.py
    ├── CepstralQSRCNN_TrainTest_GPUs.py
    ├── DataPrepare.py
    ├── Opting_Results
    │   └── README.md
    ├── QSR-WGAN-GP_Train_GPUs.py
    ├── TestData
    │   └── README.md
    ├── Test_Outputs
    │   └── README.md
    ├── TrainValiData
    │   └── README.md
    ├── WaveformQSRCNN.py
    ├── WaveformQSRCNN_TrainTest_GPUs.py
    ├── log
    │   └── README.md
    ├── model_weights
    │   └── README.md
    └── weightnorm.py
├── LICENSE
├── QSR-WGAN-GP
    ├── .gitignore
    ├── .idea
    │   ├── QSR-WGAN-GP.iml
    │   ├── markdown-navigator.xml
    │   ├── markdown-navigator
    │   │   └── profiles_settings.xml
    │   ├── misc.xml
    │   ├── modules.xml
    │   └── workspace.xml
    ├── QSR-GANGP_Train_GPUs.py
    ├── QSR-WGAN-GP_Train_GPUs.py
    └── tflib
    │   ├── __init__.py
    │   └── ops
    │       ├── __init__.py
    │       ├── batchnorm.py
    │       ├── cond_batchnorm.py
    │       ├── conv1d.py
    │       ├── conv2d.py
    │       ├── deconv2d.py
    │       ├── layernorm.py
    │       └── linear.py
├── README.md
└── WaveformCNN
    ├── DataPrepare.py
    ├── Opting_Results
        └── README.md
    ├── TestData
        └── README.md
    ├── TrainValiData
        └── README.md
    ├── WaveformQSRCNN.py
    ├── WaveformQSRCNN_TrainTest_GPUs.py
    ├── log
        └── README.md
    ├── model_weights
        ├── README.md
        └── g711_waveformqsrcnn_weights_best_bs32_lr0.0005.h5
    └── weightnorm.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 


--------------------------------------------------------------------------------
/CNN2EnhancedSpeech.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/CNN2EnhancedSpeech.PNG


--------------------------------------------------------------------------------
/CepstralCNN/CepstralQSRCNN.py:
--------------------------------------------------------------------------------
  1 | #######################################################################################################################
  2 | # Project QSRCNN: Quantized Speech Reconstruction using Convolutional Neural Network     V1.0
  3 | # =====================================================================================================================
  4 | # CepstralQSRCNN.py: QSRCNN for G711/ADPCM/AMR/EVS using Cepstral features
  5 | #
  6 | #
  7 | # =====================================================================================================================
  8 | # Technische Universität Braunschweig, IfN
  9 | # Author:    Huijun Liu M.Sc.
 10 | # Date:      17.06.2017
 11 | #######################################################################################################################
 12 | 
 13 | import os
 14 | import time
 15 | import math
 16 | import scipy.io as sio
 17 | import tensorflow as tf
 18 | 
 19 | from keras.models import Model
 20 | from keras import backend as K
 21 | from keras.engine.topology import Layer
 22 | from keras.layers import Input, Add, Multiply, Average, Activation
 23 | from keras.layers.convolutional import Conv1D, MaxPooling1D, UpSampling1D, AveragePooling1D
 24 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger, TensorBoard, LearningRateScheduler
 25 | 
 26 | from weightnorm import AdamWithWeightnorm
 27 | from tensorflow.python.framework import ops
 28 | 
 29 | # -------------------------------------------------------------------------------
 30 | # 0. define metric and activation function
 31 | # -------------------------------------------------------------------------------
 32 | 
 33 | 
 34 | def snr(y_true, y_pred):
 35 |     """
 36 |         SNR is Signal to Noise Ratio
 37 |     """
 38 |     return 10.0 * K.log((K.sum(K.square(y_true))) / (K.sum(K.square(y_pred - y_true)))) / K.log(10.0)
 39 | 
 40 | 
 41 | def selu(x):
 42 |     """Scaled Exponential Linear Unit. (Klambauer et al., 2017)
 43 |     # Arguments
 44 |         x: A tensor or variable to compute the activation function for.
 45 |     # References
 46 |         - [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515)
 47 |     """
 48 |     with ops.name_scope('elu') as scope:
 49 |         alpha = 1.6732632423543772848170429916717
 50 |         scale = 1.0507009873554804934193349852946
 51 |         return scale * tf.where(x >= 0.0, x, alpha * tf.nn.elu(x))
 52 | 
 53 | """
 54 | def step_decay(epoch):
 55 |     initial_lrate = 0.001
 56 | 
 57 |     drop = 0.5
 58 |     epochs_drop = 3.0
 59 |     lrate = initial_lrate * math.pow(drop, math.floor((1 + epoch) / epochs_drop))
 60 | 
 61 |     return lrate
 62 | """
 63 | 
 64 | """
 65 | ''' TensorFlow Backend Function '''
 66 | def phase_shift(I, r):
 67 |     ''' Function copied as is from https://github.com/Tetrachrome/subpixel/blob/master/subpixel.py'''
 68 | 
 69 |     bsize, a, b, c = I.get_shape().as_list()
 70 |     bsize = tf.shape(I)[0]  # Handling Dimension(None) type for undefined batch dim
 71 |     X = tf.reshape(I, (bsize, a, b, r, r))
 72 |     X = tf.transpose(X, (0, 1, 2, 4, 3))  # bsize, a, b, 1, 1
 73 |     X = tf.split(1, a, X)  # a, [bsize, b, r, r]
 74 |     X = tf.concat(2, [tf.squeeze(x) for x in X])  # bsize, b, a*r, r
 75 |     X = tf.split(1, b, X)  # b, [bsize, a*r, r]
 76 |     X = tf.concat(2, [tf.squeeze(x) for x in X])  # bsize, a*r, b*r
 77 |     return tf.reshape(X, (bsize, a * r, b * r, 1))
 78 | 
 79 | def depth_to_scale(input, scale, channels):
 80 |     if channels > 1:
 81 |         Xc = tf.split(3, 3, input)
 82 |         X = tf.concat(3, [phase_shift(x, scale) for x in Xc])
 83 |     else:
 84 |         X = phase_shift(input, scale)
 85 |     return X
 86 | 
 87 | 
 88 | '''
 89 | Implementation is incomplete. Use lambda layer for now.
 90 | '''
 91 | class SubPixelUpscaling(Layer):
 92 | 
 93 |     def __init__(self, r, channels, **kwargs):
 94 |         super(SubPixelUpscaling, self).__init__(**kwargs)
 95 | 
 96 |         self.r = r
 97 |         self.channels = channels
 98 | 
 99 |     def build(self, input_shape):
100 |         pass
101 | 
102 |     def call(self, x, mask=None):
103 |         y = depth_to_scale(x, self.r, self.channels)
104 |         return y
105 | 
106 |     def get_output_shape_for(self, input_shape):
107 |         if K.image_dim_ordering() == "th":
108 |             b, k, r, c = input_shape
109 |             return (b, self.channels, r * self.r, c * self.r)
110 |         else:
111 |             b, r, c, k = input_shape
112 |             return (b, r * self.r, c * self.r, self.channels)
113 | """
114 | 
115 | # -------------------------------------------------------------------------------
116 | # 1. define Cepstral-QSRCNN Model
117 | # -------------------------------------------------------------------------------
118 | class CepstralQSRCNN(object):
119 |     def __init__(self, opt_params={'lr': 5e-4, 'batch_size': 32, 'nb_epochs': 100},
120 |                  model_params={'n1': 16, 'n2': 32, 'n3': 16, 'frame_len': 32},
121 |                  codec_type_params={'codec': 'ADPCM', 'type': '3', 'weights_dir': "./model_weights", 'logdir': "./log"}):
122 |         self.learning_rate = opt_params['lr']                   # Learning rate
123 |         self.batch_size = opt_params['batch_size']              # Batch size
124 |         self.nb_epochs = opt_params['nb_epochs']                # Number of epochs
125 | 
126 |         self.codec = codec_type_params['codec']                 # Codec type
127 |         self.type = codec_type_params['type']                   # Methods type
128 | 
129 |         self.log_dir = codec_type_params['logdir']              # Log file direction
130 |         if not (os.path.exists(self.log_dir)):
131 |             os.makedirs(self.log_dir)
132 | 
133 |         self.weights_dir = codec_type_params['weights_dir']     # Weights file direction
134 |         if not (os.path.exists(self.weights_dir)):
135 |             os.makedirs(self.weights_dir)
136 | 
137 |         self.frame_len = model_params['frame_len']              # Frame length
138 |         self.model_params = model_params
139 |         self.model = self.create_model("qsrcnn")
140 | 
141 |     # -------------------------------------------------------------------------------
142 |     # Load the Weights of the Model
143 |     # -------------------------------------------------------------------------------
144 |     def load_weights(self, file_path=""):
145 |         if file_path == "":
146 |             file_path = self.weights_dir + '/' + self.codec + '_Type' + self.type + '_CepstralQSRCNN_Weights_Best_bs' + \
147 |                            str(self.batch_size) + '_lr' + str(self.learning_rate) + '.h5'
148 | 
149 |         file_path = os.path.normcase(file_path)
150 |         self.model.load_weights(file_path)
151 | 
152 |     # -------------------------------------------------------------------------------
153 |     # Save the Weights of the Model
154 |     # -------------------------------------------------------------------------------
155 |     def save_weights(self):
156 |         file_path = self.weights_dir + '/' + self.codec + '_Type' + self.type + '_CepstralQSRCNN_Weights_Final_bs' + \
157 |                        str(self.batch_size) + '_lr' + str(self.learning_rate) + '.h5'
158 |         file_path = os.path.normcase(file_path)
159 |         self.model.save_weights(file_path)
160 | 
161 | 	"""
162 |     def _upscale_block(self, ip, id):
163 |         init = ip
164 | 
165 |         x = Conv1D(256, 3, padding='same', name='espcnn_upconv1_%d' % id)(init)
166 |         x = Activation(selu)(x)
167 |         x = SubPixelUpscaling(r=2, channels=64, name='espcnn_upconv1__upscale1_%d' % id)(x)
168 |         x = Conv1D(256, 3, padding='same', name='espcnn_upconv1_filter1_%d' % id)(x)
169 |         x = Activation(selu)(x)
170 | 
171 |         return x
172 | 	"""
173 | 	
174 |     # -------------------------------------------------------------------------------
175 |     # 1. define model
176 |     # -------------------------------------------------------------------------------
177 |     def create_model(self, model_type="qsrcnn"):
178 |         if model_type == "qsrcnn":
179 |             frame_len = self.frame_len
180 |             n1 = self.model_params['n1']
181 |             n2 = self.model_params['n2']
182 |             n3 = self.model_params['n3']
183 | 
184 |             input_sque = Input(shape=(frame_len, 1))
185 |             c1 = Conv1D(n1, 3, padding='same')(input_sque)
186 |             c1 = Activation(selu)(c1)
187 |             c1 = Conv1D(n1, 3, padding='same')(c1)
188 |             c1 = Activation(selu)(c1)
189 |             x = MaxPooling1D(2)(c1)
190 | 
191 |             c2 = Conv1D(n2, 3, padding='same')(x)
192 |             c2 = Activation(selu)(c2)
193 |             c2 = Conv1D(n2, 3, padding='same')(c2)
194 |             c2 = Activation(selu)(c2)
195 |             x = MaxPooling1D(2)(c2)
196 | 
197 |             c3 = Conv1D(n3, 3, padding='same')(x)
198 |             c3 = Activation(selu)(c3)
199 |             x = UpSampling1D(2)(c3)
200 | 
201 |             c2_2 = Conv1D(n2, 3, padding='same')(x)
202 |             c2_2 = Activation(selu)(c2_2)
203 |             c2_2 = Conv1D(n2, 3, padding='same')(c2_2)
204 |             c2_2 = Activation(selu)(c2_2)
205 | 
206 |             m1 = Add()([c2, c2_2])
207 |             m1 = UpSampling1D(2)(m1)
208 | 
209 |             c1_2 = Conv1D(n1, 3, padding='same')(m1)
210 |             c1_2 = Activation(selu)(c1_2)
211 |             c1_2 = Conv1D(n1, 3, padding='same')(c1_2)
212 |             c1_2 = Activation(selu)(c1_2)
213 | 
214 |             m2 = Add()([c1, c1_2])
215 | 
216 |             decoded = Conv1D(1, 5, padding='same', activation='linear')(m2)
217 | 
218 |             model = Model(input_sque, decoded)
219 |         elif model_type == "wavenet":
220 |             frame_len = self.frame_len
221 | 
222 |             ae_width = 16
223 |             ae_filter_length = 3
224 | 
225 |             ae_num_stages = 2
226 |             ae_num_layers = 6
227 | 
228 |             num_stages = 2
229 |             num_layers = 6
230 | 
231 |             width = 16
232 |             skip_width = 16
233 |             filter_length = 3
234 | 
235 |             input_sque = Input(shape=(frame_len, 1), name='input_layer')
236 | 
237 |             # ---------------------------------------
238 |             # The Non-Causal Temporal Encoder.
239 |             # ---------------------------------------
240 |             en = Conv1D(ae_width, ae_filter_length, padding='same', name='ae_startconv')(input_sque)
241 | 
242 |             for num_layer in range(ae_num_layers):
243 |                 # dilation: 2**(0 1 2 3 4)
244 |                 d = Activation(selu)(en)
245 |                 d = Conv1D(ae_width, 3, padding='same', dilation_rate=2 ** (num_layer % ae_num_stages),
246 |                            name='ae_dilatedconv_%d' % (num_layer + 1))(d)
247 |                 d = Activation(selu)(d)
248 | 
249 |                 en2 = Conv1D(ae_width, 1, padding='same', dilation_rate=2 ** (num_layer % ae_num_stages),
250 |                              name='ae_res_%d' % (num_layer + 1))(d)
251 |                 en = Add()([en2, en])
252 | 
253 |             en = Activation(selu)(en)
254 |             en = Conv1D(16, 1, padding='causal', dilation_rate=1, name='ae_bottleneck')(en)
255 |             en = Activation(selu)(en)
256 |             en = AveragePooling1D(2, name='ae_pool')(en)
257 |             # encoding = en
258 | 
259 |             # ---------------------------------------
260 |             # The WaveNet Decoder.
261 |             # ---------------------------------------
262 |             # enup = UpSampling1D(2, name='up_sampling')(en)
263 |             # l = shift_right(input_frame)
264 | 
265 |             l = Conv1D(width, filter_length, padding='causal', dilation_rate=1, name='startconv')(input_sque)
266 |             l = Activation(selu)(l)
267 |             # Set up skip connections.
268 |             s = Conv1D(skip_width, 1, padding='causal', dilation_rate=1, name='skip_start')(l)
269 |             s = Activation(selu)(s)
270 | 
271 |             # Residual blocks with skip connections.
272 |             for i in range(num_layers):
273 |                 d = Conv1D(2 * width, filter_length, padding='causal', dilation_rate=2 ** (i % num_stages),
274 |                            name='dilatedconv_%d' % (i + 1))(l)
275 |                 d = Activation(selu)(d)
276 | 
277 |                 en3 = Conv1D(2 * width, 1, padding='causal', dilation_rate=1, name='cond_map_%d' % (i + 1))(en)  # 40
278 |                 en3 = Activation(selu)(en3)
279 |                 en3 = UpSampling1D(2, name='up_sampling_%d' % (i + 1))(en3)
280 |                 # d = condition(d,en3)
281 |                 d = Add()([d, en3])
282 | 
283 |                 d_sigmoid = Activation('sigmoid')(d)
284 |                 d_tanh = Activation('tanh')(d)
285 |                 d = Multiply()([d_sigmoid, d_tanh])
286 | 
287 |                 l2 = Conv1D(width, 1, padding='causal', dilation_rate=1, name='res_%d' % (i + 1))(d)
288 |                 l2 = Activation(selu)(l2)
289 |                 l = Add()([l2, l])
290 | 
291 |                 s2 = Conv1D(skip_width, 1, padding='causal', dilation_rate=1, name='skip_%d' % (i + 1))(d)
292 |                 s = Add()([s2, s])
293 | 
294 |             s = Activation(selu)(s)
295 | 
296 |             s = Conv1D(skip_width, 3, padding='causal', activation='linear', name='output_layer1')(s)
297 |             s = Activation(selu)(s)
298 |             en4 = Conv1D(skip_width, 1, padding='causal', activation='linear', name='cond_map_out1')(en)
299 |             en4 = Activation(selu)(en4)
300 |             en4 = UpSampling1D(2, name='up_sampling')(en4)
301 |             s = Add()([en4, s])
302 |             s = Activation(selu)(s)
303 | 
304 |             outs = Conv1D(1, 3, padding='causal', activation='linear', name='output_layer')(s)
305 | 
306 |             model = Model(input_sque, outs)
307 | 
308 |         elif model_type == "autoencoder":
309 |             frame_len = self.frame_len
310 |             n1 = 64
311 |             n2 = 32
312 | 
313 |             input_sque = Input(shape=(frame_len, 1))
314 |             c1 = Conv1D(n1, 3, padding='same')(input_sque)
315 |             c1 = Activation(selu)(c1)
316 |             x = MaxPooling1D(2)(c1)
317 | 
318 |             c2 = Conv1D(n2, 3, padding='same')(x)
319 |             c2 = Activation(selu)(c2)
320 |             encoded = MaxPooling1D(2)(c2)
321 | 
322 |             d1 = UpSampling1D(2)(encoded)
323 |             d1 = Conv1D(n2, 3, padding='same')(d1)
324 |             d1 = Activation(selu)(d1)
325 |             y = Activation(selu)(d1)
326 | 
327 |             d2 = UpSampling1D(2)(y)
328 |             d2 = Conv1D(n1, 3, padding='same')(d2)
329 |             d2 = Activation(selu)(d2)
330 | 
331 |             decoded = Conv1D(1, 5, padding='same', activation='linear')(d2)
332 | 
333 |             model = Model(input_sque, decoded)
334 | 
335 |         elif model_type == "esrcnn":
336 |             f1 = 5
337 |             f2_1 = 1
338 |             f2_2 = 2
339 |             f2_3 = 3
340 |             f3 = 5
341 | 
342 |             n1 = 128
343 |             n2 = 64
344 | 
345 |             frame_len = self.frame_len
346 | 
347 |             input_img = Input(shape=(frame_len, 1))
348 |             x = Conv1D(n1, f1, padding='same', name='level1')(input_img)
349 |             x = Activation(selu)(x)
350 | 
351 |             x1 = Conv1D(n2, f2_1, padding='same', name='lavel1_1')(x)
352 |             x1 = Activation(selu)(x1)
353 |             x2 = Conv1D(n2, f2_2, padding='same', name='lavel1_2')(x)
354 |             x2 = Activation(selu)(x2)
355 |             x3 = Conv1D(n2, f2_3, padding='same', name='lavel1_3')(x)
356 |             x3 = Activation(selu)(x3)
357 | 
358 |             x = Average()([x1, x2, x3])
359 | 
360 |             out = Conv1D(1, f3, padding='same', activation='linear', name='output_1')(x)
361 |             # out = LeakyReLU(0.2)(out)
362 | 
363 |             model = Model(input_img, out)
364 | 		"""
365 |         elif model_type == "subpixel":
366 |             frame_len = self.frame_len
367 | 
368 |             input_frame = Input(shape=(frame_len, 1))
369 |             x = Conv1D(64, 5, padding='same', name='level1')(input_frame)
370 |             x = Activation(selu)(x)
371 |             x = Conv1D(32, 3, padding='same', name='level2')(x)
372 |             x = Activation(selu)(x)
373 | 
374 |             x = self._upscale_block(x, 1)
375 | 
376 |             out = Conv1D(1, 5, activation='linear', padding='same', name='output_1')(x)
377 | 
378 |             model = Model(input_frame, out)
379 | 		"""
380 | 		
381 |         model.summary()
382 | 
383 |         learning_rate = self.learning_rate
384 |         # adam = optimizers.Adam(lr=learning_rate)
385 |         # model.compile(optimizer=adam, loss='mse', metrics=[SNRLoss])
386 | 
387 |         adam_wn = AdamWithWeightnorm(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
388 |         model.compile(optimizer=adam_wn, loss='mse', metrics=[snr])
389 | 
390 |         return model
391 | 
392 |     # -------------------------------------------------------------------------------
393 |     # 2. Fit the model
394 |     # -------------------------------------------------------------------------------
395 |     def step_decay(self, epoch):
396 |         initial_lrate = self.learning_rate
397 | 
398 |         drop = 0.5
399 |         epochs_drop = 4.0
400 |         lrate = initial_lrate * math.pow(drop, math.floor((1 + epoch) / epochs_drop))
401 | 
402 |         old_lr = K.get_value(self.model.optimizer.lr)
403 |         K.set_value(self.model.optimizer.lr, lrate)
404 |         lrate = K.get_value(self.model.optimizer.lr)
405 |         print('Ir reduced from %f to %f' % (old_lr, lrate))
406 |         return lrate
407 | 
408 |     def fit(self, x_train_noisy, x_train, x_train_noisy_vali, x_train_vali):
409 |         print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
410 |         print("> Training model ...")
411 | 
412 |         nb_epochs = self.nb_epochs
413 |         batch_size = self.batch_size
414 |         learning_rate = self.learning_rate
415 | 
416 |         # ---------------------------------------------------------
417 |         # 1. define callback functions
418 |         # ---------------------------------------------------------
419 |         # Stop training after 10 epoches if the vali_loss not decreasing
420 |         stop_str = EarlyStopping(monitor='val_snr', patience=16, verbose=1, mode='max')
421 | 
422 |         # Reduce learning rate when stop improving lr = lr*factor
423 |         reduce_LR = ReduceLROnPlateau(monitor='val_snr', factor=0.6, patience=2, verbose=1, mode='max', epsilon=0.0001, cooldown=0, min_lr=0)
424 | 
425 |         best_weights = self.weights_dir + '/' + self.codec + '_Type' + self.type + '_CepstralQSRCNN_Weights_Best_bs' + \
426 |                        str(batch_size) + '_lr' + str(learning_rate) + '.h5'
427 |         best_weights = os.path.normcase(best_weights)
428 |         model_save = ModelCheckpoint(best_weights, monitor='val_snr', save_best_only=True, mode='max', save_weights_only=True, period=1)
429 | 
430 |         logger_name = self.log_dir + '/' + self.codec + '_Type' + self.type + '_CepstralQSRCNN_log_bs' + \
431 |                       str(batch_size) + '_lr' + str(learning_rate) + '.csv'
432 |         logger_name = os.path.normcase(logger_name)
433 |         logger = CSVLogger(logger_name, separator=',', append=False)
434 |         tensor_board = TensorBoard(log_dir=self.log_dir, histogram_freq=1)
435 | 
436 |         lrate = LearningRateScheduler(self.step_decay)
437 | 
438 |         start = time.time()
439 | 
440 |         # ---------------------------------------------------------
441 |         # 2. fit the model
442 |         # ---------------------------------------------------------
443 |         print("> Training model " + "using Batch-size: " + str(batch_size) + ", Learning_rate: " + str(learning_rate) + "...")
444 |         hist = self.model.fit(x_train_noisy, x_train, epochs=nb_epochs, batch_size=batch_size, shuffle=True,
445 |                               validation_data=[x_train_noisy_vali, x_train_vali],
446 |                               callbacks=[reduce_LR, stop_str, model_save, logger])
447 | 
448 |         print("> Saving Completed, Time : ", time.time() - start)
449 |         print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
450 |         return hist
451 | 
452 |     # -------------------------------------------------------------------------------
453 |     # 3. Save loss snr val_loss val_snr as .mat File
454 |     # -------------------------------------------------------------------------------
455 |     def save_training_curves(self, hist):
456 |         print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
457 |         print("> Saving Training and Validation loss-metric curve ...")
458 | 
459 |         start = time.time()
460 | 
461 |         trian_curve_root = "./Opting_Results"
462 |         if not(os.path.exists(trian_curve_root)):
463 |             os.makedirs(trian_curve_root)
464 |         # ---------------------------------------------------------
465 |         # 1. Saving Training Loss
466 |         # ---------------------------------------------------------
467 |         TrainLossVec = trian_curve_root + '/' + self.codec + '_Type' + self.type + '_CepstralDDQSRCNN_TrainLoss_bs' + \
468 |                        str(self.batch_size) + '_lr' + str(self.learning_rate) + '.mat'
469 |         TrainLossVec = os.path.normcase(TrainLossVec)
470 | 
471 |         sio.savemat(TrainLossVec, {'Train_Loss_Vec': hist.history['loss']})
472 | 
473 |         # ---------------------------------------------------------
474 |         # 2. Saving Training Metric
475 |         # ---------------------------------------------------------
476 |         TrainSNRVec = trian_curve_root + '/' + self.codec + '_Type' + self.type + '_CepstralDDQSRCNN_TrainMetrice_bs' + \
477 |                       str(self.batch_size) + '_lr' + str(self.learning_rate) + '.mat'
478 |         TrainSNRVec = os.path.normcase(TrainSNRVec)
479 |         sio.savemat(TrainSNRVec, {'Train_SNR_Vec': hist.history['snr']})  # snr
480 | 
481 |         # ---------------------------------------------------------
482 |         # 3. Saving Validation Loss
483 |         # ---------------------------------------------------------
484 |         ValiLossVec = trian_curve_root + '/' + self.codec + '_Type' + self.type + '_CepstralDDQSRCNN_ValiLoss_bs' + \
485 |                       str(self.batch_size) + '_lr' + str(self.learning_rate) + '.mat'
486 |         ValiLossVec = os.path.normcase(ValiLossVec)
487 |         sio.savemat(ValiLossVec, {'Vali_Loss_Vec': hist.history['val_loss']})
488 | 
489 |         # ---------------------------------------------------------
490 |         # 4. Saving Validation Metric
491 |         # ---------------------------------------------------------
492 |         ValiSNRVec = trian_curve_root + '/' + self.codec + '_Type' + self.type + '_CepstralDDQSRCNN_ValiMetrice_bs' + \
493 |                      str(self.batch_size) + '_lr' + str(self.learning_rate) + '.mat'
494 |         ValiSNRVec = os.path.normcase(ValiSNRVec)
495 |         sio.savemat(ValiSNRVec, {'Vali_SNR_Vec': hist.history['val_snr']})  # val_snr
496 | 
497 |         print("> Saving Completed, Time : ", time.time() - start)
498 |         print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
499 | 
500 |     # -------------------------------------------------------------------------------
501 |     # 4. Evaluate the Trained Model
502 |     # -------------------------------------------------------------------------------
503 |     def evaluation_model(self, x_test_noisy, detail_type="1", weights_path=""):
504 |         print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
505 |         print("> Evaluation of the Trained Model ...")
506 |         # ---------------------------------------------------------
507 |         # 1. Load Model Weights
508 |         # ---------------------------------------------------------
509 |         print('> 1. Loading the Weights of the Model ...')
510 |         self.load_weights(weights_path)
511 | 
512 |         # ---------------------------------------------------------
513 |         # 2. Evaluate the Model
514 |         # ---------------------------------------------------------
515 |         start = time.time()
516 |         print('> 2. Evaluating the Model, Please wait for a Moment ...')
517 |         predicted = self.model.predict(x_test_noisy)
518 |         print('> 2. Evaluating Completed, Time : ' + str(time.time() - start))
519 | 
520 |         # ---------------------------------------------------------
521 |         # 3. Saving the Evaluation Result
522 |         # ---------------------------------------------------------
523 |         print('> 3. Saving the Evaluation Result ...')
524 |         start = time.time()
525 |         pre_file_root = "./Test_Outputs"
526 |         if not (os.path.exists(pre_file_root)):
527 |             os.makedirs(pre_file_root)
528 | 
529 |         preOutput = pre_file_root + "/" + self.codec + '_CNN_testplan_Type' + detail_type + "_ceps_vec.mat"
530 |         preOutput = os.path.normcase(preOutput)
531 | 
532 |         sio.savemat(preOutput, {'predictions': predicted})
533 |         print('> 3. Evaluation Result Saving Completed, Time : ' + str(time.time() - start))
534 |         print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')


--------------------------------------------------------------------------------
/CepstralCNN/CepstralQSRCNN_TrainTest_GPUs.py:
--------------------------------------------------------------------------------
  1 | #######################################################################################################################
  2 | # Project QSRCNN: Quantized Speech Reconstruction using Convolutional Neural Network     V1.0
  3 | # =====================================================================================================================
  4 | # CepstralQSRCNN_TrainTest_GPUs.py: Train and Test QSRCNN for G711/ADPCM/AMR/EVS using Cepstral features
  5 | #
  6 | #
  7 | # =====================================================================================================================
  8 | # Technische Universität Braunschweig, IfN
  9 | # Author:    Huijun Liu M.Sc.
 10 | # Date:      17.06.2017
 11 | #######################################################################################################################
 12 | 
 13 | import os
 14 | import sys
 15 | import time
 16 | 
 17 | import CepstralQSRCNN as model
 18 | import tensorflow as tf
 19 | import DataPrepare as dp
 20 | from keras.backend.tensorflow_backend import set_session
 21 | 
 22 | #####################################################################################
 23 | # 0. Settings For GPU
 24 | #####################################################################################
 25 | using_gpu = 0
 26 | if using_gpu == 1:                                               # Only one GPU can be used
 27 |     os.environ["CUDA_VISIBLE_DEVICES"] = "2"                     # x stand for GPU index: 3-x!!
 28 |     config = tf.ConfigProto()
 29 |     config.gpu_options.per_process_gpu_memory_fraction = 0.4     # Up to 80% Memory of GPUs can be used
 30 |     set_session(tf.Session(config=config))
 31 | 
 32 | #####################################################################################
 33 | # 1. Settings Parameters
 34 | #####################################################################################
 35 | 
 36 | train_or_test = "test"            # train  or  test  the deep model
 37 | codec = "amrwb"                    # g711/adpcm/amrwb/evsswb  codec can be used
 38 | type = "3"                      # 1_2  or  3             for Training
 39 | type_detail = "3"                 # 1  or  2  or  3 or 4   for Testing
 40 | frame_len = ""                    # 256(g711/adpcm)  or  512(amrwb)  or  1024(evsswb)
 41 | 
 42 | if codec == "g711" or codec == "adpcm":
 43 |     default_model_params = {'n1': 16, 'n2': 32, 'n3': 16, 'frame_len': 32}        # Parameters for model itself
 44 |     frame_len = "256"  # 256(g711/adpcm)  or  512(amrwb)  or  1024(evsswb)        # Frame length
 45 | elif codec == "amrwb":
 46 |     default_model_params = {'n1': 32, 'n2': 64, 'n3': 32, 'frame_len': 64}
 47 |     frame_len = "512"  # 256(g711/adpcm)  or  512(amrwb)  or  1024(evsswb)
 48 | elif codec == "evsswb":
 49 |     default_model_params = {'n1': 64, 'n2': 128, 'n3': 64, 'frame_len': 128}
 50 |     frame_len = "1024"  # 256(g711/adpcm)  or  512(amrwb)  or  1024(evsswb)
 51 | else:
 52 |     raise Exception("Please set the variable codec !")
 53 | 
 54 | default_opt_params = {'lr': 5e-4, 'batch_size': 16, 'nb_epochs': 1000}            # Parameters for model training
 55 | codec_type_params = {'codec': codec, 'type': type,
 56 |                      'weights_dir': "./model_weights",
 57 |                      'logdir': "./log"}                                           # Other parameters
 58 | 
 59 | #####################################################################################
 60 | # 2. Training and Testing
 61 | #####################################################################################
 62 | train_inputs = ""          # Path of the input data for training
 63 | train_targets = ""         # Path of the target data for training
 64 | 
 65 | vali_inputs = ""           # Path of the input data for validation
 66 | vali_targets = ""          # Path of the target data for validation
 67 | 
 68 | test_inputs = ""           # Path of the input data for testing
 69 | 
 70 | if train_or_test == "train":
 71 |     # -------------------------------------------------------------------------------
 72 |     # 1. Load Data
 73 |     # -------------------------------------------------------------------------------
 74 |     train_inputs = "./TrainValiData/Train_inputSet_" + codec + \
 75 |                    "_defautLang_OLdata_ValiTrain_type" + type + "_Fram256_ceps.mat"
 76 |     train_targets = "./TrainValiData/Train_targetSet_" + codec + \
 77 |                     "_defautLang_OLdata_ValiTrain_type" + type + "_Fram256_ceps.mat"
 78 | 
 79 |     vali_inputs = "./TrainValiData/Vali_inputSet_" + codec + \
 80 |                   "_defautLang_OLdata_ValiTrain_smallVali_type" + type + "_Fram256_ceps.mat"
 81 |     vali_targets = "./TrainValiData/Vali_targetSet_" + codec + \
 82 |                    "_defautLang_OLdata_ValiTrain_smallVali_type" + type + "_Fram256_ceps.mat"
 83 | 
 84 |     x_train_noisy, x_train, x_train_noisy_vali, x_train_vali = dp.load_train_data(train_inputs, train_targets,
 85 |                                                                                   vali_inputs, vali_targets)
 86 | 
 87 |     # -------------------------------------------------------------------------------
 88 |     # 2. Init Cepstral-QSRCNN Model
 89 |     # -------------------------------------------------------------------------------
 90 |     qsrcnn = model.CepstralQSRCNN(opt_params=default_opt_params,
 91 |                                   model_params=default_model_params,
 92 |                                   codec_type_params=codec_type_params)
 93 | 
 94 |     # -------------------------------------------------------------------------------
 95 |     # 3. Fit The Cepstral-QSRCNNe Model
 96 |     # -------------------------------------------------------------------------------
 97 |     hist =qsrcnn.fit(x_train_noisy, x_train, x_train_noisy_vali, x_train_vali)
 98 | 
 99 |     # -------------------------------------------------------------------------------
100 |     # 4. Save Weights and Training Curves
101 |     # -------------------------------------------------------------------------------
102 |     qsrcnn.save_weights()
103 |     qsrcnn.save_training_curves(hist=hist)
104 | 
105 | elif train_or_test == "test":
106 |     # -------------------------------------------------------------------------------
107 |     # 1. Load Data
108 |     # -------------------------------------------------------------------------------
109 |     test_inputs = "inputTestSet_" + codec + "_concat_Type" + type_detail + "_Frame_" + frame_len + "_ceps_v73.mat"
110 |     x_test_noisy = dp.load_test_data(test_inputs)
111 | 
112 |     # -------------------------------------------------------------------------------
113 |     # 2. Init Cepstral-QSRCNN Model
114 |     # -------------------------------------------------------------------------------
115 |     if type_detail == "1" or type_detail == "2":
116 |         type = "1_2"
117 |     elif type_detail == "3" or type_detail == "4":
118 |         type = "3"
119 | 
120 |     codec_type_params = {'codec': codec, 'type': type, 'weights_dir': "./model_weights", 'logdir': "./log"}
121 |     qsrcnn = model.CepstralQSRCNN(opt_params=default_opt_params,
122 |                                   model_params=default_model_params,
123 |                                   codec_type_params=codec_type_params)
124 | 
125 |     # -------------------------------------------------------------------------------
126 |     # 3. Evaluate The Cepstral-QSRCNNe Model
127 |     # -------------------------------------------------------------------------------
128 |     qsrcnn.evaluation_model(x_test_noisy, type_detail)
129 | 
130 | else:
131 |     raise Exception("Do you want to  train or test the model ? Please set the variable train_or_test !")
132 |     # sys.exit("Please set the codec name !")


--------------------------------------------------------------------------------
/CepstralCNN/DataPrepare.py:
--------------------------------------------------------------------------------
  1 | #######################################################################################################################
  2 | # Project QSRCNN: Quantized Speech Reconstruction using Convolutional Neural Network     V1.0
  3 | # =====================================================================================================================
  4 | # DataPrepare.py: Data prepare and load data
  5 | #
  6 | #
  7 | # =====================================================================================================================
  8 | # Technische Universität Braunschweig, IfN
  9 | # Author:    Huijun Liu M.Sc.
 10 | # Date:      20.05.2017
 11 | #######################################################################################################################
 12 | 
 13 | import os
 14 | import time
 15 | import h5py as h5
 16 | import numpy as np
 17 | import scipy.io as sio
 18 | 
 19 | from numpy import random
 20 | 
 21 | # -------------------------------------------------------------------------------
 22 | # 1. load data
 23 | # -------------------------------------------------------------------------------
 24 | 
 25 | 
 26 | def load_train_data(train_inputs, train_targets, vali_inputs, vali_targets):
 27 |     print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
 28 |     print('> Loading data ')
 29 | 
 30 |     start = time.time()
 31 |     # ---------------------------------------------------------
 32 |     # 1. Load Input Data for Training
 33 |     # ---------------------------------------------------------
 34 |     mat_input = train_inputs
 35 |     mat_input = os.path.normcase(mat_input)
 36 |     print('> 1. Loading Training Input: ' + mat_input + '...')
 37 | 
 38 |     x_train_noisy = sio.loadmat(mat_input)
 39 |     x_train_noisy = x_train_noisy['inputSetNorm']
 40 |     x_train_noisy = np.array(x_train_noisy)
 41 | 
 42 |     # ---------------------------------------------------------
 43 |     # 2. Load Target Data for training
 44 |     # ---------------------------------------------------------
 45 |     mat_target = train_targets
 46 |     mat_target = os.path.normcase(mat_target)
 47 |     print('> 2. Loading Training Target: ' + mat_target + '...')
 48 | 
 49 |     x_train = sio.loadmat(mat_target)
 50 |     x_train = x_train['targetSet']
 51 |     x_train = np.array(x_train)
 52 |     # x_train = target_min_max_scaler.fit_transform(x_train)
 53 | 
 54 |     # ---------------------------------------------------------
 55 |     # 3. Load Input Data for Validation
 56 |     # ---------------------------------------------------------
 57 |     mat_input_vali = vali_inputs
 58 |     mat_input_vali = os.path.normcase(mat_input_vali)
 59 |     print('> 3. Loading Validation Input: ' + mat_input_vali + '...')
 60 | 
 61 |     x_train_noisy_vali = sio.loadmat(mat_input_vali)
 62 |     x_train_noisy_vali = x_train_noisy_vali['inputSetNorm']
 63 |     x_train_noisy_vali = np.array(x_train_noisy_vali)
 64 | 
 65 |     # ---------------------------------------------------------
 66 |     # 4. Load Target Data for Validation
 67 |     # ---------------------------------------------------------
 68 |     mat_target_vali = vali_targets
 69 |     mat_target_vali = os.path.normcase(mat_target_vali)
 70 |     print('> 4. Loading Validation Target: ' + mat_target_vali + '...')
 71 | 
 72 |     x_train_vali = sio.loadmat(mat_target_vali)
 73 |     x_train_vali = x_train_vali['targetSet']
 74 |     x_train_vali = np.array(x_train_vali)
 75 | 
 76 |     # ---------------------------------------------------------
 77 |     # 5. Randomization of Training and/or validation Pairs
 78 |     # ---------------------------------------------------------
 79 |     print('> 5. Randomization of Training Pairs ...')
 80 |     frame_length = x_train_noisy.shape[1]
 81 | 
 82 |     random.seed(1234)
 83 |     train = np.column_stack((x_train_noisy, x_train))
 84 |     np.random.shuffle(train)
 85 |     x_train_noisy = train[:, :frame_length]
 86 |     x_train = train[:, frame_length:]
 87 | 
 88 |     # validation = np.column_stack((x_train_noisy_vali, x_train_vali))
 89 |     # np.random.shuffle(validation )
 90 |     # x_train_noisy_vali = validation [:, :frame_length]
 91 |     # x_train_vali = validation [:, frame_length:]
 92 | 
 93 |     # ---------------------------------------------------------
 94 |     # 6. Reshape of Training and validation Pairs
 95 |     # ---------------------------------------------------------
 96 |     x_train_noisy = np.reshape(x_train_noisy, (x_train_noisy.shape[0], x_train_noisy.shape[1], 1))
 97 |     x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
 98 | 
 99 |     x_train_noisy_vali = np.reshape(x_train_noisy_vali, (x_train_noisy_vali.shape[0], x_train_noisy_vali.shape[1], 1))
100 |     x_train_vali = np.reshape(x_train_vali, (x_train_vali.shape[0], x_train_vali.shape[1], 1))
101 | 
102 |     print("> Data Loaded, , Time : ", time.time() - start)
103 |     print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
104 | 
105 |     return x_train_noisy, x_train, x_train_noisy_vali, x_train_vali
106 | 
107 | 
108 | def load_test_data(testfile_path="inputTestSet_g711concat_Type3_Frame_256_ceps_v73.mat"):
109 |     print('> Loading Test data... ')
110 | 
111 |     test_file_root = "./TestData"
112 |     if not (os.path.exists(test_file_root)):
113 |         os.makedirs(test_file_root)
114 | 
115 |     mat_input = test_file_root + "/" + testfile_path
116 |     mat_input = os.path.normcase(mat_input)
117 | 
118 |     x_test_noisy = h5.File(mat_input, 'r')
119 |     x_test_noisy = x_test_noisy.get('inputTestNorm')
120 |     x_test_noisy = np.array(x_test_noisy)
121 |     x_test_noisy = np.transpose(x_test_noisy)
122 | 
123 |     # x_test_noisy = sio.loadmat(mat_input)
124 |     # x_test_noisy = x_test_noisy['inputTestNorm']
125 |     # x_test_noisy = np.array(x_test_noisy)
126 | 
127 |     x_test_noisy = np.reshape(x_test_noisy,(x_test_noisy.shape[0], x_test_noisy.shape[1], 1))
128 | 
129 |     return x_test_noisy


--------------------------------------------------------------------------------
/CepstralCNN/Opting_Results/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/CepstralCNN/Opting_Results/README.md


--------------------------------------------------------------------------------
/CepstralCNN/QSR-WGAN-GP_Train_GPUs.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import math
  4 | import numpy as np
  5 | import scipy.io as sio
  6 | import tensorflow as tf
  7 | import keras.backend as K
  8 | import matplotlib.pyplot as plt
  9 | import scipy.io.wavfile as swave
 10 | import keras.optimizers as optimizers
 11 | 
 12 | from numpy import random
 13 | from keras import initializers
 14 | from keras.models import Model
 15 | from keras.layers import Input
 16 | from keras.layers.merge import Add
 17 | from keras.layers.core import Dense, Flatten, Activation
 18 | from keras.layers.normalization import BatchNormalization
 19 | from keras.layers.convolutional import Conv1D, MaxPooling1D, UpSampling1D
 20 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger, TensorBoard
 21 | 
 22 | # from weightnorm import AdamWithWeightnorm
 23 | from tensorflow.python.framework import ops
 24 | from keras.backend.tensorflow_backend import set_session
 25 | 
 26 | 
 27 | #####################################################################################
 28 | # 0. Setteings For GPUs
 29 | #####################################################################################
 30 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" # x stand for GPU index: 3-x!!
 31 | config = tf.ConfigProto()
 32 | config.gpu_options.per_process_gpu_memory_fraction = 0.3  # Only 30% Memory of GPUs can be used
 33 | set_session(tf.Session(config=config))
 34 | 
 35 | #####################################################################################
 36 | # 2. Define new Metric Activation function and Loss function
 37 | #####################################################################################
 38 | 
 39 | 
 40 | def snr(y_true, y_pred):
 41 |     """
 42 |         SNR is Signal to Noise Ratio
 43 | 
 44 |     """
 45 |     return 10.0 * K.log((K.sum(K.square(y_true))) / (K.sum(K.square(y_pred - y_true)))) / K.log(10.0)
 46 | 
 47 | 
 48 | def selu(x):
 49 |     with ops.name_scope('elu') as scope:
 50 |         alpha = 1.6732632423543772848170429916717
 51 |         scale = 1.0507009873554804934193349852946
 52 |         return scale*tf.where(x>=0.0, x, alpha*tf.nn.elu(x))
 53 | 
 54 | 
 55 | #####################################################################################
 56 | # 2. Define Generative model and Adversarial model
 57 | #####################################################################################
 58 | def create_generator(inputs_gen):
 59 |     n1 = 32
 60 |     n2 = 64
 61 |     n3 = 32
 62 | 
 63 |     c1 = Conv1D(n1, 3, padding='same', name='conv_1')(inputs_gen)
 64 |     c1 = Activation(selu, name='act_1')(c1)
 65 |     c1 = Conv1D(n1, 3, padding='same', name='conv_2')(c1)
 66 |     c1 = Activation(selu, name='act_2')(c1)
 67 |     x = MaxPooling1D(2, name='mpool_1')(c1)
 68 | 
 69 |     c2 = Conv1D(n2, 3, padding='same', name='conv_3')(x)
 70 |     c2 = Activation(selu, name='act_3')(c2)
 71 |     c2 = Conv1D(n2, 3, padding='same', name='conv_4')(c2)
 72 |     c2 = Activation(selu, name='act_4')(c2)
 73 |     x = MaxPooling1D(2, name='mpool_2')(c2)
 74 | 
 75 |     c3 = Conv1D(n3, 3, padding='same', name='conv_5')(x)
 76 |     c3 = Activation(selu, name='act_5')(c3)
 77 |     x = UpSampling1D(2, name='usample_1')(c3)
 78 | 
 79 |     c2_2 = Conv1D(n2, 3, padding='same', name='conv_6')(x)
 80 |     c2_2 = Activation(selu, name='act_6')(c2_2)
 81 |     c2_2 = Conv1D(n2, 3, padding='same', name='conv_7')(c2_2)
 82 |     c2_2 = Activation(selu, name='act_7')(c2_2)
 83 | 
 84 |     m1 = Add(name='add_1')([c2, c2_2])
 85 |     m1 = UpSampling1D(2, name='usample_2')(m1)
 86 | 
 87 |     c1_2 = Conv1D(n1, 3, padding='same', name='conv_8')(m1)
 88 |     c1_2 = Activation(selu, name='act_8')(c1_2)
 89 |     c1_2 = Conv1D(n1, 3, padding='same', name='conv_9')(c1_2)
 90 |     c1_2 = Activation(selu, name='act_9')(c1_2)
 91 | 
 92 |     m2 = Add(name='add_2')([c1, c1_2])
 93 | 
 94 |     decoded = Conv1D(1, 5, padding='same', activation='linear', name='conv_10')(m2)
 95 | 
 96 |     return decoded
 97 | 
 98 | 
 99 | def create_discriminator(inputs_disc):
100 |     x = Conv1D(32, 3, padding='same', name='dis_conv_1')(inputs_disc)
101 |     x = Activation(selu, name='dis_act_1')(x)
102 | 
103 |     x = Conv1D(64, 3, padding='same', name='dis_conv_2')(x)
104 |     x = BatchNormalization(name='dis_bnorm_1')(x)
105 |     x1 = Activation(selu, name='dis_act_2')(x)
106 | 
107 |     m1 = Add(name='dis_add_1')([inputs_disc, x1])
108 | 
109 |     x = Conv1D(32, 3, padding='same', name='dis_conv_3')(m1)
110 |     x = Activation(selu, name='dis_act_3')(x)
111 | 
112 |     x = Conv1D(64, 3, padding='same', name='dis_conv_4')(x)
113 |     x = BatchNormalization(name='dis_bnorm_2')(x)
114 |     x2 = Activation(selu, name='dis_act_4')(x)
115 |     m2 = Add(name='dis_add_2')([m1, x2])
116 | 
117 |     discri = Conv1D(1, 5, padding='same', name='dis_conv_5')(m2)
118 | 
119 |     return discri
120 | 
121 | #####################################################################################
122 | # 3. Define Training process of QSR_WGAN_GP
123 | #####################################################################################
124 | SEQ_LEN = 80
125 | BATCH_SIZE = 128
126 | 
127 | 
128 | def load_data():
129 |     print('> Loading data... ')
130 |     # Load Input Data
131 |     mat_input = 'Train_G711_PreProc_defautLang/Train_inputSet_ALaw_defautLang_OLdata_ValiTrain_Reverse_Extended_Reverse.mat'
132 |     mat_input = os.path.normcase(mat_input)
133 |     print('> Training Input: ' + mat_input)
134 | 
135 |     x_train_noisy = sio.loadmat(mat_input)
136 |     x_train_noisy = x_train_noisy['inputSetNorm']
137 |     x_train_noisy = np.array(x_train_noisy)
138 |     # x_train_noisy = input_min_max_scaler.fit_transform(x_train_noisy)
139 | 
140 |     # Load Input Data for Validation
141 |     mat_input_vali = 'Train_G711_PreProc_defautLang/Vali_inputSet_ALaw_defautLang_OLdata_ValiTrain_smallVali.mat'
142 |     mat_input_vali = os.path.normcase(mat_input_vali)
143 |     print('> Validation Input: ' + mat_input_vali)
144 | 
145 |     x_train_noisy_vali = sio.loadmat(mat_input_vali)
146 |     x_train_noisy_vali = x_train_noisy_vali['inputSetNorm']
147 |     x_train_noisy_vali = np.array(x_train_noisy_vali)
148 | 
149 |     # Load Target Data
150 |     mat_target = 'Train_G711_PreProc_defautLang/Train_targetSet_ALaw_defautLang_OLdata_ValiTrain_Reverse_Extended_Reverse.mat'
151 |     mat_target = os.path.normcase(mat_target)
152 |     print('> Training Target: ' + mat_target)
153 | 
154 |     x_train = sio.loadmat(mat_target)
155 |     x_train = x_train['targetSet']
156 |     x_train = np.array(x_train)
157 |     # x_train = target_min_max_scaler.fit_transform(x_train)
158 | 
159 |     # Load Target Data for Validation
160 |     mat_target_vali = 'Train_G711_PreProc_defautLang/Vali_targetSet_ALaw_defautLang_OLdata_ValiTrain_smallVali.mat'
161 |     mat_target_vali = os.path.normcase(mat_target_vali)
162 |     print('> Validation Target: ' + mat_target_vali)
163 | 
164 |     x_train_vali = sio.loadmat(mat_target_vali)
165 |     x_train_vali = x_train_vali['targetSet']
166 |     x_train_vali = np.array(x_train_vali)
167 | 
168 |     # Randomization of Training Pairs (Currently NO Shuffle)
169 |     random.seed(1331)
170 | 
171 |     train = np.column_stack((x_train_noisy, x_train))
172 |     np.random.shuffle(train)
173 |     x_train_noisy = train[:, :SEQ_LEN]
174 |     x_train = train[:, SEQ_LEN:]
175 | 
176 |     # Reshape of Traing Pairs and validation Pairs
177 |     x_train_noisy = np.reshape(x_train_noisy, (x_train_noisy.shape[0], x_train_noisy.shape[1], 1))
178 |     x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
179 | 
180 |     # validation = np.column_stack((x_train_noisy_vali, x_train_vali))
181 |     # np.random.shuffle(validation )
182 |     # x_train_noisy_vali = validation [:, :SEQ_LEN]
183 |     # x_train_vali = validation [:, SEQ_LEN:]
184 | 
185 |     x_train_noisy_vali = np.reshape(x_train_noisy_vali, (x_train_noisy_vali.shape[0], x_train_noisy_vali.shape[1], 1))
186 |     x_train_vali = np.reshape(x_train_vali, (x_train_vali.shape[0], x_train_vali.shape[1], 1))
187 | 
188 |     print('> Data Loaded. Model Compiling... ')
189 |     return x_train_noisy, x_train, x_train_noisy_vali, x_train_vali
190 | 
191 | # 2. dataset generator
192 | def batch_generator(x_train_noisy, x_train, batch_size=128):
193 |     while True:
194 |         for i in range(0, x_train_noisy.shape[0] - batch_size + 1, batch_size):
195 |             yield x_train_noisy[i:i+batch_size], x_train[i:i+batch_size]
196 | 
197 | default_opt_params = {'lr': 5e-5, 'clip': 1e-2, 'n_lambda': 10, 'n_critic': 10}
198 | 
199 | 
200 | class QSRWGAN(object):
201 |     def __init__(self, opt_params=default_opt_params, frame_len=80):
202 |         self.n_critic = opt_params['n_critic']
203 |         self.n_lambda = opt_params['n_lambda']
204 |         self.clip = opt_params['clip']
205 |         self.frame_len = frame_len
206 | 
207 |         # ------------------------------------------------------------------
208 |         # 1. create session
209 |         # ------------------------------------------------------------------
210 |         self.sess = tf.Session()
211 |         K.set_session(self.sess)  # pass the session to keras
212 | 
213 |         # ------------------------------------------------------------------
214 |         # 2. create generator and discriminator
215 |         # ------------------------------------------------------------------
216 |         with tf.name_scope('generator'):
217 |             gen_inputs = Input(shape=(self.frame_len, 1))
218 |             gen_outputs = create_generator(gen_inputs)
219 | 
220 |         with tf.name_scope('discriminator'):
221 |             dis_inputs = Input(shape=(self.frame_len, 1))
222 |             dis_outputs = create_discriminator(dis_inputs)
223 | 
224 |         # ------------------------------------------------------------------
225 |         # 3. instantiate networks of generator and discriminator
226 |         # ------------------------------------------------------------------
227 |         Generator = Model(inputs=gen_inputs, outputs=gen_outputs)
228 |         Generator.summary()
229 |         self.gen_model = Generator
230 |         Discriminator = Model(inputs=dis_inputs, outputs=dis_outputs)
231 |         Discriminator.summary()
232 | 
233 |         # ------------------------------------------------------------------
234 |         # 4. save the inputs of generator and discriminator
235 |         # ------------------------------------------------------------------
236 |         quan_inputs = tf.placeholder(tf.float32, shape=(None, self.frame_len, 1), name='quan_inputs')
237 |         real_inputs = tf.placeholder(tf.float32, shape=(None, self.frame_len, 1), name='real_inputs')
238 |         self.inputs = quan_inputs, real_inputs
239 | 
240 |         # ------------------------------------------------------------------
241 |         # 5. get the weights of generator and discriminator
242 |         # ------------------------------------------------------------------
243 |         self.gen_weights = [weights for weights in tf.global_variables() if 'generator' in weights.name]
244 |         self.dis_weights = [weights for weights in tf.global_variables() if 'discriminator' in weights.name]
245 |         # self.gen_weights = Generator.get_weights()
246 |         # self.dis_weights = Discriminator.get_weights()
247 | 
248 |         # ------------------------------------------------------------------
249 |         # 6. create predictions of generator and discriminator
250 |         # ------------------------------------------------------------------
251 |         fake_inputs = Generator(quan_inputs)
252 |         disc_real = Discriminator(real_inputs)
253 |         disc_fake = Discriminator(fake_inputs)
254 |         self.predictions = fake_inputs
255 | 
256 |         # ------------------------------------------------------------------
257 |         # 7. create losses and compute probabilities of discriminator
258 |         # ------------------------------------------------------------------
259 |         # 7.1. WGAN lipschitz-penalty
260 |         alpha = tf.random_uniform(shape=[BATCH_SIZE, 1, 1], minval=-0.4, maxval=0.4)
261 |         differences = fake_inputs - real_inputs
262 |         interpolates = real_inputs + (alpha * differences)
263 | 
264 |         gradients = tf.gradients(Discriminator(interpolates), [interpolates])[0]
265 |         # slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1, 2]))
266 |         # gradient_penalty = self.n_lambda * tf.reduce_mean((slopes - 1.) ** 2)
267 |         gp = K.mean(K.square(K.sqrt(K.sum(K.square(gradients), axis=1)) - 1))
268 |         gradient_penalty = self.n_lambda * gp
269 | 
270 |         disc_loss = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real)
271 |         disc_loss += gradient_penalty
272 | 
273 |         self.dis_loss = disc_loss
274 |         self.gen_loss = -tf.reduce_mean(disc_fake)
275 | 
276 |         self.disc_real = tf.reduce_mean(disc_real)
277 |         self.disc_fake = tf.reduce_mean(disc_fake)
278 |         self.prob_real = tf.reduce_mean(tf.sigmoid(disc_real))
279 |         self.prob_fake = tf.reduce_mean(tf.sigmoid(disc_fake))
280 | 
281 |         # ------------------------------------------------------------------
282 |         # 8. create optimizer for generator and discriminator
283 |         # ------------------------------------------------------------------
284 |         learning_rate = opt_params['lr']
285 | 
286 |         gen_train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.gen_loss, var_list=self.gen_weights)
287 |         disc_train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.dis_loss, var_list=self.dis_weights)
288 | 
289 |         self.gen_opt_train = gen_train_op
290 |         self.dis_opt_train = disc_train_op
291 | 
292 |     def load_weights(self):
293 |         self.gen_model.load_weights('ddsrcnn_weights_defaultLang_OL40_stopstr_bs128_lr5e-05.h5')
294 | 
295 |     def save_weights(self, file_path):
296 |         file_path = os.path.normcase(file_path)
297 |         self.gen_model.save_weights(file_path)
298 | 
299 |     def load_batch(self, x_train_noise, x_train, train=True):
300 |         gen_inputs, dis_inputs = self.inputs
301 |         return {gen_inputs: x_train_noise, dis_inputs: x_train, K.learning_phase(): train}
302 | 
303 |     def gen(self, x_train_noise):
304 |         gen_inputs, dis_inputs = self.inputs
305 |         feed_dict = {gen_inputs: x_train_noise, K.learning_phase(): False}
306 |         return self.sess.run(self.predictions, feed_dict=feed_dict)
307 | 
308 |     def gen_train(self, feed_dict):
309 |         _, gen_loss = self.sess.run([self.gen_opt_train, self.gen_loss], feed_dict=feed_dict)
310 |         return gen_loss
311 | 
312 |     def dis_train(self, feed_dict):
313 |         # take a step of adam
314 |         _, dis_loss = self.sess.run([self.dis_opt_train, self.dis_loss], feed_dict=feed_dict)
315 |         # return discriminator loss
316 |         return dis_loss
317 | 
318 |     def fit(self, x_train_noise, x_train, x_train_noise_vali, x_train_vali, epochs=10, logdir='/qsrwgan_run'):
319 |         # ------------------------------------------------------------------
320 |         # 1. initialize log directory
321 |         # ------------------------------------------------------------------
322 |         if tf.gfile.Exists(logdir):
323 |             tf.gfile.DeleteRecursively(logdir)
324 | 
325 |         tf.gfile.MakeDirs(logdir)
326 | 
327 |         # ------------------------------------------------------------------
328 |         # 2. initialize model
329 |         # ------------------------------------------------------------------
330 |         init = tf.global_variables_initializer()
331 |         self.sess.run(init)
332 |         self.load_weights()
333 | 
334 |         # ------------------------------------------------------------------
335 |         # 3. train the model
336 |         # ------------------------------------------------------------------
337 |         step, g_step, epoch = 0, 0, 0
338 |         curr_epoch = 0
339 | 
340 |         # create data for the gan training
341 |         # generator = batch_generator(x_train_noise, x_train)
342 |         mat_input = 'Train_G711_PreProc_defautLang/inputTestSet_g711concat_nonOL_Frame_80.mat'
343 |         mat_input = os.path.normcase(mat_input)
344 |         x_train_noisy = sio.loadmat(mat_input)
345 |         x_train_noisy = x_train_noisy['inputTestNorm']
346 |         x_train_noisy = np.array(x_train_noisy)
347 |         x_train_noisy = np.reshape(x_train_noisy, (x_train_noisy.shape[0], x_train_noisy.shape[1], 1))
348 | 
349 |         while curr_epoch < epochs:
350 |             # create data for the gan training
351 |             generator = batch_generator(x_train_noise, x_train, BATCH_SIZE)
352 |             # generator_vali = batch_generator(x_train_noise_vali, x_train_vali, 1024)
353 | 
354 |             curr_iter = 0
355 |             while curr_iter < x_train_noise.shape[0]//BATCH_SIZE:
356 |                 start_time = time.time()
357 |                 # n_critic = 100 if g_step < 25 or (g_step+1) % 500 == 0 else self.n_critic
358 | 
359 |                 for i in range(self.n_critic):
360 |                     curr_iter += 1
361 |                     dis_losses = []
362 | 
363 |                     # load the batch
364 |                     quant_batch, real_batch = generator.__next__()
365 |                     # quant_batch = np.random.randn(BATCH_SIZE, 80, 1)
366 |                     feed_dict = self.load_batch(quant_batch, real_batch)
367 | 
368 |                     # train the discriminator
369 |                     dis_loss = self.dis_train(feed_dict)
370 |                     dis_losses.append(dis_loss)
371 | 
372 |                 dis_loss = np.array(dis_losses).mean()
373 | 
374 |                 # train the generator
375 |                 curr_iter += 1
376 |                 quant_batch, real_batch = generator.__next__()
377 |                 # quant_batch = np.random.randn(BATCH_SIZE, 80, 1)
378 |                 feed_dict = self.load_batch(quant_batch, real_batch)
379 |                 gen_loss = self.gen_train(feed_dict)
380 | 
381 |                 g_step += 1
382 | 
383 |                 if g_step < 1000 or g_step % 1000 == 0:
384 |                     tot_time = time.time() - start_time
385 |                     print('Epoch: %3d, Gen Steps: %4d (%3.lf s), Discriminator loss: %.6f, Generator loss: %.6f' % (curr_epoch, g_step, tot_time, dis_loss, gen_loss))
386 | 
387 |                 if g_step % 50 == 0:
388 | 
389 | 
390 |                     prediction = self.gen(np.random.randn(BATCH_SIZE, 80, 1))
391 |                     # feed_dict = self.load_batch(x_train_noisy, real_batch_vali)
392 |                     # quanspeech, realspeech = self.sess.run(self.inputs, feed_dict)
393 |                     fname = 'recon-speech-%d_%d.wav' % (curr_iter, g_step)
394 |                     swave.write(fname, 8000, np.reshape(prediction, (prediction.size,)))
395 |                     # fname = 'real-speech-%d.wav' % g_step
396 |                     # swave.write(fname, 8000, np.reshape(realspeech, (realspeech.size,)))
397 | 
398 |                     # fig = plt.figure(facecolor='white')
399 |                     # ax = fig.add_subplot(111)
400 |                     # ax.plot(np.reshape(realspeech, (realspeech.size,)), label='RealSpeech')
401 |                     # plt.plot(np.reshape(quanspeech, (quanspeech.size,)), label='QuanSpeech')
402 |                     # plt.plot(np.reshape(prediction, (prediction.size,)), label='Prediction')
403 | 
404 |                     # plt.legend()
405 |                     # plt.show()
406 | 
407 |             curr_epoch += 1
408 | 
409 |         self.save_weights("qsrwgan_weights.h5")
410 | 
411 | model = QSRWGAN(opt_params=default_opt_params)
412 | # train model
413 | x_train_noisy, x_train, _, _ = load_data()
414 | model.fit(x_train_noisy, x_train, _, _, epochs=10000)
415 | 


--------------------------------------------------------------------------------
/CepstralCNN/TestData/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/CepstralCNN/TestData/README.md


--------------------------------------------------------------------------------
/CepstralCNN/Test_Outputs/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/CepstralCNN/Test_Outputs/README.md


--------------------------------------------------------------------------------
/CepstralCNN/TrainValiData/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/CepstralCNN/TrainValiData/README.md


--------------------------------------------------------------------------------
/CepstralCNN/WaveformQSRCNN.py:
--------------------------------------------------------------------------------
  1 | #########################################################################################
  2 | # WaveformQSRCNN.py: QSRCNN for G711/ADPCM/AMR/EVS using using Waveform features
  3 | # Author:   Huijun Liu
  4 | # Time:     10.05.2017
  5 | # Location: TU Braunschweig IfN
  6 | #########################################################################################
  7 | 
  8 | import os
  9 | import time
 10 | import math
 11 | import scipy.io as sio
 12 | import tensorflow as tf
 13 | 
 14 | from keras.models import Model
 15 | from keras import backend as K
 16 | from keras.layers import Input, Add, Activation
 17 | from keras.layers.convolutional import Conv1D, MaxPooling1D, UpSampling1D
 18 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger, TensorBoard, LearningRateScheduler
 19 | 
 20 | from weightnorm import AdamWithWeightnorm
 21 | from tensorflow.python.framework import ops
 22 | 
 23 | # -------------------------------------------------------------------------------
 24 | # 0. define metric and activation function
 25 | # -------------------------------------------------------------------------------
 26 | 
 27 | 
 28 | def snr(y_true, y_pred):
 29 |     """
 30 |         SNR is Signal to Noise Ratio
 31 | 
 32 |     """
 33 |     return 10.0 * K.log((K.sum(K.square(y_true))) / (K.sum(K.square(y_pred - y_true)))) / K.log(10.0)
 34 | 
 35 | 
 36 | def selu(x):
 37 |     with ops.name_scope('elu') as scope:
 38 |         alpha = 1.6732632423543772848170429916717
 39 |         scale = 1.0507009873554804934193349852946
 40 |         return scale * tf.where(x >= 0.0, x, alpha * tf.nn.elu(x))
 41 | 
 42 | """
 43 | def step_decay(epoch):
 44 |     initial_lrate = 0.001
 45 | 
 46 |     drop = 0.25
 47 |     epochs_drop = 3.0
 48 |     lrate = initial_lrate * math.pow(drop, math.floor((1 + epoch) / epochs_drop))
 49 | 
 50 |     return lrate
 51 | """
 52 | 
 53 | # -------------------------------------------------------------------------------
 54 | # 1. define Cepstral-QSRCNN Model
 55 | # -------------------------------------------------------------------------------
 56 | 
 57 | 
 58 | class WaveformQSRCNN(object):
 59 |     def __init__(self, opt_params={'lr': 5e-4, 'batch_size': 32, 'nb_epochs': 100},
 60 |                  model_params={'n1': 16, 'n2': 32, 'n3': 16, 'frame_len': 32},
 61 |                  codec_type_params={'weights_dir': "./model_weights", 'logdir': "./log"}):
 62 |         self.learning_rate = opt_params['lr']
 63 |         self.batch_size = opt_params['batch_size']
 64 |         self.nb_epochs = opt_params['nb_epochs']
 65 | 
 66 |         self.log_dir = codec_type_params['logdir']
 67 |         if not (os.path.exists(self.log_dir)):
 68 |             os.makedirs(self.log_dir)
 69 | 
 70 |         self.weights_dir = codec_type_params['weights_dir']
 71 |         if not (os.path.exists(self.weights_dir)):
 72 |             os.makedirs(self.weights_dir)
 73 | 
 74 |         self.frame_len = model_params['frame_len']
 75 |         self.model = self.create_model(model_params)
 76 | 
 77 |     # -------------------------------------------------------------------------------
 78 |     # Load the Weights of the Model
 79 |     # -------------------------------------------------------------------------------
 80 |     def load_weights(self, file_path=""):
 81 |         if file_path == "":
 82 |             file_path = self.weights_dir + '/' + 'G711_WaveformQSRCNN_Weights_Best_bs' + \
 83 |                            str(self.batch_size) + '_lr' + str(self.learning_rate) + '.h5'
 84 | 
 85 |         file_path = os.path.normcase(file_path)
 86 |         self.model.load_weights(file_path)
 87 | 
 88 |     # -------------------------------------------------------------------------------
 89 |     # Save the Weights of the Model
 90 |     # -------------------------------------------------------------------------------
 91 |     def save_weights(self):
 92 |         file_path = self.weights_dir + '/' + 'G711_WaveformQSRCNN_Weights_Final_bs' + \
 93 |                        str(self.batch_size) + '_lr' + str(self.learning_rate) + '.h5'
 94 |         file_path = os.path.normcase(file_path)
 95 |         self.model.save_weights(file_path)
 96 | 
 97 |     # -------------------------------------------------------------------------------
 98 |     # 1. define model
 99 |     # -------------------------------------------------------------------------------
100 |     def create_model(self, model_params={'n1': 32, 'n2': 64, 'n3': 32, 'frame_len': 80}):
101 |         frame_len = self.frame_len
102 |         n1 = model_params['n1']
103 |         n2 = model_params['n2']
104 |         n3 = model_params['n3']
105 | 
106 |         input_sque = Input(shape=(frame_len, 1))
107 |         c1 = Conv1D(n1, 3, padding='same')(input_sque)
108 |         c1 = Activation(selu)(c1)
109 |         c1 = Conv1D(n1, 3, padding='same')(c1)
110 |         c1 = Activation(selu)(c1)
111 |         x = MaxPooling1D(2)(c1)
112 | 
113 |         c2 = Conv1D(n2, 3, padding='same')(x)
114 |         c2 = Activation(selu)(c2)
115 |         c2 = Conv1D(n2, 3, padding='same')(c2)
116 |         c2 = Activation(selu)(c2)
117 |         x = MaxPooling1D(2)(c2)
118 | 
119 |         c3 = Conv1D(n3, 3, padding='same')(x)
120 |         c3 = Activation(selu)(c3)
121 |         x = UpSampling1D(2)(c3)
122 | 
123 |         c2_2 = Conv1D(n2, 3, padding='same')(x)
124 |         c2_2 = Activation(selu)(c2_2)
125 |         c2_2 = Conv1D(n2, 3, padding='same')(c2_2)
126 |         c2_2 = Activation(selu)(c2_2)
127 | 
128 |         m1 = Add()([c2, c2_2])
129 |         m1 = UpSampling1D(2)(m1)
130 | 
131 |         c1_2 = Conv1D(n1, 3, padding='same')(m1)
132 |         c1_2 = Activation(selu)(c1_2)
133 |         c1_2 = Conv1D(n1, 3, padding='same')(c1_2)
134 |         c1_2 = Activation(selu)(c1_2)
135 | 
136 |         m2 = Add()([c1, c1_2])
137 | 
138 |         decoded = Conv1D(1, 5, padding='same', activation='linear')(m2)
139 | 
140 |         model = Model(input_sque, decoded)
141 |         model.summary()
142 | 
143 |         learning_rate = self.learning_rate
144 |         # adam = optimizers.Adam(lr=learning_rate)
145 |         # model.compile(optimizer=adam, loss='mse', metrics=[SNRLoss])
146 | 
147 |         adam_wn = AdamWithWeightnorm(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
148 |         model.compile(optimizer=adam_wn, loss='mse', metrics=[snr])
149 | 
150 |         return model
151 | 
152 |     # -------------------------------------------------------------------------------
153 |     # 2. Fit the model
154 |     # -------------------------------------------------------------------------------
155 |     def step_decay(self, epoch):
156 |         initial_lrate = self.learning_rate
157 | 
158 |         drop = 0.25
159 |         epochs_drop = 4.0
160 |         lrate = initial_lrate * math.pow(drop, math.floor((1 + epoch) / epochs_drop))
161 | 
162 |         old_lr = K.get_value(self.model.optimizer.lr)
163 |         K.set_value(self.model.optimizer.lr, lrate)
164 |         lrate = K.get_value(self.model.optimizer.lr)
165 |         print("> Ir reduced from %f to %f" % (old_lr, lrate))
166 |         return lrate
167 | 
168 |     def fit(self, x_train_noisy, x_train, x_train_noisy_vali, x_train_vali):
169 |         print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
170 |         print("> Training model ...")
171 | 
172 |         nb_epochs = self.nb_epochs
173 |         batch_size = self.batch_size
174 |         learning_rate = self.learning_rate
175 | 
176 |         # ---------------------------------------------------------
177 |         # 1. define callback functions
178 |         # ---------------------------------------------------------
179 |         # Stop training after 10 epoches if the vali_loss not decreasing
180 |         stop_str = EarlyStopping(monitor='val_snr', patience=16, verbose=1, mode='max')
181 | 
182 |         # Reduce learning rate when stop improving lr = lr*factor
183 |         reduce_LR = ReduceLROnPlateau(monitor='val_snr', factor=0.5, patience=2, verbose=1, mode='max', epsilon=0.0001, cooldown=0, min_lr=0)
184 | 
185 |         best_weights = self.weights_dir + '/' + 'G711_WaveformQSRCNN_Weights_Best_bs' + \
186 |                        str(batch_size) + '_lr' + str(learning_rate) + '.h5'
187 |         best_weights = os.path.normcase(best_weights)
188 |         model_save = ModelCheckpoint(best_weights, monitor='val_snr', save_best_only=True, mode='max', save_weights_only=True, period=1)
189 | 
190 |         logger_name = self.log_dir + '/' + 'G711_WaveformQSRCNN_log_bs' + \
191 |                       str(batch_size) + '_lr' + str(learning_rate) + '.csv'
192 |         logger_name = os.path.normcase(logger_name)
193 |         logger = CSVLogger(logger_name, separator=',', append=False)
194 |         tensor_board = TensorBoard(log_dir=self.log_dir, histogram_freq=1)
195 | 
196 |         lrate = LearningRateScheduler(self.step_decay)
197 | 
198 |         start = time.time()
199 | 
200 |         # ---------------------------------------------------------
201 |         # 2. fit the model
202 |         # ---------------------------------------------------------
203 |         print("> Training model " + "using Batch-size: " + str(batch_size) + ", Learning_rate: " + str(learning_rate) + "...")
204 |         hist = self.model.fit(x_train_noisy, x_train, epochs=nb_epochs, batch_size=batch_size, shuffle=True,
205 |                               validation_data=[x_train_noisy_vali, x_train_vali],
206 |                               callbacks=[lrate, reduce_LR, stop_str, model_save, logger])
207 | 
208 |         print("> Saving Completed, Time : ", time.time() - start)
209 |         print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
210 |         return hist
211 | 
212 |     # -------------------------------------------------------------------------------
213 |     # 3. Save loss snr val_loss val_snr as .mat File
214 |     # -------------------------------------------------------------------------------
215 |     def save_training_curves(self, hist):
216 |         print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
217 |         print("> Saving Training and Validation loss-metric curve ...")
218 | 
219 |         start = time.time()
220 | 
221 |         trian_curve_root = "./Opting_Results"
222 |         if not(os.path.exists(trian_curve_root)):
223 |             os.makedirs(trian_curve_root)
224 |         # ---------------------------------------------------------
225 |         # 1. Saving Training Loss
226 |         # ---------------------------------------------------------
227 |         TrainLossVec = trian_curve_root + '/' + 'G711_WaveformQSRCNN_TrainLoss_bs' + \
228 |                        str(self.batch_size) + '_lr' + str(self.learning_rate) + '.mat'
229 |         TrainLossVec = os.path.normcase(TrainLossVec)
230 | 
231 |         sio.savemat(TrainLossVec, {'Train_Loss_Vec': hist.history['loss']})
232 | 
233 |         # ---------------------------------------------------------
234 |         # 2. Saving Training Metric
235 |         # ---------------------------------------------------------
236 |         TrainSNRVec = trian_curve_root + '/' + 'G711_WaveformQSRCNN_TrainMetrice_bs' + \
237 |                       str(self.batch_size) + '_lr' + str(self.learning_rate) + '.mat'
238 |         TrainSNRVec = os.path.normcase(TrainSNRVec)
239 |         sio.savemat(TrainSNRVec, {'Train_SNR_Vec': hist.history['snr']})  # snr
240 | 
241 |         # ---------------------------------------------------------
242 |         # 3. Saving Validation Loss
243 |         # ---------------------------------------------------------
244 |         ValiLossVec = trian_curve_root + '/' + 'G711_WaveformDDQSRCNN_ValiLoss_bs' + \
245 |                       str(self.batch_size) + '_lr' + str(self.learning_rate) + '.mat'
246 |         ValiLossVec = os.path.normcase(ValiLossVec)
247 |         sio.savemat(ValiLossVec, {'Vali_Loss_Vec': hist.history['val_loss']})
248 | 
249 |         # ---------------------------------------------------------
250 |         # 4. Saving Validation Metric
251 |         # ---------------------------------------------------------
252 |         ValiSNRVec = trian_curve_root + '/' + 'G711_WaveformQSRCNN_ValiMetrice_bs' + \
253 |                      str(self.batch_size) + '_lr' + str(self.learning_rate) + '.mat'
254 |         ValiSNRVec = os.path.normcase(ValiSNRVec)
255 |         sio.savemat(ValiSNRVec, {'Vali_SNR_Vec': hist.history['val_snr']})  # val_snr
256 | 
257 |         print("> Saving Completed, Time : ", time.time() - start)
258 |         print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
259 | 
260 |     # -------------------------------------------------------------------------------
261 |     # 4. Evaluate the Trained Model
262 |     # -------------------------------------------------------------------------------
263 |     def evaluation_model(self, x_test_noisy, weights_path=""):
264 |         print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
265 |         print("> Evaluation of the Trained Model ...")
266 |         # ---------------------------------------------------------
267 |         # 1. Load Model Weights
268 |         # ---------------------------------------------------------
269 |         print('> 1. Loading the Weights of the Model ...')
270 |         self.load_weights(weights_path)
271 | 
272 |         # ---------------------------------------------------------
273 |         # 2. Evaluate the Model
274 |         # ---------------------------------------------------------
275 |         start = time.time()
276 |         print('> 2. Evaluating the Model, Please wait for a Moment ...')
277 |         predicted = self.model.predict(x_test_noisy)
278 |         print('> 2. Evaluating Completed, Time : ' + str(time.time() - start))
279 | 
280 |         # ---------------------------------------------------------
281 |         # 3. Saving the Evaluation Result
282 |         # ---------------------------------------------------------
283 |         print('> 3. Saving the Evaluation Result ...')
284 |         start = time.time()
285 |         pre_file_root = "./Test_Outputs"
286 |         if not (os.path.exists(pre_file_root)):
287 |             os.makedirs(pre_file_root)
288 | 
289 |         preOutput = pre_file_root + "/" + "G711_CNN_testplan_vec.mat"
290 |         preOutput = os.path.normcase(preOutput)
291 | 
292 |         sio.savemat(preOutput, {'predictions': predicted})
293 |         print('> 3. Evaluation Result Saving Completed, Time : ' + str(time.time() - start))
294 |         print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
295 | 


--------------------------------------------------------------------------------
/CepstralCNN/WaveformQSRCNN_TrainTest_GPUs.py:
--------------------------------------------------------------------------------
 1 | #########################################################################################################
 2 | # WaveformQSRCNN_TrainTest_GPUs.py: Train and Test QSRCNN for G711/ADPCM/AMR/EVS using Waveform features
 3 | # Author:   Huijun Liu
 4 | # Time:     10.05.2017
 5 | # Location: TU Braunschweig IfN
 6 | #########################################################################################################
 7 | 
 8 | import os
 9 | import sys
10 | import time
11 | 
12 | import WaveformQSRCNN as model
13 | import tensorflow as tf
14 | import DataPrepare as dp
15 | from keras.backend.tensorflow_backend import set_session
16 | 
17 | #####################################################################################
18 | # 0. Setteings For GPUs and Parameters
19 | #####################################################################################
20 | using_gpu = 0
21 | if using_gpu == 1:
22 |     os.environ["CUDA_VISIBLE_DEVICES"] = "3"                  # x stand for GPU index: 3-x!!
23 |     config = tf.ConfigProto()
24 |     config.gpu_options.per_process_gpu_memory_fraction = 0.5  # Only 30% Memory of GPUs can be used
25 |     set_session(tf.Session(config=config))
26 | 
27 | train_or_test = "train"  # train  or  test
28 | 
29 | default_opt_params = {'lr': 5e-4, 'batch_size': 32, 'nb_epochs': 1000}
30 | default_model_params = {'n1': 32, 'n2': 64, 'n3': 32, 'frame_len': 80}
31 | codec_type_params = {'weights_dir': "./model_weights", 'logdir': "./log"}
32 | 
33 | train_inputs = ""
34 | train_targets = ""
35 | 
36 | vali_inputs = ""
37 | vali_targets = ""
38 | 
39 | test_inputs = ""
40 | 
41 | if train_or_test == "train":
42 |     # -------------------------------------------------------------------------------
43 |     # 1. Load Data
44 |     # -------------------------------------------------------------------------------
45 |     train_inputs = "./TrainValiData/Train_inputSet_ALaw_defautLang_OLdata_ValiTrain_Reverse_Extended_Reverse.mat"
46 |     train_targets = "./TrainValiData/Train_targetSet_ALaw_defautLang_OLdata_ValiTrain_Reverse_Extended_Reverse.mat"
47 | 
48 |     vali_inputs = "./TrainValiData/Vali_inputSet_ALaw_defautLang_OLdata_ValiTrain_smallVali_Reverse_Extended.mat"
49 |     vali_targets = "./TrainValiData/Vali_targetSet_ALaw_defautLang_OLdata_ValiTrain_smallVali_Reverse_Extended.mat"
50 | 
51 |     x_train_noisy, x_train, x_train_noisy_vali, x_train_vali = dp.load_train_data(train_inputs, train_targets,
52 |                                                                                   vali_inputs, vali_targets)
53 | 
54 |     # -------------------------------------------------------------------------------
55 |     # 2. Init Cepstral-QSRCNN Model
56 |     # -------------------------------------------------------------------------------
57 |     qsrcnn = model.WaveformQSRCNN(opt_params=default_opt_params,
58 |                                   model_params=default_model_params,
59 |                                   codec_type_params=codec_type_params)
60 | 
61 |     # -------------------------------------------------------------------------------
62 |     # 3. Fit The Cepstral-QSRCNNe Model
63 |     # -------------------------------------------------------------------------------
64 |     hist =qsrcnn.fit(x_train_noisy, x_train, x_train_noisy_vali, x_train_vali)
65 | 
66 |     # -------------------------------------------------------------------------------
67 |     # 4. Save Weights and Traning Curves
68 |     # -------------------------------------------------------------------------------
69 |     qsrcnn.save_weights()
70 |     qsrcnn.save_training_curves(hist=hist)
71 | 
72 | elif train_or_test == "test":
73 |     # -------------------------------------------------------------------------------
74 |     # 1. Load Data
75 |     # -------------------------------------------------------------------------------
76 |     test_inputs = "inputTestSet_g711concat_PDandOLAI_Frame_80v73.mat"
77 |     x_test_noisy = dp.load_test_data(test_inputs)
78 | 
79 |     # -------------------------------------------------------------------------------
80 |     # 2. Init Cepstral-QSRCNN Model
81 |     # -------------------------------------------------------------------------------
82 | 
83 |     codec_type_params = {'weights_dir': "./model_weights", 'logdir': "./log"}
84 |     qsrcnn = model.WaveformQSRCNN(opt_params=default_opt_params,
85 |                                   model_params=default_model_params,
86 |                                   codec_type_params=codec_type_params)
87 | 
88 |     # -------------------------------------------------------------------------------
89 |     # 3. Evaluate The Cepstral-QSRCNNe Model
90 |     # -------------------------------------------------------------------------------
91 |     qsrcnn.evaluation_model(x_test_noisy)
92 | 
93 | else:
94 |     raise Exception("Do you want to  train or test the model ? Please set the variable train_or_test !")
95 |     # sys.exit("Please set the codec name !")
96 | 


--------------------------------------------------------------------------------
/CepstralCNN/log/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/CepstralCNN/log/README.md


--------------------------------------------------------------------------------
/CepstralCNN/model_weights/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/CepstralCNN/model_weights/README.md


--------------------------------------------------------------------------------
/CepstralCNN/weightnorm.py:
--------------------------------------------------------------------------------
  1 | from keras import backend as K
  2 | from keras.optimizers import SGD,Adam
  3 | import tensorflow as tf
  4 | 
  5 | # adapted from keras.optimizers.SGD
  6 | class SGDWithWeightnorm(SGD):
  7 |     def get_updates(self, params, constraints, loss):
  8 |         grads = self.get_gradients(loss, params)
  9 |         self.updates = []
 10 | 
 11 |         lr = self.lr
 12 |         if self.initial_decay > 0:
 13 |             lr *= (1. / (1. + self.decay * self.iterations))
 14 |             self.updates .append(K.update_add(self.iterations, 1))
 15 | 
 16 |         # momentum
 17 |         shapes = [K.get_variable_shape(p) for p in params]
 18 |         moments = [K.zeros(shape) for shape in shapes]
 19 |         self.weights = [self.iterations] + moments
 20 |         for p, g, m in zip(params, grads, moments):
 21 | 
 22 |             # if a weight tensor (len > 1) use weight normalized parameterization
 23 |             ps = K.get_variable_shape(p)
 24 |             if len(ps) > 1:
 25 | 
 26 |                 # get weight normalization parameters
 27 |                 V, V_norm, V_scaler, g_param, grad_g, grad_V = get_weightnorm_params_and_grads(p, g)
 28 | 
 29 |                 # momentum container for the 'g' parameter
 30 |                 V_scaler_shape = K.get_variable_shape(V_scaler)
 31 |                 m_g = K.zeros(V_scaler_shape)
 32 | 
 33 |                 # update g parameters
 34 |                 v_g = self.momentum * m_g - lr * grad_g  # velocity
 35 |                 self.updates.append(K.update(m_g, v_g))
 36 |                 if self.nesterov:
 37 |                     new_g_param = g_param + self.momentum * v_g - lr * grad_g
 38 |                 else:
 39 |                     new_g_param = g_param + v_g
 40 | 
 41 |                 # update V parameters
 42 |                 v_v = self.momentum * m - lr * grad_V  # velocity
 43 |                 self.updates.append(K.update(m, v_v))
 44 |                 if self.nesterov:
 45 |                     new_V_param = V + self.momentum * v_v - lr * grad_V
 46 |                 else:
 47 |                     new_V_param = V + v_v
 48 | 
 49 |                 # if there are constraints we apply them to V, not W
 50 |                 if p in constraints:
 51 |                     c = constraints[p]
 52 |                     new_V_param = c(new_V_param)
 53 | 
 54 |                 # wn param updates --> W updates
 55 |                 add_weightnorm_param_updates(self.updates, new_V_param, new_g_param, p, V_scaler)
 56 | 
 57 |             else: # normal SGD with momentum
 58 |                 v = self.momentum * m - lr * g  # velocity
 59 |                 self.updates.append(K.update(m, v))
 60 | 
 61 |                 if self.nesterov:
 62 |                     new_p = p + self.momentum * v - lr * g
 63 |                 else:
 64 |                     new_p = p + v
 65 | 
 66 |                 # apply constraints
 67 |                 if p in constraints:
 68 |                     c = constraints[p]
 69 |                     new_p = c(new_p)
 70 | 
 71 |                 self.updates.append(K.update(p, new_p))
 72 |         return self.updates
 73 | 
 74 | # adapted from keras.optimizers.Adam
 75 | class AdamWithWeightnorm(Adam):
 76 |     def get_updates(self, params, constraints, loss):
 77 |         grads = self.get_gradients(loss, params)
 78 |         self.updates = [K.update_add(self.iterations, 1)]
 79 | 
 80 |         lr = self.lr
 81 |         if self.initial_decay > 0:
 82 |             lr *= (1. / (1. + self.decay * self.iterations))
 83 | 
 84 |         t = self.iterations + 1
 85 |         lr_t = lr * K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))
 86 | 
 87 |         shapes = [K.get_variable_shape(p) for p in params]
 88 |         ms = [K.zeros(shape) for shape in shapes]
 89 |         vs = [K.zeros(shape) for shape in shapes]
 90 |         self.weights = [self.iterations] + ms + vs
 91 | 
 92 |         for p, g, m, v in zip(params, grads, ms, vs):
 93 | 
 94 |             # if a weight tensor (len > 1) use weight normalized parameterization
 95 |             # this is the only part changed w.r.t. keras.optimizers.Adam
 96 |             ps = K.get_variable_shape(p)
 97 |             if len(ps)>1:
 98 | 
 99 |                 # get weight normalization parameters
100 |                 V, V_norm, V_scaler, g_param, grad_g, grad_V = get_weightnorm_params_and_grads(p, g)
101 | 
102 |                 # Adam containers for the 'g' parameter
103 |                 V_scaler_shape = K.get_variable_shape(V_scaler)
104 |                 m_g = K.zeros(V_scaler_shape)
105 |                 v_g = K.zeros(V_scaler_shape)
106 | 
107 |                 # update g parameters
108 |                 m_g_t = (self.beta_1 * m_g) + (1. - self.beta_1) * grad_g
109 |                 v_g_t = (self.beta_2 * v_g) + (1. - self.beta_2) * K.square(grad_g)
110 |                 new_g_param = g_param - lr_t * m_g_t / (K.sqrt(v_g_t) + self.epsilon)
111 |                 self.updates.append(K.update(m_g, m_g_t))
112 |                 self.updates.append(K.update(v_g, v_g_t))
113 | 
114 |                 # update V parameters
115 |                 m_t = (self.beta_1 * m) + (1. - self.beta_1) * grad_V
116 |                 v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(grad_V)
117 |                 new_V_param = V - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)
118 |                 self.updates.append(K.update(m, m_t))
119 |                 self.updates.append(K.update(v, v_t))
120 | 
121 |                 # if there are constraints we apply them to V, not W
122 |                 if p in constraints:
123 |                     c = constraints[p]
124 |                     new_V_param = c(new_V_param)
125 | 
126 |                 # wn param updates --> W updates
127 |                 add_weightnorm_param_updates(self.updates, new_V_param, new_g_param, p, V_scaler)
128 | 
129 |             else: # do optimization normally
130 |                 m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
131 |                 v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
132 |                 p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)
133 | 
134 |                 self.updates.append(K.update(m, m_t))
135 |                 self.updates.append(K.update(v, v_t))
136 | 
137 |                 new_p = p_t
138 |                 # apply constraints
139 |                 if p in constraints:
140 |                     c = constraints[p]
141 |                     new_p = c(new_p)
142 |                 self.updates.append(K.update(p, new_p))
143 |         return self.updates
144 | 
145 | 
146 | def get_weightnorm_params_and_grads(p, g):
147 |     ps = K.get_variable_shape(p)
148 | 
149 |     # construct weight scaler: V_scaler = g/||V||
150 |     V_scaler_shape = (ps[-1],)  # assumes we're using tensorflow!
151 |     V_scaler = K.ones(V_scaler_shape)  # init to ones, so effective parameters don't change
152 | 
153 |     # get V parameters = ||V||/g * W
154 |     norm_axes = [i for i in range(len(ps) - 1)]
155 |     V = p / tf.reshape(V_scaler, [1] * len(norm_axes) + [-1])
156 | 
157 |     # split V_scaler into ||V|| and g parameters
158 |     V_norm = tf.sqrt(tf.reduce_sum(tf.square(V), norm_axes))
159 |     g_param = V_scaler * V_norm
160 | 
161 |     # get grad in V,g parameters
162 |     grad_g = tf.reduce_sum(g * V, norm_axes) / V_norm
163 |     grad_V = tf.reshape(V_scaler, [1] * len(norm_axes) + [-1]) * \
164 |              (g - tf.reshape(grad_g / V_norm, [1] * len(norm_axes) + [-1]) * V)
165 | 
166 |     return V, V_norm, V_scaler, g_param, grad_g, grad_V
167 | 
168 | 
169 | def add_weightnorm_param_updates(updates, new_V_param, new_g_param, W, V_scaler):
170 |     ps = K.get_variable_shape(new_V_param)
171 |     norm_axes = [i for i in range(len(ps) - 1)]
172 | 
173 |     # update W and V_scaler
174 |     new_V_norm = tf.sqrt(tf.reduce_sum(tf.square(new_V_param), norm_axes))
175 |     new_V_scaler = new_g_param / new_V_norm
176 |     new_W = tf.reshape(new_V_scaler, [1] * len(norm_axes) + [-1]) * new_V_param
177 |     updates.append(K.update(W, new_W))
178 |     updates.append(K.update(V_scaler, new_V_scaler))
179 | 
180 | 
181 | # data based initialization for a given Keras model
182 | def data_based_init(model, input):
183 | 
184 |     # input can be dict, numpy array, or list of numpy arrays
185 |     if type(input) is dict:
186 |         feed_dict = input
187 |     elif type(input) is list:
188 |         feed_dict = {tf_inp: np_inp for tf_inp,np_inp in zip(model.inputs,input)}
189 |     else:
190 |         feed_dict = {model.inputs[0]: input}
191 | 
192 |     # add learning phase if required
193 |     if model.uses_learning_phase and K.learning_phase() not in feed_dict:
194 |         feed_dict.update({K.learning_phase(): 1})
195 | 
196 |     # get all layer name, output, weight, bias tuples
197 |     layer_output_weight_bias = []
198 |     for l in model.layers:
199 |         if hasattr(l, 'W') and hasattr(l, 'b'):
200 |             assert(l.built)
201 |             layer_output_weight_bias.append( (l.name,l.get_output_at(0),l.W,l.b) ) # if more than one node, only use the first
202 | 
203 |     # iterate over our list and do data dependent init
204 |     sess = K.get_session()
205 |     for l,o,W,b in layer_output_weight_bias:
206 |         print('Performing data dependent initialization for layer ' + l)
207 |         m,v = tf.nn.moments(o, [i for i in range(len(o.get_shape())-1)])
208 |         s = tf.sqrt(v + 1e-10)
209 |         updates = tf.group(W.assign(W/tf.reshape(s,[1]*(len(W.get_shape())-1)+[-1])), b.assign((b-m)/s))
210 |         sess.run(updates, feed_dict)


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2018, Huijun Liu, Ziyue Zhao, Tim Fingscheidt
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/QSR-WGAN-GP/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | push
3 | 
4 | *.pyc
5 | *.pkl
6 | *.jpg
7 | *.png
8 | 


--------------------------------------------------------------------------------
/QSR-WGAN-GP/.idea/QSR-WGAN-GP.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$" />
 5 |     <orderEntry type="jdk" jdkName="Python 3.5.3 (C:\Users\liu\AppData\Local\Continuum\Anaconda3\envs\py35\python.exe)" jdkType="Python SDK" />
 6 |     <orderEntry type="sourceFolder" forTests="false" />
 7 |   </component>
 8 |   <component name="TestRunnerService">
 9 |     <option name="PROJECT_TEST_RUNNER" value="Unittests" />
10 |   </component>
11 | </module>


--------------------------------------------------------------------------------
/QSR-WGAN-GP/.idea/markdown-navigator.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project version="4">
 3 |   <component name="MarkdownProjectSettings">
 4 |     <PreviewSettings splitEditorLayout="SPLIT" splitEditorPreview="PREVIEW" useGrayscaleRendering="false" zoomFactor="1.0" maxImageWidth="0" showGitHubPageIfSynced="false" allowBrowsingInPreview="false" synchronizePreviewPosition="true" highlightPreviewType="NONE" highlightFadeOut="5" highlightOnTyping="true" synchronizeSourcePosition="true" verticallyAlignSourceAndPreviewSyncPosition="true" showSearchHighlightsInPreview="false" showSelectionInPreview="true">
 5 |       <PanelProvider>
 6 |         <provider providerId="com.vladsch.idea.multimarkdown.editor.swing.html.panel" providerName="Default - Swing" />
 7 |       </PanelProvider>
 8 |     </PreviewSettings>
 9 |     <ParserSettings gitHubSyntaxChange="false">
10 |       <PegdownExtensions>
11 |         <option name="ABBREVIATIONS" value="false" />
12 |         <option name="ANCHORLINKS" value="true" />
13 |         <option name="ASIDE" value="false" />
14 |         <option name="ATXHEADERSPACE" value="true" />
15 |         <option name="AUTOLINKS" value="true" />
16 |         <option name="DEFINITIONS" value="false" />
17 |         <option name="DEFINITION_BREAK_DOUBLE_BLANK_LINE" value="false" />
18 |         <option name="FENCED_CODE_BLOCKS" value="true" />
19 |         <option name="FOOTNOTES" value="false" />
20 |         <option name="HARDWRAPS" value="false" />
21 |         <option name="INSERTED" value="false" />
22 |         <option name="QUOTES" value="false" />
23 |         <option name="RELAXEDHRULES" value="true" />
24 |         <option name="SMARTS" value="false" />
25 |         <option name="STRIKETHROUGH" value="true" />
26 |         <option name="SUBSCRIPT" value="false" />
27 |         <option name="SUPERSCRIPT" value="false" />
28 |         <option name="SUPPRESS_HTML_BLOCKS" value="false" />
29 |         <option name="SUPPRESS_INLINE_HTML" value="false" />
30 |         <option name="TABLES" value="true" />
31 |         <option name="TASKLISTITEMS" value="true" />
32 |         <option name="TOC" value="false" />
33 |         <option name="WIKILINKS" value="true" />
34 |       </PegdownExtensions>
35 |       <ParserOptions>
36 |         <option name="COMMONMARK_LISTS" value="true" />
37 |         <option name="DUMMY" value="false" />
38 |         <option name="EMOJI_SHORTCUTS" value="true" />
39 |         <option name="FLEXMARK_FRONT_MATTER" value="false" />
40 |         <option name="GFM_LOOSE_BLANK_LINE_AFTER_ITEM_PARA" value="false" />
41 |         <option name="GFM_TABLE_RENDERING" value="true" />
42 |         <option name="GITBOOK_URL_ENCODING" value="false" />
43 |         <option name="GITHUB_EMOJI_URL" value="false" />
44 |         <option name="GITHUB_LISTS" value="false" />
45 |         <option name="GITHUB_WIKI_LINKS" value="true" />
46 |         <option name="JEKYLL_FRONT_MATTER" value="false" />
47 |         <option name="SIM_TOC_BLANK_LINE_SPACER" value="true" />
48 |       </ParserOptions>
49 |     </ParserSettings>
50 |     <HtmlSettings headerTopEnabled="false" headerBottomEnabled="false" bodyTopEnabled="false" bodyBottomEnabled="false" embedUrlContent="false" addPageHeader="true">
51 |       <GeneratorProvider>
52 |         <provider providerId="com.vladsch.idea.multimarkdown.editor.swing.html.generator" providerName="Default Swing HTML Generator" />
53 |       </GeneratorProvider>
54 |       <headerTop />
55 |       <headerBottom />
56 |       <bodyTop />
57 |       <bodyBottom />
58 |     </HtmlSettings>
59 |     <CssSettings previewScheme="UI_SCHEME" cssUri="" isCssUriEnabled="false" isCssTextEnabled="false" isDynamicPageWidth="true">
60 |       <StylesheetProvider>
61 |         <provider providerId="com.vladsch.idea.multimarkdown.editor.swing.html.css" providerName="Default Swing Stylesheet" />
62 |       </StylesheetProvider>
63 |       <ScriptProviders />
64 |       <cssText />
65 |     </CssSettings>
66 |     <HtmlExportSettings updateOnSave="false" parentDir="$ProjectFileDir$" targetDir="$ProjectFileDir$" cssDir="" scriptDir="" plainHtml="false" imageDir="" copyLinkedImages="false" imageUniquifyType="0" targetExt="" useTargetExt="false" noCssNoScripts="false" linkToExportedHtml="true" exportOnSettingsChange="true" regenerateOnProjectOpen="false" />
67 |     <LinkMapSettings>
68 |       <textMaps />
69 |     </LinkMapSettings>
70 |   </component>
71 | </project>


--------------------------------------------------------------------------------
/QSR-WGAN-GP/.idea/markdown-navigator/profiles_settings.xml:
--------------------------------------------------------------------------------
1 | <component name="MarkdownNavigator.ProfileManager">
2 |   <settings default="" pdf-export="" />
3 | </component>


--------------------------------------------------------------------------------
/QSR-WGAN-GP/.idea/misc.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project version="4">
 3 |   <component name="ProjectLevelVcsManager" settingsEditedManually="false">
 4 |     <OptionsSetting value="true" id="Add" />
 5 |     <OptionsSetting value="true" id="Remove" />
 6 |     <OptionsSetting value="true" id="Checkout" />
 7 |     <OptionsSetting value="true" id="Update" />
 8 |     <OptionsSetting value="true" id="Status" />
 9 |     <OptionsSetting value="true" id="Edit" />
10 |     <ConfirmationsSetting value="0" id="Add" />
11 |     <ConfirmationsSetting value="0" id="Remove" />
12 |   </component>
13 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.5.3 (C:\Users\liu\AppData\Local\Continuum\Anaconda3\envs\py35\python.exe)" project-jdk-type="Python SDK" />
14 | </project>


--------------------------------------------------------------------------------
/QSR-WGAN-GP/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/QSR-WGAN-GP.iml" filepath="$PROJECT_DIR$/.idea/QSR-WGAN-GP.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/QSR-WGAN-GP/QSR-GANGP_Train_GPUs.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | # -*- coding: UTF-8 -*-
  3 | 
  4 | import os, sys
  5 | sys.path.append(os.getcwd())
  6 | 
  7 | import time
  8 | import numpy as np
  9 | import scipy.io as sio
 10 | import scipy.io.wavfile as swave
 11 | from numpy import random
 12 | 
 13 | import tensorflow as tf
 14 | import tflib as lib
 15 | import tflib as lib
 16 | import tflib.ops.linear
 17 | import tflib.ops.conv1d
 18 | import matplotlib.pyplot as plt
 19 | from tensorflow.python.framework import ops
 20 | from keras.backend.tensorflow_backend import set_session
 21 | #os.environ["CUDA_VISIBLE_DEVICES"] = "3" # x stand for GPU index: 3-x!!
 22 | #config = tf.ConfigProto()
 23 | #config.gpu_options.per_process_gpu_memory_fraction = 0.6  # Only 30% Memory of GPUs can be used
 24 | #set_session(tf.Session(config=config))
 25 | 
 26 | 
 27 | #####################################################################################
 28 | # 0. Hyperparameter Init
 29 | #####################################################################################
 30 | # Download Google Billion Word at http://www.statmt.org/lm-benchmark/ and
 31 | BATCH_SIZE = 32  # Batch size
 32 | ITERS = 200000  # How many iterations to train for
 33 | SEQ_LEN = 80  # Sequence length in characters
 34 | DIM = 128  # Model dimensionality. This is fairly slow and overfits, even on
 35 | # Billion Word. Consider decreasing for smaller datasets.
 36 | CRITIC_ITERS = 5  # How many critic iterations per generator iteration. We
 37 | # use 10 for the results in the paper, but 5 should work fine
 38 | # as well.
 39 | LAMBDA = 10  # Gradient penalty lambda hyperparameter.
 40 | MAX_N_EXAMPLES = 100000  # Max number of data examples to load. If data loading
 41 | # is too slow or takes too much RAM, you can decrease
 42 | # this (at the expense of having less training data).
 43 | 
 44 | 
 45 | #####################################################################################
 46 | # 1. load data
 47 | #####################################################################################
 48 | print('> Loading data... ')
 49 | # Load Input Data
 50 | mat_input = 'Train_G711_PreProc_defautLang/Train_inputSet_ALaw_defautLang_OLdata_ValiTrain_Reverse_Extended_Reverse.mat'
 51 | mat_input = os.path.normcase(mat_input)
 52 | print('> Training Input: ' + mat_input)
 53 | 
 54 | x_train_noisy = sio.loadmat(mat_input)
 55 | x_train_noisy = x_train_noisy['inputSetNorm']
 56 | x_train_noisy = np.array(x_train_noisy)
 57 | # x_train_noisy = input_min_max_scaler.fit_transform(x_train_noisy)
 58 | 
 59 | # Load Input Data for Validation
 60 | # mat_input_vali = 'Train_G711_PreProc_defautLang/Vali_inputSet_ALaw_defautLang_OLdata_ValiTrain_smallVali_Reverse_Extended.mat'
 61 | # mat_input_vali = os.path.normcase(mat_input_vali)
 62 | # print('> Validation Input: ' + mat_input_vali)
 63 | 
 64 | # x_train_noisy_vali = sio.loadmat(mat_input_vali)
 65 | # x_train_noisy_vali = x_train_noisy_vali['inputSetNorm']
 66 | # x_train_noisy_vali = np.array(x_train_noisy_vali)
 67 | 
 68 | # Load Target Data
 69 | mat_target = 'Train_G711_PreProc_defautLang/Train_targetSet_ALaw_defautLang_OLdata_ValiTrain_Reverse_Extended_Reverse.mat'
 70 | mat_target = os.path.normcase(mat_target)
 71 | print('> Training Target: ' + mat_target)
 72 | 
 73 | x_train = sio.loadmat(mat_target)
 74 | x_train = x_train['targetSet']
 75 | x_train = np.array(x_train)
 76 | # x_train = target_min_max_scaler.fit_transform(x_train)
 77 | 
 78 | # Load Target Data for Validation
 79 | # mat_target_vali = 'Train_G711_PreProc_defautLang/Vali_targetSet_ALaw_defautLang_OLdata_ValiTrain_smallVali_Reverse_Extended.mat'
 80 | # mat_target_vali = os.path.normcase(mat_target_vali)
 81 | # print('> Validation Target: ' + mat_target_vali)
 82 | 
 83 | # x_train_vali = sio.loadmat(mat_target_vali)
 84 | # x_train_vali = x_train_vali['targetSet']
 85 | # x_train_vali = np.array(x_train_vali)
 86 | 
 87 | # Randomization of Training Pairs (Currently NO Shuffle)
 88 | random.seed(1331)
 89 | 
 90 | train = np.column_stack((x_train_noisy, x_train))
 91 | np.random.shuffle(train)
 92 | x_train_noisy = train[:, :SEQ_LEN]
 93 | x_train = train[:, SEQ_LEN:]
 94 | 
 95 | # Reshape of Traing Pairs and validation Pairs
 96 | x_train_noisy = np.reshape(x_train_noisy, (x_train_noisy.shape[0], x_train_noisy.shape[1], 1))
 97 | x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
 98 | 
 99 | # validation = np.column_stack((x_train_noisy_vali, x_train_vali))
100 | # np.random.shuffle(validation )
101 | # x_train_noisy_vali = validation [:, :SEQ_LEN]
102 | # x_train_vali = validation [:, SEQ_LEN:]
103 | 
104 | # x_train_noisy_vali = np.reshape(x_train_noisy_vali, (x_train_noisy_vali.shape[0], x_train_noisy_vali.shape[1], 1))
105 | # x_train_vali = np.reshape(x_train_vali, (x_train_vali.shape[0], x_train_vali.shape[1], 1))
106 | 
107 | print('> Data Loaded. Model Compiling... ')
108 | 
109 | def selu(x):
110 |     with ops.name_scope('elu') as scope:
111 |         alpha = 1.6732632423543772848170429916717
112 |         scale = 1.0507009873554804934193349852946
113 |         return scale*tf.where(x>=0.0, x, alpha*tf.nn.elu(x))
114 | 
115 | def ResBlock(name, inputs):
116 |     output = inputs
117 |     output = tf.nn.relu(output)
118 |     output = lib.ops.conv1d.Conv1D(name + '.1', DIM, DIM, 5, output, biases=False)
119 |     output = tf.nn.relu(output)
120 |     output = lib.ops.conv1d.Conv1D(name + '.2', DIM, DIM, 5, output, biases=False)
121 |     return inputs + (0.3 * output)
122 | 
123 | 
124 | ####################################################
125 | # 1. Define Generator Model
126 | ####################################################
127 | def Generator(inputs):
128 |     output = tf.transpose(inputs, [0, 2, 1])
129 |     output = lib.ops.conv1d.Conv1D('Generator.Input', SEQ_LEN, DIM, 1, output, biases=False)
130 |     output = ResBlock('Generator.1', output)
131 |     output = ResBlock('Generator.2', output)
132 |     output = ResBlock('Generator.3', output)
133 |     output = ResBlock('Generator.4', output)
134 |     output = ResBlock('Generator.5', output)
135 |     output = lib.ops.conv1d.Conv1D('Generator.Output', DIM, SEQ_LEN, 1, output, biases=False)
136 |     output = tf.transpose(output, [0, 2, 1])
137 |     output = tf.tanh(output)
138 |     return output
139 | 
140 | ####################################################
141 | # 2. Define Discriminator Model
142 | ####################################################
143 | def Discriminator(inputs):
144 |     output = tf.transpose(inputs, [0, 2, 1])
145 |     output = lib.ops.conv1d.Conv1D('Discriminator.Input', SEQ_LEN, DIM, 1, output, biases=False)
146 |     output = ResBlock('Discriminator.1', output)
147 |     output = ResBlock('Discriminator.2', output)
148 |     output = ResBlock('Discriminator.3', output)
149 |     output = ResBlock('Discriminator.4', output)
150 |     output = ResBlock('Discriminator.5', output)
151 |     output = tf.reshape(output, [-1, BATCH_SIZE * DIM])
152 |     output = lib.ops.linear.Linear('Discriminator.Output', BATCH_SIZE * DIM, 1, output, biases=False)
153 |     return output
154 | 
155 | ####################################################
156 | # 3. Define inputs of all Models
157 | ####################################################
158 | real_inputs = tf.placeholder(tf.float32, shape=[BATCH_SIZE, SEQ_LEN, 1])
159 | quan_inputs = tf.placeholder(tf.float32, shape=[BATCH_SIZE, SEQ_LEN, 1])
160 | 
161 | fake_inputs = Generator(quan_inputs)
162 | 
163 | disc_real = Discriminator(real_inputs)
164 | disc_fake = Discriminator(fake_inputs)
165 | 
166 | ####################################################
167 | # 4. WGAN lipschitz-penalty
168 | ####################################################
169 | alpha = tf.random_uniform(shape=[BATCH_SIZE, 1, 1], minval=0., maxval=1.)
170 | differences = fake_inputs - real_inputs
171 | interpolates = real_inputs + (alpha * differences)
172 | 
173 | # 1. loss
174 | gradients = tf.gradients(Discriminator(interpolates), [interpolates])[0]
175 | slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1, 2]))
176 | gradient_penalty = LAMBDA * tf.reduce_mean((slopes - 1.) ** 2)
177 | 
178 | disc_loss = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real)
179 | disc_loss += gradient_penalty
180 | gen_loss = -tf.reduce_mean(disc_fake)
181 | 
182 | # 3. optimizer
183 | gen_params = lib.params_with_name('Generator')
184 | disc_params = lib.params_with_name('Discriminator')
185 | gen_train_op = tf.train.AdamOptimizer(learning_rate=5e-6, beta1=0.5, beta2=0.9).minimize(gen_loss, var_list=gen_params)
186 | disc_train_op = tf.train.AdamOptimizer(learning_rate=5e-6, beta1=0.5, beta2=0.9).minimize(disc_loss, var_list=disc_params)
187 | 
188 | # 4. dataset generator
189 | def batch_generator():
190 |     while True:
191 |         for i in range(0, x_train_noisy.shape[0] - BATCH_SIZE + 1, BATCH_SIZE):
192 |             yield x_train_noisy[i:i+BATCH_SIZE], x_train[i:i+BATCH_SIZE]
193 | 
194 | if not os.path.exists('out/'):
195 |     os.makedirs('out/')
196 | 
197 | #################################################################
198 | # 5. Training Loop
199 | #################################################################
200 | with tf.Session() as session:
201 |     session.run(tf.global_variables_initializer()) # variables init
202 | 
203 |     # create data for the gan training
204 |     generator = batch_generator()
205 | 
206 |     for iteration in range(ITERS):
207 |         start_time = time.time()
208 | 
209 |         # Train critic
210 |         for i in range(CRITIC_ITERS):
211 |             quant_batch, real_batch = generator.__next__()
212 |             _, _gen_loss = session.run([gen_train_op, gen_loss], feed_dict={quan_inputs: quant_batch})
213 |             _, _disc_loss = session.run([disc_train_op, disc_loss], feed_dict={real_inputs:real_batch, quan_inputs:quant_batch})
214 | 
215 |         # Train generator
216 |         quant_batch, real_batch = generator.__next__()
217 |         _, _gen_loss = session.run([gen_train_op, gen_loss], feed_dict={quan_inputs: quant_batch})
218 | 
219 |         if iteration % 10 == 0:
220 |             print('epoch %s, disc_loss: %s, gen_loss: %s' % (iteration, _disc_loss, _gen_loss))
221 | 
222 |             realspeech, quanspeech, prediction = session.run([real_inputs, quan_inputs, fake_inputs], feed_dict={real_inputs: real_batch, quan_inputs: quant_batch})
223 | 
224 |             fig = plt.figure(facecolor='white')
225 |             ax = fig.add_subplot(111)
226 |             ax.plot(np.reshape(realspeech, (realspeech.size,)), label='RealSpeech')
227 |             plt.plot(np.reshape(quanspeech, (quanspeech.size,)), label='QuanSpeech')
228 |             plt.plot(np.reshape(prediction, (prediction.size,)), label='Prediction')
229 | 
230 |             plt.legend()
231 |             plt.show()
232 | 
233 | 


--------------------------------------------------------------------------------
/QSR-WGAN-GP/QSR-WGAN-GP_Train_GPUs.py:
--------------------------------------------------------------------------------
  1 | #######################################################################################################################
  2 | # Project QSRCNN: Quantized Speech Reconstruction using Convolutional Neural Network     V1.0
  3 | # =====================================================================================================================
  4 | # QSR-WGAN-GP_Train_GPUs.py: Trying to use WGAN-GP to do Quantized Speech Reconstruction
  5 | #
  6 | #
  7 | # =====================================================================================================================
  8 | # Abteilung Signalverarbeitung, IfN, Technische Universitaet Braunschweig, Deutschland
  9 | # Author:    Huijun Liu M.Sc.
 10 | # Date:      16.07.2017
 11 | #######################################################################################################################
 12 | import os
 13 | import time
 14 | import math
 15 | import numpy as np
 16 | import scipy.io as sio
 17 | import tensorflow as tf
 18 | import keras.backend as K
 19 | import matplotlib.pyplot as plt
 20 | import scipy.io.wavfile as swave
 21 | import keras.optimizers as optimizers
 22 | 
 23 | from numpy import random
 24 | from keras import initializers
 25 | from keras.models import Model
 26 | from keras.layers import Input
 27 | from keras.layers.merge import Add
 28 | from keras.layers.core import Dense, Flatten, Activation
 29 | from keras.layers.normalization import BatchNormalization
 30 | from keras.layers.convolutional import Conv1D, MaxPooling1D, UpSampling1D
 31 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger, TensorBoard
 32 | 
 33 | # from weightnorm import AdamWithWeightnorm
 34 | from tensorflow.python.framework import ops
 35 | from keras.backend.tensorflow_backend import set_session
 36 | 
 37 | 
 38 | #####################################################################################
 39 | # 0. Setteings For GPUs
 40 | #####################################################################################
 41 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" # x stand for GPU index: 3-x!!
 42 | config = tf.ConfigProto()
 43 | config.gpu_options.per_process_gpu_memory_fraction = 0.3  # Only 30% Memory of GPUs can be used
 44 | set_session(tf.Session(config=config))
 45 | 
 46 | #####################################################################################
 47 | # 1. Define new Metric, Activation function and Loss function
 48 | #####################################################################################
 49 | 
 50 | 
 51 | def snr(y_true, y_pred):
 52 |     """
 53 |         SNR is Signal to Noise Ratio
 54 | 
 55 |     """
 56 |     return 10.0 * K.log((K.sum(K.square(y_true))) / (K.sum(K.square(y_pred - y_true)))) / K.log(10.0)
 57 | 
 58 | 
 59 | def selu(x):
 60 |     with ops.name_scope('elu') as scope:
 61 |         alpha = 1.6732632423543772848170429916717
 62 |         scale = 1.0507009873554804934193349852946
 63 |         return scale*tf.where(x>=0.0, x, alpha*tf.nn.elu(x))
 64 | 
 65 | 
 66 | #####################################################################################
 67 | # 2. Define Generative model and Adversarial model
 68 | #####################################################################################
 69 | def create_generator(inputs_gen):
 70 |     """
 71 |     xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 72 |     """
 73 |     return decoded
 74 | 
 75 | 
 76 | def create_discriminator(inputs_disc):
 77 |     x = Conv1D(32, 3, padding='same', name='dis_conv_1')(inputs_disc)
 78 |     x = Activation(selu, name='dis_act_1')(x)
 79 | 
 80 |     x = Conv1D(64, 3, padding='same', name='dis_conv_2')(x)
 81 |     x = BatchNormalization(name='dis_bnorm_1')(x)
 82 |     x1 = Activation(selu, name='dis_act_2')(x)
 83 | 
 84 |     m1 = Add(name='dis_add_1')([inputs_disc, x1])
 85 | 
 86 |     x = Conv1D(32, 3, padding='same', name='dis_conv_3')(m1)
 87 |     x = Activation(selu, name='dis_act_3')(x)
 88 | 
 89 |     x = Conv1D(64, 3, padding='same', name='dis_conv_4')(x)
 90 |     x = BatchNormalization(name='dis_bnorm_2')(x)
 91 |     x2 = Activation(selu, name='dis_act_4')(x)
 92 |     m2 = Add(name='dis_add_2')([m1, x2])
 93 | 
 94 |     discri = Conv1D(1, 5, padding='same', name='dis_conv_5')(m2)
 95 | 
 96 |     return discri
 97 | 
 98 | #####################################################################################
 99 | # 3. Define Training process of QSR_WGAN_GP
100 | #####################################################################################
101 | SEQ_LEN = 80
102 | BATCH_SIZE = 128
103 | 
104 | 
105 | def load_data():
106 |     print('> Loading data... ')
107 |     # Load Input Data
108 |     mat_input = 'Train_G711_PreProc_defautLang/Train_inputSet_ALaw_defautLang_OLdata_ValiTrain_Reverse_Extended_Reverse.mat'
109 |     mat_input = os.path.normcase(mat_input)
110 |     print('> Training Input: ' + mat_input)
111 | 
112 |     x_train_noisy = sio.loadmat(mat_input)
113 |     x_train_noisy = x_train_noisy['inputSetNorm']
114 |     x_train_noisy = np.array(x_train_noisy)
115 |     # x_train_noisy = input_min_max_scaler.fit_transform(x_train_noisy)
116 | 
117 |     # Load Input Data for Validation
118 |     mat_input_vali = 'Train_G711_PreProc_defautLang/Vali_inputSet_ALaw_defautLang_OLdata_ValiTrain_smallVali.mat'
119 |     mat_input_vali = os.path.normcase(mat_input_vali)
120 |     print('> Validation Input: ' + mat_input_vali)
121 | 
122 |     x_train_noisy_vali = sio.loadmat(mat_input_vali)
123 |     x_train_noisy_vali = x_train_noisy_vali['inputSetNorm']
124 |     x_train_noisy_vali = np.array(x_train_noisy_vali)
125 | 
126 |     # Load Target Data
127 |     mat_target = 'Train_G711_PreProc_defautLang/Train_targetSet_ALaw_defautLang_OLdata_ValiTrain_Reverse_Extended_Reverse.mat'
128 |     mat_target = os.path.normcase(mat_target)
129 |     print('> Training Target: ' + mat_target)
130 | 
131 |     x_train = sio.loadmat(mat_target)
132 |     x_train = x_train['targetSet']
133 |     x_train = np.array(x_train)
134 |     # x_train = target_min_max_scaler.fit_transform(x_train)
135 | 
136 |     # Load Target Data for Validation
137 |     mat_target_vali = 'Train_G711_PreProc_defautLang/Vali_targetSet_ALaw_defautLang_OLdata_ValiTrain_smallVali.mat'
138 |     mat_target_vali = os.path.normcase(mat_target_vali)
139 |     print('> Validation Target: ' + mat_target_vali)
140 | 
141 |     x_train_vali = sio.loadmat(mat_target_vali)
142 |     x_train_vali = x_train_vali['targetSet']
143 |     x_train_vali = np.array(x_train_vali)
144 | 
145 |     # Randomization of Training Pairs (Currently NO Shuffle)
146 |     random.seed(1331)
147 | 
148 |     train = np.column_stack((x_train_noisy, x_train))
149 |     np.random.shuffle(train)
150 |     x_train_noisy = train[:, :SEQ_LEN]
151 |     x_train = train[:, SEQ_LEN:]
152 | 
153 |     # Reshape of Traing Pairs and validation Pairs
154 |     x_train_noisy = np.reshape(x_train_noisy, (x_train_noisy.shape[0], x_train_noisy.shape[1], 1))
155 |     x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
156 | 
157 |     # validation = np.column_stack((x_train_noisy_vali, x_train_vali))
158 |     # np.random.shuffle(validation )
159 |     # x_train_noisy_vali = validation [:, :SEQ_LEN]
160 |     # x_train_vali = validation [:, SEQ_LEN:]
161 | 
162 |     x_train_noisy_vali = np.reshape(x_train_noisy_vali, (x_train_noisy_vali.shape[0], x_train_noisy_vali.shape[1], 1))
163 |     x_train_vali = np.reshape(x_train_vali, (x_train_vali.shape[0], x_train_vali.shape[1], 1))
164 | 
165 |     print('> Data Loaded. Model Compiling... ')
166 |     return x_train_noisy, x_train, x_train_noisy_vali, x_train_vali
167 | 
168 | # 2. dataset generator
169 | def batch_generator(x_train_noisy, x_train, batch_size=128):
170 |     while True:
171 |         for i in range(0, x_train_noisy.shape[0] - batch_size + 1, batch_size):
172 |             yield x_train_noisy[i:i+batch_size], x_train[i:i+batch_size]
173 | 
174 | default_opt_params = {'lr': 5e-5, 'clip': 1e-2, 'n_lambda': 10, 'n_critic': 10}
175 | 
176 | 
177 | class QSRWGAN(object):
178 |     def __init__(self, opt_params=default_opt_params, frame_len=80):
179 |         self.n_critic = opt_params['n_critic']
180 |         self.n_lambda = opt_params['n_lambda']
181 |         self.clip = opt_params['clip']
182 |         self.frame_len = frame_len
183 | 
184 |         # ------------------------------------------------------------------
185 |         # 1. create session
186 |         # ------------------------------------------------------------------
187 |         self.sess = tf.Session()
188 |         K.set_session(self.sess)  # pass the session to keras
189 | 
190 |         # ------------------------------------------------------------------
191 |         # 2. create generator and discriminator
192 |         # ------------------------------------------------------------------
193 |         with tf.name_scope('generator'):
194 |             gen_inputs = Input(shape=(self.frame_len, 1))
195 |             gen_outputs = create_generator(gen_inputs)
196 | 
197 |         with tf.name_scope('discriminator'):
198 |             dis_inputs = Input(shape=(self.frame_len, 1))
199 |             dis_outputs = create_discriminator(dis_inputs)
200 | 
201 |         # ------------------------------------------------------------------
202 |         # 3. instantiate networks of generator and discriminator
203 |         # ------------------------------------------------------------------
204 |         Generator = Model(inputs=gen_inputs, outputs=gen_outputs)
205 |         Generator.summary()
206 |         self.gen_model = Generator
207 |         Discriminator = Model(inputs=dis_inputs, outputs=dis_outputs)
208 |         Discriminator.summary()
209 | 
210 |         # ------------------------------------------------------------------
211 |         # 4. save the inputs of generator and discriminator
212 |         # ------------------------------------------------------------------
213 |         quan_inputs = tf.placeholder(tf.float32, shape=(None, self.frame_len, 1), name='quan_inputs')
214 |         real_inputs = tf.placeholder(tf.float32, shape=(None, self.frame_len, 1), name='real_inputs')
215 |         self.inputs = quan_inputs, real_inputs
216 | 
217 |         # ------------------------------------------------------------------
218 |         # 5. get the weights of generator and discriminator
219 |         # ------------------------------------------------------------------
220 |         self.gen_weights = [weights for weights in tf.global_variables() if 'generator' in weights.name]
221 |         self.dis_weights = [weights for weights in tf.global_variables() if 'discriminator' in weights.name]
222 |         # self.gen_weights = Generator.get_weights()
223 |         # self.dis_weights = Discriminator.get_weights()
224 | 
225 |         # ------------------------------------------------------------------
226 |         # 6. create predictions of generator and discriminator
227 |         # ------------------------------------------------------------------
228 |         fake_inputs = Generator(quan_inputs)
229 |         disc_real = Discriminator(real_inputs)
230 |         disc_fake = Discriminator(fake_inputs)
231 |         self.predictions = fake_inputs
232 | 
233 |         # ------------------------------------------------------------------
234 |         # 7. create losses and compute probabilities of discriminator
235 |         # ------------------------------------------------------------------
236 |         # 7.1. WGAN lipschitz-penalty
237 |         alpha = tf.random_uniform(shape=[BATCH_SIZE, 1, 1], minval=-0.4, maxval=0.4)
238 |         differences = fake_inputs - real_inputs
239 |         interpolates = real_inputs + (alpha * differences)
240 | 
241 |         gradients = tf.gradients(Discriminator(interpolates), [interpolates])[0]
242 |         # slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1, 2]))
243 |         # gradient_penalty = self.n_lambda * tf.reduce_mean((slopes - 1.) ** 2)
244 |         gp = K.mean(K.square(K.sqrt(K.sum(K.square(gradients), axis=1)) - 1))
245 |         gradient_penalty = self.n_lambda * gp
246 | 
247 |         disc_loss = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real)
248 |         disc_loss += gradient_penalty
249 | 
250 |         self.dis_loss = disc_loss
251 |         self.gen_loss = -tf.reduce_mean(disc_fake)
252 | 
253 |         self.disc_real = tf.reduce_mean(disc_real)
254 |         self.disc_fake = tf.reduce_mean(disc_fake)
255 |         self.prob_real = tf.reduce_mean(tf.sigmoid(disc_real))
256 |         self.prob_fake = tf.reduce_mean(tf.sigmoid(disc_fake))
257 | 
258 |         # ------------------------------------------------------------------
259 |         # 8. create optimizer for generator and discriminator
260 |         # ------------------------------------------------------------------
261 |         learning_rate = opt_params['lr']
262 | 
263 |         gen_train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.gen_loss, var_list=self.gen_weights)
264 |         disc_train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.dis_loss, var_list=self.dis_weights)
265 | 
266 |         self.gen_opt_train = gen_train_op
267 |         self.dis_opt_train = disc_train_op
268 | 
269 |     def load_weights(self):
270 |         self.gen_model.load_weights('ddsrcnn_weights_defaultLang_OL40_stopstr_bs128_lr5e-05.h5')
271 | 
272 |     def save_weights(self, file_path):
273 |         file_path = os.path.normcase(file_path)
274 |         self.gen_model.save_weights(file_path)
275 | 
276 |     def load_batch(self, x_train_noise, x_train, train=True):
277 |         gen_inputs, dis_inputs = self.inputs
278 |         return {gen_inputs: x_train_noise, dis_inputs: x_train, K.learning_phase(): train}
279 | 
280 |     def gen(self, x_train_noise):
281 |         gen_inputs, dis_inputs = self.inputs
282 |         feed_dict = {gen_inputs: x_train_noise, K.learning_phase(): False}
283 |         return self.sess.run(self.predictions, feed_dict=feed_dict)
284 | 
285 |     def gen_train(self, feed_dict):
286 |         _, gen_loss = self.sess.run([self.gen_opt_train, self.gen_loss], feed_dict=feed_dict)
287 |         return gen_loss
288 | 
289 |     def dis_train(self, feed_dict):
290 |         # take a step of adam
291 |         _, dis_loss = self.sess.run([self.dis_opt_train, self.dis_loss], feed_dict=feed_dict)
292 |         # return discriminator loss
293 |         return dis_loss
294 | 
295 |     def fit(self, x_train_noise, x_train, x_train_noise_vali, x_train_vali, epochs=10, logdir='/qsrwgan_run'):
296 |         # ------------------------------------------------------------------
297 |         # 1. initialize log directory
298 |         # ------------------------------------------------------------------
299 |         if tf.gfile.Exists(logdir):
300 |             tf.gfile.DeleteRecursively(logdir)
301 | 
302 |         tf.gfile.MakeDirs(logdir)
303 | 
304 |         # ------------------------------------------------------------------
305 |         # 2. initialize model
306 |         # ------------------------------------------------------------------
307 |         init = tf.global_variables_initializer()
308 |         self.sess.run(init)
309 |         self.load_weights()
310 | 
311 |         # ------------------------------------------------------------------
312 |         # 3. train the model
313 |         # ------------------------------------------------------------------
314 |         step, g_step, epoch = 0, 0, 0
315 |         curr_epoch = 0
316 | 
317 |         # create data for the gan training
318 |         # generator = batch_generator(x_train_noise, x_train)
319 |         mat_input = 'Train_G711_PreProc_defautLang/inputTestSet_g711concat_nonOL_Frame_80.mat'
320 |         mat_input = os.path.normcase(mat_input)
321 |         x_train_noisy = sio.loadmat(mat_input)
322 |         x_train_noisy = x_train_noisy['inputTestNorm']
323 |         x_train_noisy = np.array(x_train_noisy)
324 |         x_train_noisy = np.reshape(x_train_noisy, (x_train_noisy.shape[0], x_train_noisy.shape[1], 1))
325 | 
326 |         while curr_epoch < epochs:
327 |             # create data for the gan training
328 |             generator = batch_generator(x_train_noise, x_train, BATCH_SIZE)
329 |             # generator_vali = batch_generator(x_train_noise_vali, x_train_vali, 1024)
330 | 
331 |             curr_iter = 0
332 |             while curr_iter < x_train_noise.shape[0]//BATCH_SIZE:
333 |                 start_time = time.time()
334 |                 # n_critic = 100 if g_step < 25 or (g_step+1) % 500 == 0 else self.n_critic
335 | 
336 |                 for i in range(self.n_critic):
337 |                     curr_iter += 1
338 |                     dis_losses = []
339 | 
340 |                     # load the batch
341 |                     quant_batch, real_batch = generator.__next__()
342 |                     # quant_batch = np.random.randn(BATCH_SIZE, 80, 1)
343 |                     feed_dict = self.load_batch(quant_batch, real_batch)
344 | 
345 |                     # train the discriminator
346 |                     dis_loss = self.dis_train(feed_dict)
347 |                     dis_losses.append(dis_loss)
348 | 
349 |                 dis_loss = np.array(dis_losses).mean()
350 | 
351 |                 # train the generator
352 |                 curr_iter += 1
353 |                 quant_batch, real_batch = generator.__next__()
354 |                 # quant_batch = np.random.randn(BATCH_SIZE, 80, 1)
355 |                 feed_dict = self.load_batch(quant_batch, real_batch)
356 |                 gen_loss = self.gen_train(feed_dict)
357 | 
358 |                 g_step += 1
359 | 
360 |                 if g_step < 1000 or g_step % 1000 == 0:
361 |                     tot_time = time.time() - start_time
362 |                     print('Epoch: %3d, Gen Steps: %4d (%3.lf s), Discriminator loss: %.6f, Generator loss: %.6f' % (curr_epoch, g_step, tot_time, dis_loss, gen_loss))
363 | 
364 |                 if g_step % 50 == 0:
365 | 
366 | 
367 |                     prediction = self.gen(np.random.randn(BATCH_SIZE, 80, 1))
368 |                     # feed_dict = self.load_batch(x_train_noisy, real_batch_vali)
369 |                     # quanspeech, realspeech = self.sess.run(self.inputs, feed_dict)
370 |                     fname = 'recon-speech-%d_%d.wav' % (curr_iter, g_step)
371 |                     swave.write(fname, 8000, np.reshape(prediction, (prediction.size,)))
372 |                     # fname = 'real-speech-%d.wav' % g_step
373 |                     # swave.write(fname, 8000, np.reshape(realspeech, (realspeech.size,)))
374 | 
375 |                     # fig = plt.figure(facecolor='white')
376 |                     # ax = fig.add_subplot(111)
377 |                     # ax.plot(np.reshape(realspeech, (realspeech.size,)), label='RealSpeech')
378 |                     # plt.plot(np.reshape(quanspeech, (quanspeech.size,)), label='QuanSpeech')
379 |                     # plt.plot(np.reshape(prediction, (prediction.size,)), label='Prediction')
380 | 
381 |                     # plt.legend()
382 |                     # plt.show()
383 | 
384 |             curr_epoch += 1
385 | 
386 |         self.save_weights("qsrwgan_weights.h5")
387 | 
388 | 
389 | #####################################################################################
390 | # 4. QSR_WGAN_GP Training
391 | #####################################################################################		
392 | model = QSRWGAN(opt_params=default_opt_params)
393 | # train model
394 | x_train_noisy, x_train, _, _ = load_data()
395 | model.fit(x_train_noisy, x_train, _, _, epochs=10000)
396 | 


--------------------------------------------------------------------------------
/QSR-WGAN-GP/tflib/__init__.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | 
  4 | import locale
  5 | 
  6 | locale.setlocale(locale.LC_ALL, '')
  7 | 
  8 | _params = {}
  9 | _param_aliases = {}
 10 | def param(name, *args, **kwargs):
 11 |     """
 12 |     A wrapper for `tf.Variable` which enables parameter sharing in models.
 13 |     
 14 |     Creates and returns theano shared variables similarly to `tf.Variable`, 
 15 |     except if you try to create a param with the same name as a 
 16 |     previously-created one, `param(...)` will just return the old one instead of 
 17 |     making a new one.
 18 | 
 19 |     This constructor also adds a `param` attribute to the shared variables it 
 20 |     creates, so that you can easily search a graph for all params.
 21 |     """
 22 | 
 23 |     if name not in _params:
 24 |         kwargs['name'] = name
 25 |         param = tf.Variable(*args, **kwargs)
 26 |         param.param = True
 27 |         _params[name] = param
 28 |     result = _params[name]
 29 |     i = 0
 30 |     while result in _param_aliases:
 31 |         # print 'following alias {}: {} to {}'.format(i, result, _param_aliases[result])
 32 |         i += 1
 33 |         result = _param_aliases[result]
 34 |     return result
 35 | 
 36 | def params_with_name(name):
 37 |     return [p for n,p in _params.items() if name in n]
 38 | 
 39 | def delete_all_params():
 40 |     _params.clear()
 41 | 
 42 | def alias_params(replace_dict):
 43 |     for old,new in replace_dict.items():
 44 |         # print "aliasing {} to {}".format(old,new)
 45 |         _param_aliases[old] = new
 46 | 
 47 | def delete_param_aliases():
 48 |     _param_aliases.clear()
 49 | 
 50 | # def search(node, critereon):
 51 | #     """
 52 | #     Traverse the Theano graph starting at `node` and return a list of all nodes
 53 | #     which match the `critereon` function. When optimizing a cost function, you 
 54 | #     can use this to get a list of all of the trainable params in the graph, like
 55 | #     so:
 56 | 
 57 | #     `lib.search(cost, lambda x: hasattr(x, "param"))`
 58 | #     """
 59 | 
 60 | #     def _search(node, critereon, visited):
 61 | #         if node in visited:
 62 | #             return []
 63 | #         visited.add(node)
 64 | 
 65 | #         results = []
 66 | #         if isinstance(node, T.Apply):
 67 | #             for inp in node.inputs:
 68 | #                 results += _search(inp, critereon, visited)
 69 | #         else: # Variable node
 70 | #             if critereon(node):
 71 | #                 results.append(node)
 72 | #             if node.owner is not None:
 73 | #                 results += _search(node.owner, critereon, visited)
 74 | #         return results
 75 | 
 76 | #     return _search(node, critereon, set())
 77 | 
 78 | # def print_params_info(params):
 79 | #     """Print information about the parameters in the given param set."""
 80 | 
 81 | #     params = sorted(params, key=lambda p: p.name)
 82 | #     values = [p.get_value(borrow=True) for p in params]
 83 | #     shapes = [p.shape for p in values]
 84 | #     print "Params for cost:"
 85 | #     for param, value, shape in zip(params, values, shapes):
 86 | #         print "\t{0} ({1})".format(
 87 | #             param.name,
 88 | #             ",".join([str(x) for x in shape])
 89 | #         )
 90 | 
 91 | #     total_param_count = 0
 92 | #     for shape in shapes:
 93 | #         param_count = 1
 94 | #         for dim in shape:
 95 | #             param_count *= dim
 96 | #         total_param_count += param_count
 97 | #     print "Total parameter count: {0}".format(
 98 | #         locale.format("%d", total_param_count, grouping=True)
 99 | #     )
100 | 
101 | def print_model_settings(locals_):
102 |     print("Uppercase local vars:")
103 |     all_vars = [(k,v) for (k,v) in locals_.items() if (k.isupper() and k!='T' and k!='SETTINGS' and k!='ALL_SETTINGS')]
104 |     all_vars = sorted(all_vars, key=lambda x: x[0])
105 |     for var_name, var_value in all_vars:
106 |         print("\t{}: {}".format(var_name, var_value))
107 | 
108 | 
109 | def print_model_settings_dict(settings):
110 |     print("Settings dict:")
111 |     all_vars = [(k,v) for (k,v) in settings.items()]
112 |     all_vars = sorted(all_vars, key=lambda x: x[0])
113 |     for var_name, var_value in all_vars:
114 |         print("\t{}: {}".format(var_name, var_value))


--------------------------------------------------------------------------------
/QSR-WGAN-GP/tflib/ops/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/QSR-WGAN-GP/tflib/ops/__init__.py


--------------------------------------------------------------------------------
/QSR-WGAN-GP/tflib/ops/batchnorm.py:
--------------------------------------------------------------------------------
 1 | import tflib as lib
 2 | 
 3 | import numpy as np
 4 | import tensorflow as tf
 5 | 
 6 | def Batchnorm(name, axes, inputs, is_training=None, stats_iter=None, update_moving_stats=True, fused=True):
 7 |     if ((axes == [0,2,3]) or (axes == [0,2])) and fused==True:
 8 |         if axes==[0,2]:
 9 |             inputs = tf.expand_dims(inputs, 3)
10 |         # Old (working but pretty slow) implementation:
11 |         ##########
12 | 
13 |         # inputs = tf.transpose(inputs, [0,2,3,1])
14 | 
15 |         # mean, var = tf.nn.moments(inputs, [0,1,2], keep_dims=False)
16 |         # offset = lib.param(name+'.offset', np.zeros(mean.get_shape()[-1], dtype='float32'))
17 |         # scale = lib.param(name+'.scale', np.ones(var.get_shape()[-1], dtype='float32'))
18 |         # result = tf.nn.batch_normalization(inputs, mean, var, offset, scale, 1e-4)
19 | 
20 |         # return tf.transpose(result, [0,3,1,2])
21 | 
22 |         # New (super fast but untested) implementation:
23 |         offset = lib.param(name+'.offset', np.zeros(inputs.get_shape()[1], dtype='float32'))
24 |         scale = lib.param(name+'.scale', np.ones(inputs.get_shape()[1], dtype='float32'))
25 | 
26 |         moving_mean = lib.param(name+'.moving_mean', np.zeros(inputs.get_shape()[1], dtype='float32'), trainable=False)
27 |         moving_variance = lib.param(name+'.moving_variance', np.ones(inputs.get_shape()[1], dtype='float32'), trainable=False)
28 | 
29 |         def _fused_batch_norm_training():
30 |             return tf.nn.fused_batch_norm(inputs, scale, offset, epsilon=1e-5, data_format='NCHW')
31 |         def _fused_batch_norm_inference():
32 |             # Version which blends in the current item's statistics
33 |             batch_size = tf.cast(tf.shape(inputs)[0], 'float32')
34 |             mean, var = tf.nn.moments(inputs, [2,3], keep_dims=True)
35 |             mean = ((1./batch_size)*mean) + (((batch_size-1.)/batch_size)*moving_mean)[None,:,None,None]
36 |             var = ((1./batch_size)*var) + (((batch_size-1.)/batch_size)*moving_variance)[None,:,None,None]
37 |             return tf.nn.batch_normalization(inputs, mean, var, offset[None,:,None,None], scale[None,:,None,None], 1e-5), mean, var
38 | 
39 |             # Standard version
40 |             # return tf.nn.fused_batch_norm(
41 |             #     inputs,
42 |             #     scale,
43 |             #     offset,
44 |             #     epsilon=1e-2, 
45 |             #     mean=moving_mean,
46 |             #     variance=moving_variance,
47 |             #     is_training=False,
48 |             #     data_format='NCHW'
49 |             # )
50 | 
51 |         if is_training is None:
52 |             outputs, batch_mean, batch_var = _fused_batch_norm_training()
53 |         else:
54 |             outputs, batch_mean, batch_var = tf.cond(is_training,
55 |                                                        _fused_batch_norm_training,
56 |                                                        _fused_batch_norm_inference)
57 |             if update_moving_stats:
58 |                 no_updates = lambda: outputs
59 |                 def _force_updates():
60 |                     """Internal function forces updates moving_vars if is_training."""
61 |                     float_stats_iter = tf.cast(stats_iter, tf.float32)
62 | 
63 |                     update_moving_mean = tf.assign(moving_mean, ((float_stats_iter/(float_stats_iter+1))*moving_mean) + ((1/(float_stats_iter+1))*batch_mean))
64 |                     update_moving_variance = tf.assign(moving_variance, ((float_stats_iter/(float_stats_iter+1))*moving_variance) + ((1/(float_stats_iter+1))*batch_var))
65 | 
66 |                     with tf.control_dependencies([update_moving_mean, update_moving_variance]):
67 |                         return tf.identity(outputs)
68 |                 outputs = tf.cond(is_training, _force_updates, no_updates)
69 | 
70 |         if axes == [0,2]:
71 |             return outputs[:,:,:,0] # collapse last dim
72 |         else:
73 |             return outputs
74 |     else:
75 |         # raise Exception('old BN')
76 |         # TODO we can probably use nn.fused_batch_norm here too for speedup
77 |         mean, var = tf.nn.moments(inputs, axes, keep_dims=True)
78 |         shape = mean.get_shape().as_list()
79 |         if 0 not in axes:
80 |             print("WARNING ({}): didn't find 0 in axes, but not using separate BN params for each item in batch".format(name))
81 |             shape[0] = 1
82 |         offset = lib.param(name+'.offset', np.zeros(shape, dtype='float32'))
83 |         scale = lib.param(name+'.scale', np.ones(shape, dtype='float32'))
84 |         result = tf.nn.batch_normalization(inputs, mean, var, offset, scale, 1e-5)
85 | 
86 | 
87 |         return result
88 | 


--------------------------------------------------------------------------------
/QSR-WGAN-GP/tflib/ops/cond_batchnorm.py:
--------------------------------------------------------------------------------
 1 | import tflib as lib
 2 | 
 3 | import numpy as np
 4 | import tensorflow as tf
 5 | 
 6 | def Batchnorm(name, axes, inputs, is_training=None, stats_iter=None, update_moving_stats=True, fused=True, labels=None, n_labels=None):
 7 |     """conditional batchnorm (dumoulin et al 2016) for BCHW conv filtermaps"""
 8 |     if axes != [0,2,3]:
 9 |         raise Exception('unsupported')
10 |     mean, var = tf.nn.moments(inputs, axes, keep_dims=True)
11 |     shape = mean.get_shape().as_list() # shape is [1,n,1,1]
12 |     offset_m = lib.param(name+'.offset', np.zeros([n_labels,shape[1]], dtype='float32'))
13 |     scale_m = lib.param(name+'.scale', np.ones([n_labels,shape[1]], dtype='float32'))
14 |     offset = tf.nn.embedding_lookup(offset_m, labels)
15 |     scale = tf.nn.embedding_lookup(scale_m, labels)
16 |     result = tf.nn.batch_normalization(inputs, mean, var, offset[:,:,None,None], scale[:,:,None,None], 1e-5)
17 |     return result


--------------------------------------------------------------------------------
/QSR-WGAN-GP/tflib/ops/conv1d.py:
--------------------------------------------------------------------------------
  1 | import tflib as lib
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | 
  6 | _default_weightnorm = False
  7 | def enable_default_weightnorm():
  8 |     global _default_weightnorm
  9 |     _default_weightnorm = True
 10 | 
 11 | def Conv1D(name, input_dim, output_dim, filter_size, inputs, he_init=True, mask_type=None, stride=1, weightnorm=None, biases=True, gain=1.):
 12 |     """
 13 |     inputs: tensor of shape (batch size, num channels, width)
 14 |     mask_type: one of None, 'a', 'b'
 15 | 
 16 |     returns: tensor of shape (batch size, num channels, width)
 17 |     """
 18 |     with tf.name_scope(name) as scope:
 19 | 
 20 |         if mask_type is not None:
 21 |             mask_type, mask_n_channels = mask_type
 22 | 
 23 |             mask = np.ones(
 24 |                 (filter_size, input_dim, output_dim), 
 25 |                 dtype='float32'
 26 |             )
 27 |             center = filter_size // 2
 28 | 
 29 |             # Mask out future locations
 30 |             # filter shape is (width, input channels, output channels)
 31 |             mask[center+1:, :, :] = 0.
 32 | 
 33 |             # Mask out future channels
 34 |             for i in range(mask_n_channels):
 35 |                 for j in range(mask_n_channels):
 36 |                     if (mask_type=='a' and i >= j) or (mask_type=='b' and i > j):
 37 |                         mask[
 38 |                             center,
 39 |                             i::mask_n_channels,
 40 |                             j::mask_n_channels
 41 |                         ] = 0.
 42 | 
 43 | 
 44 |         def uniform(stdev, size):
 45 |             return np.random.uniform(
 46 |                 low=-stdev * np.sqrt(3),
 47 |                 high=stdev * np.sqrt(3),
 48 |                 size=size
 49 |             ).astype('float32')
 50 | 
 51 |         fan_in = input_dim * filter_size
 52 |         fan_out = output_dim * filter_size / stride
 53 | 
 54 |         if mask_type is not None: # only approximately correct
 55 |             fan_in /= 2.
 56 |             fan_out /= 2.
 57 | 
 58 |         if he_init:
 59 |             filters_stdev = np.sqrt(4./(fan_in+fan_out))
 60 |         else: # Normalized init (Glorot & Bengio)
 61 |             filters_stdev = np.sqrt(2./(fan_in+fan_out))
 62 | 
 63 |         filter_values = uniform(
 64 |             filters_stdev,
 65 |             (filter_size, input_dim, output_dim)
 66 |         )
 67 |         # print "WARNING IGNORING GAIN"
 68 |         filter_values *= gain
 69 | 
 70 |         filters = lib.param(name+'.Filters', filter_values)
 71 | 
 72 |         if weightnorm==None:
 73 |             weightnorm = _default_weightnorm
 74 |         if weightnorm:
 75 |             norm_values = np.sqrt(np.sum(np.square(filter_values), axis=(0,1)))
 76 |             target_norms = lib.param(
 77 |                 name + '.g',
 78 |                 norm_values
 79 |             )
 80 |             with tf.name_scope('weightnorm') as scope:
 81 |                 norms = tf.sqrt(tf.reduce_sum(tf.square(filters), reduction_indices=[0,1]))
 82 |                 filters = filters * (target_norms / norms)
 83 | 
 84 |         if mask_type is not None:
 85 |             with tf.name_scope('filter_mask'):
 86 |                 filters = filters * mask
 87 | 
 88 |         result = tf.nn.conv1d(
 89 |             value=inputs, 
 90 |             filters=filters, 
 91 |             stride=stride,
 92 |             padding='SAME',
 93 |             data_format='NHWC'
 94 |         )
 95 | 
 96 |         if biases:
 97 |             _biases = lib.param(
 98 |                 name+'.Biases',
 99 |                 np.zeros([output_dim], dtype='float32')
100 |             )
101 | 
102 |             # result = result + _biases
103 | 
104 |             result = tf.expand_dims(result, 3)
105 |             result = tf.nn.bias_add(result, _biases, data_format='NCHW')
106 |             result = tf.squeeze(result)
107 | 
108 |         return result
109 | 


--------------------------------------------------------------------------------
/QSR-WGAN-GP/tflib/ops/conv2d.py:
--------------------------------------------------------------------------------
  1 | import tflib as lib
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | 
  6 | _default_weightnorm = False
  7 | def enable_default_weightnorm():
  8 |     global _default_weightnorm
  9 |     _default_weightnorm = True
 10 | 
 11 | _weights_stdev = None
 12 | def set_weights_stdev(weights_stdev):
 13 |     global _weights_stdev
 14 |     _weights_stdev = weights_stdev
 15 | 
 16 | def unset_weights_stdev():
 17 |     global _weights_stdev
 18 |     _weights_stdev = None
 19 | 
 20 | def Conv2D(name, input_dim, output_dim, filter_size, inputs, he_init=True, mask_type=None, stride=1, weightnorm=None, biases=True, gain=1.):
 21 |     """
 22 |     inputs: tensor of shape (batch size, num channels, height, width)
 23 |     mask_type: one of None, 'a', 'b'
 24 | 
 25 |     returns: tensor of shape (batch size, num channels, height, width)
 26 |     """
 27 |     with tf.name_scope(name) as scope:
 28 | 
 29 |         if mask_type is not None:
 30 |             mask_type, mask_n_channels = mask_type
 31 | 
 32 |             mask = np.ones(
 33 |                 (filter_size, filter_size, input_dim, output_dim), 
 34 |                 dtype='float32'
 35 |             )
 36 |             center = filter_size // 2
 37 | 
 38 |             # Mask out future locations
 39 |             # filter shape is (height, width, input channels, output channels)
 40 |             mask[center+1:, :, :, :] = 0.
 41 |             mask[center, center+1:, :, :] = 0.
 42 | 
 43 |             # Mask out future channels
 44 |             for i in xrange(mask_n_channels):
 45 |                 for j in xrange(mask_n_channels):
 46 |                     if (mask_type=='a' and i >= j) or (mask_type=='b' and i > j):
 47 |                         mask[
 48 |                             center,
 49 |                             center,
 50 |                             i::mask_n_channels,
 51 |                             j::mask_n_channels
 52 |                         ] = 0.
 53 | 
 54 | 
 55 |         def uniform(stdev, size):
 56 |             return np.random.uniform(
 57 |                 low=-stdev * np.sqrt(3),
 58 |                 high=stdev * np.sqrt(3),
 59 |                 size=size
 60 |             ).astype('float32')
 61 | 
 62 |         fan_in = input_dim * filter_size**2
 63 |         fan_out = output_dim * filter_size**2 / (stride**2)
 64 | 
 65 |         if mask_type is not None: # only approximately correct
 66 |             fan_in /= 2.
 67 |             fan_out /= 2.
 68 | 
 69 |         if he_init:
 70 |             filters_stdev = np.sqrt(4./(fan_in+fan_out))
 71 |         else: # Normalized init (Glorot & Bengio)
 72 |             filters_stdev = np.sqrt(2./(fan_in+fan_out))
 73 | 
 74 |         if _weights_stdev is not None:
 75 |             filter_values = uniform(
 76 |                 _weights_stdev,
 77 |                 (filter_size, filter_size, input_dim, output_dim)
 78 |             )
 79 |         else:
 80 |             filter_values = uniform(
 81 |                 filters_stdev,
 82 |                 (filter_size, filter_size, input_dim, output_dim)
 83 |             )
 84 | 
 85 |         # print "WARNING IGNORING GAIN"
 86 |         filter_values *= gain
 87 | 
 88 |         filters = lib.param(name+'.Filters', filter_values)
 89 | 
 90 |         if weightnorm==None:
 91 |             weightnorm = _default_weightnorm
 92 |         if weightnorm:
 93 |             norm_values = np.sqrt(np.sum(np.square(filter_values), axis=(0,1,2)))
 94 |             target_norms = lib.param(
 95 |                 name + '.g',
 96 |                 norm_values
 97 |             )
 98 |             with tf.name_scope('weightnorm') as scope:
 99 |                 norms = tf.sqrt(tf.reduce_sum(tf.square(filters), reduction_indices=[0,1,2]))
100 |                 filters = filters * (target_norms / norms)
101 | 
102 |         if mask_type is not None:
103 |             with tf.name_scope('filter_mask'):
104 |                 filters = filters * mask
105 | 
106 |         result = tf.nn.conv2d(
107 |             input=inputs, 
108 |             filter=filters, 
109 |             strides=[1, 1, stride, stride],
110 |             padding='SAME',
111 |             data_format='NCHW'
112 |         )
113 | 
114 |         if biases:
115 |             _biases = lib.param(
116 |                 name+'.Biases',
117 |                 np.zeros(output_dim, dtype='float32')
118 |             )
119 | 
120 |             result = tf.nn.bias_add(result, _biases, data_format='NCHW')
121 | 
122 | 
123 |         return result
124 | 


--------------------------------------------------------------------------------
/QSR-WGAN-GP/tflib/ops/deconv2d.py:
--------------------------------------------------------------------------------
  1 | import tflib as lib
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | 
  6 | _default_weightnorm = False
  7 | def enable_default_weightnorm():
  8 |     global _default_weightnorm
  9 |     _default_weightnorm = True
 10 | 
 11 | _weights_stdev = None
 12 | def set_weights_stdev(weights_stdev):
 13 |     global _weights_stdev
 14 |     _weights_stdev = weights_stdev
 15 | 
 16 | def unset_weights_stdev():
 17 |     global _weights_stdev
 18 |     _weights_stdev = None
 19 | 
 20 | def Deconv2D(
 21 |     name, 
 22 |     input_dim, 
 23 |     output_dim, 
 24 |     filter_size, 
 25 |     inputs, 
 26 |     he_init=True,
 27 |     weightnorm=None,
 28 |     biases=True,
 29 |     gain=1.,
 30 |     mask_type=None,
 31 |     ):
 32 |     """
 33 |     inputs: tensor of shape (batch size, height, width, input_dim)
 34 |     returns: tensor of shape (batch size, 2*height, 2*width, output_dim)
 35 |     """
 36 |     with tf.name_scope(name) as scope:
 37 | 
 38 |         if mask_type != None:
 39 |             raise Exception('Unsupported configuration')
 40 | 
 41 |         def uniform(stdev, size):
 42 |             return np.random.uniform(
 43 |                 low=-stdev * np.sqrt(3),
 44 |                 high=stdev * np.sqrt(3),
 45 |                 size=size
 46 |             ).astype('float32')
 47 | 
 48 |         stride = 2
 49 |         fan_in = input_dim * filter_size**2 / (stride**2)
 50 |         fan_out = output_dim * filter_size**2
 51 | 
 52 |         if he_init:
 53 |             filters_stdev = np.sqrt(4./(fan_in+fan_out))
 54 |         else: # Normalized init (Glorot & Bengio)
 55 |             filters_stdev = np.sqrt(2./(fan_in+fan_out))
 56 | 
 57 | 
 58 |         if _weights_stdev is not None:
 59 |             filter_values = uniform(
 60 |                 _weights_stdev,
 61 |                 (filter_size, filter_size, output_dim, input_dim)
 62 |             )
 63 |         else:
 64 |             filter_values = uniform(
 65 |                 filters_stdev,
 66 |                 (filter_size, filter_size, output_dim, input_dim)
 67 |             )
 68 | 
 69 |         filter_values *= gain
 70 | 
 71 |         filters = lib.param(
 72 |             name+'.Filters',
 73 |             filter_values
 74 |         )
 75 | 
 76 |         if weightnorm==None:
 77 |             weightnorm = _default_weightnorm
 78 |         if weightnorm:
 79 |             norm_values = np.sqrt(np.sum(np.square(filter_values), axis=(0,1,3)))
 80 |             target_norms = lib.param(
 81 |                 name + '.g',
 82 |                 norm_values
 83 |             )
 84 |             with tf.name_scope('weightnorm') as scope:
 85 |                 norms = tf.sqrt(tf.reduce_sum(tf.square(filters), reduction_indices=[0,1,3]))
 86 |                 filters = filters * tf.expand_dims(target_norms / norms, 1)
 87 | 
 88 | 
 89 |         inputs = tf.transpose(inputs, [0,2,3,1], name='NCHW_to_NHWC')
 90 | 
 91 |         input_shape = tf.shape(inputs)
 92 |         try: # tf pre-1.0 (top) vs 1.0 (bottom)
 93 |             output_shape = tf.pack([input_shape[0], 2*input_shape[1], 2*input_shape[2], output_dim])
 94 |         except Exception as e:
 95 |             output_shape = tf.stack([input_shape[0], 2*input_shape[1], 2*input_shape[2], output_dim])
 96 | 
 97 |         result = tf.nn.conv2d_transpose(
 98 |             value=inputs, 
 99 |             filter=filters,
100 |             output_shape=output_shape, 
101 |             strides=[1, 2, 2, 1],
102 |             padding='SAME'
103 |         )
104 | 
105 |         if biases:
106 |             _biases = lib.param(
107 |                 name+'.Biases',
108 |                 np.zeros(output_dim, dtype='float32')
109 |             )
110 |             result = tf.nn.bias_add(result, _biases)
111 | 
112 |         result = tf.transpose(result, [0,3,1,2], name='NHWC_to_NCHW')
113 | 
114 | 
115 |         return result
116 | 


--------------------------------------------------------------------------------
/QSR-WGAN-GP/tflib/ops/layernorm.py:
--------------------------------------------------------------------------------
 1 | import tflib as lib
 2 | 
 3 | import numpy as np
 4 | import tensorflow as tf
 5 | 
 6 | def Layernorm(name, norm_axes, inputs):
 7 |     mean, var = tf.nn.moments(inputs, norm_axes, keep_dims=True)
 8 | 
 9 |     # Assume the 'neurons' axis is the first of norm_axes. This is the case for fully-connected and BCHW conv layers.
10 |     n_neurons = inputs.get_shape().as_list()[norm_axes[0]]
11 | 
12 |     offset = lib.param(name+'.offset', np.zeros(n_neurons, dtype='float32'))
13 |     scale = lib.param(name+'.scale', np.ones(n_neurons, dtype='float32'))
14 | 
15 |     # Add broadcasting dims to offset and scale (e.g. BCHW conv data)
16 |     offset = tf.reshape(offset, [-1] + [1 for i in xrange(len(norm_axes)-1)])
17 |     scale = tf.reshape(scale, [-1] + [1 for i in xrange(len(norm_axes)-1)])
18 | 
19 |     result = tf.nn.batch_normalization(inputs, mean, var, offset, scale, 1e-5)
20 | 
21 |     return result


--------------------------------------------------------------------------------
/QSR-WGAN-GP/tflib/ops/linear.py:
--------------------------------------------------------------------------------
  1 | import tflib as lib
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | 
  6 | _default_weightnorm = False
  7 | def enable_default_weightnorm():
  8 |     global _default_weightnorm
  9 |     _default_weightnorm = True
 10 | 
 11 | def disable_default_weightnorm():
 12 |     global _default_weightnorm
 13 |     _default_weightnorm = False
 14 | 
 15 | _weights_stdev = None
 16 | def set_weights_stdev(weights_stdev):
 17 |     global _weights_stdev
 18 |     _weights_stdev = weights_stdev
 19 | 
 20 | def unset_weights_stdev():
 21 |     global _weights_stdev
 22 |     _weights_stdev = None
 23 | 
 24 | def Linear(
 25 |         name, 
 26 |         input_dim, 
 27 |         output_dim, 
 28 |         inputs,
 29 |         biases=True,
 30 |         initialization=None,
 31 |         weightnorm=None,
 32 |         gain=1.
 33 |         ):
 34 |     """
 35 |     initialization: None, `lecun`, 'glorot', `he`, 'glorot_he', `orthogonal`, `("uniform", range)`
 36 |     """
 37 |     with tf.name_scope(name) as scope:
 38 | 
 39 |         def uniform(stdev, size):
 40 |             if _weights_stdev is not None:
 41 |                 stdev = _weights_stdev
 42 |             return np.random.uniform(
 43 |                 low=-stdev * np.sqrt(3),
 44 |                 high=stdev * np.sqrt(3),
 45 |                 size=size
 46 |             ).astype('float32')
 47 | 
 48 |         if initialization == 'lecun':# and input_dim != output_dim):
 49 |             # disabling orth. init for now because it's too slow
 50 |             weight_values = uniform(
 51 |                 np.sqrt(1./input_dim),
 52 |                 (input_dim, output_dim)
 53 |             )
 54 | 
 55 |         elif initialization == 'glorot' or (initialization == None):
 56 | 
 57 |             weight_values = uniform(
 58 |                 np.sqrt(2./(input_dim+output_dim)),
 59 |                 (input_dim, output_dim)
 60 |             )
 61 | 
 62 |         elif initialization == 'he':
 63 | 
 64 |             weight_values = uniform(
 65 |                 np.sqrt(2./input_dim),
 66 |                 (input_dim, output_dim)
 67 |             )
 68 | 
 69 |         elif initialization == 'glorot_he':
 70 | 
 71 |             weight_values = uniform(
 72 |                 np.sqrt(4./(input_dim+output_dim)),
 73 |                 (input_dim, output_dim)
 74 |             )
 75 | 
 76 |         elif initialization == 'orthogonal' or \
 77 |             (initialization == None and input_dim == output_dim):
 78 |             
 79 |             # From lasagne
 80 |             def sample(shape):
 81 |                 if len(shape) < 2:
 82 |                     raise RuntimeError("Only shapes of length 2 or more are "
 83 |                                        "supported.")
 84 |                 flat_shape = (shape[0], np.prod(shape[1:]))
 85 |                  # TODO: why normal and not uniform?
 86 |                 a = np.random.normal(0.0, 1.0, flat_shape)
 87 |                 u, _, v = np.linalg.svd(a, full_matrices=False)
 88 |                 # pick the one with the correct shape
 89 |                 q = u if u.shape == flat_shape else v
 90 |                 q = q.reshape(shape)
 91 |                 return q.astype('float32')
 92 |             weight_values = sample((input_dim, output_dim))
 93 |         
 94 |         elif initialization[0] == 'uniform':
 95 |         
 96 |             weight_values = np.random.uniform(
 97 |                 low=-initialization[1],
 98 |                 high=initialization[1],
 99 |                 size=(input_dim, output_dim)
100 |             ).astype('float32')
101 | 
102 |         else:
103 | 
104 |             raise Exception('Invalid initialization!')
105 | 
106 |         weight_values *= gain
107 | 
108 |         weight = lib.param(
109 |             name + '.W',
110 |             weight_values
111 |         )
112 | 
113 |         if weightnorm==None:
114 |             weightnorm = _default_weightnorm
115 |         if weightnorm:
116 |             norm_values = np.sqrt(np.sum(np.square(weight_values), axis=0))
117 |             # norm_values = np.linalg.norm(weight_values, axis=0)
118 | 
119 |             target_norms = lib.param(
120 |                 name + '.g',
121 |                 norm_values
122 |             )
123 | 
124 |             with tf.name_scope('weightnorm') as scope:
125 |                 norms = tf.sqrt(tf.reduce_sum(tf.square(weight), reduction_indices=[0]))
126 |                 weight = weight * (target_norms / norms)
127 | 
128 |         # if 'Discriminator' in name:
129 |         #     print "WARNING weight constraint on {}".format(name)
130 |         #     weight = tf.nn.softsign(10.*weight)*.1
131 | 
132 |         if inputs.get_shape().ndims == 2:
133 |             result = tf.matmul(inputs, weight)
134 |         else:
135 |             reshaped_inputs = tf.reshape(inputs, [-1, input_dim])
136 |             result = tf.matmul(reshaped_inputs, weight)
137 |             result = tf.reshape(result, tf.stack(tf.unstack(tf.shape(inputs))[:-1] + [output_dim]))
138 | 
139 |         if biases:
140 |             result = tf.nn.bias_add(
141 |                 result,
142 |                 lib.param(
143 |                     name + '.b',
144 |                     np.zeros((output_dim,), dtype='float32')
145 |                 )
146 |             )
147 | 
148 |         return result


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Convolutional Neural Networks to Enhance Coded Speech
 2 | (Here Part of the project code，**Not for commercial use!!!**) 
 3 |   
 4 | **Abstract**—Enhancing coded speech suffering from far-end acoustic background noise, quantization noise, and potentially transmission errors, is a challenging task. In this work we propose two postprocessing approaches applying convolutional neural networks (CNNs) either in the time domain or the cepstral domain to enhance the coded speech without any modification of the codecs. The time domain approach follows an end-to-end fashion, while the cepstral domain approach uses analysis-synthesis with
 5 | cepstral domain features. The proposed postprocessors in both domains are evaluated for various narrowband and wideband speech codecs in a wide range of conditions. The proposed postprocessor improves speech quality (PESQ) by up to 0.25 MOS-LQO points for G.711, 0.30 points for G.726, 0.82 points for G.722, and 0.26 points for adaptive multirate wideband codec(AMR-WB). In a subjective CCR listening test, the proposed postprocessor on G.711-coded speech exceeds the speech quality of an ITU-T standardized postfilter by 0.36 CMOS points, and obtains a clear preference of 1.77 CMOS points compared to G.711, even en par with uncoded speech.
 6 | 
 7 | **Index Terms—convolutional neural networks, speech codecs, speech enhancement.**
 8 | 
 9 | If you use **Convolutional Neural Networks to Enhance Coded Speech** in your research, please cite:
10 | ```bibtex
11 | @article{cnn2codedspeech,
12 |   title={Convolutional Neural Networks to Enhance Coded Speech},
13 |   author={Zhao, Ziyue and Liu, Huijun and Fingscheidt, Tim},
14 |   journal={Transactions on Audio, Speech and Language Processing},
15 |   year={2018}
16 | }
17 | ```
18 | 
19 | <p align="center">
20 | <img src="https://github.com/ansleliu/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/blob/master/CNN2EnhancedSpeech.PNG" />
21 | </p>
22 | 
23 | 


--------------------------------------------------------------------------------
/WaveformCNN/DataPrepare.py:
--------------------------------------------------------------------------------
  1 | #####################################################################################
  2 | # DataPrepare.py: data prepare and load data
  3 | # Author:   Huijun Liu
  4 | # Time:     17.07.2017
  5 | # Location: TU Braunschweig IfN
  6 | #####################################################################################
  7 | 
  8 | import os
  9 | import time
 10 | # import h5py as h5
 11 | import numpy as np
 12 | import scipy.io as sio
 13 | 
 14 | from numpy import random
 15 | 
 16 | # -------------------------------------------------------------------------------
 17 | # 1. load data
 18 | # -------------------------------------------------------------------------------
 19 | 
 20 | 
 21 | def load_train_data(train_inputs, train_targets, vali_inputs, vali_targets):
 22 |     print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
 23 |     print('> Loading data ')
 24 | 
 25 |     start = time.time()
 26 |     # ---------------------------------------------------------
 27 |     # 1. Load Input Data for Training
 28 |     # ---------------------------------------------------------
 29 |     mat_input = train_inputs
 30 |     mat_input = os.path.normcase(mat_input)
 31 |     print('> 1. Loading Training Input: ' + mat_input + '...')
 32 | 
 33 |     x_train_noisy = sio.loadmat(mat_input)
 34 |     x_train_noisy = x_train_noisy['inputSetNorm']
 35 |     x_train_noisy = np.array(x_train_noisy)
 36 | 
 37 |     # ---------------------------------------------------------
 38 |     # 2. Load Target Data for training
 39 |     # ---------------------------------------------------------
 40 |     mat_target = train_targets
 41 |     mat_target = os.path.normcase(mat_target)
 42 |     print('> 2. Loading Training Target: ' + mat_target + '...')
 43 | 
 44 |     x_train = sio.loadmat(mat_target)
 45 |     x_train = x_train['targetSet']
 46 |     x_train = np.array(x_train)
 47 |     # x_train = target_min_max_scaler.fit_transform(x_train)
 48 | 
 49 |     # ---------------------------------------------------------
 50 |     # 3. Load Input Data for Validation
 51 |     # ---------------------------------------------------------
 52 |     mat_input_vali = vali_inputs
 53 |     mat_input_vali = os.path.normcase(mat_input_vali)
 54 |     print('> 3. Loading Validation Input: ' + mat_input_vali + '...')
 55 | 
 56 |     x_train_noisy_vali = sio.loadmat(mat_input_vali)
 57 |     x_train_noisy_vali = x_train_noisy_vali['inputSetNorm']
 58 |     x_train_noisy_vali = np.array(x_train_noisy_vali)
 59 | 
 60 |     # ---------------------------------------------------------
 61 |     # 4. Load Target Data for Validation
 62 |     # ---------------------------------------------------------
 63 |     mat_target_vali = vali_targets
 64 |     mat_target_vali = os.path.normcase(mat_target_vali)
 65 |     print('> 4. Loading Validation Target: ' + mat_target_vali + '...')
 66 | 
 67 |     x_train_vali = sio.loadmat(mat_target_vali)
 68 |     x_train_vali = x_train_vali['targetSet']
 69 |     x_train_vali = np.array(x_train_vali)
 70 | 
 71 |     # ---------------------------------------------------------
 72 |     # 5. Randomization of Training and/or validation Pairs
 73 |     # ---------------------------------------------------------
 74 |     print('> 5. Randomization of Training Pairs ...')
 75 |     frame_length = x_train_noisy.shape[1]
 76 | 
 77 |     random.seed(1234)
 78 |     train = np.column_stack((x_train_noisy, x_train))
 79 |     np.random.shuffle(train)
 80 |     x_train_noisy = train[:, :frame_length]
 81 |     x_train = train[:, frame_length:]
 82 | 
 83 |     # validation = np.column_stack((x_train_noisy_vali, x_train_vali))
 84 |     # np.random.shuffle(validation )
 85 |     # x_train_noisy_vali = validation [:, :80]
 86 |     # x_train_vali = validation [:, 80:]
 87 | 
 88 |     # ---------------------------------------------------------
 89 |     # 6. Reshape of Training and validation Pairs
 90 |     # ---------------------------------------------------------
 91 |     x_train_noisy = np.reshape(x_train_noisy, (x_train_noisy.shape[0], x_train_noisy.shape[1], 1))
 92 |     x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
 93 | 
 94 |     x_train_noisy_vali = np.reshape(x_train_noisy_vali, (x_train_noisy_vali.shape[0], x_train_noisy_vali.shape[1], 1))
 95 |     x_train_vali = np.reshape(x_train_vali, (x_train_vali.shape[0], x_train_vali.shape[1], 1))
 96 | 
 97 |     print("> Data Loaded, , Time : ", time.time() - start)
 98 |     print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
 99 | 
100 |     return x_train_noisy, x_train, x_train_noisy_vali, x_train_vali
101 | 
102 | 
103 | def load_test_data(testfile_path="inputTestSet_g711concat_Type3_Frame_256_ceps_v73.mat"):
104 |     print('> Loading Test data... ')
105 | 
106 |     test_file_root = "./TestData"
107 |     if not (os.path.exists(test_file_root)):
108 |         os.makedirs(test_file_root)
109 | 
110 |     mat_input = test_file_root + "/" + testfile_path
111 |     mat_input = os.path.normcase(mat_input)
112 | 
113 |     # x_test_noisy = h5.File(mat_input, 'r')
114 |     # x_test_noisy = x_test_noisy.get('inputTestNorm')
115 |     # x_test_noisy = np.array(x_test_noisy)
116 |     # x_test_noisy = np.transpose(x_test_noisy)
117 | 
118 |     x_test_noisy = sio.loadmat(mat_input)
119 |     x_test_noisy = x_test_noisy['inputTestNorm']
120 |     x_test_noisy = np.array(x_test_noisy)
121 | 
122 |     x_test_noisy = np.reshape(x_test_noisy,(x_test_noisy.shape[0], x_test_noisy.shape[1], 1))
123 | 
124 |     return x_test_noisy


--------------------------------------------------------------------------------
/WaveformCNN/Opting_Results/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/WaveformCNN/Opting_Results/README.md


--------------------------------------------------------------------------------
/WaveformCNN/TestData/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/WaveformCNN/TestData/README.md


--------------------------------------------------------------------------------
/WaveformCNN/TrainValiData/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/WaveformCNN/TrainValiData/README.md


--------------------------------------------------------------------------------
/WaveformCNN/WaveformQSRCNN.py:
--------------------------------------------------------------------------------
  1 | #########################################################################################
  2 | # CepstralQSRCNN.py: QSRCNN for G711/ADPCM/AMR/EVS using Cepstral features
  3 | # Author:   Huijun Liu
  4 | # Time:     17.07.2017
  5 | # Location: TU Braunschweig IfN
  6 | #########################################################################################
  7 | 
  8 | import os
  9 | import time
 10 | import math
 11 | import scipy.io as sio
 12 | import tensorflow as tf
 13 | 
 14 | from keras.models import Model
 15 | from keras import backend as K
 16 | from keras.layers import Input, Add, Activation
 17 | from keras.layers.convolutional import Conv1D, MaxPooling1D, UpSampling1D
 18 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger, TensorBoard, LearningRateScheduler
 19 | 
 20 | from weightnorm import AdamWithWeightnorm
 21 | from tensorflow.python.framework import ops
 22 | 
 23 | # -------------------------------------------------------------------------------
 24 | # 0. define metric and activation function
 25 | # -------------------------------------------------------------------------------
 26 | 
 27 | 
 28 | def snr(y_true, y_pred):
 29 |     """
 30 |         SNR is Signal to Noise Ratio
 31 | 
 32 |     """
 33 |     return 10.0 * K.log((K.sum(K.square(y_true))) / (K.sum(K.square(y_pred - y_true)))) / K.log(10.0)
 34 | 
 35 | 
 36 | def selu(x):
 37 |     with ops.name_scope('elu') as scope:
 38 |         alpha = 1.6732632423543772848170429916717
 39 |         scale = 1.0507009873554804934193349852946
 40 |         return scale * tf.where(x >= 0.0, x, alpha * tf.nn.elu(x))
 41 | 
 42 | """
 43 | def step_decay(epoch):
 44 |     initial_lrate = 0.001
 45 | 
 46 |     drop = 0.25
 47 |     epochs_drop = 3.0
 48 |     lrate = initial_lrate * math.pow(drop, math.floor((1 + epoch) / epochs_drop))
 49 | 
 50 |     return lrate
 51 | """
 52 | 
 53 | # -------------------------------------------------------------------------------
 54 | # 1. define Cepstral-QSRCNN Model
 55 | # -------------------------------------------------------------------------------
 56 | 
 57 | 
 58 | class WaveformQSRCNN(object):
 59 |     def __init__(self, opt_params={'lr': 5e-4, 'batch_size': 32, 'nb_epochs': 100},
 60 |                  model_params={'n1': 16, 'n2': 32, 'n3': 16, 'frame_len': 32},
 61 |                  codec_type_params={'weights_dir': "./model_weights", 'logdir': "./log"}):
 62 |         self.learning_rate = opt_params['lr']
 63 |         self.batch_size = opt_params['batch_size']
 64 |         self.nb_epochs = opt_params['nb_epochs']
 65 | 
 66 |         self.log_dir = codec_type_params['logdir']
 67 |         if not (os.path.exists(self.log_dir)):
 68 |             os.makedirs(self.log_dir)
 69 | 
 70 |         self.weights_dir = codec_type_params['weights_dir']
 71 |         if not (os.path.exists(self.weights_dir)):
 72 |             os.makedirs(self.weights_dir)
 73 | 
 74 |         self.frame_len = model_params['frame_len']
 75 |         self.model = self.create_model(model_params)
 76 | 
 77 |     # -------------------------------------------------------------------------------
 78 |     # Load the Weights of the Model
 79 |     # -------------------------------------------------------------------------------
 80 |     def load_weights(self, file_path=""):
 81 |         if file_path == "":
 82 |             file_path = self.weights_dir + '/' + 'G711_WaveformQSRCNN_Weights_Best_bs' + \
 83 |                            str(self.batch_size) + '_lr' + str(self.learning_rate) + '.h5'
 84 | 
 85 |         file_path = os.path.normcase(file_path)
 86 |         self.model.load_weights(file_path)
 87 | 
 88 |     # -------------------------------------------------------------------------------
 89 |     # Save the Weights of the Model
 90 |     # -------------------------------------------------------------------------------
 91 |     def save_weights(self):
 92 |         file_path = self.weights_dir + '/' + 'G711_WaveformQSRCNN_Weights_Final_bs' + \
 93 |                        str(self.batch_size) + '_lr' + str(self.learning_rate) + '.h5'
 94 |         file_path = os.path.normcase(file_path)
 95 |         self.model.save_weights(file_path)
 96 | 
 97 |     # -------------------------------------------------------------------------------
 98 |     # 1. define model
 99 |     # -------------------------------------------------------------------------------
100 |     def create_model(self, model_params={'n1': 32, 'n2': 64, 'n3': 32, 'frame_len': 80}):
101 |         frame_len = self.frame_len
102 |         n1 = model_params['n1']
103 |         n2 = model_params['n2']
104 |         n3 = model_params['n3']
105 | 
106 |         input_sque = Input(shape=(frame_len, 1))
107 |         c1 = Conv1D(n1, 3, padding='same')(input_sque)
108 |         c1 = Activation(selu)(c1)
109 |         c1 = Conv1D(n1, 3, padding='same')(c1)
110 |         c1 = Activation(selu)(c1)
111 |         x = MaxPooling1D(2)(c1)
112 | 
113 |         c2 = Conv1D(n2, 3, padding='same')(x)
114 |         c2 = Activation(selu)(c2)
115 |         c2 = Conv1D(n2, 3, padding='same')(c2)
116 |         c2 = Activation(selu)(c2)
117 |         x = MaxPooling1D(2)(c2)
118 | 
119 |         c3 = Conv1D(n3, 3, padding='same')(x)
120 |         c3 = Activation(selu)(c3)
121 |         x = UpSampling1D(2)(c3)
122 | 
123 |         c2_2 = Conv1D(n2, 3, padding='same')(x)
124 |         c2_2 = Activation(selu)(c2_2)
125 |         c2_2 = Conv1D(n2, 3, padding='same')(c2_2)
126 |         c2_2 = Activation(selu)(c2_2)
127 | 
128 |         m1 = Add()([c2, c2_2])
129 |         m1 = UpSampling1D(2)(m1)
130 | 
131 |         c1_2 = Conv1D(n1, 3, padding='same')(m1)
132 |         c1_2 = Activation(selu)(c1_2)
133 |         c1_2 = Conv1D(n1, 3, padding='same')(c1_2)
134 |         c1_2 = Activation(selu)(c1_2)
135 | 
136 |         m2 = Add()([c1, c1_2])
137 | 
138 |         decoded = Conv1D(1, 5, padding='same', activation='linear')(m2)
139 | 
140 |         model = Model(input_sque, decoded)
141 |         model.summary()
142 | 
143 |         learning_rate = self.learning_rate
144 |         # adam = optimizers.Adam(lr=learning_rate)
145 |         # model.compile(optimizer=adam, loss='mse', metrics=[SNRLoss])
146 | 
147 |         adam_wn = AdamWithWeightnorm(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
148 |         model.compile(optimizer=adam_wn, loss='mse', metrics=[snr])
149 | 
150 |         return model
151 | 
152 |     # -------------------------------------------------------------------------------
153 |     # 2. Fit the model
154 |     # -------------------------------------------------------------------------------
155 |     def step_decay(self, epoch):
156 |         initial_lrate = self.learning_rate
157 | 
158 |         drop = 0.25
159 |         epochs_drop = 4.0
160 |         lrate = initial_lrate * math.pow(drop, math.floor((1 + epoch) / epochs_drop))
161 | 
162 |         old_lr = K.get_value(self.model.optimizer.lr)
163 |         K.set_value(self.model.optimizer.lr, lrate)
164 |         lrate = K.get_value(self.model.optimizer.lr)
165 |         print("> Ir reduced from %f to %f" % (old_lr, lrate))
166 |         return lrate
167 | 
168 |     def fit(self, x_train_noisy, x_train, x_train_noisy_vali, x_train_vali):
169 |         print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
170 |         print("> Training model ...")
171 | 
172 |         nb_epochs = self.nb_epochs
173 |         batch_size = self.batch_size
174 |         learning_rate = self.learning_rate
175 | 
176 |         # ---------------------------------------------------------
177 |         # 1. define callback functions
178 |         # ---------------------------------------------------------
179 |         # Stop training after 10 epoches if the vali_loss not decreasing
180 |         stop_str = EarlyStopping(monitor='val_snr', patience=16, verbose=1, mode='max')
181 | 
182 |         # Reduce learning rate when stop improving lr = lr*factor
183 |         reduce_LR = ReduceLROnPlateau(monitor='val_snr', factor=0.5, patience=2, verbose=1, mode='max', epsilon=0.0001, cooldown=0, min_lr=0)
184 | 
185 |         best_weights = self.weights_dir + '/' + 'G711_WaveformQSRCNN_Weights_Best_bs' + \
186 |                        str(batch_size) + '_lr' + str(learning_rate) + '.h5'
187 |         best_weights = os.path.normcase(best_weights)
188 |         model_save = ModelCheckpoint(best_weights, monitor='val_snr', save_best_only=True, mode='max', save_weights_only=True, period=1)
189 | 
190 |         logger_name = self.log_dir + '/' + 'G711_WaveformQSRCNN_log_bs' + \
191 |                       str(batch_size) + '_lr' + str(learning_rate) + '.csv'
192 |         logger_name = os.path.normcase(logger_name)
193 |         logger = CSVLogger(logger_name, separator=',', append=False)
194 |         tensor_board = TensorBoard(log_dir=self.log_dir, histogram_freq=1)
195 | 
196 |         lrate = LearningRateScheduler(self.step_decay)
197 | 
198 |         start = time.time()
199 | 
200 |         # ---------------------------------------------------------
201 |         # 2. fit the model
202 |         # ---------------------------------------------------------
203 |         print("> Training model " + "using Batch-size: " + str(batch_size) + ", Learning_rate: " + str(learning_rate) + "...")
204 |         hist = self.model.fit(x_train_noisy, x_train, epochs=nb_epochs, batch_size=batch_size, shuffle=True,
205 |                               validation_data=[x_train_noisy_vali, x_train_vali],
206 |                               callbacks=[lrate, reduce_LR, stop_str, model_save, logger])
207 | 
208 |         print("> Saving Completed, Time : ", time.time() - start)
209 |         print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
210 |         return hist
211 | 
212 |     # -------------------------------------------------------------------------------
213 |     # 3. Save loss snr val_loss val_snr as .mat File
214 |     # -------------------------------------------------------------------------------
215 |     def save_training_curves(self, hist):
216 |         print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
217 |         print("> Saving Training and Validation loss-metric curve ...")
218 | 
219 |         start = time.time()
220 | 
221 |         trian_curve_root = "./Opting_Results"
222 |         if not(os.path.exists(trian_curve_root)):
223 |             os.makedirs(trian_curve_root)
224 |         # ---------------------------------------------------------
225 |         # 1. Saving Training Loss
226 |         # ---------------------------------------------------------
227 |         TrainLossVec = trian_curve_root + '/' + 'G711_WaveformQSRCNN_TrainLoss_bs' + \
228 |                        str(self.batch_size) + '_lr' + str(self.learning_rate) + '.mat'
229 |         TrainLossVec = os.path.normcase(TrainLossVec)
230 | 
231 |         sio.savemat(TrainLossVec, {'Train_Loss_Vec': hist.history['loss']})
232 | 
233 |         # ---------------------------------------------------------
234 |         # 2. Saving Training Metric
235 |         # ---------------------------------------------------------
236 |         TrainSNRVec = trian_curve_root + '/' + 'G711_WaveformQSRCNN_TrainMetrice_bs' + \
237 |                       str(self.batch_size) + '_lr' + str(self.learning_rate) + '.mat'
238 |         TrainSNRVec = os.path.normcase(TrainSNRVec)
239 |         sio.savemat(TrainSNRVec, {'Train_SNR_Vec': hist.history['snr']})  # snr
240 | 
241 |         # ---------------------------------------------------------
242 |         # 3. Saving Validation Loss
243 |         # ---------------------------------------------------------
244 |         ValiLossVec = trian_curve_root + '/' + 'G711_WaveformDDQSRCNN_ValiLoss_bs' + \
245 |                       str(self.batch_size) + '_lr' + str(self.learning_rate) + '.mat'
246 |         ValiLossVec = os.path.normcase(ValiLossVec)
247 |         sio.savemat(ValiLossVec, {'Vali_Loss_Vec': hist.history['val_loss']})
248 | 
249 |         # ---------------------------------------------------------
250 |         # 4. Saving Validation Metric
251 |         # ---------------------------------------------------------
252 |         ValiSNRVec = trian_curve_root + '/' + 'G711_WaveformQSRCNN_ValiMetrice_bs' + \
253 |                      str(self.batch_size) + '_lr' + str(self.learning_rate) + '.mat'
254 |         ValiSNRVec = os.path.normcase(ValiSNRVec)
255 |         sio.savemat(ValiSNRVec, {'Vali_SNR_Vec': hist.history['val_snr']})  # val_snr
256 | 
257 |         print("> Saving Completed, Time : ", time.time() - start)
258 |         print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
259 | 
260 |     # -------------------------------------------------------------------------------
261 |     # 4. Evaluate the Trained Model
262 |     # -------------------------------------------------------------------------------
263 |     def evaluation_model(self, x_test_noisy, weights_path=""):
264 |         print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
265 |         print("> Evaluation of the Trained Model ...")
266 |         # ---------------------------------------------------------
267 |         # 1. Load Model Weights
268 |         # ---------------------------------------------------------
269 |         print('> 1. Loading the Weights of the Model ...')
270 |         self.load_weights(weights_path)
271 | 
272 |         # ---------------------------------------------------------
273 |         # 2. Evaluate the Model
274 |         # ---------------------------------------------------------
275 |         start = time.time()
276 |         print('> 2. Evaluating the Model, Please wait for a Moment ...')
277 |         predicted = self.model.predict(x_test_noisy)
278 |         print('> 2. Evaluating Completed, Time : ' + str(time.time() - start))
279 | 
280 |         # ---------------------------------------------------------
281 |         # 3. Saving the Evaluation Result
282 |         # ---------------------------------------------------------
283 |         print('> 3. Saving the Evaluation Result ...')
284 |         start = time.time()
285 |         pre_file_root = "./Test_Outputs"
286 |         if not (os.path.exists(pre_file_root)):
287 |             os.makedirs(pre_file_root)
288 | 
289 |         preOutput = pre_file_root + "/" + "G711_CNN_testplan_vec.mat"
290 |         preOutput = os.path.normcase(preOutput)
291 | 
292 |         sio.savemat(preOutput, {'predictions': predicted})
293 |         print('> 3. Evaluation Result Saving Completed, Time : ' + str(time.time() - start))
294 |         print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
295 | 


--------------------------------------------------------------------------------
/WaveformCNN/WaveformQSRCNN_TrainTest_GPUs.py:
--------------------------------------------------------------------------------
 1 | ############################################################################################
 2 | # CepstralQSRCNN_TrainTest_GPUs.py: tRAIN QSRCNN for G711/ADPCM/AMR/EVS using Cepstral features
 3 | # Author:   Huijun Liu
 4 | # Time:     17.07.2017
 5 | # Location: TU Braunschweig IfN
 6 | ############################################################################################
 7 | 
 8 | import os
 9 | import sys
10 | import time
11 | 
12 | import WaveformQSRCNN as model
13 | import tensorflow as tf
14 | import DataPrepare as dp
15 | from keras.backend.tensorflow_backend import set_session
16 | 
17 | #####################################################################################
18 | # 0. Setteings For GPUs and Parameters
19 | #####################################################################################
20 | using_gpu = 0
21 | if using_gpu == 1:
22 |     os.environ["CUDA_VISIBLE_DEVICES"] = "3"                  # x stand for GPU index: 3-x!!
23 |     config = tf.ConfigProto()
24 |     config.gpu_options.per_process_gpu_memory_fraction = 0.5  # Only 30% Memory of GPUs can be used
25 |     set_session(tf.Session(config=config))
26 | 
27 | train_or_test = "train"  # train  or  test
28 | 
29 | default_opt_params = {'lr': 5e-4, 'batch_size': 32, 'nb_epochs': 1000}
30 | default_model_params = {'n1': 32, 'n2': 64, 'n3': 32, 'frame_len': 80}
31 | codec_type_params = {'weights_dir': "./model_weights", 'logdir': "./log"}
32 | 
33 | train_inputs = ""
34 | train_targets = ""
35 | 
36 | vali_inputs = ""
37 | vali_targets = ""
38 | 
39 | test_inputs = ""
40 | 
41 | if train_or_test == "train":
42 |     # -------------------------------------------------------------------------------
43 |     # 1. Load Data
44 |     # -------------------------------------------------------------------------------
45 |     train_inputs = "./TrainValiData/Train_inputSet_ALaw_defautLang_OLdata_ValiTrain_Reverse_Extended_Reverse.mat"
46 |     train_targets = "./TrainValiData/Train_targetSet_ALaw_defautLang_OLdata_ValiTrain_Reverse_Extended_Reverse.mat"
47 | 
48 |     vali_inputs = "./TrainValiData/Vali_inputSet_ALaw_defautLang_OLdata_ValiTrain_smallVali_Reverse_Extended.mat"
49 |     vali_targets = "./TrainValiData/Vali_targetSet_ALaw_defautLang_OLdata_ValiTrain_smallVali_Reverse_Extended.mat"
50 | 
51 |     x_train_noisy, x_train, x_train_noisy_vali, x_train_vali = dp.load_train_data(train_inputs, train_targets,
52 |                                                                                   vali_inputs, vali_targets)
53 | 
54 |     # -------------------------------------------------------------------------------
55 |     # 2. Init Cepstral-QSRCNN Model
56 |     # -------------------------------------------------------------------------------
57 |     qsrcnn = model.WaveformQSRCNN(opt_params=default_opt_params,
58 |                                   model_params=default_model_params,
59 |                                   codec_type_params=codec_type_params)
60 | 
61 |     # -------------------------------------------------------------------------------
62 |     # 3. Fit The Cepstral-QSRCNNe Model
63 |     # -------------------------------------------------------------------------------
64 |     hist =qsrcnn.fit(x_train_noisy, x_train, x_train_noisy_vali, x_train_vali)
65 | 
66 |     # -------------------------------------------------------------------------------
67 |     # 4. Save Weights and Traning Curves
68 |     # -------------------------------------------------------------------------------
69 |     qsrcnn.save_weights()
70 |     qsrcnn.save_training_curves(hist=hist)
71 | 
72 | elif train_or_test == "test":
73 |     # -------------------------------------------------------------------------------
74 |     # 1. Load Data
75 |     # -------------------------------------------------------------------------------
76 |     test_inputs = "inputTestSet_g711concat_PDandOLAI_Frame_80v73.mat"
77 |     x_test_noisy = dp.load_test_data(test_inputs)
78 | 
79 |     # -------------------------------------------------------------------------------
80 |     # 2. Init Cepstral-QSRCNN Model
81 |     # -------------------------------------------------------------------------------
82 | 
83 |     codec_type_params = {'weights_dir': "./model_weights", 'logdir': "./log"}
84 |     qsrcnn = model.WaveformQSRCNN(opt_params=default_opt_params,
85 |                                   model_params=default_model_params,
86 |                                   codec_type_params=codec_type_params)
87 | 
88 |     # -------------------------------------------------------------------------------
89 |     # 3. Evaluate The Cepstral-QSRCNNe Model
90 |     # -------------------------------------------------------------------------------
91 |     qsrcnn.evaluation_model(x_test_noisy)
92 | 
93 | else:
94 |     raise Exception("Do you want to  train or test the model ? Please set the variable train_or_test !")
95 |     # sys.exit("Please set the codec name !")
96 | 


--------------------------------------------------------------------------------
/WaveformCNN/log/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/WaveformCNN/log/README.md


--------------------------------------------------------------------------------
/WaveformCNN/model_weights/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/WaveformCNN/model_weights/README.md


--------------------------------------------------------------------------------
/WaveformCNN/model_weights/g711_waveformqsrcnn_weights_best_bs32_lr0.0005.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/WaveformCNN/model_weights/g711_waveformqsrcnn_weights_best_bs32_lr0.0005.h5


--------------------------------------------------------------------------------
/WaveformCNN/weightnorm.py:
--------------------------------------------------------------------------------
  1 | from keras import backend as K
  2 | from keras.optimizers import SGD,Adam
  3 | import tensorflow as tf
  4 | 
  5 | # adapted from keras.optimizers.SGD
  6 | class SGDWithWeightnorm(SGD):
  7 |     def get_updates(self, params, constraints, loss):
  8 |         grads = self.get_gradients(loss, params)
  9 |         self.updates = []
 10 | 
 11 |         lr = self.lr
 12 |         if self.initial_decay > 0:
 13 |             lr *= (1. / (1. + self.decay * self.iterations))
 14 |             self.updates .append(K.update_add(self.iterations, 1))
 15 | 
 16 |         # momentum
 17 |         shapes = [K.get_variable_shape(p) for p in params]
 18 |         moments = [K.zeros(shape) for shape in shapes]
 19 |         self.weights = [self.iterations] + moments
 20 |         for p, g, m in zip(params, grads, moments):
 21 | 
 22 |             # if a weight tensor (len > 1) use weight normalized parameterization
 23 |             ps = K.get_variable_shape(p)
 24 |             if len(ps) > 1:
 25 | 
 26 |                 # get weight normalization parameters
 27 |                 V, V_norm, V_scaler, g_param, grad_g, grad_V = get_weightnorm_params_and_grads(p, g)
 28 | 
 29 |                 # momentum container for the 'g' parameter
 30 |                 V_scaler_shape = K.get_variable_shape(V_scaler)
 31 |                 m_g = K.zeros(V_scaler_shape)
 32 | 
 33 |                 # update g parameters
 34 |                 v_g = self.momentum * m_g - lr * grad_g  # velocity
 35 |                 self.updates.append(K.update(m_g, v_g))
 36 |                 if self.nesterov:
 37 |                     new_g_param = g_param + self.momentum * v_g - lr * grad_g
 38 |                 else:
 39 |                     new_g_param = g_param + v_g
 40 | 
 41 |                 # update V parameters
 42 |                 v_v = self.momentum * m - lr * grad_V  # velocity
 43 |                 self.updates.append(K.update(m, v_v))
 44 |                 if self.nesterov:
 45 |                     new_V_param = V + self.momentum * v_v - lr * grad_V
 46 |                 else:
 47 |                     new_V_param = V + v_v
 48 | 
 49 |                 # if there are constraints we apply them to V, not W
 50 |                 if p in constraints:
 51 |                     c = constraints[p]
 52 |                     new_V_param = c(new_V_param)
 53 | 
 54 |                 # wn param updates --> W updates
 55 |                 add_weightnorm_param_updates(self.updates, new_V_param, new_g_param, p, V_scaler)
 56 | 
 57 |             else: # normal SGD with momentum
 58 |                 v = self.momentum * m - lr * g  # velocity
 59 |                 self.updates.append(K.update(m, v))
 60 | 
 61 |                 if self.nesterov:
 62 |                     new_p = p + self.momentum * v - lr * g
 63 |                 else:
 64 |                     new_p = p + v
 65 | 
 66 |                 # apply constraints
 67 |                 if p in constraints:
 68 |                     c = constraints[p]
 69 |                     new_p = c(new_p)
 70 | 
 71 |                 self.updates.append(K.update(p, new_p))
 72 |         return self.updates
 73 | 
 74 | # adapted from keras.optimizers.Adam
 75 | class AdamWithWeightnorm(Adam):
 76 |     def get_updates(self, params, constraints, loss):
 77 |         grads = self.get_gradients(loss, params)
 78 |         self.updates = [K.update_add(self.iterations, 1)]
 79 | 
 80 |         lr = self.lr
 81 |         if self.initial_decay > 0:
 82 |             lr *= (1. / (1. + self.decay * self.iterations))
 83 | 
 84 |         t = self.iterations + 1
 85 |         lr_t = lr * K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))
 86 | 
 87 |         shapes = [K.get_variable_shape(p) for p in params]
 88 |         ms = [K.zeros(shape) for shape in shapes]
 89 |         vs = [K.zeros(shape) for shape in shapes]
 90 |         self.weights = [self.iterations] + ms + vs
 91 | 
 92 |         for p, g, m, v in zip(params, grads, ms, vs):
 93 | 
 94 |             # if a weight tensor (len > 1) use weight normalized parameterization
 95 |             # this is the only part changed w.r.t. keras.optimizers.Adam
 96 |             ps = K.get_variable_shape(p)
 97 |             if len(ps)>1:
 98 | 
 99 |                 # get weight normalization parameters
100 |                 V, V_norm, V_scaler, g_param, grad_g, grad_V = get_weightnorm_params_and_grads(p, g)
101 | 
102 |                 # Adam containers for the 'g' parameter
103 |                 V_scaler_shape = K.get_variable_shape(V_scaler)
104 |                 m_g = K.zeros(V_scaler_shape)
105 |                 v_g = K.zeros(V_scaler_shape)
106 | 
107 |                 # update g parameters
108 |                 m_g_t = (self.beta_1 * m_g) + (1. - self.beta_1) * grad_g
109 |                 v_g_t = (self.beta_2 * v_g) + (1. - self.beta_2) * K.square(grad_g)
110 |                 new_g_param = g_param - lr_t * m_g_t / (K.sqrt(v_g_t) + self.epsilon)
111 |                 self.updates.append(K.update(m_g, m_g_t))
112 |                 self.updates.append(K.update(v_g, v_g_t))
113 | 
114 |                 # update V parameters
115 |                 m_t = (self.beta_1 * m) + (1. - self.beta_1) * grad_V
116 |                 v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(grad_V)
117 |                 new_V_param = V - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)
118 |                 self.updates.append(K.update(m, m_t))
119 |                 self.updates.append(K.update(v, v_t))
120 | 
121 |                 # if there are constraints we apply them to V, not W
122 |                 if p in constraints:
123 |                     c = constraints[p]
124 |                     new_V_param = c(new_V_param)
125 | 
126 |                 # wn param updates --> W updates
127 |                 add_weightnorm_param_updates(self.updates, new_V_param, new_g_param, p, V_scaler)
128 | 
129 |             else: # do optimization normally
130 |                 m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
131 |                 v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
132 |                 p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)
133 | 
134 |                 self.updates.append(K.update(m, m_t))
135 |                 self.updates.append(K.update(v, v_t))
136 | 
137 |                 new_p = p_t
138 |                 # apply constraints
139 |                 if p in constraints:
140 |                     c = constraints[p]
141 |                     new_p = c(new_p)
142 |                 self.updates.append(K.update(p, new_p))
143 |         return self.updates
144 | 
145 | 
146 | def get_weightnorm_params_and_grads(p, g):
147 |     ps = K.get_variable_shape(p)
148 | 
149 |     # construct weight scaler: V_scaler = g/||V||
150 |     V_scaler_shape = (ps[-1],)  # assumes we're using tensorflow!
151 |     V_scaler = K.ones(V_scaler_shape)  # init to ones, so effective parameters don't change
152 | 
153 |     # get V parameters = ||V||/g * W
154 |     norm_axes = [i for i in range(len(ps) - 1)]
155 |     V = p / tf.reshape(V_scaler, [1] * len(norm_axes) + [-1])
156 | 
157 |     # split V_scaler into ||V|| and g parameters
158 |     V_norm = tf.sqrt(tf.reduce_sum(tf.square(V), norm_axes))
159 |     g_param = V_scaler * V_norm
160 | 
161 |     # get grad in V,g parameters
162 |     grad_g = tf.reduce_sum(g * V, norm_axes) / V_norm
163 |     grad_V = tf.reshape(V_scaler, [1] * len(norm_axes) + [-1]) * \
164 |              (g - tf.reshape(grad_g / V_norm, [1] * len(norm_axes) + [-1]) * V)
165 | 
166 |     return V, V_norm, V_scaler, g_param, grad_g, grad_V
167 | 
168 | 
169 | def add_weightnorm_param_updates(updates, new_V_param, new_g_param, W, V_scaler):
170 |     ps = K.get_variable_shape(new_V_param)
171 |     norm_axes = [i for i in range(len(ps) - 1)]
172 | 
173 |     # update W and V_scaler
174 |     new_V_norm = tf.sqrt(tf.reduce_sum(tf.square(new_V_param), norm_axes))
175 |     new_V_scaler = new_g_param / new_V_norm
176 |     new_W = tf.reshape(new_V_scaler, [1] * len(norm_axes) + [-1]) * new_V_param
177 |     updates.append(K.update(W, new_W))
178 |     updates.append(K.update(V_scaler, new_V_scaler))
179 | 
180 | 
181 | # data based initialization for a given Keras model
182 | def data_based_init(model, input):
183 | 
184 |     # input can be dict, numpy array, or list of numpy arrays
185 |     if type(input) is dict:
186 |         feed_dict = input
187 |     elif type(input) is list:
188 |         feed_dict = {tf_inp: np_inp for tf_inp,np_inp in zip(model.inputs,input)}
189 |     else:
190 |         feed_dict = {model.inputs[0]: input}
191 | 
192 |     # add learning phase if required
193 |     if model.uses_learning_phase and K.learning_phase() not in feed_dict:
194 |         feed_dict.update({K.learning_phase(): 1})
195 | 
196 |     # get all layer name, output, weight, bias tuples
197 |     layer_output_weight_bias = []
198 |     for l in model.layers:
199 |         if hasattr(l, 'W') and hasattr(l, 'b'):
200 |             assert(l.built)
201 |             layer_output_weight_bias.append( (l.name,l.get_output_at(0),l.W,l.b) ) # if more than one node, only use the first
202 | 
203 |     # iterate over our list and do data dependent init
204 |     sess = K.get_session()
205 |     for l,o,W,b in layer_output_weight_bias:
206 |         print('Performing data dependent initialization for layer ' + l)
207 |         m,v = tf.nn.moments(o, [i for i in range(len(o.get_shape())-1)])
208 |         s = tf.sqrt(v + 1e-10)
209 |         updates = tf.group(W.assign(W/tf.reshape(s,[1]*(len(W.get_shape())-1)+[-1])), b.assign((b-m)/s))
210 |         sess.run(updates, feed_dict)


--------------------------------------------------------------------------------