├── .gitignore
├── CNN2EnhancedSpeech.PNG
├── CepstralCNN
├── CepstralQSRCNN.py
├── CepstralQSRCNN_TrainTest_GPUs.py
├── DataPrepare.py
├── Opting_Results
│ └── README.md
├── QSR-WGAN-GP_Train_GPUs.py
├── TestData
│ └── README.md
├── Test_Outputs
│ └── README.md
├── TrainValiData
│ └── README.md
├── WaveformQSRCNN.py
├── WaveformQSRCNN_TrainTest_GPUs.py
├── log
│ └── README.md
├── model_weights
│ └── README.md
└── weightnorm.py
├── LICENSE
├── QSR-WGAN-GP
├── .gitignore
├── .idea
│ ├── QSR-WGAN-GP.iml
│ ├── markdown-navigator.xml
│ ├── markdown-navigator
│ │ └── profiles_settings.xml
│ ├── misc.xml
│ ├── modules.xml
│ └── workspace.xml
├── QSR-GANGP_Train_GPUs.py
├── QSR-WGAN-GP_Train_GPUs.py
└── tflib
│ ├── __init__.py
│ └── ops
│ ├── __init__.py
│ ├── batchnorm.py
│ ├── cond_batchnorm.py
│ ├── conv1d.py
│ ├── conv2d.py
│ ├── deconv2d.py
│ ├── layernorm.py
│ └── linear.py
├── README.md
└── WaveformCNN
├── DataPrepare.py
├── Opting_Results
└── README.md
├── TestData
└── README.md
├── TrainValiData
└── README.md
├── WaveformQSRCNN.py
├── WaveformQSRCNN_TrainTest_GPUs.py
├── log
└── README.md
├── model_weights
├── README.md
└── g711_waveformqsrcnn_weights_best_bs32_lr0.0005.h5
└── weightnorm.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 |
49 | # Translations
50 | *.mo
51 | *.pot
52 |
53 | # Django stuff:
54 | *.log
55 | local_settings.py
56 |
57 | # Flask stuff:
58 | instance/
59 | .webassets-cache
60 |
61 | # Scrapy stuff:
62 | .scrapy
63 |
64 | # Sphinx documentation
65 | docs/_build/
66 |
67 | # PyBuilder
68 | target/
69 |
70 | # Jupyter Notebook
71 | .ipynb_checkpoints
72 |
73 | # pyenv
74 | .python-version
75 |
76 | # celery beat schedule file
77 | celerybeat-schedule
78 |
79 | # SageMath parsed files
80 | *.sage.py
81 |
82 | # dotenv
83 | .env
84 |
85 | # virtualenv
86 | .venv
87 | venv/
88 | ENV/
89 |
90 | # Spyder project settings
91 | .spyderproject
92 | .spyproject
93 |
94 | # Rope project settings
95 | .ropeproject
96 |
97 | # mkdocs documentation
98 | /site
99 |
100 | # mypy
101 | .mypy_cache/
102 |
--------------------------------------------------------------------------------
/CNN2EnhancedSpeech.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/CNN2EnhancedSpeech.PNG
--------------------------------------------------------------------------------
/CepstralCNN/CepstralQSRCNN.py:
--------------------------------------------------------------------------------
1 | #######################################################################################################################
2 | # Project QSRCNN: Quantized Speech Reconstruction using Convolutional Neural Network V1.0
3 | # =====================================================================================================================
4 | # CepstralQSRCNN.py: QSRCNN for G711/ADPCM/AMR/EVS using Cepstral features
5 | #
6 | #
7 | # =====================================================================================================================
8 | # Technische Universität Braunschweig, IfN
9 | # Author: Huijun Liu M.Sc.
10 | # Date: 17.06.2017
11 | #######################################################################################################################
12 |
13 | import os
14 | import time
15 | import math
16 | import scipy.io as sio
17 | import tensorflow as tf
18 |
19 | from keras.models import Model
20 | from keras import backend as K
21 | from keras.engine.topology import Layer
22 | from keras.layers import Input, Add, Multiply, Average, Activation
23 | from keras.layers.convolutional import Conv1D, MaxPooling1D, UpSampling1D, AveragePooling1D
24 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger, TensorBoard, LearningRateScheduler
25 |
26 | from weightnorm import AdamWithWeightnorm
27 | from tensorflow.python.framework import ops
28 |
29 | # -------------------------------------------------------------------------------
30 | # 0. define metric and activation function
31 | # -------------------------------------------------------------------------------
32 |
33 |
34 | def snr(y_true, y_pred):
35 | """
36 | SNR is Signal to Noise Ratio
37 | """
38 | return 10.0 * K.log((K.sum(K.square(y_true))) / (K.sum(K.square(y_pred - y_true)))) / K.log(10.0)
39 |
40 |
41 | def selu(x):
42 | """Scaled Exponential Linear Unit. (Klambauer et al., 2017)
43 | # Arguments
44 | x: A tensor or variable to compute the activation function for.
45 | # References
46 | - [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515)
47 | """
48 | with ops.name_scope('elu') as scope:
49 | alpha = 1.6732632423543772848170429916717
50 | scale = 1.0507009873554804934193349852946
51 | return scale * tf.where(x >= 0.0, x, alpha * tf.nn.elu(x))
52 |
53 | """
54 | def step_decay(epoch):
55 | initial_lrate = 0.001
56 |
57 | drop = 0.5
58 | epochs_drop = 3.0
59 | lrate = initial_lrate * math.pow(drop, math.floor((1 + epoch) / epochs_drop))
60 |
61 | return lrate
62 | """
63 |
64 | """
65 | ''' TensorFlow Backend Function '''
66 | def phase_shift(I, r):
67 | ''' Function copied as is from https://github.com/Tetrachrome/subpixel/blob/master/subpixel.py'''
68 |
69 | bsize, a, b, c = I.get_shape().as_list()
70 | bsize = tf.shape(I)[0] # Handling Dimension(None) type for undefined batch dim
71 | X = tf.reshape(I, (bsize, a, b, r, r))
72 | X = tf.transpose(X, (0, 1, 2, 4, 3)) # bsize, a, b, 1, 1
73 | X = tf.split(1, a, X) # a, [bsize, b, r, r]
74 | X = tf.concat(2, [tf.squeeze(x) for x in X]) # bsize, b, a*r, r
75 | X = tf.split(1, b, X) # b, [bsize, a*r, r]
76 | X = tf.concat(2, [tf.squeeze(x) for x in X]) # bsize, a*r, b*r
77 | return tf.reshape(X, (bsize, a * r, b * r, 1))
78 |
79 | def depth_to_scale(input, scale, channels):
80 | if channels > 1:
81 | Xc = tf.split(3, 3, input)
82 | X = tf.concat(3, [phase_shift(x, scale) for x in Xc])
83 | else:
84 | X = phase_shift(input, scale)
85 | return X
86 |
87 |
88 | '''
89 | Implementation is incomplete. Use lambda layer for now.
90 | '''
91 | class SubPixelUpscaling(Layer):
92 |
93 | def __init__(self, r, channels, **kwargs):
94 | super(SubPixelUpscaling, self).__init__(**kwargs)
95 |
96 | self.r = r
97 | self.channels = channels
98 |
99 | def build(self, input_shape):
100 | pass
101 |
102 | def call(self, x, mask=None):
103 | y = depth_to_scale(x, self.r, self.channels)
104 | return y
105 |
106 | def get_output_shape_for(self, input_shape):
107 | if K.image_dim_ordering() == "th":
108 | b, k, r, c = input_shape
109 | return (b, self.channels, r * self.r, c * self.r)
110 | else:
111 | b, r, c, k = input_shape
112 | return (b, r * self.r, c * self.r, self.channels)
113 | """
114 |
115 | # -------------------------------------------------------------------------------
116 | # 1. define Cepstral-QSRCNN Model
117 | # -------------------------------------------------------------------------------
118 | class CepstralQSRCNN(object):
119 | def __init__(self, opt_params={'lr': 5e-4, 'batch_size': 32, 'nb_epochs': 100},
120 | model_params={'n1': 16, 'n2': 32, 'n3': 16, 'frame_len': 32},
121 | codec_type_params={'codec': 'ADPCM', 'type': '3', 'weights_dir': "./model_weights", 'logdir': "./log"}):
122 | self.learning_rate = opt_params['lr'] # Learning rate
123 | self.batch_size = opt_params['batch_size'] # Batch size
124 | self.nb_epochs = opt_params['nb_epochs'] # Number of epochs
125 |
126 | self.codec = codec_type_params['codec'] # Codec type
127 | self.type = codec_type_params['type'] # Methods type
128 |
129 | self.log_dir = codec_type_params['logdir'] # Log file direction
130 | if not (os.path.exists(self.log_dir)):
131 | os.makedirs(self.log_dir)
132 |
133 | self.weights_dir = codec_type_params['weights_dir'] # Weights file direction
134 | if not (os.path.exists(self.weights_dir)):
135 | os.makedirs(self.weights_dir)
136 |
137 | self.frame_len = model_params['frame_len'] # Frame length
138 | self.model_params = model_params
139 | self.model = self.create_model("qsrcnn")
140 |
141 | # -------------------------------------------------------------------------------
142 | # Load the Weights of the Model
143 | # -------------------------------------------------------------------------------
144 | def load_weights(self, file_path=""):
145 | if file_path == "":
146 | file_path = self.weights_dir + '/' + self.codec + '_Type' + self.type + '_CepstralQSRCNN_Weights_Best_bs' + \
147 | str(self.batch_size) + '_lr' + str(self.learning_rate) + '.h5'
148 |
149 | file_path = os.path.normcase(file_path)
150 | self.model.load_weights(file_path)
151 |
152 | # -------------------------------------------------------------------------------
153 | # Save the Weights of the Model
154 | # -------------------------------------------------------------------------------
155 | def save_weights(self):
156 | file_path = self.weights_dir + '/' + self.codec + '_Type' + self.type + '_CepstralQSRCNN_Weights_Final_bs' + \
157 | str(self.batch_size) + '_lr' + str(self.learning_rate) + '.h5'
158 | file_path = os.path.normcase(file_path)
159 | self.model.save_weights(file_path)
160 |
161 | """
162 | def _upscale_block(self, ip, id):
163 | init = ip
164 |
165 | x = Conv1D(256, 3, padding='same', name='espcnn_upconv1_%d' % id)(init)
166 | x = Activation(selu)(x)
167 | x = SubPixelUpscaling(r=2, channels=64, name='espcnn_upconv1__upscale1_%d' % id)(x)
168 | x = Conv1D(256, 3, padding='same', name='espcnn_upconv1_filter1_%d' % id)(x)
169 | x = Activation(selu)(x)
170 |
171 | return x
172 | """
173 |
174 | # -------------------------------------------------------------------------------
175 | # 1. define model
176 | # -------------------------------------------------------------------------------
177 | def create_model(self, model_type="qsrcnn"):
178 | if model_type == "qsrcnn":
179 | frame_len = self.frame_len
180 | n1 = self.model_params['n1']
181 | n2 = self.model_params['n2']
182 | n3 = self.model_params['n3']
183 |
184 | input_sque = Input(shape=(frame_len, 1))
185 | c1 = Conv1D(n1, 3, padding='same')(input_sque)
186 | c1 = Activation(selu)(c1)
187 | c1 = Conv1D(n1, 3, padding='same')(c1)
188 | c1 = Activation(selu)(c1)
189 | x = MaxPooling1D(2)(c1)
190 |
191 | c2 = Conv1D(n2, 3, padding='same')(x)
192 | c2 = Activation(selu)(c2)
193 | c2 = Conv1D(n2, 3, padding='same')(c2)
194 | c2 = Activation(selu)(c2)
195 | x = MaxPooling1D(2)(c2)
196 |
197 | c3 = Conv1D(n3, 3, padding='same')(x)
198 | c3 = Activation(selu)(c3)
199 | x = UpSampling1D(2)(c3)
200 |
201 | c2_2 = Conv1D(n2, 3, padding='same')(x)
202 | c2_2 = Activation(selu)(c2_2)
203 | c2_2 = Conv1D(n2, 3, padding='same')(c2_2)
204 | c2_2 = Activation(selu)(c2_2)
205 |
206 | m1 = Add()([c2, c2_2])
207 | m1 = UpSampling1D(2)(m1)
208 |
209 | c1_2 = Conv1D(n1, 3, padding='same')(m1)
210 | c1_2 = Activation(selu)(c1_2)
211 | c1_2 = Conv1D(n1, 3, padding='same')(c1_2)
212 | c1_2 = Activation(selu)(c1_2)
213 |
214 | m2 = Add()([c1, c1_2])
215 |
216 | decoded = Conv1D(1, 5, padding='same', activation='linear')(m2)
217 |
218 | model = Model(input_sque, decoded)
219 | elif model_type == "wavenet":
220 | frame_len = self.frame_len
221 |
222 | ae_width = 16
223 | ae_filter_length = 3
224 |
225 | ae_num_stages = 2
226 | ae_num_layers = 6
227 |
228 | num_stages = 2
229 | num_layers = 6
230 |
231 | width = 16
232 | skip_width = 16
233 | filter_length = 3
234 |
235 | input_sque = Input(shape=(frame_len, 1), name='input_layer')
236 |
237 | # ---------------------------------------
238 | # The Non-Causal Temporal Encoder.
239 | # ---------------------------------------
240 | en = Conv1D(ae_width, ae_filter_length, padding='same', name='ae_startconv')(input_sque)
241 |
242 | for num_layer in range(ae_num_layers):
243 | # dilation: 2**(0 1 2 3 4)
244 | d = Activation(selu)(en)
245 | d = Conv1D(ae_width, 3, padding='same', dilation_rate=2 ** (num_layer % ae_num_stages),
246 | name='ae_dilatedconv_%d' % (num_layer + 1))(d)
247 | d = Activation(selu)(d)
248 |
249 | en2 = Conv1D(ae_width, 1, padding='same', dilation_rate=2 ** (num_layer % ae_num_stages),
250 | name='ae_res_%d' % (num_layer + 1))(d)
251 | en = Add()([en2, en])
252 |
253 | en = Activation(selu)(en)
254 | en = Conv1D(16, 1, padding='causal', dilation_rate=1, name='ae_bottleneck')(en)
255 | en = Activation(selu)(en)
256 | en = AveragePooling1D(2, name='ae_pool')(en)
257 | # encoding = en
258 |
259 | # ---------------------------------------
260 | # The WaveNet Decoder.
261 | # ---------------------------------------
262 | # enup = UpSampling1D(2, name='up_sampling')(en)
263 | # l = shift_right(input_frame)
264 |
265 | l = Conv1D(width, filter_length, padding='causal', dilation_rate=1, name='startconv')(input_sque)
266 | l = Activation(selu)(l)
267 | # Set up skip connections.
268 | s = Conv1D(skip_width, 1, padding='causal', dilation_rate=1, name='skip_start')(l)
269 | s = Activation(selu)(s)
270 |
271 | # Residual blocks with skip connections.
272 | for i in range(num_layers):
273 | d = Conv1D(2 * width, filter_length, padding='causal', dilation_rate=2 ** (i % num_stages),
274 | name='dilatedconv_%d' % (i + 1))(l)
275 | d = Activation(selu)(d)
276 |
277 | en3 = Conv1D(2 * width, 1, padding='causal', dilation_rate=1, name='cond_map_%d' % (i + 1))(en) # 40
278 | en3 = Activation(selu)(en3)
279 | en3 = UpSampling1D(2, name='up_sampling_%d' % (i + 1))(en3)
280 | # d = condition(d,en3)
281 | d = Add()([d, en3])
282 |
283 | d_sigmoid = Activation('sigmoid')(d)
284 | d_tanh = Activation('tanh')(d)
285 | d = Multiply()([d_sigmoid, d_tanh])
286 |
287 | l2 = Conv1D(width, 1, padding='causal', dilation_rate=1, name='res_%d' % (i + 1))(d)
288 | l2 = Activation(selu)(l2)
289 | l = Add()([l2, l])
290 |
291 | s2 = Conv1D(skip_width, 1, padding='causal', dilation_rate=1, name='skip_%d' % (i + 1))(d)
292 | s = Add()([s2, s])
293 |
294 | s = Activation(selu)(s)
295 |
296 | s = Conv1D(skip_width, 3, padding='causal', activation='linear', name='output_layer1')(s)
297 | s = Activation(selu)(s)
298 | en4 = Conv1D(skip_width, 1, padding='causal', activation='linear', name='cond_map_out1')(en)
299 | en4 = Activation(selu)(en4)
300 | en4 = UpSampling1D(2, name='up_sampling')(en4)
301 | s = Add()([en4, s])
302 | s = Activation(selu)(s)
303 |
304 | outs = Conv1D(1, 3, padding='causal', activation='linear', name='output_layer')(s)
305 |
306 | model = Model(input_sque, outs)
307 |
308 | elif model_type == "autoencoder":
309 | frame_len = self.frame_len
310 | n1 = 64
311 | n2 = 32
312 |
313 | input_sque = Input(shape=(frame_len, 1))
314 | c1 = Conv1D(n1, 3, padding='same')(input_sque)
315 | c1 = Activation(selu)(c1)
316 | x = MaxPooling1D(2)(c1)
317 |
318 | c2 = Conv1D(n2, 3, padding='same')(x)
319 | c2 = Activation(selu)(c2)
320 | encoded = MaxPooling1D(2)(c2)
321 |
322 | d1 = UpSampling1D(2)(encoded)
323 | d1 = Conv1D(n2, 3, padding='same')(d1)
324 | d1 = Activation(selu)(d1)
325 | y = Activation(selu)(d1)
326 |
327 | d2 = UpSampling1D(2)(y)
328 | d2 = Conv1D(n1, 3, padding='same')(d2)
329 | d2 = Activation(selu)(d2)
330 |
331 | decoded = Conv1D(1, 5, padding='same', activation='linear')(d2)
332 |
333 | model = Model(input_sque, decoded)
334 |
335 | elif model_type == "esrcnn":
336 | f1 = 5
337 | f2_1 = 1
338 | f2_2 = 2
339 | f2_3 = 3
340 | f3 = 5
341 |
342 | n1 = 128
343 | n2 = 64
344 |
345 | frame_len = self.frame_len
346 |
347 | input_img = Input(shape=(frame_len, 1))
348 | x = Conv1D(n1, f1, padding='same', name='level1')(input_img)
349 | x = Activation(selu)(x)
350 |
351 | x1 = Conv1D(n2, f2_1, padding='same', name='lavel1_1')(x)
352 | x1 = Activation(selu)(x1)
353 | x2 = Conv1D(n2, f2_2, padding='same', name='lavel1_2')(x)
354 | x2 = Activation(selu)(x2)
355 | x3 = Conv1D(n2, f2_3, padding='same', name='lavel1_3')(x)
356 | x3 = Activation(selu)(x3)
357 |
358 | x = Average()([x1, x2, x3])
359 |
360 | out = Conv1D(1, f3, padding='same', activation='linear', name='output_1')(x)
361 | # out = LeakyReLU(0.2)(out)
362 |
363 | model = Model(input_img, out)
364 | """
365 | elif model_type == "subpixel":
366 | frame_len = self.frame_len
367 |
368 | input_frame = Input(shape=(frame_len, 1))
369 | x = Conv1D(64, 5, padding='same', name='level1')(input_frame)
370 | x = Activation(selu)(x)
371 | x = Conv1D(32, 3, padding='same', name='level2')(x)
372 | x = Activation(selu)(x)
373 |
374 | x = self._upscale_block(x, 1)
375 |
376 | out = Conv1D(1, 5, activation='linear', padding='same', name='output_1')(x)
377 |
378 | model = Model(input_frame, out)
379 | """
380 |
381 | model.summary()
382 |
383 | learning_rate = self.learning_rate
384 | # adam = optimizers.Adam(lr=learning_rate)
385 | # model.compile(optimizer=adam, loss='mse', metrics=[SNRLoss])
386 |
387 | adam_wn = AdamWithWeightnorm(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
388 | model.compile(optimizer=adam_wn, loss='mse', metrics=[snr])
389 |
390 | return model
391 |
392 | # -------------------------------------------------------------------------------
393 | # 2. Fit the model
394 | # -------------------------------------------------------------------------------
395 | def step_decay(self, epoch):
396 | initial_lrate = self.learning_rate
397 |
398 | drop = 0.5
399 | epochs_drop = 4.0
400 | lrate = initial_lrate * math.pow(drop, math.floor((1 + epoch) / epochs_drop))
401 |
402 | old_lr = K.get_value(self.model.optimizer.lr)
403 | K.set_value(self.model.optimizer.lr, lrate)
404 | lrate = K.get_value(self.model.optimizer.lr)
405 | print('Ir reduced from %f to %f' % (old_lr, lrate))
406 | return lrate
407 |
408 | def fit(self, x_train_noisy, x_train, x_train_noisy_vali, x_train_vali):
409 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
410 | print("> Training model ...")
411 |
412 | nb_epochs = self.nb_epochs
413 | batch_size = self.batch_size
414 | learning_rate = self.learning_rate
415 |
416 | # ---------------------------------------------------------
417 | # 1. define callback functions
418 | # ---------------------------------------------------------
419 | # Stop training after 10 epoches if the vali_loss not decreasing
420 | stop_str = EarlyStopping(monitor='val_snr', patience=16, verbose=1, mode='max')
421 |
422 | # Reduce learning rate when stop improving lr = lr*factor
423 | reduce_LR = ReduceLROnPlateau(monitor='val_snr', factor=0.6, patience=2, verbose=1, mode='max', epsilon=0.0001, cooldown=0, min_lr=0)
424 |
425 | best_weights = self.weights_dir + '/' + self.codec + '_Type' + self.type + '_CepstralQSRCNN_Weights_Best_bs' + \
426 | str(batch_size) + '_lr' + str(learning_rate) + '.h5'
427 | best_weights = os.path.normcase(best_weights)
428 | model_save = ModelCheckpoint(best_weights, monitor='val_snr', save_best_only=True, mode='max', save_weights_only=True, period=1)
429 |
430 | logger_name = self.log_dir + '/' + self.codec + '_Type' + self.type + '_CepstralQSRCNN_log_bs' + \
431 | str(batch_size) + '_lr' + str(learning_rate) + '.csv'
432 | logger_name = os.path.normcase(logger_name)
433 | logger = CSVLogger(logger_name, separator=',', append=False)
434 | tensor_board = TensorBoard(log_dir=self.log_dir, histogram_freq=1)
435 |
436 | lrate = LearningRateScheduler(self.step_decay)
437 |
438 | start = time.time()
439 |
440 | # ---------------------------------------------------------
441 | # 2. fit the model
442 | # ---------------------------------------------------------
443 | print("> Training model " + "using Batch-size: " + str(batch_size) + ", Learning_rate: " + str(learning_rate) + "...")
444 | hist = self.model.fit(x_train_noisy, x_train, epochs=nb_epochs, batch_size=batch_size, shuffle=True,
445 | validation_data=[x_train_noisy_vali, x_train_vali],
446 | callbacks=[reduce_LR, stop_str, model_save, logger])
447 |
448 | print("> Saving Completed, Time : ", time.time() - start)
449 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
450 | return hist
451 |
452 | # -------------------------------------------------------------------------------
453 | # 3. Save loss snr val_loss val_snr as .mat File
454 | # -------------------------------------------------------------------------------
455 | def save_training_curves(self, hist):
456 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
457 | print("> Saving Training and Validation loss-metric curve ...")
458 |
459 | start = time.time()
460 |
461 | trian_curve_root = "./Opting_Results"
462 | if not(os.path.exists(trian_curve_root)):
463 | os.makedirs(trian_curve_root)
464 | # ---------------------------------------------------------
465 | # 1. Saving Training Loss
466 | # ---------------------------------------------------------
467 | TrainLossVec = trian_curve_root + '/' + self.codec + '_Type' + self.type + '_CepstralDDQSRCNN_TrainLoss_bs' + \
468 | str(self.batch_size) + '_lr' + str(self.learning_rate) + '.mat'
469 | TrainLossVec = os.path.normcase(TrainLossVec)
470 |
471 | sio.savemat(TrainLossVec, {'Train_Loss_Vec': hist.history['loss']})
472 |
473 | # ---------------------------------------------------------
474 | # 2. Saving Training Metric
475 | # ---------------------------------------------------------
476 | TrainSNRVec = trian_curve_root + '/' + self.codec + '_Type' + self.type + '_CepstralDDQSRCNN_TrainMetrice_bs' + \
477 | str(self.batch_size) + '_lr' + str(self.learning_rate) + '.mat'
478 | TrainSNRVec = os.path.normcase(TrainSNRVec)
479 | sio.savemat(TrainSNRVec, {'Train_SNR_Vec': hist.history['snr']}) # snr
480 |
481 | # ---------------------------------------------------------
482 | # 3. Saving Validation Loss
483 | # ---------------------------------------------------------
484 | ValiLossVec = trian_curve_root + '/' + self.codec + '_Type' + self.type + '_CepstralDDQSRCNN_ValiLoss_bs' + \
485 | str(self.batch_size) + '_lr' + str(self.learning_rate) + '.mat'
486 | ValiLossVec = os.path.normcase(ValiLossVec)
487 | sio.savemat(ValiLossVec, {'Vali_Loss_Vec': hist.history['val_loss']})
488 |
489 | # ---------------------------------------------------------
490 | # 4. Saving Validation Metric
491 | # ---------------------------------------------------------
492 | ValiSNRVec = trian_curve_root + '/' + self.codec + '_Type' + self.type + '_CepstralDDQSRCNN_ValiMetrice_bs' + \
493 | str(self.batch_size) + '_lr' + str(self.learning_rate) + '.mat'
494 | ValiSNRVec = os.path.normcase(ValiSNRVec)
495 | sio.savemat(ValiSNRVec, {'Vali_SNR_Vec': hist.history['val_snr']}) # val_snr
496 |
497 | print("> Saving Completed, Time : ", time.time() - start)
498 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
499 |
500 | # -------------------------------------------------------------------------------
501 | # 4. Evaluate the Trained Model
502 | # -------------------------------------------------------------------------------
503 | def evaluation_model(self, x_test_noisy, detail_type="1", weights_path=""):
504 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
505 | print("> Evaluation of the Trained Model ...")
506 | # ---------------------------------------------------------
507 | # 1. Load Model Weights
508 | # ---------------------------------------------------------
509 | print('> 1. Loading the Weights of the Model ...')
510 | self.load_weights(weights_path)
511 |
512 | # ---------------------------------------------------------
513 | # 2. Evaluate the Model
514 | # ---------------------------------------------------------
515 | start = time.time()
516 | print('> 2. Evaluating the Model, Please wait for a Moment ...')
517 | predicted = self.model.predict(x_test_noisy)
518 | print('> 2. Evaluating Completed, Time : ' + str(time.time() - start))
519 |
520 | # ---------------------------------------------------------
521 | # 3. Saving the Evaluation Result
522 | # ---------------------------------------------------------
523 | print('> 3. Saving the Evaluation Result ...')
524 | start = time.time()
525 | pre_file_root = "./Test_Outputs"
526 | if not (os.path.exists(pre_file_root)):
527 | os.makedirs(pre_file_root)
528 |
529 | preOutput = pre_file_root + "/" + self.codec + '_CNN_testplan_Type' + detail_type + "_ceps_vec.mat"
530 | preOutput = os.path.normcase(preOutput)
531 |
532 | sio.savemat(preOutput, {'predictions': predicted})
533 | print('> 3. Evaluation Result Saving Completed, Time : ' + str(time.time() - start))
534 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
--------------------------------------------------------------------------------
/CepstralCNN/CepstralQSRCNN_TrainTest_GPUs.py:
--------------------------------------------------------------------------------
1 | #######################################################################################################################
2 | # Project QSRCNN: Quantized Speech Reconstruction using Convolutional Neural Network V1.0
3 | # =====================================================================================================================
4 | # CepstralQSRCNN_TrainTest_GPUs.py: Train and Test QSRCNN for G711/ADPCM/AMR/EVS using Cepstral features
5 | #
6 | #
7 | # =====================================================================================================================
8 | # Technische Universität Braunschweig, IfN
9 | # Author: Huijun Liu M.Sc.
10 | # Date: 17.06.2017
11 | #######################################################################################################################
12 |
13 | import os
14 | import sys
15 | import time
16 |
17 | import CepstralQSRCNN as model
18 | import tensorflow as tf
19 | import DataPrepare as dp
20 | from keras.backend.tensorflow_backend import set_session
21 |
22 | #####################################################################################
23 | # 0. Settings For GPU
24 | #####################################################################################
25 | using_gpu = 0
26 | if using_gpu == 1: # Only one GPU can be used
27 | os.environ["CUDA_VISIBLE_DEVICES"] = "2" # x stand for GPU index: 3-x!!
28 | config = tf.ConfigProto()
29 | config.gpu_options.per_process_gpu_memory_fraction = 0.4 # Up to 80% Memory of GPUs can be used
30 | set_session(tf.Session(config=config))
31 |
32 | #####################################################################################
33 | # 1. Settings Parameters
34 | #####################################################################################
35 |
36 | train_or_test = "test" # train or test the deep model
37 | codec = "amrwb" # g711/adpcm/amrwb/evsswb codec can be used
38 | type = "3" # 1_2 or 3 for Training
39 | type_detail = "3" # 1 or 2 or 3 or 4 for Testing
40 | frame_len = "" # 256(g711/adpcm) or 512(amrwb) or 1024(evsswb)
41 |
42 | if codec == "g711" or codec == "adpcm":
43 | default_model_params = {'n1': 16, 'n2': 32, 'n3': 16, 'frame_len': 32} # Parameters for model itself
44 | frame_len = "256" # 256(g711/adpcm) or 512(amrwb) or 1024(evsswb) # Frame length
45 | elif codec == "amrwb":
46 | default_model_params = {'n1': 32, 'n2': 64, 'n3': 32, 'frame_len': 64}
47 | frame_len = "512" # 256(g711/adpcm) or 512(amrwb) or 1024(evsswb)
48 | elif codec == "evsswb":
49 | default_model_params = {'n1': 64, 'n2': 128, 'n3': 64, 'frame_len': 128}
50 | frame_len = "1024" # 256(g711/adpcm) or 512(amrwb) or 1024(evsswb)
51 | else:
52 | raise Exception("Please set the variable codec !")
53 |
54 | default_opt_params = {'lr': 5e-4, 'batch_size': 16, 'nb_epochs': 1000} # Parameters for model training
55 | codec_type_params = {'codec': codec, 'type': type,
56 | 'weights_dir': "./model_weights",
57 | 'logdir': "./log"} # Other parameters
58 |
59 | #####################################################################################
60 | # 2. Training and Testing
61 | #####################################################################################
62 | train_inputs = "" # Path of the input data for training
63 | train_targets = "" # Path of the target data for training
64 |
65 | vali_inputs = "" # Path of the input data for validation
66 | vali_targets = "" # Path of the target data for validation
67 |
68 | test_inputs = "" # Path of the input data for testing
69 |
70 | if train_or_test == "train":
71 | # -------------------------------------------------------------------------------
72 | # 1. Load Data
73 | # -------------------------------------------------------------------------------
74 | train_inputs = "./TrainValiData/Train_inputSet_" + codec + \
75 | "_defautLang_OLdata_ValiTrain_type" + type + "_Fram256_ceps.mat"
76 | train_targets = "./TrainValiData/Train_targetSet_" + codec + \
77 | "_defautLang_OLdata_ValiTrain_type" + type + "_Fram256_ceps.mat"
78 |
79 | vali_inputs = "./TrainValiData/Vali_inputSet_" + codec + \
80 | "_defautLang_OLdata_ValiTrain_smallVali_type" + type + "_Fram256_ceps.mat"
81 | vali_targets = "./TrainValiData/Vali_targetSet_" + codec + \
82 | "_defautLang_OLdata_ValiTrain_smallVali_type" + type + "_Fram256_ceps.mat"
83 |
84 | x_train_noisy, x_train, x_train_noisy_vali, x_train_vali = dp.load_train_data(train_inputs, train_targets,
85 | vali_inputs, vali_targets)
86 |
87 | # -------------------------------------------------------------------------------
88 | # 2. Init Cepstral-QSRCNN Model
89 | # -------------------------------------------------------------------------------
90 | qsrcnn = model.CepstralQSRCNN(opt_params=default_opt_params,
91 | model_params=default_model_params,
92 | codec_type_params=codec_type_params)
93 |
94 | # -------------------------------------------------------------------------------
95 | # 3. Fit The Cepstral-QSRCNNe Model
96 | # -------------------------------------------------------------------------------
97 | hist =qsrcnn.fit(x_train_noisy, x_train, x_train_noisy_vali, x_train_vali)
98 |
99 | # -------------------------------------------------------------------------------
100 | # 4. Save Weights and Training Curves
101 | # -------------------------------------------------------------------------------
102 | qsrcnn.save_weights()
103 | qsrcnn.save_training_curves(hist=hist)
104 |
105 | elif train_or_test == "test":
106 | # -------------------------------------------------------------------------------
107 | # 1. Load Data
108 | # -------------------------------------------------------------------------------
109 | test_inputs = "inputTestSet_" + codec + "_concat_Type" + type_detail + "_Frame_" + frame_len + "_ceps_v73.mat"
110 | x_test_noisy = dp.load_test_data(test_inputs)
111 |
112 | # -------------------------------------------------------------------------------
113 | # 2. Init Cepstral-QSRCNN Model
114 | # -------------------------------------------------------------------------------
115 | if type_detail == "1" or type_detail == "2":
116 | type = "1_2"
117 | elif type_detail == "3" or type_detail == "4":
118 | type = "3"
119 |
120 | codec_type_params = {'codec': codec, 'type': type, 'weights_dir': "./model_weights", 'logdir': "./log"}
121 | qsrcnn = model.CepstralQSRCNN(opt_params=default_opt_params,
122 | model_params=default_model_params,
123 | codec_type_params=codec_type_params)
124 |
125 | # -------------------------------------------------------------------------------
126 | # 3. Evaluate The Cepstral-QSRCNNe Model
127 | # -------------------------------------------------------------------------------
128 | qsrcnn.evaluation_model(x_test_noisy, type_detail)
129 |
130 | else:
131 | raise Exception("Do you want to train or test the model ? Please set the variable train_or_test !")
132 | # sys.exit("Please set the codec name !")
--------------------------------------------------------------------------------
/CepstralCNN/DataPrepare.py:
--------------------------------------------------------------------------------
1 | #######################################################################################################################
2 | # Project QSRCNN: Quantized Speech Reconstruction using Convolutional Neural Network V1.0
3 | # =====================================================================================================================
4 | # DataPrepare.py: Data prepare and load data
5 | #
6 | #
7 | # =====================================================================================================================
8 | # Technische Universität Braunschweig, IfN
9 | # Author: Huijun Liu M.Sc.
10 | # Date: 20.05.2017
11 | #######################################################################################################################
12 |
13 | import os
14 | import time
15 | import h5py as h5
16 | import numpy as np
17 | import scipy.io as sio
18 |
19 | from numpy import random
20 |
21 | # -------------------------------------------------------------------------------
22 | # 1. load data
23 | # -------------------------------------------------------------------------------
24 |
25 |
26 | def load_train_data(train_inputs, train_targets, vali_inputs, vali_targets):
27 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
28 | print('> Loading data ')
29 |
30 | start = time.time()
31 | # ---------------------------------------------------------
32 | # 1. Load Input Data for Training
33 | # ---------------------------------------------------------
34 | mat_input = train_inputs
35 | mat_input = os.path.normcase(mat_input)
36 | print('> 1. Loading Training Input: ' + mat_input + '...')
37 |
38 | x_train_noisy = sio.loadmat(mat_input)
39 | x_train_noisy = x_train_noisy['inputSetNorm']
40 | x_train_noisy = np.array(x_train_noisy)
41 |
42 | # ---------------------------------------------------------
43 | # 2. Load Target Data for training
44 | # ---------------------------------------------------------
45 | mat_target = train_targets
46 | mat_target = os.path.normcase(mat_target)
47 | print('> 2. Loading Training Target: ' + mat_target + '...')
48 |
49 | x_train = sio.loadmat(mat_target)
50 | x_train = x_train['targetSet']
51 | x_train = np.array(x_train)
52 | # x_train = target_min_max_scaler.fit_transform(x_train)
53 |
54 | # ---------------------------------------------------------
55 | # 3. Load Input Data for Validation
56 | # ---------------------------------------------------------
57 | mat_input_vali = vali_inputs
58 | mat_input_vali = os.path.normcase(mat_input_vali)
59 | print('> 3. Loading Validation Input: ' + mat_input_vali + '...')
60 |
61 | x_train_noisy_vali = sio.loadmat(mat_input_vali)
62 | x_train_noisy_vali = x_train_noisy_vali['inputSetNorm']
63 | x_train_noisy_vali = np.array(x_train_noisy_vali)
64 |
65 | # ---------------------------------------------------------
66 | # 4. Load Target Data for Validation
67 | # ---------------------------------------------------------
68 | mat_target_vali = vali_targets
69 | mat_target_vali = os.path.normcase(mat_target_vali)
70 | print('> 4. Loading Validation Target: ' + mat_target_vali + '...')
71 |
72 | x_train_vali = sio.loadmat(mat_target_vali)
73 | x_train_vali = x_train_vali['targetSet']
74 | x_train_vali = np.array(x_train_vali)
75 |
76 | # ---------------------------------------------------------
77 | # 5. Randomization of Training and/or validation Pairs
78 | # ---------------------------------------------------------
79 | print('> 5. Randomization of Training Pairs ...')
80 | frame_length = x_train_noisy.shape[1]
81 |
82 | random.seed(1234)
83 | train = np.column_stack((x_train_noisy, x_train))
84 | np.random.shuffle(train)
85 | x_train_noisy = train[:, :frame_length]
86 | x_train = train[:, frame_length:]
87 |
88 | # validation = np.column_stack((x_train_noisy_vali, x_train_vali))
89 | # np.random.shuffle(validation )
90 | # x_train_noisy_vali = validation [:, :frame_length]
91 | # x_train_vali = validation [:, frame_length:]
92 |
93 | # ---------------------------------------------------------
94 | # 6. Reshape of Training and validation Pairs
95 | # ---------------------------------------------------------
96 | x_train_noisy = np.reshape(x_train_noisy, (x_train_noisy.shape[0], x_train_noisy.shape[1], 1))
97 | x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
98 |
99 | x_train_noisy_vali = np.reshape(x_train_noisy_vali, (x_train_noisy_vali.shape[0], x_train_noisy_vali.shape[1], 1))
100 | x_train_vali = np.reshape(x_train_vali, (x_train_vali.shape[0], x_train_vali.shape[1], 1))
101 |
102 | print("> Data Loaded, , Time : ", time.time() - start)
103 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
104 |
105 | return x_train_noisy, x_train, x_train_noisy_vali, x_train_vali
106 |
107 |
108 | def load_test_data(testfile_path="inputTestSet_g711concat_Type3_Frame_256_ceps_v73.mat"):
109 | print('> Loading Test data... ')
110 |
111 | test_file_root = "./TestData"
112 | if not (os.path.exists(test_file_root)):
113 | os.makedirs(test_file_root)
114 |
115 | mat_input = test_file_root + "/" + testfile_path
116 | mat_input = os.path.normcase(mat_input)
117 |
118 | x_test_noisy = h5.File(mat_input, 'r')
119 | x_test_noisy = x_test_noisy.get('inputTestNorm')
120 | x_test_noisy = np.array(x_test_noisy)
121 | x_test_noisy = np.transpose(x_test_noisy)
122 |
123 | # x_test_noisy = sio.loadmat(mat_input)
124 | # x_test_noisy = x_test_noisy['inputTestNorm']
125 | # x_test_noisy = np.array(x_test_noisy)
126 |
127 | x_test_noisy = np.reshape(x_test_noisy,(x_test_noisy.shape[0], x_test_noisy.shape[1], 1))
128 |
129 | return x_test_noisy
--------------------------------------------------------------------------------
/CepstralCNN/Opting_Results/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/CepstralCNN/Opting_Results/README.md
--------------------------------------------------------------------------------
/CepstralCNN/QSR-WGAN-GP_Train_GPUs.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 | import math
4 | import numpy as np
5 | import scipy.io as sio
6 | import tensorflow as tf
7 | import keras.backend as K
8 | import matplotlib.pyplot as plt
9 | import scipy.io.wavfile as swave
10 | import keras.optimizers as optimizers
11 |
12 | from numpy import random
13 | from keras import initializers
14 | from keras.models import Model
15 | from keras.layers import Input
16 | from keras.layers.merge import Add
17 | from keras.layers.core import Dense, Flatten, Activation
18 | from keras.layers.normalization import BatchNormalization
19 | from keras.layers.convolutional import Conv1D, MaxPooling1D, UpSampling1D
20 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger, TensorBoard
21 |
22 | # from weightnorm import AdamWithWeightnorm
23 | from tensorflow.python.framework import ops
24 | from keras.backend.tensorflow_backend import set_session
25 |
26 |
27 | #####################################################################################
28 | # 0. Setteings For GPUs
29 | #####################################################################################
30 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" # x stand for GPU index: 3-x!!
31 | config = tf.ConfigProto()
32 | config.gpu_options.per_process_gpu_memory_fraction = 0.3 # Only 30% Memory of GPUs can be used
33 | set_session(tf.Session(config=config))
34 |
35 | #####################################################################################
36 | # 2. Define new Metric Activation function and Loss function
37 | #####################################################################################
38 |
39 |
40 | def snr(y_true, y_pred):
41 | """
42 | SNR is Signal to Noise Ratio
43 |
44 | """
45 | return 10.0 * K.log((K.sum(K.square(y_true))) / (K.sum(K.square(y_pred - y_true)))) / K.log(10.0)
46 |
47 |
48 | def selu(x):
49 | with ops.name_scope('elu') as scope:
50 | alpha = 1.6732632423543772848170429916717
51 | scale = 1.0507009873554804934193349852946
52 | return scale*tf.where(x>=0.0, x, alpha*tf.nn.elu(x))
53 |
54 |
55 | #####################################################################################
56 | # 2. Define Generative model and Adversarial model
57 | #####################################################################################
58 | def create_generator(inputs_gen):
59 | n1 = 32
60 | n2 = 64
61 | n3 = 32
62 |
63 | c1 = Conv1D(n1, 3, padding='same', name='conv_1')(inputs_gen)
64 | c1 = Activation(selu, name='act_1')(c1)
65 | c1 = Conv1D(n1, 3, padding='same', name='conv_2')(c1)
66 | c1 = Activation(selu, name='act_2')(c1)
67 | x = MaxPooling1D(2, name='mpool_1')(c1)
68 |
69 | c2 = Conv1D(n2, 3, padding='same', name='conv_3')(x)
70 | c2 = Activation(selu, name='act_3')(c2)
71 | c2 = Conv1D(n2, 3, padding='same', name='conv_4')(c2)
72 | c2 = Activation(selu, name='act_4')(c2)
73 | x = MaxPooling1D(2, name='mpool_2')(c2)
74 |
75 | c3 = Conv1D(n3, 3, padding='same', name='conv_5')(x)
76 | c3 = Activation(selu, name='act_5')(c3)
77 | x = UpSampling1D(2, name='usample_1')(c3)
78 |
79 | c2_2 = Conv1D(n2, 3, padding='same', name='conv_6')(x)
80 | c2_2 = Activation(selu, name='act_6')(c2_2)
81 | c2_2 = Conv1D(n2, 3, padding='same', name='conv_7')(c2_2)
82 | c2_2 = Activation(selu, name='act_7')(c2_2)
83 |
84 | m1 = Add(name='add_1')([c2, c2_2])
85 | m1 = UpSampling1D(2, name='usample_2')(m1)
86 |
87 | c1_2 = Conv1D(n1, 3, padding='same', name='conv_8')(m1)
88 | c1_2 = Activation(selu, name='act_8')(c1_2)
89 | c1_2 = Conv1D(n1, 3, padding='same', name='conv_9')(c1_2)
90 | c1_2 = Activation(selu, name='act_9')(c1_2)
91 |
92 | m2 = Add(name='add_2')([c1, c1_2])
93 |
94 | decoded = Conv1D(1, 5, padding='same', activation='linear', name='conv_10')(m2)
95 |
96 | return decoded
97 |
98 |
99 | def create_discriminator(inputs_disc):
100 | x = Conv1D(32, 3, padding='same', name='dis_conv_1')(inputs_disc)
101 | x = Activation(selu, name='dis_act_1')(x)
102 |
103 | x = Conv1D(64, 3, padding='same', name='dis_conv_2')(x)
104 | x = BatchNormalization(name='dis_bnorm_1')(x)
105 | x1 = Activation(selu, name='dis_act_2')(x)
106 |
107 | m1 = Add(name='dis_add_1')([inputs_disc, x1])
108 |
109 | x = Conv1D(32, 3, padding='same', name='dis_conv_3')(m1)
110 | x = Activation(selu, name='dis_act_3')(x)
111 |
112 | x = Conv1D(64, 3, padding='same', name='dis_conv_4')(x)
113 | x = BatchNormalization(name='dis_bnorm_2')(x)
114 | x2 = Activation(selu, name='dis_act_4')(x)
115 | m2 = Add(name='dis_add_2')([m1, x2])
116 |
117 | discri = Conv1D(1, 5, padding='same', name='dis_conv_5')(m2)
118 |
119 | return discri
120 |
121 | #####################################################################################
122 | # 3. Define Training process of QSR_WGAN_GP
123 | #####################################################################################
124 | SEQ_LEN = 80
125 | BATCH_SIZE = 128
126 |
127 |
128 | def load_data():
129 | print('> Loading data... ')
130 | # Load Input Data
131 | mat_input = 'Train_G711_PreProc_defautLang/Train_inputSet_ALaw_defautLang_OLdata_ValiTrain_Reverse_Extended_Reverse.mat'
132 | mat_input = os.path.normcase(mat_input)
133 | print('> Training Input: ' + mat_input)
134 |
135 | x_train_noisy = sio.loadmat(mat_input)
136 | x_train_noisy = x_train_noisy['inputSetNorm']
137 | x_train_noisy = np.array(x_train_noisy)
138 | # x_train_noisy = input_min_max_scaler.fit_transform(x_train_noisy)
139 |
140 | # Load Input Data for Validation
141 | mat_input_vali = 'Train_G711_PreProc_defautLang/Vali_inputSet_ALaw_defautLang_OLdata_ValiTrain_smallVali.mat'
142 | mat_input_vali = os.path.normcase(mat_input_vali)
143 | print('> Validation Input: ' + mat_input_vali)
144 |
145 | x_train_noisy_vali = sio.loadmat(mat_input_vali)
146 | x_train_noisy_vali = x_train_noisy_vali['inputSetNorm']
147 | x_train_noisy_vali = np.array(x_train_noisy_vali)
148 |
149 | # Load Target Data
150 | mat_target = 'Train_G711_PreProc_defautLang/Train_targetSet_ALaw_defautLang_OLdata_ValiTrain_Reverse_Extended_Reverse.mat'
151 | mat_target = os.path.normcase(mat_target)
152 | print('> Training Target: ' + mat_target)
153 |
154 | x_train = sio.loadmat(mat_target)
155 | x_train = x_train['targetSet']
156 | x_train = np.array(x_train)
157 | # x_train = target_min_max_scaler.fit_transform(x_train)
158 |
159 | # Load Target Data for Validation
160 | mat_target_vali = 'Train_G711_PreProc_defautLang/Vali_targetSet_ALaw_defautLang_OLdata_ValiTrain_smallVali.mat'
161 | mat_target_vali = os.path.normcase(mat_target_vali)
162 | print('> Validation Target: ' + mat_target_vali)
163 |
164 | x_train_vali = sio.loadmat(mat_target_vali)
165 | x_train_vali = x_train_vali['targetSet']
166 | x_train_vali = np.array(x_train_vali)
167 |
168 | # Randomization of Training Pairs (Currently NO Shuffle)
169 | random.seed(1331)
170 |
171 | train = np.column_stack((x_train_noisy, x_train))
172 | np.random.shuffle(train)
173 | x_train_noisy = train[:, :SEQ_LEN]
174 | x_train = train[:, SEQ_LEN:]
175 |
176 | # Reshape of Traing Pairs and validation Pairs
177 | x_train_noisy = np.reshape(x_train_noisy, (x_train_noisy.shape[0], x_train_noisy.shape[1], 1))
178 | x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
179 |
180 | # validation = np.column_stack((x_train_noisy_vali, x_train_vali))
181 | # np.random.shuffle(validation )
182 | # x_train_noisy_vali = validation [:, :SEQ_LEN]
183 | # x_train_vali = validation [:, SEQ_LEN:]
184 |
185 | x_train_noisy_vali = np.reshape(x_train_noisy_vali, (x_train_noisy_vali.shape[0], x_train_noisy_vali.shape[1], 1))
186 | x_train_vali = np.reshape(x_train_vali, (x_train_vali.shape[0], x_train_vali.shape[1], 1))
187 |
188 | print('> Data Loaded. Model Compiling... ')
189 | return x_train_noisy, x_train, x_train_noisy_vali, x_train_vali
190 |
191 | # 2. dataset generator
192 | def batch_generator(x_train_noisy, x_train, batch_size=128):
193 | while True:
194 | for i in range(0, x_train_noisy.shape[0] - batch_size + 1, batch_size):
195 | yield x_train_noisy[i:i+batch_size], x_train[i:i+batch_size]
196 |
197 | default_opt_params = {'lr': 5e-5, 'clip': 1e-2, 'n_lambda': 10, 'n_critic': 10}
198 |
199 |
200 | class QSRWGAN(object):
201 | def __init__(self, opt_params=default_opt_params, frame_len=80):
202 | self.n_critic = opt_params['n_critic']
203 | self.n_lambda = opt_params['n_lambda']
204 | self.clip = opt_params['clip']
205 | self.frame_len = frame_len
206 |
207 | # ------------------------------------------------------------------
208 | # 1. create session
209 | # ------------------------------------------------------------------
210 | self.sess = tf.Session()
211 | K.set_session(self.sess) # pass the session to keras
212 |
213 | # ------------------------------------------------------------------
214 | # 2. create generator and discriminator
215 | # ------------------------------------------------------------------
216 | with tf.name_scope('generator'):
217 | gen_inputs = Input(shape=(self.frame_len, 1))
218 | gen_outputs = create_generator(gen_inputs)
219 |
220 | with tf.name_scope('discriminator'):
221 | dis_inputs = Input(shape=(self.frame_len, 1))
222 | dis_outputs = create_discriminator(dis_inputs)
223 |
224 | # ------------------------------------------------------------------
225 | # 3. instantiate networks of generator and discriminator
226 | # ------------------------------------------------------------------
227 | Generator = Model(inputs=gen_inputs, outputs=gen_outputs)
228 | Generator.summary()
229 | self.gen_model = Generator
230 | Discriminator = Model(inputs=dis_inputs, outputs=dis_outputs)
231 | Discriminator.summary()
232 |
233 | # ------------------------------------------------------------------
234 | # 4. save the inputs of generator and discriminator
235 | # ------------------------------------------------------------------
236 | quan_inputs = tf.placeholder(tf.float32, shape=(None, self.frame_len, 1), name='quan_inputs')
237 | real_inputs = tf.placeholder(tf.float32, shape=(None, self.frame_len, 1), name='real_inputs')
238 | self.inputs = quan_inputs, real_inputs
239 |
240 | # ------------------------------------------------------------------
241 | # 5. get the weights of generator and discriminator
242 | # ------------------------------------------------------------------
243 | self.gen_weights = [weights for weights in tf.global_variables() if 'generator' in weights.name]
244 | self.dis_weights = [weights for weights in tf.global_variables() if 'discriminator' in weights.name]
245 | # self.gen_weights = Generator.get_weights()
246 | # self.dis_weights = Discriminator.get_weights()
247 |
248 | # ------------------------------------------------------------------
249 | # 6. create predictions of generator and discriminator
250 | # ------------------------------------------------------------------
251 | fake_inputs = Generator(quan_inputs)
252 | disc_real = Discriminator(real_inputs)
253 | disc_fake = Discriminator(fake_inputs)
254 | self.predictions = fake_inputs
255 |
256 | # ------------------------------------------------------------------
257 | # 7. create losses and compute probabilities of discriminator
258 | # ------------------------------------------------------------------
259 | # 7.1. WGAN lipschitz-penalty
260 | alpha = tf.random_uniform(shape=[BATCH_SIZE, 1, 1], minval=-0.4, maxval=0.4)
261 | differences = fake_inputs - real_inputs
262 | interpolates = real_inputs + (alpha * differences)
263 |
264 | gradients = tf.gradients(Discriminator(interpolates), [interpolates])[0]
265 | # slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1, 2]))
266 | # gradient_penalty = self.n_lambda * tf.reduce_mean((slopes - 1.) ** 2)
267 | gp = K.mean(K.square(K.sqrt(K.sum(K.square(gradients), axis=1)) - 1))
268 | gradient_penalty = self.n_lambda * gp
269 |
270 | disc_loss = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real)
271 | disc_loss += gradient_penalty
272 |
273 | self.dis_loss = disc_loss
274 | self.gen_loss = -tf.reduce_mean(disc_fake)
275 |
276 | self.disc_real = tf.reduce_mean(disc_real)
277 | self.disc_fake = tf.reduce_mean(disc_fake)
278 | self.prob_real = tf.reduce_mean(tf.sigmoid(disc_real))
279 | self.prob_fake = tf.reduce_mean(tf.sigmoid(disc_fake))
280 |
281 | # ------------------------------------------------------------------
282 | # 8. create optimizer for generator and discriminator
283 | # ------------------------------------------------------------------
284 | learning_rate = opt_params['lr']
285 |
286 | gen_train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.gen_loss, var_list=self.gen_weights)
287 | disc_train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.dis_loss, var_list=self.dis_weights)
288 |
289 | self.gen_opt_train = gen_train_op
290 | self.dis_opt_train = disc_train_op
291 |
292 | def load_weights(self):
293 | self.gen_model.load_weights('ddsrcnn_weights_defaultLang_OL40_stopstr_bs128_lr5e-05.h5')
294 |
295 | def save_weights(self, file_path):
296 | file_path = os.path.normcase(file_path)
297 | self.gen_model.save_weights(file_path)
298 |
299 | def load_batch(self, x_train_noise, x_train, train=True):
300 | gen_inputs, dis_inputs = self.inputs
301 | return {gen_inputs: x_train_noise, dis_inputs: x_train, K.learning_phase(): train}
302 |
303 | def gen(self, x_train_noise):
304 | gen_inputs, dis_inputs = self.inputs
305 | feed_dict = {gen_inputs: x_train_noise, K.learning_phase(): False}
306 | return self.sess.run(self.predictions, feed_dict=feed_dict)
307 |
308 | def gen_train(self, feed_dict):
309 | _, gen_loss = self.sess.run([self.gen_opt_train, self.gen_loss], feed_dict=feed_dict)
310 | return gen_loss
311 |
312 | def dis_train(self, feed_dict):
313 | # take a step of adam
314 | _, dis_loss = self.sess.run([self.dis_opt_train, self.dis_loss], feed_dict=feed_dict)
315 | # return discriminator loss
316 | return dis_loss
317 |
318 | def fit(self, x_train_noise, x_train, x_train_noise_vali, x_train_vali, epochs=10, logdir='/qsrwgan_run'):
319 | # ------------------------------------------------------------------
320 | # 1. initialize log directory
321 | # ------------------------------------------------------------------
322 | if tf.gfile.Exists(logdir):
323 | tf.gfile.DeleteRecursively(logdir)
324 |
325 | tf.gfile.MakeDirs(logdir)
326 |
327 | # ------------------------------------------------------------------
328 | # 2. initialize model
329 | # ------------------------------------------------------------------
330 | init = tf.global_variables_initializer()
331 | self.sess.run(init)
332 | self.load_weights()
333 |
334 | # ------------------------------------------------------------------
335 | # 3. train the model
336 | # ------------------------------------------------------------------
337 | step, g_step, epoch = 0, 0, 0
338 | curr_epoch = 0
339 |
340 | # create data for the gan training
341 | # generator = batch_generator(x_train_noise, x_train)
342 | mat_input = 'Train_G711_PreProc_defautLang/inputTestSet_g711concat_nonOL_Frame_80.mat'
343 | mat_input = os.path.normcase(mat_input)
344 | x_train_noisy = sio.loadmat(mat_input)
345 | x_train_noisy = x_train_noisy['inputTestNorm']
346 | x_train_noisy = np.array(x_train_noisy)
347 | x_train_noisy = np.reshape(x_train_noisy, (x_train_noisy.shape[0], x_train_noisy.shape[1], 1))
348 |
349 | while curr_epoch < epochs:
350 | # create data for the gan training
351 | generator = batch_generator(x_train_noise, x_train, BATCH_SIZE)
352 | # generator_vali = batch_generator(x_train_noise_vali, x_train_vali, 1024)
353 |
354 | curr_iter = 0
355 | while curr_iter < x_train_noise.shape[0]//BATCH_SIZE:
356 | start_time = time.time()
357 | # n_critic = 100 if g_step < 25 or (g_step+1) % 500 == 0 else self.n_critic
358 |
359 | for i in range(self.n_critic):
360 | curr_iter += 1
361 | dis_losses = []
362 |
363 | # load the batch
364 | quant_batch, real_batch = generator.__next__()
365 | # quant_batch = np.random.randn(BATCH_SIZE, 80, 1)
366 | feed_dict = self.load_batch(quant_batch, real_batch)
367 |
368 | # train the discriminator
369 | dis_loss = self.dis_train(feed_dict)
370 | dis_losses.append(dis_loss)
371 |
372 | dis_loss = np.array(dis_losses).mean()
373 |
374 | # train the generator
375 | curr_iter += 1
376 | quant_batch, real_batch = generator.__next__()
377 | # quant_batch = np.random.randn(BATCH_SIZE, 80, 1)
378 | feed_dict = self.load_batch(quant_batch, real_batch)
379 | gen_loss = self.gen_train(feed_dict)
380 |
381 | g_step += 1
382 |
383 | if g_step < 1000 or g_step % 1000 == 0:
384 | tot_time = time.time() - start_time
385 | print('Epoch: %3d, Gen Steps: %4d (%3.lf s), Discriminator loss: %.6f, Generator loss: %.6f' % (curr_epoch, g_step, tot_time, dis_loss, gen_loss))
386 |
387 | if g_step % 50 == 0:
388 |
389 |
390 | prediction = self.gen(np.random.randn(BATCH_SIZE, 80, 1))
391 | # feed_dict = self.load_batch(x_train_noisy, real_batch_vali)
392 | # quanspeech, realspeech = self.sess.run(self.inputs, feed_dict)
393 | fname = 'recon-speech-%d_%d.wav' % (curr_iter, g_step)
394 | swave.write(fname, 8000, np.reshape(prediction, (prediction.size,)))
395 | # fname = 'real-speech-%d.wav' % g_step
396 | # swave.write(fname, 8000, np.reshape(realspeech, (realspeech.size,)))
397 |
398 | # fig = plt.figure(facecolor='white')
399 | # ax = fig.add_subplot(111)
400 | # ax.plot(np.reshape(realspeech, (realspeech.size,)), label='RealSpeech')
401 | # plt.plot(np.reshape(quanspeech, (quanspeech.size,)), label='QuanSpeech')
402 | # plt.plot(np.reshape(prediction, (prediction.size,)), label='Prediction')
403 |
404 | # plt.legend()
405 | # plt.show()
406 |
407 | curr_epoch += 1
408 |
409 | self.save_weights("qsrwgan_weights.h5")
410 |
411 | model = QSRWGAN(opt_params=default_opt_params)
412 | # train model
413 | x_train_noisy, x_train, _, _ = load_data()
414 | model.fit(x_train_noisy, x_train, _, _, epochs=10000)
415 |
--------------------------------------------------------------------------------
/CepstralCNN/TestData/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/CepstralCNN/TestData/README.md
--------------------------------------------------------------------------------
/CepstralCNN/Test_Outputs/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/CepstralCNN/Test_Outputs/README.md
--------------------------------------------------------------------------------
/CepstralCNN/TrainValiData/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/CepstralCNN/TrainValiData/README.md
--------------------------------------------------------------------------------
/CepstralCNN/WaveformQSRCNN.py:
--------------------------------------------------------------------------------
1 | #########################################################################################
2 | # WaveformQSRCNN.py: QSRCNN for G711/ADPCM/AMR/EVS using using Waveform features
3 | # Author: Huijun Liu
4 | # Time: 10.05.2017
5 | # Location: TU Braunschweig IfN
6 | #########################################################################################
7 |
8 | import os
9 | import time
10 | import math
11 | import scipy.io as sio
12 | import tensorflow as tf
13 |
14 | from keras.models import Model
15 | from keras import backend as K
16 | from keras.layers import Input, Add, Activation
17 | from keras.layers.convolutional import Conv1D, MaxPooling1D, UpSampling1D
18 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger, TensorBoard, LearningRateScheduler
19 |
20 | from weightnorm import AdamWithWeightnorm
21 | from tensorflow.python.framework import ops
22 |
23 | # -------------------------------------------------------------------------------
24 | # 0. define metric and activation function
25 | # -------------------------------------------------------------------------------
26 |
27 |
28 | def snr(y_true, y_pred):
29 | """
30 | SNR is Signal to Noise Ratio
31 |
32 | """
33 | return 10.0 * K.log((K.sum(K.square(y_true))) / (K.sum(K.square(y_pred - y_true)))) / K.log(10.0)
34 |
35 |
36 | def selu(x):
37 | with ops.name_scope('elu') as scope:
38 | alpha = 1.6732632423543772848170429916717
39 | scale = 1.0507009873554804934193349852946
40 | return scale * tf.where(x >= 0.0, x, alpha * tf.nn.elu(x))
41 |
42 | """
43 | def step_decay(epoch):
44 | initial_lrate = 0.001
45 |
46 | drop = 0.25
47 | epochs_drop = 3.0
48 | lrate = initial_lrate * math.pow(drop, math.floor((1 + epoch) / epochs_drop))
49 |
50 | return lrate
51 | """
52 |
53 | # -------------------------------------------------------------------------------
54 | # 1. define Cepstral-QSRCNN Model
55 | # -------------------------------------------------------------------------------
56 |
57 |
58 | class WaveformQSRCNN(object):
59 | def __init__(self, opt_params={'lr': 5e-4, 'batch_size': 32, 'nb_epochs': 100},
60 | model_params={'n1': 16, 'n2': 32, 'n3': 16, 'frame_len': 32},
61 | codec_type_params={'weights_dir': "./model_weights", 'logdir': "./log"}):
62 | self.learning_rate = opt_params['lr']
63 | self.batch_size = opt_params['batch_size']
64 | self.nb_epochs = opt_params['nb_epochs']
65 |
66 | self.log_dir = codec_type_params['logdir']
67 | if not (os.path.exists(self.log_dir)):
68 | os.makedirs(self.log_dir)
69 |
70 | self.weights_dir = codec_type_params['weights_dir']
71 | if not (os.path.exists(self.weights_dir)):
72 | os.makedirs(self.weights_dir)
73 |
74 | self.frame_len = model_params['frame_len']
75 | self.model = self.create_model(model_params)
76 |
77 | # -------------------------------------------------------------------------------
78 | # Load the Weights of the Model
79 | # -------------------------------------------------------------------------------
80 | def load_weights(self, file_path=""):
81 | if file_path == "":
82 | file_path = self.weights_dir + '/' + 'G711_WaveformQSRCNN_Weights_Best_bs' + \
83 | str(self.batch_size) + '_lr' + str(self.learning_rate) + '.h5'
84 |
85 | file_path = os.path.normcase(file_path)
86 | self.model.load_weights(file_path)
87 |
88 | # -------------------------------------------------------------------------------
89 | # Save the Weights of the Model
90 | # -------------------------------------------------------------------------------
91 | def save_weights(self):
92 | file_path = self.weights_dir + '/' + 'G711_WaveformQSRCNN_Weights_Final_bs' + \
93 | str(self.batch_size) + '_lr' + str(self.learning_rate) + '.h5'
94 | file_path = os.path.normcase(file_path)
95 | self.model.save_weights(file_path)
96 |
97 | # -------------------------------------------------------------------------------
98 | # 1. define model
99 | # -------------------------------------------------------------------------------
100 | def create_model(self, model_params={'n1': 32, 'n2': 64, 'n3': 32, 'frame_len': 80}):
101 | frame_len = self.frame_len
102 | n1 = model_params['n1']
103 | n2 = model_params['n2']
104 | n3 = model_params['n3']
105 |
106 | input_sque = Input(shape=(frame_len, 1))
107 | c1 = Conv1D(n1, 3, padding='same')(input_sque)
108 | c1 = Activation(selu)(c1)
109 | c1 = Conv1D(n1, 3, padding='same')(c1)
110 | c1 = Activation(selu)(c1)
111 | x = MaxPooling1D(2)(c1)
112 |
113 | c2 = Conv1D(n2, 3, padding='same')(x)
114 | c2 = Activation(selu)(c2)
115 | c2 = Conv1D(n2, 3, padding='same')(c2)
116 | c2 = Activation(selu)(c2)
117 | x = MaxPooling1D(2)(c2)
118 |
119 | c3 = Conv1D(n3, 3, padding='same')(x)
120 | c3 = Activation(selu)(c3)
121 | x = UpSampling1D(2)(c3)
122 |
123 | c2_2 = Conv1D(n2, 3, padding='same')(x)
124 | c2_2 = Activation(selu)(c2_2)
125 | c2_2 = Conv1D(n2, 3, padding='same')(c2_2)
126 | c2_2 = Activation(selu)(c2_2)
127 |
128 | m1 = Add()([c2, c2_2])
129 | m1 = UpSampling1D(2)(m1)
130 |
131 | c1_2 = Conv1D(n1, 3, padding='same')(m1)
132 | c1_2 = Activation(selu)(c1_2)
133 | c1_2 = Conv1D(n1, 3, padding='same')(c1_2)
134 | c1_2 = Activation(selu)(c1_2)
135 |
136 | m2 = Add()([c1, c1_2])
137 |
138 | decoded = Conv1D(1, 5, padding='same', activation='linear')(m2)
139 |
140 | model = Model(input_sque, decoded)
141 | model.summary()
142 |
143 | learning_rate = self.learning_rate
144 | # adam = optimizers.Adam(lr=learning_rate)
145 | # model.compile(optimizer=adam, loss='mse', metrics=[SNRLoss])
146 |
147 | adam_wn = AdamWithWeightnorm(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
148 | model.compile(optimizer=adam_wn, loss='mse', metrics=[snr])
149 |
150 | return model
151 |
152 | # -------------------------------------------------------------------------------
153 | # 2. Fit the model
154 | # -------------------------------------------------------------------------------
155 | def step_decay(self, epoch):
156 | initial_lrate = self.learning_rate
157 |
158 | drop = 0.25
159 | epochs_drop = 4.0
160 | lrate = initial_lrate * math.pow(drop, math.floor((1 + epoch) / epochs_drop))
161 |
162 | old_lr = K.get_value(self.model.optimizer.lr)
163 | K.set_value(self.model.optimizer.lr, lrate)
164 | lrate = K.get_value(self.model.optimizer.lr)
165 | print("> Ir reduced from %f to %f" % (old_lr, lrate))
166 | return lrate
167 |
168 | def fit(self, x_train_noisy, x_train, x_train_noisy_vali, x_train_vali):
169 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
170 | print("> Training model ...")
171 |
172 | nb_epochs = self.nb_epochs
173 | batch_size = self.batch_size
174 | learning_rate = self.learning_rate
175 |
176 | # ---------------------------------------------------------
177 | # 1. define callback functions
178 | # ---------------------------------------------------------
179 | # Stop training after 10 epoches if the vali_loss not decreasing
180 | stop_str = EarlyStopping(monitor='val_snr', patience=16, verbose=1, mode='max')
181 |
182 | # Reduce learning rate when stop improving lr = lr*factor
183 | reduce_LR = ReduceLROnPlateau(monitor='val_snr', factor=0.5, patience=2, verbose=1, mode='max', epsilon=0.0001, cooldown=0, min_lr=0)
184 |
185 | best_weights = self.weights_dir + '/' + 'G711_WaveformQSRCNN_Weights_Best_bs' + \
186 | str(batch_size) + '_lr' + str(learning_rate) + '.h5'
187 | best_weights = os.path.normcase(best_weights)
188 | model_save = ModelCheckpoint(best_weights, monitor='val_snr', save_best_only=True, mode='max', save_weights_only=True, period=1)
189 |
190 | logger_name = self.log_dir + '/' + 'G711_WaveformQSRCNN_log_bs' + \
191 | str(batch_size) + '_lr' + str(learning_rate) + '.csv'
192 | logger_name = os.path.normcase(logger_name)
193 | logger = CSVLogger(logger_name, separator=',', append=False)
194 | tensor_board = TensorBoard(log_dir=self.log_dir, histogram_freq=1)
195 |
196 | lrate = LearningRateScheduler(self.step_decay)
197 |
198 | start = time.time()
199 |
200 | # ---------------------------------------------------------
201 | # 2. fit the model
202 | # ---------------------------------------------------------
203 | print("> Training model " + "using Batch-size: " + str(batch_size) + ", Learning_rate: " + str(learning_rate) + "...")
204 | hist = self.model.fit(x_train_noisy, x_train, epochs=nb_epochs, batch_size=batch_size, shuffle=True,
205 | validation_data=[x_train_noisy_vali, x_train_vali],
206 | callbacks=[lrate, reduce_LR, stop_str, model_save, logger])
207 |
208 | print("> Saving Completed, Time : ", time.time() - start)
209 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
210 | return hist
211 |
212 | # -------------------------------------------------------------------------------
213 | # 3. Save loss snr val_loss val_snr as .mat File
214 | # -------------------------------------------------------------------------------
215 | def save_training_curves(self, hist):
216 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
217 | print("> Saving Training and Validation loss-metric curve ...")
218 |
219 | start = time.time()
220 |
221 | trian_curve_root = "./Opting_Results"
222 | if not(os.path.exists(trian_curve_root)):
223 | os.makedirs(trian_curve_root)
224 | # ---------------------------------------------------------
225 | # 1. Saving Training Loss
226 | # ---------------------------------------------------------
227 | TrainLossVec = trian_curve_root + '/' + 'G711_WaveformQSRCNN_TrainLoss_bs' + \
228 | str(self.batch_size) + '_lr' + str(self.learning_rate) + '.mat'
229 | TrainLossVec = os.path.normcase(TrainLossVec)
230 |
231 | sio.savemat(TrainLossVec, {'Train_Loss_Vec': hist.history['loss']})
232 |
233 | # ---------------------------------------------------------
234 | # 2. Saving Training Metric
235 | # ---------------------------------------------------------
236 | TrainSNRVec = trian_curve_root + '/' + 'G711_WaveformQSRCNN_TrainMetrice_bs' + \
237 | str(self.batch_size) + '_lr' + str(self.learning_rate) + '.mat'
238 | TrainSNRVec = os.path.normcase(TrainSNRVec)
239 | sio.savemat(TrainSNRVec, {'Train_SNR_Vec': hist.history['snr']}) # snr
240 |
241 | # ---------------------------------------------------------
242 | # 3. Saving Validation Loss
243 | # ---------------------------------------------------------
244 | ValiLossVec = trian_curve_root + '/' + 'G711_WaveformDDQSRCNN_ValiLoss_bs' + \
245 | str(self.batch_size) + '_lr' + str(self.learning_rate) + '.mat'
246 | ValiLossVec = os.path.normcase(ValiLossVec)
247 | sio.savemat(ValiLossVec, {'Vali_Loss_Vec': hist.history['val_loss']})
248 |
249 | # ---------------------------------------------------------
250 | # 4. Saving Validation Metric
251 | # ---------------------------------------------------------
252 | ValiSNRVec = trian_curve_root + '/' + 'G711_WaveformQSRCNN_ValiMetrice_bs' + \
253 | str(self.batch_size) + '_lr' + str(self.learning_rate) + '.mat'
254 | ValiSNRVec = os.path.normcase(ValiSNRVec)
255 | sio.savemat(ValiSNRVec, {'Vali_SNR_Vec': hist.history['val_snr']}) # val_snr
256 |
257 | print("> Saving Completed, Time : ", time.time() - start)
258 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
259 |
260 | # -------------------------------------------------------------------------------
261 | # 4. Evaluate the Trained Model
262 | # -------------------------------------------------------------------------------
263 | def evaluation_model(self, x_test_noisy, weights_path=""):
264 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
265 | print("> Evaluation of the Trained Model ...")
266 | # ---------------------------------------------------------
267 | # 1. Load Model Weights
268 | # ---------------------------------------------------------
269 | print('> 1. Loading the Weights of the Model ...')
270 | self.load_weights(weights_path)
271 |
272 | # ---------------------------------------------------------
273 | # 2. Evaluate the Model
274 | # ---------------------------------------------------------
275 | start = time.time()
276 | print('> 2. Evaluating the Model, Please wait for a Moment ...')
277 | predicted = self.model.predict(x_test_noisy)
278 | print('> 2. Evaluating Completed, Time : ' + str(time.time() - start))
279 |
280 | # ---------------------------------------------------------
281 | # 3. Saving the Evaluation Result
282 | # ---------------------------------------------------------
283 | print('> 3. Saving the Evaluation Result ...')
284 | start = time.time()
285 | pre_file_root = "./Test_Outputs"
286 | if not (os.path.exists(pre_file_root)):
287 | os.makedirs(pre_file_root)
288 |
289 | preOutput = pre_file_root + "/" + "G711_CNN_testplan_vec.mat"
290 | preOutput = os.path.normcase(preOutput)
291 |
292 | sio.savemat(preOutput, {'predictions': predicted})
293 | print('> 3. Evaluation Result Saving Completed, Time : ' + str(time.time() - start))
294 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ')
295 |
--------------------------------------------------------------------------------
/CepstralCNN/WaveformQSRCNN_TrainTest_GPUs.py:
--------------------------------------------------------------------------------
1 | #########################################################################################################
2 | # WaveformQSRCNN_TrainTest_GPUs.py: Train and Test QSRCNN for G711/ADPCM/AMR/EVS using Waveform features
3 | # Author: Huijun Liu
4 | # Time: 10.05.2017
5 | # Location: TU Braunschweig IfN
6 | #########################################################################################################
7 |
8 | import os
9 | import sys
10 | import time
11 |
12 | import WaveformQSRCNN as model
13 | import tensorflow as tf
14 | import DataPrepare as dp
15 | from keras.backend.tensorflow_backend import set_session
16 |
17 | #####################################################################################
18 | # 0. Setteings For GPUs and Parameters
19 | #####################################################################################
20 | using_gpu = 0
21 | if using_gpu == 1:
22 | os.environ["CUDA_VISIBLE_DEVICES"] = "3" # x stand for GPU index: 3-x!!
23 | config = tf.ConfigProto()
24 | config.gpu_options.per_process_gpu_memory_fraction = 0.5 # Only 30% Memory of GPUs can be used
25 | set_session(tf.Session(config=config))
26 |
27 | train_or_test = "train" # train or test
28 |
29 | default_opt_params = {'lr': 5e-4, 'batch_size': 32, 'nb_epochs': 1000}
30 | default_model_params = {'n1': 32, 'n2': 64, 'n3': 32, 'frame_len': 80}
31 | codec_type_params = {'weights_dir': "./model_weights", 'logdir': "./log"}
32 |
33 | train_inputs = ""
34 | train_targets = ""
35 |
36 | vali_inputs = ""
37 | vali_targets = ""
38 |
39 | test_inputs = ""
40 |
41 | if train_or_test == "train":
42 | # -------------------------------------------------------------------------------
43 | # 1. Load Data
44 | # -------------------------------------------------------------------------------
45 | train_inputs = "./TrainValiData/Train_inputSet_ALaw_defautLang_OLdata_ValiTrain_Reverse_Extended_Reverse.mat"
46 | train_targets = "./TrainValiData/Train_targetSet_ALaw_defautLang_OLdata_ValiTrain_Reverse_Extended_Reverse.mat"
47 |
48 | vali_inputs = "./TrainValiData/Vali_inputSet_ALaw_defautLang_OLdata_ValiTrain_smallVali_Reverse_Extended.mat"
49 | vali_targets = "./TrainValiData/Vali_targetSet_ALaw_defautLang_OLdata_ValiTrain_smallVali_Reverse_Extended.mat"
50 |
51 | x_train_noisy, x_train, x_train_noisy_vali, x_train_vali = dp.load_train_data(train_inputs, train_targets,
52 | vali_inputs, vali_targets)
53 |
54 | # -------------------------------------------------------------------------------
55 | # 2. Init Cepstral-QSRCNN Model
56 | # -------------------------------------------------------------------------------
57 | qsrcnn = model.WaveformQSRCNN(opt_params=default_opt_params,
58 | model_params=default_model_params,
59 | codec_type_params=codec_type_params)
60 |
61 | # -------------------------------------------------------------------------------
62 | # 3. Fit The Cepstral-QSRCNNe Model
63 | # -------------------------------------------------------------------------------
64 | hist =qsrcnn.fit(x_train_noisy, x_train, x_train_noisy_vali, x_train_vali)
65 |
66 | # -------------------------------------------------------------------------------
67 | # 4. Save Weights and Traning Curves
68 | # -------------------------------------------------------------------------------
69 | qsrcnn.save_weights()
70 | qsrcnn.save_training_curves(hist=hist)
71 |
72 | elif train_or_test == "test":
73 | # -------------------------------------------------------------------------------
74 | # 1. Load Data
75 | # -------------------------------------------------------------------------------
76 | test_inputs = "inputTestSet_g711concat_PDandOLAI_Frame_80v73.mat"
77 | x_test_noisy = dp.load_test_data(test_inputs)
78 |
79 | # -------------------------------------------------------------------------------
80 | # 2. Init Cepstral-QSRCNN Model
81 | # -------------------------------------------------------------------------------
82 |
83 | codec_type_params = {'weights_dir': "./model_weights", 'logdir': "./log"}
84 | qsrcnn = model.WaveformQSRCNN(opt_params=default_opt_params,
85 | model_params=default_model_params,
86 | codec_type_params=codec_type_params)
87 |
88 | # -------------------------------------------------------------------------------
89 | # 3. Evaluate The Cepstral-QSRCNNe Model
90 | # -------------------------------------------------------------------------------
91 | qsrcnn.evaluation_model(x_test_noisy)
92 |
93 | else:
94 | raise Exception("Do you want to train or test the model ? Please set the variable train_or_test !")
95 | # sys.exit("Please set the codec name !")
96 |
--------------------------------------------------------------------------------
/CepstralCNN/log/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/CepstralCNN/log/README.md
--------------------------------------------------------------------------------
/CepstralCNN/model_weights/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/CepstralCNN/model_weights/README.md
--------------------------------------------------------------------------------
/CepstralCNN/weightnorm.py:
--------------------------------------------------------------------------------
1 | from keras import backend as K
2 | from keras.optimizers import SGD,Adam
3 | import tensorflow as tf
4 |
5 | # adapted from keras.optimizers.SGD
6 | class SGDWithWeightnorm(SGD):
7 | def get_updates(self, params, constraints, loss):
8 | grads = self.get_gradients(loss, params)
9 | self.updates = []
10 |
11 | lr = self.lr
12 | if self.initial_decay > 0:
13 | lr *= (1. / (1. + self.decay * self.iterations))
14 | self.updates .append(K.update_add(self.iterations, 1))
15 |
16 | # momentum
17 | shapes = [K.get_variable_shape(p) for p in params]
18 | moments = [K.zeros(shape) for shape in shapes]
19 | self.weights = [self.iterations] + moments
20 | for p, g, m in zip(params, grads, moments):
21 |
22 | # if a weight tensor (len > 1) use weight normalized parameterization
23 | ps = K.get_variable_shape(p)
24 | if len(ps) > 1:
25 |
26 | # get weight normalization parameters
27 | V, V_norm, V_scaler, g_param, grad_g, grad_V = get_weightnorm_params_and_grads(p, g)
28 |
29 | # momentum container for the 'g' parameter
30 | V_scaler_shape = K.get_variable_shape(V_scaler)
31 | m_g = K.zeros(V_scaler_shape)
32 |
33 | # update g parameters
34 | v_g = self.momentum * m_g - lr * grad_g # velocity
35 | self.updates.append(K.update(m_g, v_g))
36 | if self.nesterov:
37 | new_g_param = g_param + self.momentum * v_g - lr * grad_g
38 | else:
39 | new_g_param = g_param + v_g
40 |
41 | # update V parameters
42 | v_v = self.momentum * m - lr * grad_V # velocity
43 | self.updates.append(K.update(m, v_v))
44 | if self.nesterov:
45 | new_V_param = V + self.momentum * v_v - lr * grad_V
46 | else:
47 | new_V_param = V + v_v
48 |
49 | # if there are constraints we apply them to V, not W
50 | if p in constraints:
51 | c = constraints[p]
52 | new_V_param = c(new_V_param)
53 |
54 | # wn param updates --> W updates
55 | add_weightnorm_param_updates(self.updates, new_V_param, new_g_param, p, V_scaler)
56 |
57 | else: # normal SGD with momentum
58 | v = self.momentum * m - lr * g # velocity
59 | self.updates.append(K.update(m, v))
60 |
61 | if self.nesterov:
62 | new_p = p + self.momentum * v - lr * g
63 | else:
64 | new_p = p + v
65 |
66 | # apply constraints
67 | if p in constraints:
68 | c = constraints[p]
69 | new_p = c(new_p)
70 |
71 | self.updates.append(K.update(p, new_p))
72 | return self.updates
73 |
74 | # adapted from keras.optimizers.Adam
75 | class AdamWithWeightnorm(Adam):
76 | def get_updates(self, params, constraints, loss):
77 | grads = self.get_gradients(loss, params)
78 | self.updates = [K.update_add(self.iterations, 1)]
79 |
80 | lr = self.lr
81 | if self.initial_decay > 0:
82 | lr *= (1. / (1. + self.decay * self.iterations))
83 |
84 | t = self.iterations + 1
85 | lr_t = lr * K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))
86 |
87 | shapes = [K.get_variable_shape(p) for p in params]
88 | ms = [K.zeros(shape) for shape in shapes]
89 | vs = [K.zeros(shape) for shape in shapes]
90 | self.weights = [self.iterations] + ms + vs
91 |
92 | for p, g, m, v in zip(params, grads, ms, vs):
93 |
94 | # if a weight tensor (len > 1) use weight normalized parameterization
95 | # this is the only part changed w.r.t. keras.optimizers.Adam
96 | ps = K.get_variable_shape(p)
97 | if len(ps)>1:
98 |
99 | # get weight normalization parameters
100 | V, V_norm, V_scaler, g_param, grad_g, grad_V = get_weightnorm_params_and_grads(p, g)
101 |
102 | # Adam containers for the 'g' parameter
103 | V_scaler_shape = K.get_variable_shape(V_scaler)
104 | m_g = K.zeros(V_scaler_shape)
105 | v_g = K.zeros(V_scaler_shape)
106 |
107 | # update g parameters
108 | m_g_t = (self.beta_1 * m_g) + (1. - self.beta_1) * grad_g
109 | v_g_t = (self.beta_2 * v_g) + (1. - self.beta_2) * K.square(grad_g)
110 | new_g_param = g_param - lr_t * m_g_t / (K.sqrt(v_g_t) + self.epsilon)
111 | self.updates.append(K.update(m_g, m_g_t))
112 | self.updates.append(K.update(v_g, v_g_t))
113 |
114 | # update V parameters
115 | m_t = (self.beta_1 * m) + (1. - self.beta_1) * grad_V
116 | v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(grad_V)
117 | new_V_param = V - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)
118 | self.updates.append(K.update(m, m_t))
119 | self.updates.append(K.update(v, v_t))
120 |
121 | # if there are constraints we apply them to V, not W
122 | if p in constraints:
123 | c = constraints[p]
124 | new_V_param = c(new_V_param)
125 |
126 | # wn param updates --> W updates
127 | add_weightnorm_param_updates(self.updates, new_V_param, new_g_param, p, V_scaler)
128 |
129 | else: # do optimization normally
130 | m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
131 | v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
132 | p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)
133 |
134 | self.updates.append(K.update(m, m_t))
135 | self.updates.append(K.update(v, v_t))
136 |
137 | new_p = p_t
138 | # apply constraints
139 | if p in constraints:
140 | c = constraints[p]
141 | new_p = c(new_p)
142 | self.updates.append(K.update(p, new_p))
143 | return self.updates
144 |
145 |
146 | def get_weightnorm_params_and_grads(p, g):
147 | ps = K.get_variable_shape(p)
148 |
149 | # construct weight scaler: V_scaler = g/||V||
150 | V_scaler_shape = (ps[-1],) # assumes we're using tensorflow!
151 | V_scaler = K.ones(V_scaler_shape) # init to ones, so effective parameters don't change
152 |
153 | # get V parameters = ||V||/g * W
154 | norm_axes = [i for i in range(len(ps) - 1)]
155 | V = p / tf.reshape(V_scaler, [1] * len(norm_axes) + [-1])
156 |
157 | # split V_scaler into ||V|| and g parameters
158 | V_norm = tf.sqrt(tf.reduce_sum(tf.square(V), norm_axes))
159 | g_param = V_scaler * V_norm
160 |
161 | # get grad in V,g parameters
162 | grad_g = tf.reduce_sum(g * V, norm_axes) / V_norm
163 | grad_V = tf.reshape(V_scaler, [1] * len(norm_axes) + [-1]) * \
164 | (g - tf.reshape(grad_g / V_norm, [1] * len(norm_axes) + [-1]) * V)
165 |
166 | return V, V_norm, V_scaler, g_param, grad_g, grad_V
167 |
168 |
169 | def add_weightnorm_param_updates(updates, new_V_param, new_g_param, W, V_scaler):
170 | ps = K.get_variable_shape(new_V_param)
171 | norm_axes = [i for i in range(len(ps) - 1)]
172 |
173 | # update W and V_scaler
174 | new_V_norm = tf.sqrt(tf.reduce_sum(tf.square(new_V_param), norm_axes))
175 | new_V_scaler = new_g_param / new_V_norm
176 | new_W = tf.reshape(new_V_scaler, [1] * len(norm_axes) + [-1]) * new_V_param
177 | updates.append(K.update(W, new_W))
178 | updates.append(K.update(V_scaler, new_V_scaler))
179 |
180 |
181 | # data based initialization for a given Keras model
182 | def data_based_init(model, input):
183 |
184 | # input can be dict, numpy array, or list of numpy arrays
185 | if type(input) is dict:
186 | feed_dict = input
187 | elif type(input) is list:
188 | feed_dict = {tf_inp: np_inp for tf_inp,np_inp in zip(model.inputs,input)}
189 | else:
190 | feed_dict = {model.inputs[0]: input}
191 |
192 | # add learning phase if required
193 | if model.uses_learning_phase and K.learning_phase() not in feed_dict:
194 | feed_dict.update({K.learning_phase(): 1})
195 |
196 | # get all layer name, output, weight, bias tuples
197 | layer_output_weight_bias = []
198 | for l in model.layers:
199 | if hasattr(l, 'W') and hasattr(l, 'b'):
200 | assert(l.built)
201 | layer_output_weight_bias.append( (l.name,l.get_output_at(0),l.W,l.b) ) # if more than one node, only use the first
202 |
203 | # iterate over our list and do data dependent init
204 | sess = K.get_session()
205 | for l,o,W,b in layer_output_weight_bias:
206 | print('Performing data dependent initialization for layer ' + l)
207 | m,v = tf.nn.moments(o, [i for i in range(len(o.get_shape())-1)])
208 | s = tf.sqrt(v + 1e-10)
209 | updates = tf.group(W.assign(W/tf.reshape(s,[1]*(len(W.get_shape())-1)+[-1])), b.assign((b-m)/s))
210 | sess.run(updates, feed_dict)
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2018, Huijun Liu, Ziyue Zhao, Tim Fingscheidt
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | * Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | * Neither the name of the copyright holder nor the names of its
17 | contributors may be used to endorse or promote products derived from
18 | this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
--------------------------------------------------------------------------------
/QSR-WGAN-GP/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | push
3 |
4 | *.pyc
5 | *.pkl
6 | *.jpg
7 | *.png
8 |
--------------------------------------------------------------------------------
/QSR-WGAN-GP/.idea/QSR-WGAN-GP.iml:
--------------------------------------------------------------------------------
1 |
2 |
20 |
21 |