├── .gitignore ├── CNN2EnhancedSpeech.PNG ├── CepstralCNN ├── CepstralQSRCNN.py ├── CepstralQSRCNN_TrainTest_GPUs.py ├── DataPrepare.py ├── Opting_Results │ └── README.md ├── QSR-WGAN-GP_Train_GPUs.py ├── TestData │ └── README.md ├── Test_Outputs │ └── README.md ├── TrainValiData │ └── README.md ├── WaveformQSRCNN.py ├── WaveformQSRCNN_TrainTest_GPUs.py ├── log │ └── README.md ├── model_weights │ └── README.md └── weightnorm.py ├── LICENSE ├── QSR-WGAN-GP ├── .gitignore ├── .idea │ ├── QSR-WGAN-GP.iml │ ├── markdown-navigator.xml │ ├── markdown-navigator │ │ └── profiles_settings.xml │ ├── misc.xml │ ├── modules.xml │ └── workspace.xml ├── QSR-GANGP_Train_GPUs.py ├── QSR-WGAN-GP_Train_GPUs.py └── tflib │ ├── __init__.py │ └── ops │ ├── __init__.py │ ├── batchnorm.py │ ├── cond_batchnorm.py │ ├── conv1d.py │ ├── conv2d.py │ ├── deconv2d.py │ ├── layernorm.py │ └── linear.py ├── README.md └── WaveformCNN ├── DataPrepare.py ├── Opting_Results └── README.md ├── TestData └── README.md ├── TrainValiData └── README.md ├── WaveformQSRCNN.py ├── WaveformQSRCNN_TrainTest_GPUs.py ├── log └── README.md ├── model_weights ├── README.md └── g711_waveformqsrcnn_weights_best_bs32_lr0.0005.h5 └── weightnorm.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /CNN2EnhancedSpeech.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/CNN2EnhancedSpeech.PNG -------------------------------------------------------------------------------- /CepstralCNN/CepstralQSRCNN.py: -------------------------------------------------------------------------------- 1 | ####################################################################################################################### 2 | # Project QSRCNN: Quantized Speech Reconstruction using Convolutional Neural Network V1.0 3 | # ===================================================================================================================== 4 | # CepstralQSRCNN.py: QSRCNN for G711/ADPCM/AMR/EVS using Cepstral features 5 | # 6 | # 7 | # ===================================================================================================================== 8 | # Technische Universität Braunschweig, IfN 9 | # Author: Huijun Liu M.Sc. 10 | # Date: 17.06.2017 11 | ####################################################################################################################### 12 | 13 | import os 14 | import time 15 | import math 16 | import scipy.io as sio 17 | import tensorflow as tf 18 | 19 | from keras.models import Model 20 | from keras import backend as K 21 | from keras.engine.topology import Layer 22 | from keras.layers import Input, Add, Multiply, Average, Activation 23 | from keras.layers.convolutional import Conv1D, MaxPooling1D, UpSampling1D, AveragePooling1D 24 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger, TensorBoard, LearningRateScheduler 25 | 26 | from weightnorm import AdamWithWeightnorm 27 | from tensorflow.python.framework import ops 28 | 29 | # ------------------------------------------------------------------------------- 30 | # 0. define metric and activation function 31 | # ------------------------------------------------------------------------------- 32 | 33 | 34 | def snr(y_true, y_pred): 35 | """ 36 | SNR is Signal to Noise Ratio 37 | """ 38 | return 10.0 * K.log((K.sum(K.square(y_true))) / (K.sum(K.square(y_pred - y_true)))) / K.log(10.0) 39 | 40 | 41 | def selu(x): 42 | """Scaled Exponential Linear Unit. (Klambauer et al., 2017) 43 | # Arguments 44 | x: A tensor or variable to compute the activation function for. 45 | # References 46 | - [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) 47 | """ 48 | with ops.name_scope('elu') as scope: 49 | alpha = 1.6732632423543772848170429916717 50 | scale = 1.0507009873554804934193349852946 51 | return scale * tf.where(x >= 0.0, x, alpha * tf.nn.elu(x)) 52 | 53 | """ 54 | def step_decay(epoch): 55 | initial_lrate = 0.001 56 | 57 | drop = 0.5 58 | epochs_drop = 3.0 59 | lrate = initial_lrate * math.pow(drop, math.floor((1 + epoch) / epochs_drop)) 60 | 61 | return lrate 62 | """ 63 | 64 | """ 65 | ''' TensorFlow Backend Function ''' 66 | def phase_shift(I, r): 67 | ''' Function copied as is from https://github.com/Tetrachrome/subpixel/blob/master/subpixel.py''' 68 | 69 | bsize, a, b, c = I.get_shape().as_list() 70 | bsize = tf.shape(I)[0] # Handling Dimension(None) type for undefined batch dim 71 | X = tf.reshape(I, (bsize, a, b, r, r)) 72 | X = tf.transpose(X, (0, 1, 2, 4, 3)) # bsize, a, b, 1, 1 73 | X = tf.split(1, a, X) # a, [bsize, b, r, r] 74 | X = tf.concat(2, [tf.squeeze(x) for x in X]) # bsize, b, a*r, r 75 | X = tf.split(1, b, X) # b, [bsize, a*r, r] 76 | X = tf.concat(2, [tf.squeeze(x) for x in X]) # bsize, a*r, b*r 77 | return tf.reshape(X, (bsize, a * r, b * r, 1)) 78 | 79 | def depth_to_scale(input, scale, channels): 80 | if channels > 1: 81 | Xc = tf.split(3, 3, input) 82 | X = tf.concat(3, [phase_shift(x, scale) for x in Xc]) 83 | else: 84 | X = phase_shift(input, scale) 85 | return X 86 | 87 | 88 | ''' 89 | Implementation is incomplete. Use lambda layer for now. 90 | ''' 91 | class SubPixelUpscaling(Layer): 92 | 93 | def __init__(self, r, channels, **kwargs): 94 | super(SubPixelUpscaling, self).__init__(**kwargs) 95 | 96 | self.r = r 97 | self.channels = channels 98 | 99 | def build(self, input_shape): 100 | pass 101 | 102 | def call(self, x, mask=None): 103 | y = depth_to_scale(x, self.r, self.channels) 104 | return y 105 | 106 | def get_output_shape_for(self, input_shape): 107 | if K.image_dim_ordering() == "th": 108 | b, k, r, c = input_shape 109 | return (b, self.channels, r * self.r, c * self.r) 110 | else: 111 | b, r, c, k = input_shape 112 | return (b, r * self.r, c * self.r, self.channels) 113 | """ 114 | 115 | # ------------------------------------------------------------------------------- 116 | # 1. define Cepstral-QSRCNN Model 117 | # ------------------------------------------------------------------------------- 118 | class CepstralQSRCNN(object): 119 | def __init__(self, opt_params={'lr': 5e-4, 'batch_size': 32, 'nb_epochs': 100}, 120 | model_params={'n1': 16, 'n2': 32, 'n3': 16, 'frame_len': 32}, 121 | codec_type_params={'codec': 'ADPCM', 'type': '3', 'weights_dir': "./model_weights", 'logdir': "./log"}): 122 | self.learning_rate = opt_params['lr'] # Learning rate 123 | self.batch_size = opt_params['batch_size'] # Batch size 124 | self.nb_epochs = opt_params['nb_epochs'] # Number of epochs 125 | 126 | self.codec = codec_type_params['codec'] # Codec type 127 | self.type = codec_type_params['type'] # Methods type 128 | 129 | self.log_dir = codec_type_params['logdir'] # Log file direction 130 | if not (os.path.exists(self.log_dir)): 131 | os.makedirs(self.log_dir) 132 | 133 | self.weights_dir = codec_type_params['weights_dir'] # Weights file direction 134 | if not (os.path.exists(self.weights_dir)): 135 | os.makedirs(self.weights_dir) 136 | 137 | self.frame_len = model_params['frame_len'] # Frame length 138 | self.model_params = model_params 139 | self.model = self.create_model("qsrcnn") 140 | 141 | # ------------------------------------------------------------------------------- 142 | # Load the Weights of the Model 143 | # ------------------------------------------------------------------------------- 144 | def load_weights(self, file_path=""): 145 | if file_path == "": 146 | file_path = self.weights_dir + '/' + self.codec + '_Type' + self.type + '_CepstralQSRCNN_Weights_Best_bs' + \ 147 | str(self.batch_size) + '_lr' + str(self.learning_rate) + '.h5' 148 | 149 | file_path = os.path.normcase(file_path) 150 | self.model.load_weights(file_path) 151 | 152 | # ------------------------------------------------------------------------------- 153 | # Save the Weights of the Model 154 | # ------------------------------------------------------------------------------- 155 | def save_weights(self): 156 | file_path = self.weights_dir + '/' + self.codec + '_Type' + self.type + '_CepstralQSRCNN_Weights_Final_bs' + \ 157 | str(self.batch_size) + '_lr' + str(self.learning_rate) + '.h5' 158 | file_path = os.path.normcase(file_path) 159 | self.model.save_weights(file_path) 160 | 161 | """ 162 | def _upscale_block(self, ip, id): 163 | init = ip 164 | 165 | x = Conv1D(256, 3, padding='same', name='espcnn_upconv1_%d' % id)(init) 166 | x = Activation(selu)(x) 167 | x = SubPixelUpscaling(r=2, channels=64, name='espcnn_upconv1__upscale1_%d' % id)(x) 168 | x = Conv1D(256, 3, padding='same', name='espcnn_upconv1_filter1_%d' % id)(x) 169 | x = Activation(selu)(x) 170 | 171 | return x 172 | """ 173 | 174 | # ------------------------------------------------------------------------------- 175 | # 1. define model 176 | # ------------------------------------------------------------------------------- 177 | def create_model(self, model_type="qsrcnn"): 178 | if model_type == "qsrcnn": 179 | frame_len = self.frame_len 180 | n1 = self.model_params['n1'] 181 | n2 = self.model_params['n2'] 182 | n3 = self.model_params['n3'] 183 | 184 | input_sque = Input(shape=(frame_len, 1)) 185 | c1 = Conv1D(n1, 3, padding='same')(input_sque) 186 | c1 = Activation(selu)(c1) 187 | c1 = Conv1D(n1, 3, padding='same')(c1) 188 | c1 = Activation(selu)(c1) 189 | x = MaxPooling1D(2)(c1) 190 | 191 | c2 = Conv1D(n2, 3, padding='same')(x) 192 | c2 = Activation(selu)(c2) 193 | c2 = Conv1D(n2, 3, padding='same')(c2) 194 | c2 = Activation(selu)(c2) 195 | x = MaxPooling1D(2)(c2) 196 | 197 | c3 = Conv1D(n3, 3, padding='same')(x) 198 | c3 = Activation(selu)(c3) 199 | x = UpSampling1D(2)(c3) 200 | 201 | c2_2 = Conv1D(n2, 3, padding='same')(x) 202 | c2_2 = Activation(selu)(c2_2) 203 | c2_2 = Conv1D(n2, 3, padding='same')(c2_2) 204 | c2_2 = Activation(selu)(c2_2) 205 | 206 | m1 = Add()([c2, c2_2]) 207 | m1 = UpSampling1D(2)(m1) 208 | 209 | c1_2 = Conv1D(n1, 3, padding='same')(m1) 210 | c1_2 = Activation(selu)(c1_2) 211 | c1_2 = Conv1D(n1, 3, padding='same')(c1_2) 212 | c1_2 = Activation(selu)(c1_2) 213 | 214 | m2 = Add()([c1, c1_2]) 215 | 216 | decoded = Conv1D(1, 5, padding='same', activation='linear')(m2) 217 | 218 | model = Model(input_sque, decoded) 219 | elif model_type == "wavenet": 220 | frame_len = self.frame_len 221 | 222 | ae_width = 16 223 | ae_filter_length = 3 224 | 225 | ae_num_stages = 2 226 | ae_num_layers = 6 227 | 228 | num_stages = 2 229 | num_layers = 6 230 | 231 | width = 16 232 | skip_width = 16 233 | filter_length = 3 234 | 235 | input_sque = Input(shape=(frame_len, 1), name='input_layer') 236 | 237 | # --------------------------------------- 238 | # The Non-Causal Temporal Encoder. 239 | # --------------------------------------- 240 | en = Conv1D(ae_width, ae_filter_length, padding='same', name='ae_startconv')(input_sque) 241 | 242 | for num_layer in range(ae_num_layers): 243 | # dilation: 2**(0 1 2 3 4) 244 | d = Activation(selu)(en) 245 | d = Conv1D(ae_width, 3, padding='same', dilation_rate=2 ** (num_layer % ae_num_stages), 246 | name='ae_dilatedconv_%d' % (num_layer + 1))(d) 247 | d = Activation(selu)(d) 248 | 249 | en2 = Conv1D(ae_width, 1, padding='same', dilation_rate=2 ** (num_layer % ae_num_stages), 250 | name='ae_res_%d' % (num_layer + 1))(d) 251 | en = Add()([en2, en]) 252 | 253 | en = Activation(selu)(en) 254 | en = Conv1D(16, 1, padding='causal', dilation_rate=1, name='ae_bottleneck')(en) 255 | en = Activation(selu)(en) 256 | en = AveragePooling1D(2, name='ae_pool')(en) 257 | # encoding = en 258 | 259 | # --------------------------------------- 260 | # The WaveNet Decoder. 261 | # --------------------------------------- 262 | # enup = UpSampling1D(2, name='up_sampling')(en) 263 | # l = shift_right(input_frame) 264 | 265 | l = Conv1D(width, filter_length, padding='causal', dilation_rate=1, name='startconv')(input_sque) 266 | l = Activation(selu)(l) 267 | # Set up skip connections. 268 | s = Conv1D(skip_width, 1, padding='causal', dilation_rate=1, name='skip_start')(l) 269 | s = Activation(selu)(s) 270 | 271 | # Residual blocks with skip connections. 272 | for i in range(num_layers): 273 | d = Conv1D(2 * width, filter_length, padding='causal', dilation_rate=2 ** (i % num_stages), 274 | name='dilatedconv_%d' % (i + 1))(l) 275 | d = Activation(selu)(d) 276 | 277 | en3 = Conv1D(2 * width, 1, padding='causal', dilation_rate=1, name='cond_map_%d' % (i + 1))(en) # 40 278 | en3 = Activation(selu)(en3) 279 | en3 = UpSampling1D(2, name='up_sampling_%d' % (i + 1))(en3) 280 | # d = condition(d,en3) 281 | d = Add()([d, en3]) 282 | 283 | d_sigmoid = Activation('sigmoid')(d) 284 | d_tanh = Activation('tanh')(d) 285 | d = Multiply()([d_sigmoid, d_tanh]) 286 | 287 | l2 = Conv1D(width, 1, padding='causal', dilation_rate=1, name='res_%d' % (i + 1))(d) 288 | l2 = Activation(selu)(l2) 289 | l = Add()([l2, l]) 290 | 291 | s2 = Conv1D(skip_width, 1, padding='causal', dilation_rate=1, name='skip_%d' % (i + 1))(d) 292 | s = Add()([s2, s]) 293 | 294 | s = Activation(selu)(s) 295 | 296 | s = Conv1D(skip_width, 3, padding='causal', activation='linear', name='output_layer1')(s) 297 | s = Activation(selu)(s) 298 | en4 = Conv1D(skip_width, 1, padding='causal', activation='linear', name='cond_map_out1')(en) 299 | en4 = Activation(selu)(en4) 300 | en4 = UpSampling1D(2, name='up_sampling')(en4) 301 | s = Add()([en4, s]) 302 | s = Activation(selu)(s) 303 | 304 | outs = Conv1D(1, 3, padding='causal', activation='linear', name='output_layer')(s) 305 | 306 | model = Model(input_sque, outs) 307 | 308 | elif model_type == "autoencoder": 309 | frame_len = self.frame_len 310 | n1 = 64 311 | n2 = 32 312 | 313 | input_sque = Input(shape=(frame_len, 1)) 314 | c1 = Conv1D(n1, 3, padding='same')(input_sque) 315 | c1 = Activation(selu)(c1) 316 | x = MaxPooling1D(2)(c1) 317 | 318 | c2 = Conv1D(n2, 3, padding='same')(x) 319 | c2 = Activation(selu)(c2) 320 | encoded = MaxPooling1D(2)(c2) 321 | 322 | d1 = UpSampling1D(2)(encoded) 323 | d1 = Conv1D(n2, 3, padding='same')(d1) 324 | d1 = Activation(selu)(d1) 325 | y = Activation(selu)(d1) 326 | 327 | d2 = UpSampling1D(2)(y) 328 | d2 = Conv1D(n1, 3, padding='same')(d2) 329 | d2 = Activation(selu)(d2) 330 | 331 | decoded = Conv1D(1, 5, padding='same', activation='linear')(d2) 332 | 333 | model = Model(input_sque, decoded) 334 | 335 | elif model_type == "esrcnn": 336 | f1 = 5 337 | f2_1 = 1 338 | f2_2 = 2 339 | f2_3 = 3 340 | f3 = 5 341 | 342 | n1 = 128 343 | n2 = 64 344 | 345 | frame_len = self.frame_len 346 | 347 | input_img = Input(shape=(frame_len, 1)) 348 | x = Conv1D(n1, f1, padding='same', name='level1')(input_img) 349 | x = Activation(selu)(x) 350 | 351 | x1 = Conv1D(n2, f2_1, padding='same', name='lavel1_1')(x) 352 | x1 = Activation(selu)(x1) 353 | x2 = Conv1D(n2, f2_2, padding='same', name='lavel1_2')(x) 354 | x2 = Activation(selu)(x2) 355 | x3 = Conv1D(n2, f2_3, padding='same', name='lavel1_3')(x) 356 | x3 = Activation(selu)(x3) 357 | 358 | x = Average()([x1, x2, x3]) 359 | 360 | out = Conv1D(1, f3, padding='same', activation='linear', name='output_1')(x) 361 | # out = LeakyReLU(0.2)(out) 362 | 363 | model = Model(input_img, out) 364 | """ 365 | elif model_type == "subpixel": 366 | frame_len = self.frame_len 367 | 368 | input_frame = Input(shape=(frame_len, 1)) 369 | x = Conv1D(64, 5, padding='same', name='level1')(input_frame) 370 | x = Activation(selu)(x) 371 | x = Conv1D(32, 3, padding='same', name='level2')(x) 372 | x = Activation(selu)(x) 373 | 374 | x = self._upscale_block(x, 1) 375 | 376 | out = Conv1D(1, 5, activation='linear', padding='same', name='output_1')(x) 377 | 378 | model = Model(input_frame, out) 379 | """ 380 | 381 | model.summary() 382 | 383 | learning_rate = self.learning_rate 384 | # adam = optimizers.Adam(lr=learning_rate) 385 | # model.compile(optimizer=adam, loss='mse', metrics=[SNRLoss]) 386 | 387 | adam_wn = AdamWithWeightnorm(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08) 388 | model.compile(optimizer=adam_wn, loss='mse', metrics=[snr]) 389 | 390 | return model 391 | 392 | # ------------------------------------------------------------------------------- 393 | # 2. Fit the model 394 | # ------------------------------------------------------------------------------- 395 | def step_decay(self, epoch): 396 | initial_lrate = self.learning_rate 397 | 398 | drop = 0.5 399 | epochs_drop = 4.0 400 | lrate = initial_lrate * math.pow(drop, math.floor((1 + epoch) / epochs_drop)) 401 | 402 | old_lr = K.get_value(self.model.optimizer.lr) 403 | K.set_value(self.model.optimizer.lr, lrate) 404 | lrate = K.get_value(self.model.optimizer.lr) 405 | print('Ir reduced from %f to %f' % (old_lr, lrate)) 406 | return lrate 407 | 408 | def fit(self, x_train_noisy, x_train, x_train_noisy_vali, x_train_vali): 409 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ') 410 | print("> Training model ...") 411 | 412 | nb_epochs = self.nb_epochs 413 | batch_size = self.batch_size 414 | learning_rate = self.learning_rate 415 | 416 | # --------------------------------------------------------- 417 | # 1. define callback functions 418 | # --------------------------------------------------------- 419 | # Stop training after 10 epoches if the vali_loss not decreasing 420 | stop_str = EarlyStopping(monitor='val_snr', patience=16, verbose=1, mode='max') 421 | 422 | # Reduce learning rate when stop improving lr = lr*factor 423 | reduce_LR = ReduceLROnPlateau(monitor='val_snr', factor=0.6, patience=2, verbose=1, mode='max', epsilon=0.0001, cooldown=0, min_lr=0) 424 | 425 | best_weights = self.weights_dir + '/' + self.codec + '_Type' + self.type + '_CepstralQSRCNN_Weights_Best_bs' + \ 426 | str(batch_size) + '_lr' + str(learning_rate) + '.h5' 427 | best_weights = os.path.normcase(best_weights) 428 | model_save = ModelCheckpoint(best_weights, monitor='val_snr', save_best_only=True, mode='max', save_weights_only=True, period=1) 429 | 430 | logger_name = self.log_dir + '/' + self.codec + '_Type' + self.type + '_CepstralQSRCNN_log_bs' + \ 431 | str(batch_size) + '_lr' + str(learning_rate) + '.csv' 432 | logger_name = os.path.normcase(logger_name) 433 | logger = CSVLogger(logger_name, separator=',', append=False) 434 | tensor_board = TensorBoard(log_dir=self.log_dir, histogram_freq=1) 435 | 436 | lrate = LearningRateScheduler(self.step_decay) 437 | 438 | start = time.time() 439 | 440 | # --------------------------------------------------------- 441 | # 2. fit the model 442 | # --------------------------------------------------------- 443 | print("> Training model " + "using Batch-size: " + str(batch_size) + ", Learning_rate: " + str(learning_rate) + "...") 444 | hist = self.model.fit(x_train_noisy, x_train, epochs=nb_epochs, batch_size=batch_size, shuffle=True, 445 | validation_data=[x_train_noisy_vali, x_train_vali], 446 | callbacks=[reduce_LR, stop_str, model_save, logger]) 447 | 448 | print("> Saving Completed, Time : ", time.time() - start) 449 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ') 450 | return hist 451 | 452 | # ------------------------------------------------------------------------------- 453 | # 3. Save loss snr val_loss val_snr as .mat File 454 | # ------------------------------------------------------------------------------- 455 | def save_training_curves(self, hist): 456 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ') 457 | print("> Saving Training and Validation loss-metric curve ...") 458 | 459 | start = time.time() 460 | 461 | trian_curve_root = "./Opting_Results" 462 | if not(os.path.exists(trian_curve_root)): 463 | os.makedirs(trian_curve_root) 464 | # --------------------------------------------------------- 465 | # 1. Saving Training Loss 466 | # --------------------------------------------------------- 467 | TrainLossVec = trian_curve_root + '/' + self.codec + '_Type' + self.type + '_CepstralDDQSRCNN_TrainLoss_bs' + \ 468 | str(self.batch_size) + '_lr' + str(self.learning_rate) + '.mat' 469 | TrainLossVec = os.path.normcase(TrainLossVec) 470 | 471 | sio.savemat(TrainLossVec, {'Train_Loss_Vec': hist.history['loss']}) 472 | 473 | # --------------------------------------------------------- 474 | # 2. Saving Training Metric 475 | # --------------------------------------------------------- 476 | TrainSNRVec = trian_curve_root + '/' + self.codec + '_Type' + self.type + '_CepstralDDQSRCNN_TrainMetrice_bs' + \ 477 | str(self.batch_size) + '_lr' + str(self.learning_rate) + '.mat' 478 | TrainSNRVec = os.path.normcase(TrainSNRVec) 479 | sio.savemat(TrainSNRVec, {'Train_SNR_Vec': hist.history['snr']}) # snr 480 | 481 | # --------------------------------------------------------- 482 | # 3. Saving Validation Loss 483 | # --------------------------------------------------------- 484 | ValiLossVec = trian_curve_root + '/' + self.codec + '_Type' + self.type + '_CepstralDDQSRCNN_ValiLoss_bs' + \ 485 | str(self.batch_size) + '_lr' + str(self.learning_rate) + '.mat' 486 | ValiLossVec = os.path.normcase(ValiLossVec) 487 | sio.savemat(ValiLossVec, {'Vali_Loss_Vec': hist.history['val_loss']}) 488 | 489 | # --------------------------------------------------------- 490 | # 4. Saving Validation Metric 491 | # --------------------------------------------------------- 492 | ValiSNRVec = trian_curve_root + '/' + self.codec + '_Type' + self.type + '_CepstralDDQSRCNN_ValiMetrice_bs' + \ 493 | str(self.batch_size) + '_lr' + str(self.learning_rate) + '.mat' 494 | ValiSNRVec = os.path.normcase(ValiSNRVec) 495 | sio.savemat(ValiSNRVec, {'Vali_SNR_Vec': hist.history['val_snr']}) # val_snr 496 | 497 | print("> Saving Completed, Time : ", time.time() - start) 498 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ') 499 | 500 | # ------------------------------------------------------------------------------- 501 | # 4. Evaluate the Trained Model 502 | # ------------------------------------------------------------------------------- 503 | def evaluation_model(self, x_test_noisy, detail_type="1", weights_path=""): 504 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ') 505 | print("> Evaluation of the Trained Model ...") 506 | # --------------------------------------------------------- 507 | # 1. Load Model Weights 508 | # --------------------------------------------------------- 509 | print('> 1. Loading the Weights of the Model ...') 510 | self.load_weights(weights_path) 511 | 512 | # --------------------------------------------------------- 513 | # 2. Evaluate the Model 514 | # --------------------------------------------------------- 515 | start = time.time() 516 | print('> 2. Evaluating the Model, Please wait for a Moment ...') 517 | predicted = self.model.predict(x_test_noisy) 518 | print('> 2. Evaluating Completed, Time : ' + str(time.time() - start)) 519 | 520 | # --------------------------------------------------------- 521 | # 3. Saving the Evaluation Result 522 | # --------------------------------------------------------- 523 | print('> 3. Saving the Evaluation Result ...') 524 | start = time.time() 525 | pre_file_root = "./Test_Outputs" 526 | if not (os.path.exists(pre_file_root)): 527 | os.makedirs(pre_file_root) 528 | 529 | preOutput = pre_file_root + "/" + self.codec + '_CNN_testplan_Type' + detail_type + "_ceps_vec.mat" 530 | preOutput = os.path.normcase(preOutput) 531 | 532 | sio.savemat(preOutput, {'predictions': predicted}) 533 | print('> 3. Evaluation Result Saving Completed, Time : ' + str(time.time() - start)) 534 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ') -------------------------------------------------------------------------------- /CepstralCNN/CepstralQSRCNN_TrainTest_GPUs.py: -------------------------------------------------------------------------------- 1 | ####################################################################################################################### 2 | # Project QSRCNN: Quantized Speech Reconstruction using Convolutional Neural Network V1.0 3 | # ===================================================================================================================== 4 | # CepstralQSRCNN_TrainTest_GPUs.py: Train and Test QSRCNN for G711/ADPCM/AMR/EVS using Cepstral features 5 | # 6 | # 7 | # ===================================================================================================================== 8 | # Technische Universität Braunschweig, IfN 9 | # Author: Huijun Liu M.Sc. 10 | # Date: 17.06.2017 11 | ####################################################################################################################### 12 | 13 | import os 14 | import sys 15 | import time 16 | 17 | import CepstralQSRCNN as model 18 | import tensorflow as tf 19 | import DataPrepare as dp 20 | from keras.backend.tensorflow_backend import set_session 21 | 22 | ##################################################################################### 23 | # 0. Settings For GPU 24 | ##################################################################################### 25 | using_gpu = 0 26 | if using_gpu == 1: # Only one GPU can be used 27 | os.environ["CUDA_VISIBLE_DEVICES"] = "2" # x stand for GPU index: 3-x!! 28 | config = tf.ConfigProto() 29 | config.gpu_options.per_process_gpu_memory_fraction = 0.4 # Up to 80% Memory of GPUs can be used 30 | set_session(tf.Session(config=config)) 31 | 32 | ##################################################################################### 33 | # 1. Settings Parameters 34 | ##################################################################################### 35 | 36 | train_or_test = "test" # train or test the deep model 37 | codec = "amrwb" # g711/adpcm/amrwb/evsswb codec can be used 38 | type = "3" # 1_2 or 3 for Training 39 | type_detail = "3" # 1 or 2 or 3 or 4 for Testing 40 | frame_len = "" # 256(g711/adpcm) or 512(amrwb) or 1024(evsswb) 41 | 42 | if codec == "g711" or codec == "adpcm": 43 | default_model_params = {'n1': 16, 'n2': 32, 'n3': 16, 'frame_len': 32} # Parameters for model itself 44 | frame_len = "256" # 256(g711/adpcm) or 512(amrwb) or 1024(evsswb) # Frame length 45 | elif codec == "amrwb": 46 | default_model_params = {'n1': 32, 'n2': 64, 'n3': 32, 'frame_len': 64} 47 | frame_len = "512" # 256(g711/adpcm) or 512(amrwb) or 1024(evsswb) 48 | elif codec == "evsswb": 49 | default_model_params = {'n1': 64, 'n2': 128, 'n3': 64, 'frame_len': 128} 50 | frame_len = "1024" # 256(g711/adpcm) or 512(amrwb) or 1024(evsswb) 51 | else: 52 | raise Exception("Please set the variable codec !") 53 | 54 | default_opt_params = {'lr': 5e-4, 'batch_size': 16, 'nb_epochs': 1000} # Parameters for model training 55 | codec_type_params = {'codec': codec, 'type': type, 56 | 'weights_dir': "./model_weights", 57 | 'logdir': "./log"} # Other parameters 58 | 59 | ##################################################################################### 60 | # 2. Training and Testing 61 | ##################################################################################### 62 | train_inputs = "" # Path of the input data for training 63 | train_targets = "" # Path of the target data for training 64 | 65 | vali_inputs = "" # Path of the input data for validation 66 | vali_targets = "" # Path of the target data for validation 67 | 68 | test_inputs = "" # Path of the input data for testing 69 | 70 | if train_or_test == "train": 71 | # ------------------------------------------------------------------------------- 72 | # 1. Load Data 73 | # ------------------------------------------------------------------------------- 74 | train_inputs = "./TrainValiData/Train_inputSet_" + codec + \ 75 | "_defautLang_OLdata_ValiTrain_type" + type + "_Fram256_ceps.mat" 76 | train_targets = "./TrainValiData/Train_targetSet_" + codec + \ 77 | "_defautLang_OLdata_ValiTrain_type" + type + "_Fram256_ceps.mat" 78 | 79 | vali_inputs = "./TrainValiData/Vali_inputSet_" + codec + \ 80 | "_defautLang_OLdata_ValiTrain_smallVali_type" + type + "_Fram256_ceps.mat" 81 | vali_targets = "./TrainValiData/Vali_targetSet_" + codec + \ 82 | "_defautLang_OLdata_ValiTrain_smallVali_type" + type + "_Fram256_ceps.mat" 83 | 84 | x_train_noisy, x_train, x_train_noisy_vali, x_train_vali = dp.load_train_data(train_inputs, train_targets, 85 | vali_inputs, vali_targets) 86 | 87 | # ------------------------------------------------------------------------------- 88 | # 2. Init Cepstral-QSRCNN Model 89 | # ------------------------------------------------------------------------------- 90 | qsrcnn = model.CepstralQSRCNN(opt_params=default_opt_params, 91 | model_params=default_model_params, 92 | codec_type_params=codec_type_params) 93 | 94 | # ------------------------------------------------------------------------------- 95 | # 3. Fit The Cepstral-QSRCNNe Model 96 | # ------------------------------------------------------------------------------- 97 | hist =qsrcnn.fit(x_train_noisy, x_train, x_train_noisy_vali, x_train_vali) 98 | 99 | # ------------------------------------------------------------------------------- 100 | # 4. Save Weights and Training Curves 101 | # ------------------------------------------------------------------------------- 102 | qsrcnn.save_weights() 103 | qsrcnn.save_training_curves(hist=hist) 104 | 105 | elif train_or_test == "test": 106 | # ------------------------------------------------------------------------------- 107 | # 1. Load Data 108 | # ------------------------------------------------------------------------------- 109 | test_inputs = "inputTestSet_" + codec + "_concat_Type" + type_detail + "_Frame_" + frame_len + "_ceps_v73.mat" 110 | x_test_noisy = dp.load_test_data(test_inputs) 111 | 112 | # ------------------------------------------------------------------------------- 113 | # 2. Init Cepstral-QSRCNN Model 114 | # ------------------------------------------------------------------------------- 115 | if type_detail == "1" or type_detail == "2": 116 | type = "1_2" 117 | elif type_detail == "3" or type_detail == "4": 118 | type = "3" 119 | 120 | codec_type_params = {'codec': codec, 'type': type, 'weights_dir': "./model_weights", 'logdir': "./log"} 121 | qsrcnn = model.CepstralQSRCNN(opt_params=default_opt_params, 122 | model_params=default_model_params, 123 | codec_type_params=codec_type_params) 124 | 125 | # ------------------------------------------------------------------------------- 126 | # 3. Evaluate The Cepstral-QSRCNNe Model 127 | # ------------------------------------------------------------------------------- 128 | qsrcnn.evaluation_model(x_test_noisy, type_detail) 129 | 130 | else: 131 | raise Exception("Do you want to train or test the model ? Please set the variable train_or_test !") 132 | # sys.exit("Please set the codec name !") -------------------------------------------------------------------------------- /CepstralCNN/DataPrepare.py: -------------------------------------------------------------------------------- 1 | ####################################################################################################################### 2 | # Project QSRCNN: Quantized Speech Reconstruction using Convolutional Neural Network V1.0 3 | # ===================================================================================================================== 4 | # DataPrepare.py: Data prepare and load data 5 | # 6 | # 7 | # ===================================================================================================================== 8 | # Technische Universität Braunschweig, IfN 9 | # Author: Huijun Liu M.Sc. 10 | # Date: 20.05.2017 11 | ####################################################################################################################### 12 | 13 | import os 14 | import time 15 | import h5py as h5 16 | import numpy as np 17 | import scipy.io as sio 18 | 19 | from numpy import random 20 | 21 | # ------------------------------------------------------------------------------- 22 | # 1. load data 23 | # ------------------------------------------------------------------------------- 24 | 25 | 26 | def load_train_data(train_inputs, train_targets, vali_inputs, vali_targets): 27 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ') 28 | print('> Loading data ') 29 | 30 | start = time.time() 31 | # --------------------------------------------------------- 32 | # 1. Load Input Data for Training 33 | # --------------------------------------------------------- 34 | mat_input = train_inputs 35 | mat_input = os.path.normcase(mat_input) 36 | print('> 1. Loading Training Input: ' + mat_input + '...') 37 | 38 | x_train_noisy = sio.loadmat(mat_input) 39 | x_train_noisy = x_train_noisy['inputSetNorm'] 40 | x_train_noisy = np.array(x_train_noisy) 41 | 42 | # --------------------------------------------------------- 43 | # 2. Load Target Data for training 44 | # --------------------------------------------------------- 45 | mat_target = train_targets 46 | mat_target = os.path.normcase(mat_target) 47 | print('> 2. Loading Training Target: ' + mat_target + '...') 48 | 49 | x_train = sio.loadmat(mat_target) 50 | x_train = x_train['targetSet'] 51 | x_train = np.array(x_train) 52 | # x_train = target_min_max_scaler.fit_transform(x_train) 53 | 54 | # --------------------------------------------------------- 55 | # 3. Load Input Data for Validation 56 | # --------------------------------------------------------- 57 | mat_input_vali = vali_inputs 58 | mat_input_vali = os.path.normcase(mat_input_vali) 59 | print('> 3. Loading Validation Input: ' + mat_input_vali + '...') 60 | 61 | x_train_noisy_vali = sio.loadmat(mat_input_vali) 62 | x_train_noisy_vali = x_train_noisy_vali['inputSetNorm'] 63 | x_train_noisy_vali = np.array(x_train_noisy_vali) 64 | 65 | # --------------------------------------------------------- 66 | # 4. Load Target Data for Validation 67 | # --------------------------------------------------------- 68 | mat_target_vali = vali_targets 69 | mat_target_vali = os.path.normcase(mat_target_vali) 70 | print('> 4. Loading Validation Target: ' + mat_target_vali + '...') 71 | 72 | x_train_vali = sio.loadmat(mat_target_vali) 73 | x_train_vali = x_train_vali['targetSet'] 74 | x_train_vali = np.array(x_train_vali) 75 | 76 | # --------------------------------------------------------- 77 | # 5. Randomization of Training and/or validation Pairs 78 | # --------------------------------------------------------- 79 | print('> 5. Randomization of Training Pairs ...') 80 | frame_length = x_train_noisy.shape[1] 81 | 82 | random.seed(1234) 83 | train = np.column_stack((x_train_noisy, x_train)) 84 | np.random.shuffle(train) 85 | x_train_noisy = train[:, :frame_length] 86 | x_train = train[:, frame_length:] 87 | 88 | # validation = np.column_stack((x_train_noisy_vali, x_train_vali)) 89 | # np.random.shuffle(validation ) 90 | # x_train_noisy_vali = validation [:, :frame_length] 91 | # x_train_vali = validation [:, frame_length:] 92 | 93 | # --------------------------------------------------------- 94 | # 6. Reshape of Training and validation Pairs 95 | # --------------------------------------------------------- 96 | x_train_noisy = np.reshape(x_train_noisy, (x_train_noisy.shape[0], x_train_noisy.shape[1], 1)) 97 | x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1)) 98 | 99 | x_train_noisy_vali = np.reshape(x_train_noisy_vali, (x_train_noisy_vali.shape[0], x_train_noisy_vali.shape[1], 1)) 100 | x_train_vali = np.reshape(x_train_vali, (x_train_vali.shape[0], x_train_vali.shape[1], 1)) 101 | 102 | print("> Data Loaded, , Time : ", time.time() - start) 103 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ') 104 | 105 | return x_train_noisy, x_train, x_train_noisy_vali, x_train_vali 106 | 107 | 108 | def load_test_data(testfile_path="inputTestSet_g711concat_Type3_Frame_256_ceps_v73.mat"): 109 | print('> Loading Test data... ') 110 | 111 | test_file_root = "./TestData" 112 | if not (os.path.exists(test_file_root)): 113 | os.makedirs(test_file_root) 114 | 115 | mat_input = test_file_root + "/" + testfile_path 116 | mat_input = os.path.normcase(mat_input) 117 | 118 | x_test_noisy = h5.File(mat_input, 'r') 119 | x_test_noisy = x_test_noisy.get('inputTestNorm') 120 | x_test_noisy = np.array(x_test_noisy) 121 | x_test_noisy = np.transpose(x_test_noisy) 122 | 123 | # x_test_noisy = sio.loadmat(mat_input) 124 | # x_test_noisy = x_test_noisy['inputTestNorm'] 125 | # x_test_noisy = np.array(x_test_noisy) 126 | 127 | x_test_noisy = np.reshape(x_test_noisy,(x_test_noisy.shape[0], x_test_noisy.shape[1], 1)) 128 | 129 | return x_test_noisy -------------------------------------------------------------------------------- /CepstralCNN/Opting_Results/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/CepstralCNN/Opting_Results/README.md -------------------------------------------------------------------------------- /CepstralCNN/QSR-WGAN-GP_Train_GPUs.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import math 4 | import numpy as np 5 | import scipy.io as sio 6 | import tensorflow as tf 7 | import keras.backend as K 8 | import matplotlib.pyplot as plt 9 | import scipy.io.wavfile as swave 10 | import keras.optimizers as optimizers 11 | 12 | from numpy import random 13 | from keras import initializers 14 | from keras.models import Model 15 | from keras.layers import Input 16 | from keras.layers.merge import Add 17 | from keras.layers.core import Dense, Flatten, Activation 18 | from keras.layers.normalization import BatchNormalization 19 | from keras.layers.convolutional import Conv1D, MaxPooling1D, UpSampling1D 20 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger, TensorBoard 21 | 22 | # from weightnorm import AdamWithWeightnorm 23 | from tensorflow.python.framework import ops 24 | from keras.backend.tensorflow_backend import set_session 25 | 26 | 27 | ##################################################################################### 28 | # 0. Setteings For GPUs 29 | ##################################################################################### 30 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" # x stand for GPU index: 3-x!! 31 | config = tf.ConfigProto() 32 | config.gpu_options.per_process_gpu_memory_fraction = 0.3 # Only 30% Memory of GPUs can be used 33 | set_session(tf.Session(config=config)) 34 | 35 | ##################################################################################### 36 | # 2. Define new Metric Activation function and Loss function 37 | ##################################################################################### 38 | 39 | 40 | def snr(y_true, y_pred): 41 | """ 42 | SNR is Signal to Noise Ratio 43 | 44 | """ 45 | return 10.0 * K.log((K.sum(K.square(y_true))) / (K.sum(K.square(y_pred - y_true)))) / K.log(10.0) 46 | 47 | 48 | def selu(x): 49 | with ops.name_scope('elu') as scope: 50 | alpha = 1.6732632423543772848170429916717 51 | scale = 1.0507009873554804934193349852946 52 | return scale*tf.where(x>=0.0, x, alpha*tf.nn.elu(x)) 53 | 54 | 55 | ##################################################################################### 56 | # 2. Define Generative model and Adversarial model 57 | ##################################################################################### 58 | def create_generator(inputs_gen): 59 | n1 = 32 60 | n2 = 64 61 | n3 = 32 62 | 63 | c1 = Conv1D(n1, 3, padding='same', name='conv_1')(inputs_gen) 64 | c1 = Activation(selu, name='act_1')(c1) 65 | c1 = Conv1D(n1, 3, padding='same', name='conv_2')(c1) 66 | c1 = Activation(selu, name='act_2')(c1) 67 | x = MaxPooling1D(2, name='mpool_1')(c1) 68 | 69 | c2 = Conv1D(n2, 3, padding='same', name='conv_3')(x) 70 | c2 = Activation(selu, name='act_3')(c2) 71 | c2 = Conv1D(n2, 3, padding='same', name='conv_4')(c2) 72 | c2 = Activation(selu, name='act_4')(c2) 73 | x = MaxPooling1D(2, name='mpool_2')(c2) 74 | 75 | c3 = Conv1D(n3, 3, padding='same', name='conv_5')(x) 76 | c3 = Activation(selu, name='act_5')(c3) 77 | x = UpSampling1D(2, name='usample_1')(c3) 78 | 79 | c2_2 = Conv1D(n2, 3, padding='same', name='conv_6')(x) 80 | c2_2 = Activation(selu, name='act_6')(c2_2) 81 | c2_2 = Conv1D(n2, 3, padding='same', name='conv_7')(c2_2) 82 | c2_2 = Activation(selu, name='act_7')(c2_2) 83 | 84 | m1 = Add(name='add_1')([c2, c2_2]) 85 | m1 = UpSampling1D(2, name='usample_2')(m1) 86 | 87 | c1_2 = Conv1D(n1, 3, padding='same', name='conv_8')(m1) 88 | c1_2 = Activation(selu, name='act_8')(c1_2) 89 | c1_2 = Conv1D(n1, 3, padding='same', name='conv_9')(c1_2) 90 | c1_2 = Activation(selu, name='act_9')(c1_2) 91 | 92 | m2 = Add(name='add_2')([c1, c1_2]) 93 | 94 | decoded = Conv1D(1, 5, padding='same', activation='linear', name='conv_10')(m2) 95 | 96 | return decoded 97 | 98 | 99 | def create_discriminator(inputs_disc): 100 | x = Conv1D(32, 3, padding='same', name='dis_conv_1')(inputs_disc) 101 | x = Activation(selu, name='dis_act_1')(x) 102 | 103 | x = Conv1D(64, 3, padding='same', name='dis_conv_2')(x) 104 | x = BatchNormalization(name='dis_bnorm_1')(x) 105 | x1 = Activation(selu, name='dis_act_2')(x) 106 | 107 | m1 = Add(name='dis_add_1')([inputs_disc, x1]) 108 | 109 | x = Conv1D(32, 3, padding='same', name='dis_conv_3')(m1) 110 | x = Activation(selu, name='dis_act_3')(x) 111 | 112 | x = Conv1D(64, 3, padding='same', name='dis_conv_4')(x) 113 | x = BatchNormalization(name='dis_bnorm_2')(x) 114 | x2 = Activation(selu, name='dis_act_4')(x) 115 | m2 = Add(name='dis_add_2')([m1, x2]) 116 | 117 | discri = Conv1D(1, 5, padding='same', name='dis_conv_5')(m2) 118 | 119 | return discri 120 | 121 | ##################################################################################### 122 | # 3. Define Training process of QSR_WGAN_GP 123 | ##################################################################################### 124 | SEQ_LEN = 80 125 | BATCH_SIZE = 128 126 | 127 | 128 | def load_data(): 129 | print('> Loading data... ') 130 | # Load Input Data 131 | mat_input = 'Train_G711_PreProc_defautLang/Train_inputSet_ALaw_defautLang_OLdata_ValiTrain_Reverse_Extended_Reverse.mat' 132 | mat_input = os.path.normcase(mat_input) 133 | print('> Training Input: ' + mat_input) 134 | 135 | x_train_noisy = sio.loadmat(mat_input) 136 | x_train_noisy = x_train_noisy['inputSetNorm'] 137 | x_train_noisy = np.array(x_train_noisy) 138 | # x_train_noisy = input_min_max_scaler.fit_transform(x_train_noisy) 139 | 140 | # Load Input Data for Validation 141 | mat_input_vali = 'Train_G711_PreProc_defautLang/Vali_inputSet_ALaw_defautLang_OLdata_ValiTrain_smallVali.mat' 142 | mat_input_vali = os.path.normcase(mat_input_vali) 143 | print('> Validation Input: ' + mat_input_vali) 144 | 145 | x_train_noisy_vali = sio.loadmat(mat_input_vali) 146 | x_train_noisy_vali = x_train_noisy_vali['inputSetNorm'] 147 | x_train_noisy_vali = np.array(x_train_noisy_vali) 148 | 149 | # Load Target Data 150 | mat_target = 'Train_G711_PreProc_defautLang/Train_targetSet_ALaw_defautLang_OLdata_ValiTrain_Reverse_Extended_Reverse.mat' 151 | mat_target = os.path.normcase(mat_target) 152 | print('> Training Target: ' + mat_target) 153 | 154 | x_train = sio.loadmat(mat_target) 155 | x_train = x_train['targetSet'] 156 | x_train = np.array(x_train) 157 | # x_train = target_min_max_scaler.fit_transform(x_train) 158 | 159 | # Load Target Data for Validation 160 | mat_target_vali = 'Train_G711_PreProc_defautLang/Vali_targetSet_ALaw_defautLang_OLdata_ValiTrain_smallVali.mat' 161 | mat_target_vali = os.path.normcase(mat_target_vali) 162 | print('> Validation Target: ' + mat_target_vali) 163 | 164 | x_train_vali = sio.loadmat(mat_target_vali) 165 | x_train_vali = x_train_vali['targetSet'] 166 | x_train_vali = np.array(x_train_vali) 167 | 168 | # Randomization of Training Pairs (Currently NO Shuffle) 169 | random.seed(1331) 170 | 171 | train = np.column_stack((x_train_noisy, x_train)) 172 | np.random.shuffle(train) 173 | x_train_noisy = train[:, :SEQ_LEN] 174 | x_train = train[:, SEQ_LEN:] 175 | 176 | # Reshape of Traing Pairs and validation Pairs 177 | x_train_noisy = np.reshape(x_train_noisy, (x_train_noisy.shape[0], x_train_noisy.shape[1], 1)) 178 | x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1)) 179 | 180 | # validation = np.column_stack((x_train_noisy_vali, x_train_vali)) 181 | # np.random.shuffle(validation ) 182 | # x_train_noisy_vali = validation [:, :SEQ_LEN] 183 | # x_train_vali = validation [:, SEQ_LEN:] 184 | 185 | x_train_noisy_vali = np.reshape(x_train_noisy_vali, (x_train_noisy_vali.shape[0], x_train_noisy_vali.shape[1], 1)) 186 | x_train_vali = np.reshape(x_train_vali, (x_train_vali.shape[0], x_train_vali.shape[1], 1)) 187 | 188 | print('> Data Loaded. Model Compiling... ') 189 | return x_train_noisy, x_train, x_train_noisy_vali, x_train_vali 190 | 191 | # 2. dataset generator 192 | def batch_generator(x_train_noisy, x_train, batch_size=128): 193 | while True: 194 | for i in range(0, x_train_noisy.shape[0] - batch_size + 1, batch_size): 195 | yield x_train_noisy[i:i+batch_size], x_train[i:i+batch_size] 196 | 197 | default_opt_params = {'lr': 5e-5, 'clip': 1e-2, 'n_lambda': 10, 'n_critic': 10} 198 | 199 | 200 | class QSRWGAN(object): 201 | def __init__(self, opt_params=default_opt_params, frame_len=80): 202 | self.n_critic = opt_params['n_critic'] 203 | self.n_lambda = opt_params['n_lambda'] 204 | self.clip = opt_params['clip'] 205 | self.frame_len = frame_len 206 | 207 | # ------------------------------------------------------------------ 208 | # 1. create session 209 | # ------------------------------------------------------------------ 210 | self.sess = tf.Session() 211 | K.set_session(self.sess) # pass the session to keras 212 | 213 | # ------------------------------------------------------------------ 214 | # 2. create generator and discriminator 215 | # ------------------------------------------------------------------ 216 | with tf.name_scope('generator'): 217 | gen_inputs = Input(shape=(self.frame_len, 1)) 218 | gen_outputs = create_generator(gen_inputs) 219 | 220 | with tf.name_scope('discriminator'): 221 | dis_inputs = Input(shape=(self.frame_len, 1)) 222 | dis_outputs = create_discriminator(dis_inputs) 223 | 224 | # ------------------------------------------------------------------ 225 | # 3. instantiate networks of generator and discriminator 226 | # ------------------------------------------------------------------ 227 | Generator = Model(inputs=gen_inputs, outputs=gen_outputs) 228 | Generator.summary() 229 | self.gen_model = Generator 230 | Discriminator = Model(inputs=dis_inputs, outputs=dis_outputs) 231 | Discriminator.summary() 232 | 233 | # ------------------------------------------------------------------ 234 | # 4. save the inputs of generator and discriminator 235 | # ------------------------------------------------------------------ 236 | quan_inputs = tf.placeholder(tf.float32, shape=(None, self.frame_len, 1), name='quan_inputs') 237 | real_inputs = tf.placeholder(tf.float32, shape=(None, self.frame_len, 1), name='real_inputs') 238 | self.inputs = quan_inputs, real_inputs 239 | 240 | # ------------------------------------------------------------------ 241 | # 5. get the weights of generator and discriminator 242 | # ------------------------------------------------------------------ 243 | self.gen_weights = [weights for weights in tf.global_variables() if 'generator' in weights.name] 244 | self.dis_weights = [weights for weights in tf.global_variables() if 'discriminator' in weights.name] 245 | # self.gen_weights = Generator.get_weights() 246 | # self.dis_weights = Discriminator.get_weights() 247 | 248 | # ------------------------------------------------------------------ 249 | # 6. create predictions of generator and discriminator 250 | # ------------------------------------------------------------------ 251 | fake_inputs = Generator(quan_inputs) 252 | disc_real = Discriminator(real_inputs) 253 | disc_fake = Discriminator(fake_inputs) 254 | self.predictions = fake_inputs 255 | 256 | # ------------------------------------------------------------------ 257 | # 7. create losses and compute probabilities of discriminator 258 | # ------------------------------------------------------------------ 259 | # 7.1. WGAN lipschitz-penalty 260 | alpha = tf.random_uniform(shape=[BATCH_SIZE, 1, 1], minval=-0.4, maxval=0.4) 261 | differences = fake_inputs - real_inputs 262 | interpolates = real_inputs + (alpha * differences) 263 | 264 | gradients = tf.gradients(Discriminator(interpolates), [interpolates])[0] 265 | # slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1, 2])) 266 | # gradient_penalty = self.n_lambda * tf.reduce_mean((slopes - 1.) ** 2) 267 | gp = K.mean(K.square(K.sqrt(K.sum(K.square(gradients), axis=1)) - 1)) 268 | gradient_penalty = self.n_lambda * gp 269 | 270 | disc_loss = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real) 271 | disc_loss += gradient_penalty 272 | 273 | self.dis_loss = disc_loss 274 | self.gen_loss = -tf.reduce_mean(disc_fake) 275 | 276 | self.disc_real = tf.reduce_mean(disc_real) 277 | self.disc_fake = tf.reduce_mean(disc_fake) 278 | self.prob_real = tf.reduce_mean(tf.sigmoid(disc_real)) 279 | self.prob_fake = tf.reduce_mean(tf.sigmoid(disc_fake)) 280 | 281 | # ------------------------------------------------------------------ 282 | # 8. create optimizer for generator and discriminator 283 | # ------------------------------------------------------------------ 284 | learning_rate = opt_params['lr'] 285 | 286 | gen_train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.gen_loss, var_list=self.gen_weights) 287 | disc_train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.dis_loss, var_list=self.dis_weights) 288 | 289 | self.gen_opt_train = gen_train_op 290 | self.dis_opt_train = disc_train_op 291 | 292 | def load_weights(self): 293 | self.gen_model.load_weights('ddsrcnn_weights_defaultLang_OL40_stopstr_bs128_lr5e-05.h5') 294 | 295 | def save_weights(self, file_path): 296 | file_path = os.path.normcase(file_path) 297 | self.gen_model.save_weights(file_path) 298 | 299 | def load_batch(self, x_train_noise, x_train, train=True): 300 | gen_inputs, dis_inputs = self.inputs 301 | return {gen_inputs: x_train_noise, dis_inputs: x_train, K.learning_phase(): train} 302 | 303 | def gen(self, x_train_noise): 304 | gen_inputs, dis_inputs = self.inputs 305 | feed_dict = {gen_inputs: x_train_noise, K.learning_phase(): False} 306 | return self.sess.run(self.predictions, feed_dict=feed_dict) 307 | 308 | def gen_train(self, feed_dict): 309 | _, gen_loss = self.sess.run([self.gen_opt_train, self.gen_loss], feed_dict=feed_dict) 310 | return gen_loss 311 | 312 | def dis_train(self, feed_dict): 313 | # take a step of adam 314 | _, dis_loss = self.sess.run([self.dis_opt_train, self.dis_loss], feed_dict=feed_dict) 315 | # return discriminator loss 316 | return dis_loss 317 | 318 | def fit(self, x_train_noise, x_train, x_train_noise_vali, x_train_vali, epochs=10, logdir='/qsrwgan_run'): 319 | # ------------------------------------------------------------------ 320 | # 1. initialize log directory 321 | # ------------------------------------------------------------------ 322 | if tf.gfile.Exists(logdir): 323 | tf.gfile.DeleteRecursively(logdir) 324 | 325 | tf.gfile.MakeDirs(logdir) 326 | 327 | # ------------------------------------------------------------------ 328 | # 2. initialize model 329 | # ------------------------------------------------------------------ 330 | init = tf.global_variables_initializer() 331 | self.sess.run(init) 332 | self.load_weights() 333 | 334 | # ------------------------------------------------------------------ 335 | # 3. train the model 336 | # ------------------------------------------------------------------ 337 | step, g_step, epoch = 0, 0, 0 338 | curr_epoch = 0 339 | 340 | # create data for the gan training 341 | # generator = batch_generator(x_train_noise, x_train) 342 | mat_input = 'Train_G711_PreProc_defautLang/inputTestSet_g711concat_nonOL_Frame_80.mat' 343 | mat_input = os.path.normcase(mat_input) 344 | x_train_noisy = sio.loadmat(mat_input) 345 | x_train_noisy = x_train_noisy['inputTestNorm'] 346 | x_train_noisy = np.array(x_train_noisy) 347 | x_train_noisy = np.reshape(x_train_noisy, (x_train_noisy.shape[0], x_train_noisy.shape[1], 1)) 348 | 349 | while curr_epoch < epochs: 350 | # create data for the gan training 351 | generator = batch_generator(x_train_noise, x_train, BATCH_SIZE) 352 | # generator_vali = batch_generator(x_train_noise_vali, x_train_vali, 1024) 353 | 354 | curr_iter = 0 355 | while curr_iter < x_train_noise.shape[0]//BATCH_SIZE: 356 | start_time = time.time() 357 | # n_critic = 100 if g_step < 25 or (g_step+1) % 500 == 0 else self.n_critic 358 | 359 | for i in range(self.n_critic): 360 | curr_iter += 1 361 | dis_losses = [] 362 | 363 | # load the batch 364 | quant_batch, real_batch = generator.__next__() 365 | # quant_batch = np.random.randn(BATCH_SIZE, 80, 1) 366 | feed_dict = self.load_batch(quant_batch, real_batch) 367 | 368 | # train the discriminator 369 | dis_loss = self.dis_train(feed_dict) 370 | dis_losses.append(dis_loss) 371 | 372 | dis_loss = np.array(dis_losses).mean() 373 | 374 | # train the generator 375 | curr_iter += 1 376 | quant_batch, real_batch = generator.__next__() 377 | # quant_batch = np.random.randn(BATCH_SIZE, 80, 1) 378 | feed_dict = self.load_batch(quant_batch, real_batch) 379 | gen_loss = self.gen_train(feed_dict) 380 | 381 | g_step += 1 382 | 383 | if g_step < 1000 or g_step % 1000 == 0: 384 | tot_time = time.time() - start_time 385 | print('Epoch: %3d, Gen Steps: %4d (%3.lf s), Discriminator loss: %.6f, Generator loss: %.6f' % (curr_epoch, g_step, tot_time, dis_loss, gen_loss)) 386 | 387 | if g_step % 50 == 0: 388 | 389 | 390 | prediction = self.gen(np.random.randn(BATCH_SIZE, 80, 1)) 391 | # feed_dict = self.load_batch(x_train_noisy, real_batch_vali) 392 | # quanspeech, realspeech = self.sess.run(self.inputs, feed_dict) 393 | fname = 'recon-speech-%d_%d.wav' % (curr_iter, g_step) 394 | swave.write(fname, 8000, np.reshape(prediction, (prediction.size,))) 395 | # fname = 'real-speech-%d.wav' % g_step 396 | # swave.write(fname, 8000, np.reshape(realspeech, (realspeech.size,))) 397 | 398 | # fig = plt.figure(facecolor='white') 399 | # ax = fig.add_subplot(111) 400 | # ax.plot(np.reshape(realspeech, (realspeech.size,)), label='RealSpeech') 401 | # plt.plot(np.reshape(quanspeech, (quanspeech.size,)), label='QuanSpeech') 402 | # plt.plot(np.reshape(prediction, (prediction.size,)), label='Prediction') 403 | 404 | # plt.legend() 405 | # plt.show() 406 | 407 | curr_epoch += 1 408 | 409 | self.save_weights("qsrwgan_weights.h5") 410 | 411 | model = QSRWGAN(opt_params=default_opt_params) 412 | # train model 413 | x_train_noisy, x_train, _, _ = load_data() 414 | model.fit(x_train_noisy, x_train, _, _, epochs=10000) 415 | -------------------------------------------------------------------------------- /CepstralCNN/TestData/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/CepstralCNN/TestData/README.md -------------------------------------------------------------------------------- /CepstralCNN/Test_Outputs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/CepstralCNN/Test_Outputs/README.md -------------------------------------------------------------------------------- /CepstralCNN/TrainValiData/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/CepstralCNN/TrainValiData/README.md -------------------------------------------------------------------------------- /CepstralCNN/WaveformQSRCNN.py: -------------------------------------------------------------------------------- 1 | ######################################################################################### 2 | # WaveformQSRCNN.py: QSRCNN for G711/ADPCM/AMR/EVS using using Waveform features 3 | # Author: Huijun Liu 4 | # Time: 10.05.2017 5 | # Location: TU Braunschweig IfN 6 | ######################################################################################### 7 | 8 | import os 9 | import time 10 | import math 11 | import scipy.io as sio 12 | import tensorflow as tf 13 | 14 | from keras.models import Model 15 | from keras import backend as K 16 | from keras.layers import Input, Add, Activation 17 | from keras.layers.convolutional import Conv1D, MaxPooling1D, UpSampling1D 18 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger, TensorBoard, LearningRateScheduler 19 | 20 | from weightnorm import AdamWithWeightnorm 21 | from tensorflow.python.framework import ops 22 | 23 | # ------------------------------------------------------------------------------- 24 | # 0. define metric and activation function 25 | # ------------------------------------------------------------------------------- 26 | 27 | 28 | def snr(y_true, y_pred): 29 | """ 30 | SNR is Signal to Noise Ratio 31 | 32 | """ 33 | return 10.0 * K.log((K.sum(K.square(y_true))) / (K.sum(K.square(y_pred - y_true)))) / K.log(10.0) 34 | 35 | 36 | def selu(x): 37 | with ops.name_scope('elu') as scope: 38 | alpha = 1.6732632423543772848170429916717 39 | scale = 1.0507009873554804934193349852946 40 | return scale * tf.where(x >= 0.0, x, alpha * tf.nn.elu(x)) 41 | 42 | """ 43 | def step_decay(epoch): 44 | initial_lrate = 0.001 45 | 46 | drop = 0.25 47 | epochs_drop = 3.0 48 | lrate = initial_lrate * math.pow(drop, math.floor((1 + epoch) / epochs_drop)) 49 | 50 | return lrate 51 | """ 52 | 53 | # ------------------------------------------------------------------------------- 54 | # 1. define Cepstral-QSRCNN Model 55 | # ------------------------------------------------------------------------------- 56 | 57 | 58 | class WaveformQSRCNN(object): 59 | def __init__(self, opt_params={'lr': 5e-4, 'batch_size': 32, 'nb_epochs': 100}, 60 | model_params={'n1': 16, 'n2': 32, 'n3': 16, 'frame_len': 32}, 61 | codec_type_params={'weights_dir': "./model_weights", 'logdir': "./log"}): 62 | self.learning_rate = opt_params['lr'] 63 | self.batch_size = opt_params['batch_size'] 64 | self.nb_epochs = opt_params['nb_epochs'] 65 | 66 | self.log_dir = codec_type_params['logdir'] 67 | if not (os.path.exists(self.log_dir)): 68 | os.makedirs(self.log_dir) 69 | 70 | self.weights_dir = codec_type_params['weights_dir'] 71 | if not (os.path.exists(self.weights_dir)): 72 | os.makedirs(self.weights_dir) 73 | 74 | self.frame_len = model_params['frame_len'] 75 | self.model = self.create_model(model_params) 76 | 77 | # ------------------------------------------------------------------------------- 78 | # Load the Weights of the Model 79 | # ------------------------------------------------------------------------------- 80 | def load_weights(self, file_path=""): 81 | if file_path == "": 82 | file_path = self.weights_dir + '/' + 'G711_WaveformQSRCNN_Weights_Best_bs' + \ 83 | str(self.batch_size) + '_lr' + str(self.learning_rate) + '.h5' 84 | 85 | file_path = os.path.normcase(file_path) 86 | self.model.load_weights(file_path) 87 | 88 | # ------------------------------------------------------------------------------- 89 | # Save the Weights of the Model 90 | # ------------------------------------------------------------------------------- 91 | def save_weights(self): 92 | file_path = self.weights_dir + '/' + 'G711_WaveformQSRCNN_Weights_Final_bs' + \ 93 | str(self.batch_size) + '_lr' + str(self.learning_rate) + '.h5' 94 | file_path = os.path.normcase(file_path) 95 | self.model.save_weights(file_path) 96 | 97 | # ------------------------------------------------------------------------------- 98 | # 1. define model 99 | # ------------------------------------------------------------------------------- 100 | def create_model(self, model_params={'n1': 32, 'n2': 64, 'n3': 32, 'frame_len': 80}): 101 | frame_len = self.frame_len 102 | n1 = model_params['n1'] 103 | n2 = model_params['n2'] 104 | n3 = model_params['n3'] 105 | 106 | input_sque = Input(shape=(frame_len, 1)) 107 | c1 = Conv1D(n1, 3, padding='same')(input_sque) 108 | c1 = Activation(selu)(c1) 109 | c1 = Conv1D(n1, 3, padding='same')(c1) 110 | c1 = Activation(selu)(c1) 111 | x = MaxPooling1D(2)(c1) 112 | 113 | c2 = Conv1D(n2, 3, padding='same')(x) 114 | c2 = Activation(selu)(c2) 115 | c2 = Conv1D(n2, 3, padding='same')(c2) 116 | c2 = Activation(selu)(c2) 117 | x = MaxPooling1D(2)(c2) 118 | 119 | c3 = Conv1D(n3, 3, padding='same')(x) 120 | c3 = Activation(selu)(c3) 121 | x = UpSampling1D(2)(c3) 122 | 123 | c2_2 = Conv1D(n2, 3, padding='same')(x) 124 | c2_2 = Activation(selu)(c2_2) 125 | c2_2 = Conv1D(n2, 3, padding='same')(c2_2) 126 | c2_2 = Activation(selu)(c2_2) 127 | 128 | m1 = Add()([c2, c2_2]) 129 | m1 = UpSampling1D(2)(m1) 130 | 131 | c1_2 = Conv1D(n1, 3, padding='same')(m1) 132 | c1_2 = Activation(selu)(c1_2) 133 | c1_2 = Conv1D(n1, 3, padding='same')(c1_2) 134 | c1_2 = Activation(selu)(c1_2) 135 | 136 | m2 = Add()([c1, c1_2]) 137 | 138 | decoded = Conv1D(1, 5, padding='same', activation='linear')(m2) 139 | 140 | model = Model(input_sque, decoded) 141 | model.summary() 142 | 143 | learning_rate = self.learning_rate 144 | # adam = optimizers.Adam(lr=learning_rate) 145 | # model.compile(optimizer=adam, loss='mse', metrics=[SNRLoss]) 146 | 147 | adam_wn = AdamWithWeightnorm(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08) 148 | model.compile(optimizer=adam_wn, loss='mse', metrics=[snr]) 149 | 150 | return model 151 | 152 | # ------------------------------------------------------------------------------- 153 | # 2. Fit the model 154 | # ------------------------------------------------------------------------------- 155 | def step_decay(self, epoch): 156 | initial_lrate = self.learning_rate 157 | 158 | drop = 0.25 159 | epochs_drop = 4.0 160 | lrate = initial_lrate * math.pow(drop, math.floor((1 + epoch) / epochs_drop)) 161 | 162 | old_lr = K.get_value(self.model.optimizer.lr) 163 | K.set_value(self.model.optimizer.lr, lrate) 164 | lrate = K.get_value(self.model.optimizer.lr) 165 | print("> Ir reduced from %f to %f" % (old_lr, lrate)) 166 | return lrate 167 | 168 | def fit(self, x_train_noisy, x_train, x_train_noisy_vali, x_train_vali): 169 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ') 170 | print("> Training model ...") 171 | 172 | nb_epochs = self.nb_epochs 173 | batch_size = self.batch_size 174 | learning_rate = self.learning_rate 175 | 176 | # --------------------------------------------------------- 177 | # 1. define callback functions 178 | # --------------------------------------------------------- 179 | # Stop training after 10 epoches if the vali_loss not decreasing 180 | stop_str = EarlyStopping(monitor='val_snr', patience=16, verbose=1, mode='max') 181 | 182 | # Reduce learning rate when stop improving lr = lr*factor 183 | reduce_LR = ReduceLROnPlateau(monitor='val_snr', factor=0.5, patience=2, verbose=1, mode='max', epsilon=0.0001, cooldown=0, min_lr=0) 184 | 185 | best_weights = self.weights_dir + '/' + 'G711_WaveformQSRCNN_Weights_Best_bs' + \ 186 | str(batch_size) + '_lr' + str(learning_rate) + '.h5' 187 | best_weights = os.path.normcase(best_weights) 188 | model_save = ModelCheckpoint(best_weights, monitor='val_snr', save_best_only=True, mode='max', save_weights_only=True, period=1) 189 | 190 | logger_name = self.log_dir + '/' + 'G711_WaveformQSRCNN_log_bs' + \ 191 | str(batch_size) + '_lr' + str(learning_rate) + '.csv' 192 | logger_name = os.path.normcase(logger_name) 193 | logger = CSVLogger(logger_name, separator=',', append=False) 194 | tensor_board = TensorBoard(log_dir=self.log_dir, histogram_freq=1) 195 | 196 | lrate = LearningRateScheduler(self.step_decay) 197 | 198 | start = time.time() 199 | 200 | # --------------------------------------------------------- 201 | # 2. fit the model 202 | # --------------------------------------------------------- 203 | print("> Training model " + "using Batch-size: " + str(batch_size) + ", Learning_rate: " + str(learning_rate) + "...") 204 | hist = self.model.fit(x_train_noisy, x_train, epochs=nb_epochs, batch_size=batch_size, shuffle=True, 205 | validation_data=[x_train_noisy_vali, x_train_vali], 206 | callbacks=[lrate, reduce_LR, stop_str, model_save, logger]) 207 | 208 | print("> Saving Completed, Time : ", time.time() - start) 209 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ') 210 | return hist 211 | 212 | # ------------------------------------------------------------------------------- 213 | # 3. Save loss snr val_loss val_snr as .mat File 214 | # ------------------------------------------------------------------------------- 215 | def save_training_curves(self, hist): 216 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ') 217 | print("> Saving Training and Validation loss-metric curve ...") 218 | 219 | start = time.time() 220 | 221 | trian_curve_root = "./Opting_Results" 222 | if not(os.path.exists(trian_curve_root)): 223 | os.makedirs(trian_curve_root) 224 | # --------------------------------------------------------- 225 | # 1. Saving Training Loss 226 | # --------------------------------------------------------- 227 | TrainLossVec = trian_curve_root + '/' + 'G711_WaveformQSRCNN_TrainLoss_bs' + \ 228 | str(self.batch_size) + '_lr' + str(self.learning_rate) + '.mat' 229 | TrainLossVec = os.path.normcase(TrainLossVec) 230 | 231 | sio.savemat(TrainLossVec, {'Train_Loss_Vec': hist.history['loss']}) 232 | 233 | # --------------------------------------------------------- 234 | # 2. Saving Training Metric 235 | # --------------------------------------------------------- 236 | TrainSNRVec = trian_curve_root + '/' + 'G711_WaveformQSRCNN_TrainMetrice_bs' + \ 237 | str(self.batch_size) + '_lr' + str(self.learning_rate) + '.mat' 238 | TrainSNRVec = os.path.normcase(TrainSNRVec) 239 | sio.savemat(TrainSNRVec, {'Train_SNR_Vec': hist.history['snr']}) # snr 240 | 241 | # --------------------------------------------------------- 242 | # 3. Saving Validation Loss 243 | # --------------------------------------------------------- 244 | ValiLossVec = trian_curve_root + '/' + 'G711_WaveformDDQSRCNN_ValiLoss_bs' + \ 245 | str(self.batch_size) + '_lr' + str(self.learning_rate) + '.mat' 246 | ValiLossVec = os.path.normcase(ValiLossVec) 247 | sio.savemat(ValiLossVec, {'Vali_Loss_Vec': hist.history['val_loss']}) 248 | 249 | # --------------------------------------------------------- 250 | # 4. Saving Validation Metric 251 | # --------------------------------------------------------- 252 | ValiSNRVec = trian_curve_root + '/' + 'G711_WaveformQSRCNN_ValiMetrice_bs' + \ 253 | str(self.batch_size) + '_lr' + str(self.learning_rate) + '.mat' 254 | ValiSNRVec = os.path.normcase(ValiSNRVec) 255 | sio.savemat(ValiSNRVec, {'Vali_SNR_Vec': hist.history['val_snr']}) # val_snr 256 | 257 | print("> Saving Completed, Time : ", time.time() - start) 258 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ') 259 | 260 | # ------------------------------------------------------------------------------- 261 | # 4. Evaluate the Trained Model 262 | # ------------------------------------------------------------------------------- 263 | def evaluation_model(self, x_test_noisy, weights_path=""): 264 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ') 265 | print("> Evaluation of the Trained Model ...") 266 | # --------------------------------------------------------- 267 | # 1. Load Model Weights 268 | # --------------------------------------------------------- 269 | print('> 1. Loading the Weights of the Model ...') 270 | self.load_weights(weights_path) 271 | 272 | # --------------------------------------------------------- 273 | # 2. Evaluate the Model 274 | # --------------------------------------------------------- 275 | start = time.time() 276 | print('> 2. Evaluating the Model, Please wait for a Moment ...') 277 | predicted = self.model.predict(x_test_noisy) 278 | print('> 2. Evaluating Completed, Time : ' + str(time.time() - start)) 279 | 280 | # --------------------------------------------------------- 281 | # 3. Saving the Evaluation Result 282 | # --------------------------------------------------------- 283 | print('> 3. Saving the Evaluation Result ...') 284 | start = time.time() 285 | pre_file_root = "./Test_Outputs" 286 | if not (os.path.exists(pre_file_root)): 287 | os.makedirs(pre_file_root) 288 | 289 | preOutput = pre_file_root + "/" + "G711_CNN_testplan_vec.mat" 290 | preOutput = os.path.normcase(preOutput) 291 | 292 | sio.savemat(preOutput, {'predictions': predicted}) 293 | print('> 3. Evaluation Result Saving Completed, Time : ' + str(time.time() - start)) 294 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ') 295 | -------------------------------------------------------------------------------- /CepstralCNN/WaveformQSRCNN_TrainTest_GPUs.py: -------------------------------------------------------------------------------- 1 | ######################################################################################################### 2 | # WaveformQSRCNN_TrainTest_GPUs.py: Train and Test QSRCNN for G711/ADPCM/AMR/EVS using Waveform features 3 | # Author: Huijun Liu 4 | # Time: 10.05.2017 5 | # Location: TU Braunschweig IfN 6 | ######################################################################################################### 7 | 8 | import os 9 | import sys 10 | import time 11 | 12 | import WaveformQSRCNN as model 13 | import tensorflow as tf 14 | import DataPrepare as dp 15 | from keras.backend.tensorflow_backend import set_session 16 | 17 | ##################################################################################### 18 | # 0. Setteings For GPUs and Parameters 19 | ##################################################################################### 20 | using_gpu = 0 21 | if using_gpu == 1: 22 | os.environ["CUDA_VISIBLE_DEVICES"] = "3" # x stand for GPU index: 3-x!! 23 | config = tf.ConfigProto() 24 | config.gpu_options.per_process_gpu_memory_fraction = 0.5 # Only 30% Memory of GPUs can be used 25 | set_session(tf.Session(config=config)) 26 | 27 | train_or_test = "train" # train or test 28 | 29 | default_opt_params = {'lr': 5e-4, 'batch_size': 32, 'nb_epochs': 1000} 30 | default_model_params = {'n1': 32, 'n2': 64, 'n3': 32, 'frame_len': 80} 31 | codec_type_params = {'weights_dir': "./model_weights", 'logdir': "./log"} 32 | 33 | train_inputs = "" 34 | train_targets = "" 35 | 36 | vali_inputs = "" 37 | vali_targets = "" 38 | 39 | test_inputs = "" 40 | 41 | if train_or_test == "train": 42 | # ------------------------------------------------------------------------------- 43 | # 1. Load Data 44 | # ------------------------------------------------------------------------------- 45 | train_inputs = "./TrainValiData/Train_inputSet_ALaw_defautLang_OLdata_ValiTrain_Reverse_Extended_Reverse.mat" 46 | train_targets = "./TrainValiData/Train_targetSet_ALaw_defautLang_OLdata_ValiTrain_Reverse_Extended_Reverse.mat" 47 | 48 | vali_inputs = "./TrainValiData/Vali_inputSet_ALaw_defautLang_OLdata_ValiTrain_smallVali_Reverse_Extended.mat" 49 | vali_targets = "./TrainValiData/Vali_targetSet_ALaw_defautLang_OLdata_ValiTrain_smallVali_Reverse_Extended.mat" 50 | 51 | x_train_noisy, x_train, x_train_noisy_vali, x_train_vali = dp.load_train_data(train_inputs, train_targets, 52 | vali_inputs, vali_targets) 53 | 54 | # ------------------------------------------------------------------------------- 55 | # 2. Init Cepstral-QSRCNN Model 56 | # ------------------------------------------------------------------------------- 57 | qsrcnn = model.WaveformQSRCNN(opt_params=default_opt_params, 58 | model_params=default_model_params, 59 | codec_type_params=codec_type_params) 60 | 61 | # ------------------------------------------------------------------------------- 62 | # 3. Fit The Cepstral-QSRCNNe Model 63 | # ------------------------------------------------------------------------------- 64 | hist =qsrcnn.fit(x_train_noisy, x_train, x_train_noisy_vali, x_train_vali) 65 | 66 | # ------------------------------------------------------------------------------- 67 | # 4. Save Weights and Traning Curves 68 | # ------------------------------------------------------------------------------- 69 | qsrcnn.save_weights() 70 | qsrcnn.save_training_curves(hist=hist) 71 | 72 | elif train_or_test == "test": 73 | # ------------------------------------------------------------------------------- 74 | # 1. Load Data 75 | # ------------------------------------------------------------------------------- 76 | test_inputs = "inputTestSet_g711concat_PDandOLAI_Frame_80v73.mat" 77 | x_test_noisy = dp.load_test_data(test_inputs) 78 | 79 | # ------------------------------------------------------------------------------- 80 | # 2. Init Cepstral-QSRCNN Model 81 | # ------------------------------------------------------------------------------- 82 | 83 | codec_type_params = {'weights_dir': "./model_weights", 'logdir': "./log"} 84 | qsrcnn = model.WaveformQSRCNN(opt_params=default_opt_params, 85 | model_params=default_model_params, 86 | codec_type_params=codec_type_params) 87 | 88 | # ------------------------------------------------------------------------------- 89 | # 3. Evaluate The Cepstral-QSRCNNe Model 90 | # ------------------------------------------------------------------------------- 91 | qsrcnn.evaluation_model(x_test_noisy) 92 | 93 | else: 94 | raise Exception("Do you want to train or test the model ? Please set the variable train_or_test !") 95 | # sys.exit("Please set the codec name !") 96 | -------------------------------------------------------------------------------- /CepstralCNN/log/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/CepstralCNN/log/README.md -------------------------------------------------------------------------------- /CepstralCNN/model_weights/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/CepstralCNN/model_weights/README.md -------------------------------------------------------------------------------- /CepstralCNN/weightnorm.py: -------------------------------------------------------------------------------- 1 | from keras import backend as K 2 | from keras.optimizers import SGD,Adam 3 | import tensorflow as tf 4 | 5 | # adapted from keras.optimizers.SGD 6 | class SGDWithWeightnorm(SGD): 7 | def get_updates(self, params, constraints, loss): 8 | grads = self.get_gradients(loss, params) 9 | self.updates = [] 10 | 11 | lr = self.lr 12 | if self.initial_decay > 0: 13 | lr *= (1. / (1. + self.decay * self.iterations)) 14 | self.updates .append(K.update_add(self.iterations, 1)) 15 | 16 | # momentum 17 | shapes = [K.get_variable_shape(p) for p in params] 18 | moments = [K.zeros(shape) for shape in shapes] 19 | self.weights = [self.iterations] + moments 20 | for p, g, m in zip(params, grads, moments): 21 | 22 | # if a weight tensor (len > 1) use weight normalized parameterization 23 | ps = K.get_variable_shape(p) 24 | if len(ps) > 1: 25 | 26 | # get weight normalization parameters 27 | V, V_norm, V_scaler, g_param, grad_g, grad_V = get_weightnorm_params_and_grads(p, g) 28 | 29 | # momentum container for the 'g' parameter 30 | V_scaler_shape = K.get_variable_shape(V_scaler) 31 | m_g = K.zeros(V_scaler_shape) 32 | 33 | # update g parameters 34 | v_g = self.momentum * m_g - lr * grad_g # velocity 35 | self.updates.append(K.update(m_g, v_g)) 36 | if self.nesterov: 37 | new_g_param = g_param + self.momentum * v_g - lr * grad_g 38 | else: 39 | new_g_param = g_param + v_g 40 | 41 | # update V parameters 42 | v_v = self.momentum * m - lr * grad_V # velocity 43 | self.updates.append(K.update(m, v_v)) 44 | if self.nesterov: 45 | new_V_param = V + self.momentum * v_v - lr * grad_V 46 | else: 47 | new_V_param = V + v_v 48 | 49 | # if there are constraints we apply them to V, not W 50 | if p in constraints: 51 | c = constraints[p] 52 | new_V_param = c(new_V_param) 53 | 54 | # wn param updates --> W updates 55 | add_weightnorm_param_updates(self.updates, new_V_param, new_g_param, p, V_scaler) 56 | 57 | else: # normal SGD with momentum 58 | v = self.momentum * m - lr * g # velocity 59 | self.updates.append(K.update(m, v)) 60 | 61 | if self.nesterov: 62 | new_p = p + self.momentum * v - lr * g 63 | else: 64 | new_p = p + v 65 | 66 | # apply constraints 67 | if p in constraints: 68 | c = constraints[p] 69 | new_p = c(new_p) 70 | 71 | self.updates.append(K.update(p, new_p)) 72 | return self.updates 73 | 74 | # adapted from keras.optimizers.Adam 75 | class AdamWithWeightnorm(Adam): 76 | def get_updates(self, params, constraints, loss): 77 | grads = self.get_gradients(loss, params) 78 | self.updates = [K.update_add(self.iterations, 1)] 79 | 80 | lr = self.lr 81 | if self.initial_decay > 0: 82 | lr *= (1. / (1. + self.decay * self.iterations)) 83 | 84 | t = self.iterations + 1 85 | lr_t = lr * K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t)) 86 | 87 | shapes = [K.get_variable_shape(p) for p in params] 88 | ms = [K.zeros(shape) for shape in shapes] 89 | vs = [K.zeros(shape) for shape in shapes] 90 | self.weights = [self.iterations] + ms + vs 91 | 92 | for p, g, m, v in zip(params, grads, ms, vs): 93 | 94 | # if a weight tensor (len > 1) use weight normalized parameterization 95 | # this is the only part changed w.r.t. keras.optimizers.Adam 96 | ps = K.get_variable_shape(p) 97 | if len(ps)>1: 98 | 99 | # get weight normalization parameters 100 | V, V_norm, V_scaler, g_param, grad_g, grad_V = get_weightnorm_params_and_grads(p, g) 101 | 102 | # Adam containers for the 'g' parameter 103 | V_scaler_shape = K.get_variable_shape(V_scaler) 104 | m_g = K.zeros(V_scaler_shape) 105 | v_g = K.zeros(V_scaler_shape) 106 | 107 | # update g parameters 108 | m_g_t = (self.beta_1 * m_g) + (1. - self.beta_1) * grad_g 109 | v_g_t = (self.beta_2 * v_g) + (1. - self.beta_2) * K.square(grad_g) 110 | new_g_param = g_param - lr_t * m_g_t / (K.sqrt(v_g_t) + self.epsilon) 111 | self.updates.append(K.update(m_g, m_g_t)) 112 | self.updates.append(K.update(v_g, v_g_t)) 113 | 114 | # update V parameters 115 | m_t = (self.beta_1 * m) + (1. - self.beta_1) * grad_V 116 | v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(grad_V) 117 | new_V_param = V - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) 118 | self.updates.append(K.update(m, m_t)) 119 | self.updates.append(K.update(v, v_t)) 120 | 121 | # if there are constraints we apply them to V, not W 122 | if p in constraints: 123 | c = constraints[p] 124 | new_V_param = c(new_V_param) 125 | 126 | # wn param updates --> W updates 127 | add_weightnorm_param_updates(self.updates, new_V_param, new_g_param, p, V_scaler) 128 | 129 | else: # do optimization normally 130 | m_t = (self.beta_1 * m) + (1. - self.beta_1) * g 131 | v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) 132 | p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) 133 | 134 | self.updates.append(K.update(m, m_t)) 135 | self.updates.append(K.update(v, v_t)) 136 | 137 | new_p = p_t 138 | # apply constraints 139 | if p in constraints: 140 | c = constraints[p] 141 | new_p = c(new_p) 142 | self.updates.append(K.update(p, new_p)) 143 | return self.updates 144 | 145 | 146 | def get_weightnorm_params_and_grads(p, g): 147 | ps = K.get_variable_shape(p) 148 | 149 | # construct weight scaler: V_scaler = g/||V|| 150 | V_scaler_shape = (ps[-1],) # assumes we're using tensorflow! 151 | V_scaler = K.ones(V_scaler_shape) # init to ones, so effective parameters don't change 152 | 153 | # get V parameters = ||V||/g * W 154 | norm_axes = [i for i in range(len(ps) - 1)] 155 | V = p / tf.reshape(V_scaler, [1] * len(norm_axes) + [-1]) 156 | 157 | # split V_scaler into ||V|| and g parameters 158 | V_norm = tf.sqrt(tf.reduce_sum(tf.square(V), norm_axes)) 159 | g_param = V_scaler * V_norm 160 | 161 | # get grad in V,g parameters 162 | grad_g = tf.reduce_sum(g * V, norm_axes) / V_norm 163 | grad_V = tf.reshape(V_scaler, [1] * len(norm_axes) + [-1]) * \ 164 | (g - tf.reshape(grad_g / V_norm, [1] * len(norm_axes) + [-1]) * V) 165 | 166 | return V, V_norm, V_scaler, g_param, grad_g, grad_V 167 | 168 | 169 | def add_weightnorm_param_updates(updates, new_V_param, new_g_param, W, V_scaler): 170 | ps = K.get_variable_shape(new_V_param) 171 | norm_axes = [i for i in range(len(ps) - 1)] 172 | 173 | # update W and V_scaler 174 | new_V_norm = tf.sqrt(tf.reduce_sum(tf.square(new_V_param), norm_axes)) 175 | new_V_scaler = new_g_param / new_V_norm 176 | new_W = tf.reshape(new_V_scaler, [1] * len(norm_axes) + [-1]) * new_V_param 177 | updates.append(K.update(W, new_W)) 178 | updates.append(K.update(V_scaler, new_V_scaler)) 179 | 180 | 181 | # data based initialization for a given Keras model 182 | def data_based_init(model, input): 183 | 184 | # input can be dict, numpy array, or list of numpy arrays 185 | if type(input) is dict: 186 | feed_dict = input 187 | elif type(input) is list: 188 | feed_dict = {tf_inp: np_inp for tf_inp,np_inp in zip(model.inputs,input)} 189 | else: 190 | feed_dict = {model.inputs[0]: input} 191 | 192 | # add learning phase if required 193 | if model.uses_learning_phase and K.learning_phase() not in feed_dict: 194 | feed_dict.update({K.learning_phase(): 1}) 195 | 196 | # get all layer name, output, weight, bias tuples 197 | layer_output_weight_bias = [] 198 | for l in model.layers: 199 | if hasattr(l, 'W') and hasattr(l, 'b'): 200 | assert(l.built) 201 | layer_output_weight_bias.append( (l.name,l.get_output_at(0),l.W,l.b) ) # if more than one node, only use the first 202 | 203 | # iterate over our list and do data dependent init 204 | sess = K.get_session() 205 | for l,o,W,b in layer_output_weight_bias: 206 | print('Performing data dependent initialization for layer ' + l) 207 | m,v = tf.nn.moments(o, [i for i in range(len(o.get_shape())-1)]) 208 | s = tf.sqrt(v + 1e-10) 209 | updates = tf.group(W.assign(W/tf.reshape(s,[1]*(len(W.get_shape())-1)+[-1])), b.assign((b-m)/s)) 210 | sess.run(updates, feed_dict) -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2018, Huijun Liu, Ziyue Zhao, Tim Fingscheidt 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /QSR-WGAN-GP/.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | push 3 | 4 | *.pyc 5 | *.pkl 6 | *.jpg 7 | *.png 8 | -------------------------------------------------------------------------------- /QSR-WGAN-GP/.idea/QSR-WGAN-GP.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | -------------------------------------------------------------------------------- /QSR-WGAN-GP/.idea/markdown-navigator.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 35 | 36 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /QSR-WGAN-GP/.idea/markdown-navigator/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /QSR-WGAN-GP/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /QSR-WGAN-GP/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /QSR-WGAN-GP/QSR-GANGP_Train_GPUs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: UTF-8 -*- 3 | 4 | import os, sys 5 | sys.path.append(os.getcwd()) 6 | 7 | import time 8 | import numpy as np 9 | import scipy.io as sio 10 | import scipy.io.wavfile as swave 11 | from numpy import random 12 | 13 | import tensorflow as tf 14 | import tflib as lib 15 | import tflib as lib 16 | import tflib.ops.linear 17 | import tflib.ops.conv1d 18 | import matplotlib.pyplot as plt 19 | from tensorflow.python.framework import ops 20 | from keras.backend.tensorflow_backend import set_session 21 | #os.environ["CUDA_VISIBLE_DEVICES"] = "3" # x stand for GPU index: 3-x!! 22 | #config = tf.ConfigProto() 23 | #config.gpu_options.per_process_gpu_memory_fraction = 0.6 # Only 30% Memory of GPUs can be used 24 | #set_session(tf.Session(config=config)) 25 | 26 | 27 | ##################################################################################### 28 | # 0. Hyperparameter Init 29 | ##################################################################################### 30 | # Download Google Billion Word at http://www.statmt.org/lm-benchmark/ and 31 | BATCH_SIZE = 32 # Batch size 32 | ITERS = 200000 # How many iterations to train for 33 | SEQ_LEN = 80 # Sequence length in characters 34 | DIM = 128 # Model dimensionality. This is fairly slow and overfits, even on 35 | # Billion Word. Consider decreasing for smaller datasets. 36 | CRITIC_ITERS = 5 # How many critic iterations per generator iteration. We 37 | # use 10 for the results in the paper, but 5 should work fine 38 | # as well. 39 | LAMBDA = 10 # Gradient penalty lambda hyperparameter. 40 | MAX_N_EXAMPLES = 100000 # Max number of data examples to load. If data loading 41 | # is too slow or takes too much RAM, you can decrease 42 | # this (at the expense of having less training data). 43 | 44 | 45 | ##################################################################################### 46 | # 1. load data 47 | ##################################################################################### 48 | print('> Loading data... ') 49 | # Load Input Data 50 | mat_input = 'Train_G711_PreProc_defautLang/Train_inputSet_ALaw_defautLang_OLdata_ValiTrain_Reverse_Extended_Reverse.mat' 51 | mat_input = os.path.normcase(mat_input) 52 | print('> Training Input: ' + mat_input) 53 | 54 | x_train_noisy = sio.loadmat(mat_input) 55 | x_train_noisy = x_train_noisy['inputSetNorm'] 56 | x_train_noisy = np.array(x_train_noisy) 57 | # x_train_noisy = input_min_max_scaler.fit_transform(x_train_noisy) 58 | 59 | # Load Input Data for Validation 60 | # mat_input_vali = 'Train_G711_PreProc_defautLang/Vali_inputSet_ALaw_defautLang_OLdata_ValiTrain_smallVali_Reverse_Extended.mat' 61 | # mat_input_vali = os.path.normcase(mat_input_vali) 62 | # print('> Validation Input: ' + mat_input_vali) 63 | 64 | # x_train_noisy_vali = sio.loadmat(mat_input_vali) 65 | # x_train_noisy_vali = x_train_noisy_vali['inputSetNorm'] 66 | # x_train_noisy_vali = np.array(x_train_noisy_vali) 67 | 68 | # Load Target Data 69 | mat_target = 'Train_G711_PreProc_defautLang/Train_targetSet_ALaw_defautLang_OLdata_ValiTrain_Reverse_Extended_Reverse.mat' 70 | mat_target = os.path.normcase(mat_target) 71 | print('> Training Target: ' + mat_target) 72 | 73 | x_train = sio.loadmat(mat_target) 74 | x_train = x_train['targetSet'] 75 | x_train = np.array(x_train) 76 | # x_train = target_min_max_scaler.fit_transform(x_train) 77 | 78 | # Load Target Data for Validation 79 | # mat_target_vali = 'Train_G711_PreProc_defautLang/Vali_targetSet_ALaw_defautLang_OLdata_ValiTrain_smallVali_Reverse_Extended.mat' 80 | # mat_target_vali = os.path.normcase(mat_target_vali) 81 | # print('> Validation Target: ' + mat_target_vali) 82 | 83 | # x_train_vali = sio.loadmat(mat_target_vali) 84 | # x_train_vali = x_train_vali['targetSet'] 85 | # x_train_vali = np.array(x_train_vali) 86 | 87 | # Randomization of Training Pairs (Currently NO Shuffle) 88 | random.seed(1331) 89 | 90 | train = np.column_stack((x_train_noisy, x_train)) 91 | np.random.shuffle(train) 92 | x_train_noisy = train[:, :SEQ_LEN] 93 | x_train = train[:, SEQ_LEN:] 94 | 95 | # Reshape of Traing Pairs and validation Pairs 96 | x_train_noisy = np.reshape(x_train_noisy, (x_train_noisy.shape[0], x_train_noisy.shape[1], 1)) 97 | x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1)) 98 | 99 | # validation = np.column_stack((x_train_noisy_vali, x_train_vali)) 100 | # np.random.shuffle(validation ) 101 | # x_train_noisy_vali = validation [:, :SEQ_LEN] 102 | # x_train_vali = validation [:, SEQ_LEN:] 103 | 104 | # x_train_noisy_vali = np.reshape(x_train_noisy_vali, (x_train_noisy_vali.shape[0], x_train_noisy_vali.shape[1], 1)) 105 | # x_train_vali = np.reshape(x_train_vali, (x_train_vali.shape[0], x_train_vali.shape[1], 1)) 106 | 107 | print('> Data Loaded. Model Compiling... ') 108 | 109 | def selu(x): 110 | with ops.name_scope('elu') as scope: 111 | alpha = 1.6732632423543772848170429916717 112 | scale = 1.0507009873554804934193349852946 113 | return scale*tf.where(x>=0.0, x, alpha*tf.nn.elu(x)) 114 | 115 | def ResBlock(name, inputs): 116 | output = inputs 117 | output = tf.nn.relu(output) 118 | output = lib.ops.conv1d.Conv1D(name + '.1', DIM, DIM, 5, output, biases=False) 119 | output = tf.nn.relu(output) 120 | output = lib.ops.conv1d.Conv1D(name + '.2', DIM, DIM, 5, output, biases=False) 121 | return inputs + (0.3 * output) 122 | 123 | 124 | #################################################### 125 | # 1. Define Generator Model 126 | #################################################### 127 | def Generator(inputs): 128 | output = tf.transpose(inputs, [0, 2, 1]) 129 | output = lib.ops.conv1d.Conv1D('Generator.Input', SEQ_LEN, DIM, 1, output, biases=False) 130 | output = ResBlock('Generator.1', output) 131 | output = ResBlock('Generator.2', output) 132 | output = ResBlock('Generator.3', output) 133 | output = ResBlock('Generator.4', output) 134 | output = ResBlock('Generator.5', output) 135 | output = lib.ops.conv1d.Conv1D('Generator.Output', DIM, SEQ_LEN, 1, output, biases=False) 136 | output = tf.transpose(output, [0, 2, 1]) 137 | output = tf.tanh(output) 138 | return output 139 | 140 | #################################################### 141 | # 2. Define Discriminator Model 142 | #################################################### 143 | def Discriminator(inputs): 144 | output = tf.transpose(inputs, [0, 2, 1]) 145 | output = lib.ops.conv1d.Conv1D('Discriminator.Input', SEQ_LEN, DIM, 1, output, biases=False) 146 | output = ResBlock('Discriminator.1', output) 147 | output = ResBlock('Discriminator.2', output) 148 | output = ResBlock('Discriminator.3', output) 149 | output = ResBlock('Discriminator.4', output) 150 | output = ResBlock('Discriminator.5', output) 151 | output = tf.reshape(output, [-1, BATCH_SIZE * DIM]) 152 | output = lib.ops.linear.Linear('Discriminator.Output', BATCH_SIZE * DIM, 1, output, biases=False) 153 | return output 154 | 155 | #################################################### 156 | # 3. Define inputs of all Models 157 | #################################################### 158 | real_inputs = tf.placeholder(tf.float32, shape=[BATCH_SIZE, SEQ_LEN, 1]) 159 | quan_inputs = tf.placeholder(tf.float32, shape=[BATCH_SIZE, SEQ_LEN, 1]) 160 | 161 | fake_inputs = Generator(quan_inputs) 162 | 163 | disc_real = Discriminator(real_inputs) 164 | disc_fake = Discriminator(fake_inputs) 165 | 166 | #################################################### 167 | # 4. WGAN lipschitz-penalty 168 | #################################################### 169 | alpha = tf.random_uniform(shape=[BATCH_SIZE, 1, 1], minval=0., maxval=1.) 170 | differences = fake_inputs - real_inputs 171 | interpolates = real_inputs + (alpha * differences) 172 | 173 | # 1. loss 174 | gradients = tf.gradients(Discriminator(interpolates), [interpolates])[0] 175 | slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1, 2])) 176 | gradient_penalty = LAMBDA * tf.reduce_mean((slopes - 1.) ** 2) 177 | 178 | disc_loss = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real) 179 | disc_loss += gradient_penalty 180 | gen_loss = -tf.reduce_mean(disc_fake) 181 | 182 | # 3. optimizer 183 | gen_params = lib.params_with_name('Generator') 184 | disc_params = lib.params_with_name('Discriminator') 185 | gen_train_op = tf.train.AdamOptimizer(learning_rate=5e-6, beta1=0.5, beta2=0.9).minimize(gen_loss, var_list=gen_params) 186 | disc_train_op = tf.train.AdamOptimizer(learning_rate=5e-6, beta1=0.5, beta2=0.9).minimize(disc_loss, var_list=disc_params) 187 | 188 | # 4. dataset generator 189 | def batch_generator(): 190 | while True: 191 | for i in range(0, x_train_noisy.shape[0] - BATCH_SIZE + 1, BATCH_SIZE): 192 | yield x_train_noisy[i:i+BATCH_SIZE], x_train[i:i+BATCH_SIZE] 193 | 194 | if not os.path.exists('out/'): 195 | os.makedirs('out/') 196 | 197 | ################################################################# 198 | # 5. Training Loop 199 | ################################################################# 200 | with tf.Session() as session: 201 | session.run(tf.global_variables_initializer()) # variables init 202 | 203 | # create data for the gan training 204 | generator = batch_generator() 205 | 206 | for iteration in range(ITERS): 207 | start_time = time.time() 208 | 209 | # Train critic 210 | for i in range(CRITIC_ITERS): 211 | quant_batch, real_batch = generator.__next__() 212 | _, _gen_loss = session.run([gen_train_op, gen_loss], feed_dict={quan_inputs: quant_batch}) 213 | _, _disc_loss = session.run([disc_train_op, disc_loss], feed_dict={real_inputs:real_batch, quan_inputs:quant_batch}) 214 | 215 | # Train generator 216 | quant_batch, real_batch = generator.__next__() 217 | _, _gen_loss = session.run([gen_train_op, gen_loss], feed_dict={quan_inputs: quant_batch}) 218 | 219 | if iteration % 10 == 0: 220 | print('epoch %s, disc_loss: %s, gen_loss: %s' % (iteration, _disc_loss, _gen_loss)) 221 | 222 | realspeech, quanspeech, prediction = session.run([real_inputs, quan_inputs, fake_inputs], feed_dict={real_inputs: real_batch, quan_inputs: quant_batch}) 223 | 224 | fig = plt.figure(facecolor='white') 225 | ax = fig.add_subplot(111) 226 | ax.plot(np.reshape(realspeech, (realspeech.size,)), label='RealSpeech') 227 | plt.plot(np.reshape(quanspeech, (quanspeech.size,)), label='QuanSpeech') 228 | plt.plot(np.reshape(prediction, (prediction.size,)), label='Prediction') 229 | 230 | plt.legend() 231 | plt.show() 232 | 233 | -------------------------------------------------------------------------------- /QSR-WGAN-GP/QSR-WGAN-GP_Train_GPUs.py: -------------------------------------------------------------------------------- 1 | ####################################################################################################################### 2 | # Project QSRCNN: Quantized Speech Reconstruction using Convolutional Neural Network V1.0 3 | # ===================================================================================================================== 4 | # QSR-WGAN-GP_Train_GPUs.py: Trying to use WGAN-GP to do Quantized Speech Reconstruction 5 | # 6 | # 7 | # ===================================================================================================================== 8 | # Abteilung Signalverarbeitung, IfN, Technische Universitaet Braunschweig, Deutschland 9 | # Author: Huijun Liu M.Sc. 10 | # Date: 16.07.2017 11 | ####################################################################################################################### 12 | import os 13 | import time 14 | import math 15 | import numpy as np 16 | import scipy.io as sio 17 | import tensorflow as tf 18 | import keras.backend as K 19 | import matplotlib.pyplot as plt 20 | import scipy.io.wavfile as swave 21 | import keras.optimizers as optimizers 22 | 23 | from numpy import random 24 | from keras import initializers 25 | from keras.models import Model 26 | from keras.layers import Input 27 | from keras.layers.merge import Add 28 | from keras.layers.core import Dense, Flatten, Activation 29 | from keras.layers.normalization import BatchNormalization 30 | from keras.layers.convolutional import Conv1D, MaxPooling1D, UpSampling1D 31 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger, TensorBoard 32 | 33 | # from weightnorm import AdamWithWeightnorm 34 | from tensorflow.python.framework import ops 35 | from keras.backend.tensorflow_backend import set_session 36 | 37 | 38 | ##################################################################################### 39 | # 0. Setteings For GPUs 40 | ##################################################################################### 41 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" # x stand for GPU index: 3-x!! 42 | config = tf.ConfigProto() 43 | config.gpu_options.per_process_gpu_memory_fraction = 0.3 # Only 30% Memory of GPUs can be used 44 | set_session(tf.Session(config=config)) 45 | 46 | ##################################################################################### 47 | # 1. Define new Metric, Activation function and Loss function 48 | ##################################################################################### 49 | 50 | 51 | def snr(y_true, y_pred): 52 | """ 53 | SNR is Signal to Noise Ratio 54 | 55 | """ 56 | return 10.0 * K.log((K.sum(K.square(y_true))) / (K.sum(K.square(y_pred - y_true)))) / K.log(10.0) 57 | 58 | 59 | def selu(x): 60 | with ops.name_scope('elu') as scope: 61 | alpha = 1.6732632423543772848170429916717 62 | scale = 1.0507009873554804934193349852946 63 | return scale*tf.where(x>=0.0, x, alpha*tf.nn.elu(x)) 64 | 65 | 66 | ##################################################################################### 67 | # 2. Define Generative model and Adversarial model 68 | ##################################################################################### 69 | def create_generator(inputs_gen): 70 | """ 71 | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx 72 | """ 73 | return decoded 74 | 75 | 76 | def create_discriminator(inputs_disc): 77 | x = Conv1D(32, 3, padding='same', name='dis_conv_1')(inputs_disc) 78 | x = Activation(selu, name='dis_act_1')(x) 79 | 80 | x = Conv1D(64, 3, padding='same', name='dis_conv_2')(x) 81 | x = BatchNormalization(name='dis_bnorm_1')(x) 82 | x1 = Activation(selu, name='dis_act_2')(x) 83 | 84 | m1 = Add(name='dis_add_1')([inputs_disc, x1]) 85 | 86 | x = Conv1D(32, 3, padding='same', name='dis_conv_3')(m1) 87 | x = Activation(selu, name='dis_act_3')(x) 88 | 89 | x = Conv1D(64, 3, padding='same', name='dis_conv_4')(x) 90 | x = BatchNormalization(name='dis_bnorm_2')(x) 91 | x2 = Activation(selu, name='dis_act_4')(x) 92 | m2 = Add(name='dis_add_2')([m1, x2]) 93 | 94 | discri = Conv1D(1, 5, padding='same', name='dis_conv_5')(m2) 95 | 96 | return discri 97 | 98 | ##################################################################################### 99 | # 3. Define Training process of QSR_WGAN_GP 100 | ##################################################################################### 101 | SEQ_LEN = 80 102 | BATCH_SIZE = 128 103 | 104 | 105 | def load_data(): 106 | print('> Loading data... ') 107 | # Load Input Data 108 | mat_input = 'Train_G711_PreProc_defautLang/Train_inputSet_ALaw_defautLang_OLdata_ValiTrain_Reverse_Extended_Reverse.mat' 109 | mat_input = os.path.normcase(mat_input) 110 | print('> Training Input: ' + mat_input) 111 | 112 | x_train_noisy = sio.loadmat(mat_input) 113 | x_train_noisy = x_train_noisy['inputSetNorm'] 114 | x_train_noisy = np.array(x_train_noisy) 115 | # x_train_noisy = input_min_max_scaler.fit_transform(x_train_noisy) 116 | 117 | # Load Input Data for Validation 118 | mat_input_vali = 'Train_G711_PreProc_defautLang/Vali_inputSet_ALaw_defautLang_OLdata_ValiTrain_smallVali.mat' 119 | mat_input_vali = os.path.normcase(mat_input_vali) 120 | print('> Validation Input: ' + mat_input_vali) 121 | 122 | x_train_noisy_vali = sio.loadmat(mat_input_vali) 123 | x_train_noisy_vali = x_train_noisy_vali['inputSetNorm'] 124 | x_train_noisy_vali = np.array(x_train_noisy_vali) 125 | 126 | # Load Target Data 127 | mat_target = 'Train_G711_PreProc_defautLang/Train_targetSet_ALaw_defautLang_OLdata_ValiTrain_Reverse_Extended_Reverse.mat' 128 | mat_target = os.path.normcase(mat_target) 129 | print('> Training Target: ' + mat_target) 130 | 131 | x_train = sio.loadmat(mat_target) 132 | x_train = x_train['targetSet'] 133 | x_train = np.array(x_train) 134 | # x_train = target_min_max_scaler.fit_transform(x_train) 135 | 136 | # Load Target Data for Validation 137 | mat_target_vali = 'Train_G711_PreProc_defautLang/Vali_targetSet_ALaw_defautLang_OLdata_ValiTrain_smallVali.mat' 138 | mat_target_vali = os.path.normcase(mat_target_vali) 139 | print('> Validation Target: ' + mat_target_vali) 140 | 141 | x_train_vali = sio.loadmat(mat_target_vali) 142 | x_train_vali = x_train_vali['targetSet'] 143 | x_train_vali = np.array(x_train_vali) 144 | 145 | # Randomization of Training Pairs (Currently NO Shuffle) 146 | random.seed(1331) 147 | 148 | train = np.column_stack((x_train_noisy, x_train)) 149 | np.random.shuffle(train) 150 | x_train_noisy = train[:, :SEQ_LEN] 151 | x_train = train[:, SEQ_LEN:] 152 | 153 | # Reshape of Traing Pairs and validation Pairs 154 | x_train_noisy = np.reshape(x_train_noisy, (x_train_noisy.shape[0], x_train_noisy.shape[1], 1)) 155 | x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1)) 156 | 157 | # validation = np.column_stack((x_train_noisy_vali, x_train_vali)) 158 | # np.random.shuffle(validation ) 159 | # x_train_noisy_vali = validation [:, :SEQ_LEN] 160 | # x_train_vali = validation [:, SEQ_LEN:] 161 | 162 | x_train_noisy_vali = np.reshape(x_train_noisy_vali, (x_train_noisy_vali.shape[0], x_train_noisy_vali.shape[1], 1)) 163 | x_train_vali = np.reshape(x_train_vali, (x_train_vali.shape[0], x_train_vali.shape[1], 1)) 164 | 165 | print('> Data Loaded. Model Compiling... ') 166 | return x_train_noisy, x_train, x_train_noisy_vali, x_train_vali 167 | 168 | # 2. dataset generator 169 | def batch_generator(x_train_noisy, x_train, batch_size=128): 170 | while True: 171 | for i in range(0, x_train_noisy.shape[0] - batch_size + 1, batch_size): 172 | yield x_train_noisy[i:i+batch_size], x_train[i:i+batch_size] 173 | 174 | default_opt_params = {'lr': 5e-5, 'clip': 1e-2, 'n_lambda': 10, 'n_critic': 10} 175 | 176 | 177 | class QSRWGAN(object): 178 | def __init__(self, opt_params=default_opt_params, frame_len=80): 179 | self.n_critic = opt_params['n_critic'] 180 | self.n_lambda = opt_params['n_lambda'] 181 | self.clip = opt_params['clip'] 182 | self.frame_len = frame_len 183 | 184 | # ------------------------------------------------------------------ 185 | # 1. create session 186 | # ------------------------------------------------------------------ 187 | self.sess = tf.Session() 188 | K.set_session(self.sess) # pass the session to keras 189 | 190 | # ------------------------------------------------------------------ 191 | # 2. create generator and discriminator 192 | # ------------------------------------------------------------------ 193 | with tf.name_scope('generator'): 194 | gen_inputs = Input(shape=(self.frame_len, 1)) 195 | gen_outputs = create_generator(gen_inputs) 196 | 197 | with tf.name_scope('discriminator'): 198 | dis_inputs = Input(shape=(self.frame_len, 1)) 199 | dis_outputs = create_discriminator(dis_inputs) 200 | 201 | # ------------------------------------------------------------------ 202 | # 3. instantiate networks of generator and discriminator 203 | # ------------------------------------------------------------------ 204 | Generator = Model(inputs=gen_inputs, outputs=gen_outputs) 205 | Generator.summary() 206 | self.gen_model = Generator 207 | Discriminator = Model(inputs=dis_inputs, outputs=dis_outputs) 208 | Discriminator.summary() 209 | 210 | # ------------------------------------------------------------------ 211 | # 4. save the inputs of generator and discriminator 212 | # ------------------------------------------------------------------ 213 | quan_inputs = tf.placeholder(tf.float32, shape=(None, self.frame_len, 1), name='quan_inputs') 214 | real_inputs = tf.placeholder(tf.float32, shape=(None, self.frame_len, 1), name='real_inputs') 215 | self.inputs = quan_inputs, real_inputs 216 | 217 | # ------------------------------------------------------------------ 218 | # 5. get the weights of generator and discriminator 219 | # ------------------------------------------------------------------ 220 | self.gen_weights = [weights for weights in tf.global_variables() if 'generator' in weights.name] 221 | self.dis_weights = [weights for weights in tf.global_variables() if 'discriminator' in weights.name] 222 | # self.gen_weights = Generator.get_weights() 223 | # self.dis_weights = Discriminator.get_weights() 224 | 225 | # ------------------------------------------------------------------ 226 | # 6. create predictions of generator and discriminator 227 | # ------------------------------------------------------------------ 228 | fake_inputs = Generator(quan_inputs) 229 | disc_real = Discriminator(real_inputs) 230 | disc_fake = Discriminator(fake_inputs) 231 | self.predictions = fake_inputs 232 | 233 | # ------------------------------------------------------------------ 234 | # 7. create losses and compute probabilities of discriminator 235 | # ------------------------------------------------------------------ 236 | # 7.1. WGAN lipschitz-penalty 237 | alpha = tf.random_uniform(shape=[BATCH_SIZE, 1, 1], minval=-0.4, maxval=0.4) 238 | differences = fake_inputs - real_inputs 239 | interpolates = real_inputs + (alpha * differences) 240 | 241 | gradients = tf.gradients(Discriminator(interpolates), [interpolates])[0] 242 | # slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1, 2])) 243 | # gradient_penalty = self.n_lambda * tf.reduce_mean((slopes - 1.) ** 2) 244 | gp = K.mean(K.square(K.sqrt(K.sum(K.square(gradients), axis=1)) - 1)) 245 | gradient_penalty = self.n_lambda * gp 246 | 247 | disc_loss = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real) 248 | disc_loss += gradient_penalty 249 | 250 | self.dis_loss = disc_loss 251 | self.gen_loss = -tf.reduce_mean(disc_fake) 252 | 253 | self.disc_real = tf.reduce_mean(disc_real) 254 | self.disc_fake = tf.reduce_mean(disc_fake) 255 | self.prob_real = tf.reduce_mean(tf.sigmoid(disc_real)) 256 | self.prob_fake = tf.reduce_mean(tf.sigmoid(disc_fake)) 257 | 258 | # ------------------------------------------------------------------ 259 | # 8. create optimizer for generator and discriminator 260 | # ------------------------------------------------------------------ 261 | learning_rate = opt_params['lr'] 262 | 263 | gen_train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.gen_loss, var_list=self.gen_weights) 264 | disc_train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.dis_loss, var_list=self.dis_weights) 265 | 266 | self.gen_opt_train = gen_train_op 267 | self.dis_opt_train = disc_train_op 268 | 269 | def load_weights(self): 270 | self.gen_model.load_weights('ddsrcnn_weights_defaultLang_OL40_stopstr_bs128_lr5e-05.h5') 271 | 272 | def save_weights(self, file_path): 273 | file_path = os.path.normcase(file_path) 274 | self.gen_model.save_weights(file_path) 275 | 276 | def load_batch(self, x_train_noise, x_train, train=True): 277 | gen_inputs, dis_inputs = self.inputs 278 | return {gen_inputs: x_train_noise, dis_inputs: x_train, K.learning_phase(): train} 279 | 280 | def gen(self, x_train_noise): 281 | gen_inputs, dis_inputs = self.inputs 282 | feed_dict = {gen_inputs: x_train_noise, K.learning_phase(): False} 283 | return self.sess.run(self.predictions, feed_dict=feed_dict) 284 | 285 | def gen_train(self, feed_dict): 286 | _, gen_loss = self.sess.run([self.gen_opt_train, self.gen_loss], feed_dict=feed_dict) 287 | return gen_loss 288 | 289 | def dis_train(self, feed_dict): 290 | # take a step of adam 291 | _, dis_loss = self.sess.run([self.dis_opt_train, self.dis_loss], feed_dict=feed_dict) 292 | # return discriminator loss 293 | return dis_loss 294 | 295 | def fit(self, x_train_noise, x_train, x_train_noise_vali, x_train_vali, epochs=10, logdir='/qsrwgan_run'): 296 | # ------------------------------------------------------------------ 297 | # 1. initialize log directory 298 | # ------------------------------------------------------------------ 299 | if tf.gfile.Exists(logdir): 300 | tf.gfile.DeleteRecursively(logdir) 301 | 302 | tf.gfile.MakeDirs(logdir) 303 | 304 | # ------------------------------------------------------------------ 305 | # 2. initialize model 306 | # ------------------------------------------------------------------ 307 | init = tf.global_variables_initializer() 308 | self.sess.run(init) 309 | self.load_weights() 310 | 311 | # ------------------------------------------------------------------ 312 | # 3. train the model 313 | # ------------------------------------------------------------------ 314 | step, g_step, epoch = 0, 0, 0 315 | curr_epoch = 0 316 | 317 | # create data for the gan training 318 | # generator = batch_generator(x_train_noise, x_train) 319 | mat_input = 'Train_G711_PreProc_defautLang/inputTestSet_g711concat_nonOL_Frame_80.mat' 320 | mat_input = os.path.normcase(mat_input) 321 | x_train_noisy = sio.loadmat(mat_input) 322 | x_train_noisy = x_train_noisy['inputTestNorm'] 323 | x_train_noisy = np.array(x_train_noisy) 324 | x_train_noisy = np.reshape(x_train_noisy, (x_train_noisy.shape[0], x_train_noisy.shape[1], 1)) 325 | 326 | while curr_epoch < epochs: 327 | # create data for the gan training 328 | generator = batch_generator(x_train_noise, x_train, BATCH_SIZE) 329 | # generator_vali = batch_generator(x_train_noise_vali, x_train_vali, 1024) 330 | 331 | curr_iter = 0 332 | while curr_iter < x_train_noise.shape[0]//BATCH_SIZE: 333 | start_time = time.time() 334 | # n_critic = 100 if g_step < 25 or (g_step+1) % 500 == 0 else self.n_critic 335 | 336 | for i in range(self.n_critic): 337 | curr_iter += 1 338 | dis_losses = [] 339 | 340 | # load the batch 341 | quant_batch, real_batch = generator.__next__() 342 | # quant_batch = np.random.randn(BATCH_SIZE, 80, 1) 343 | feed_dict = self.load_batch(quant_batch, real_batch) 344 | 345 | # train the discriminator 346 | dis_loss = self.dis_train(feed_dict) 347 | dis_losses.append(dis_loss) 348 | 349 | dis_loss = np.array(dis_losses).mean() 350 | 351 | # train the generator 352 | curr_iter += 1 353 | quant_batch, real_batch = generator.__next__() 354 | # quant_batch = np.random.randn(BATCH_SIZE, 80, 1) 355 | feed_dict = self.load_batch(quant_batch, real_batch) 356 | gen_loss = self.gen_train(feed_dict) 357 | 358 | g_step += 1 359 | 360 | if g_step < 1000 or g_step % 1000 == 0: 361 | tot_time = time.time() - start_time 362 | print('Epoch: %3d, Gen Steps: %4d (%3.lf s), Discriminator loss: %.6f, Generator loss: %.6f' % (curr_epoch, g_step, tot_time, dis_loss, gen_loss)) 363 | 364 | if g_step % 50 == 0: 365 | 366 | 367 | prediction = self.gen(np.random.randn(BATCH_SIZE, 80, 1)) 368 | # feed_dict = self.load_batch(x_train_noisy, real_batch_vali) 369 | # quanspeech, realspeech = self.sess.run(self.inputs, feed_dict) 370 | fname = 'recon-speech-%d_%d.wav' % (curr_iter, g_step) 371 | swave.write(fname, 8000, np.reshape(prediction, (prediction.size,))) 372 | # fname = 'real-speech-%d.wav' % g_step 373 | # swave.write(fname, 8000, np.reshape(realspeech, (realspeech.size,))) 374 | 375 | # fig = plt.figure(facecolor='white') 376 | # ax = fig.add_subplot(111) 377 | # ax.plot(np.reshape(realspeech, (realspeech.size,)), label='RealSpeech') 378 | # plt.plot(np.reshape(quanspeech, (quanspeech.size,)), label='QuanSpeech') 379 | # plt.plot(np.reshape(prediction, (prediction.size,)), label='Prediction') 380 | 381 | # plt.legend() 382 | # plt.show() 383 | 384 | curr_epoch += 1 385 | 386 | self.save_weights("qsrwgan_weights.h5") 387 | 388 | 389 | ##################################################################################### 390 | # 4. QSR_WGAN_GP Training 391 | ##################################################################################### 392 | model = QSRWGAN(opt_params=default_opt_params) 393 | # train model 394 | x_train_noisy, x_train, _, _ = load_data() 395 | model.fit(x_train_noisy, x_train, _, _, epochs=10000) 396 | -------------------------------------------------------------------------------- /QSR-WGAN-GP/tflib/__init__.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | import locale 5 | 6 | locale.setlocale(locale.LC_ALL, '') 7 | 8 | _params = {} 9 | _param_aliases = {} 10 | def param(name, *args, **kwargs): 11 | """ 12 | A wrapper for `tf.Variable` which enables parameter sharing in models. 13 | 14 | Creates and returns theano shared variables similarly to `tf.Variable`, 15 | except if you try to create a param with the same name as a 16 | previously-created one, `param(...)` will just return the old one instead of 17 | making a new one. 18 | 19 | This constructor also adds a `param` attribute to the shared variables it 20 | creates, so that you can easily search a graph for all params. 21 | """ 22 | 23 | if name not in _params: 24 | kwargs['name'] = name 25 | param = tf.Variable(*args, **kwargs) 26 | param.param = True 27 | _params[name] = param 28 | result = _params[name] 29 | i = 0 30 | while result in _param_aliases: 31 | # print 'following alias {}: {} to {}'.format(i, result, _param_aliases[result]) 32 | i += 1 33 | result = _param_aliases[result] 34 | return result 35 | 36 | def params_with_name(name): 37 | return [p for n,p in _params.items() if name in n] 38 | 39 | def delete_all_params(): 40 | _params.clear() 41 | 42 | def alias_params(replace_dict): 43 | for old,new in replace_dict.items(): 44 | # print "aliasing {} to {}".format(old,new) 45 | _param_aliases[old] = new 46 | 47 | def delete_param_aliases(): 48 | _param_aliases.clear() 49 | 50 | # def search(node, critereon): 51 | # """ 52 | # Traverse the Theano graph starting at `node` and return a list of all nodes 53 | # which match the `critereon` function. When optimizing a cost function, you 54 | # can use this to get a list of all of the trainable params in the graph, like 55 | # so: 56 | 57 | # `lib.search(cost, lambda x: hasattr(x, "param"))` 58 | # """ 59 | 60 | # def _search(node, critereon, visited): 61 | # if node in visited: 62 | # return [] 63 | # visited.add(node) 64 | 65 | # results = [] 66 | # if isinstance(node, T.Apply): 67 | # for inp in node.inputs: 68 | # results += _search(inp, critereon, visited) 69 | # else: # Variable node 70 | # if critereon(node): 71 | # results.append(node) 72 | # if node.owner is not None: 73 | # results += _search(node.owner, critereon, visited) 74 | # return results 75 | 76 | # return _search(node, critereon, set()) 77 | 78 | # def print_params_info(params): 79 | # """Print information about the parameters in the given param set.""" 80 | 81 | # params = sorted(params, key=lambda p: p.name) 82 | # values = [p.get_value(borrow=True) for p in params] 83 | # shapes = [p.shape for p in values] 84 | # print "Params for cost:" 85 | # for param, value, shape in zip(params, values, shapes): 86 | # print "\t{0} ({1})".format( 87 | # param.name, 88 | # ",".join([str(x) for x in shape]) 89 | # ) 90 | 91 | # total_param_count = 0 92 | # for shape in shapes: 93 | # param_count = 1 94 | # for dim in shape: 95 | # param_count *= dim 96 | # total_param_count += param_count 97 | # print "Total parameter count: {0}".format( 98 | # locale.format("%d", total_param_count, grouping=True) 99 | # ) 100 | 101 | def print_model_settings(locals_): 102 | print("Uppercase local vars:") 103 | all_vars = [(k,v) for (k,v) in locals_.items() if (k.isupper() and k!='T' and k!='SETTINGS' and k!='ALL_SETTINGS')] 104 | all_vars = sorted(all_vars, key=lambda x: x[0]) 105 | for var_name, var_value in all_vars: 106 | print("\t{}: {}".format(var_name, var_value)) 107 | 108 | 109 | def print_model_settings_dict(settings): 110 | print("Settings dict:") 111 | all_vars = [(k,v) for (k,v) in settings.items()] 112 | all_vars = sorted(all_vars, key=lambda x: x[0]) 113 | for var_name, var_value in all_vars: 114 | print("\t{}: {}".format(var_name, var_value)) -------------------------------------------------------------------------------- /QSR-WGAN-GP/tflib/ops/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/QSR-WGAN-GP/tflib/ops/__init__.py -------------------------------------------------------------------------------- /QSR-WGAN-GP/tflib/ops/batchnorm.py: -------------------------------------------------------------------------------- 1 | import tflib as lib 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | def Batchnorm(name, axes, inputs, is_training=None, stats_iter=None, update_moving_stats=True, fused=True): 7 | if ((axes == [0,2,3]) or (axes == [0,2])) and fused==True: 8 | if axes==[0,2]: 9 | inputs = tf.expand_dims(inputs, 3) 10 | # Old (working but pretty slow) implementation: 11 | ########## 12 | 13 | # inputs = tf.transpose(inputs, [0,2,3,1]) 14 | 15 | # mean, var = tf.nn.moments(inputs, [0,1,2], keep_dims=False) 16 | # offset = lib.param(name+'.offset', np.zeros(mean.get_shape()[-1], dtype='float32')) 17 | # scale = lib.param(name+'.scale', np.ones(var.get_shape()[-1], dtype='float32')) 18 | # result = tf.nn.batch_normalization(inputs, mean, var, offset, scale, 1e-4) 19 | 20 | # return tf.transpose(result, [0,3,1,2]) 21 | 22 | # New (super fast but untested) implementation: 23 | offset = lib.param(name+'.offset', np.zeros(inputs.get_shape()[1], dtype='float32')) 24 | scale = lib.param(name+'.scale', np.ones(inputs.get_shape()[1], dtype='float32')) 25 | 26 | moving_mean = lib.param(name+'.moving_mean', np.zeros(inputs.get_shape()[1], dtype='float32'), trainable=False) 27 | moving_variance = lib.param(name+'.moving_variance', np.ones(inputs.get_shape()[1], dtype='float32'), trainable=False) 28 | 29 | def _fused_batch_norm_training(): 30 | return tf.nn.fused_batch_norm(inputs, scale, offset, epsilon=1e-5, data_format='NCHW') 31 | def _fused_batch_norm_inference(): 32 | # Version which blends in the current item's statistics 33 | batch_size = tf.cast(tf.shape(inputs)[0], 'float32') 34 | mean, var = tf.nn.moments(inputs, [2,3], keep_dims=True) 35 | mean = ((1./batch_size)*mean) + (((batch_size-1.)/batch_size)*moving_mean)[None,:,None,None] 36 | var = ((1./batch_size)*var) + (((batch_size-1.)/batch_size)*moving_variance)[None,:,None,None] 37 | return tf.nn.batch_normalization(inputs, mean, var, offset[None,:,None,None], scale[None,:,None,None], 1e-5), mean, var 38 | 39 | # Standard version 40 | # return tf.nn.fused_batch_norm( 41 | # inputs, 42 | # scale, 43 | # offset, 44 | # epsilon=1e-2, 45 | # mean=moving_mean, 46 | # variance=moving_variance, 47 | # is_training=False, 48 | # data_format='NCHW' 49 | # ) 50 | 51 | if is_training is None: 52 | outputs, batch_mean, batch_var = _fused_batch_norm_training() 53 | else: 54 | outputs, batch_mean, batch_var = tf.cond(is_training, 55 | _fused_batch_norm_training, 56 | _fused_batch_norm_inference) 57 | if update_moving_stats: 58 | no_updates = lambda: outputs 59 | def _force_updates(): 60 | """Internal function forces updates moving_vars if is_training.""" 61 | float_stats_iter = tf.cast(stats_iter, tf.float32) 62 | 63 | update_moving_mean = tf.assign(moving_mean, ((float_stats_iter/(float_stats_iter+1))*moving_mean) + ((1/(float_stats_iter+1))*batch_mean)) 64 | update_moving_variance = tf.assign(moving_variance, ((float_stats_iter/(float_stats_iter+1))*moving_variance) + ((1/(float_stats_iter+1))*batch_var)) 65 | 66 | with tf.control_dependencies([update_moving_mean, update_moving_variance]): 67 | return tf.identity(outputs) 68 | outputs = tf.cond(is_training, _force_updates, no_updates) 69 | 70 | if axes == [0,2]: 71 | return outputs[:,:,:,0] # collapse last dim 72 | else: 73 | return outputs 74 | else: 75 | # raise Exception('old BN') 76 | # TODO we can probably use nn.fused_batch_norm here too for speedup 77 | mean, var = tf.nn.moments(inputs, axes, keep_dims=True) 78 | shape = mean.get_shape().as_list() 79 | if 0 not in axes: 80 | print("WARNING ({}): didn't find 0 in axes, but not using separate BN params for each item in batch".format(name)) 81 | shape[0] = 1 82 | offset = lib.param(name+'.offset', np.zeros(shape, dtype='float32')) 83 | scale = lib.param(name+'.scale', np.ones(shape, dtype='float32')) 84 | result = tf.nn.batch_normalization(inputs, mean, var, offset, scale, 1e-5) 85 | 86 | 87 | return result 88 | -------------------------------------------------------------------------------- /QSR-WGAN-GP/tflib/ops/cond_batchnorm.py: -------------------------------------------------------------------------------- 1 | import tflib as lib 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | def Batchnorm(name, axes, inputs, is_training=None, stats_iter=None, update_moving_stats=True, fused=True, labels=None, n_labels=None): 7 | """conditional batchnorm (dumoulin et al 2016) for BCHW conv filtermaps""" 8 | if axes != [0,2,3]: 9 | raise Exception('unsupported') 10 | mean, var = tf.nn.moments(inputs, axes, keep_dims=True) 11 | shape = mean.get_shape().as_list() # shape is [1,n,1,1] 12 | offset_m = lib.param(name+'.offset', np.zeros([n_labels,shape[1]], dtype='float32')) 13 | scale_m = lib.param(name+'.scale', np.ones([n_labels,shape[1]], dtype='float32')) 14 | offset = tf.nn.embedding_lookup(offset_m, labels) 15 | scale = tf.nn.embedding_lookup(scale_m, labels) 16 | result = tf.nn.batch_normalization(inputs, mean, var, offset[:,:,None,None], scale[:,:,None,None], 1e-5) 17 | return result -------------------------------------------------------------------------------- /QSR-WGAN-GP/tflib/ops/conv1d.py: -------------------------------------------------------------------------------- 1 | import tflib as lib 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | _default_weightnorm = False 7 | def enable_default_weightnorm(): 8 | global _default_weightnorm 9 | _default_weightnorm = True 10 | 11 | def Conv1D(name, input_dim, output_dim, filter_size, inputs, he_init=True, mask_type=None, stride=1, weightnorm=None, biases=True, gain=1.): 12 | """ 13 | inputs: tensor of shape (batch size, num channels, width) 14 | mask_type: one of None, 'a', 'b' 15 | 16 | returns: tensor of shape (batch size, num channels, width) 17 | """ 18 | with tf.name_scope(name) as scope: 19 | 20 | if mask_type is not None: 21 | mask_type, mask_n_channels = mask_type 22 | 23 | mask = np.ones( 24 | (filter_size, input_dim, output_dim), 25 | dtype='float32' 26 | ) 27 | center = filter_size // 2 28 | 29 | # Mask out future locations 30 | # filter shape is (width, input channels, output channels) 31 | mask[center+1:, :, :] = 0. 32 | 33 | # Mask out future channels 34 | for i in range(mask_n_channels): 35 | for j in range(mask_n_channels): 36 | if (mask_type=='a' and i >= j) or (mask_type=='b' and i > j): 37 | mask[ 38 | center, 39 | i::mask_n_channels, 40 | j::mask_n_channels 41 | ] = 0. 42 | 43 | 44 | def uniform(stdev, size): 45 | return np.random.uniform( 46 | low=-stdev * np.sqrt(3), 47 | high=stdev * np.sqrt(3), 48 | size=size 49 | ).astype('float32') 50 | 51 | fan_in = input_dim * filter_size 52 | fan_out = output_dim * filter_size / stride 53 | 54 | if mask_type is not None: # only approximately correct 55 | fan_in /= 2. 56 | fan_out /= 2. 57 | 58 | if he_init: 59 | filters_stdev = np.sqrt(4./(fan_in+fan_out)) 60 | else: # Normalized init (Glorot & Bengio) 61 | filters_stdev = np.sqrt(2./(fan_in+fan_out)) 62 | 63 | filter_values = uniform( 64 | filters_stdev, 65 | (filter_size, input_dim, output_dim) 66 | ) 67 | # print "WARNING IGNORING GAIN" 68 | filter_values *= gain 69 | 70 | filters = lib.param(name+'.Filters', filter_values) 71 | 72 | if weightnorm==None: 73 | weightnorm = _default_weightnorm 74 | if weightnorm: 75 | norm_values = np.sqrt(np.sum(np.square(filter_values), axis=(0,1))) 76 | target_norms = lib.param( 77 | name + '.g', 78 | norm_values 79 | ) 80 | with tf.name_scope('weightnorm') as scope: 81 | norms = tf.sqrt(tf.reduce_sum(tf.square(filters), reduction_indices=[0,1])) 82 | filters = filters * (target_norms / norms) 83 | 84 | if mask_type is not None: 85 | with tf.name_scope('filter_mask'): 86 | filters = filters * mask 87 | 88 | result = tf.nn.conv1d( 89 | value=inputs, 90 | filters=filters, 91 | stride=stride, 92 | padding='SAME', 93 | data_format='NHWC' 94 | ) 95 | 96 | if biases: 97 | _biases = lib.param( 98 | name+'.Biases', 99 | np.zeros([output_dim], dtype='float32') 100 | ) 101 | 102 | # result = result + _biases 103 | 104 | result = tf.expand_dims(result, 3) 105 | result = tf.nn.bias_add(result, _biases, data_format='NCHW') 106 | result = tf.squeeze(result) 107 | 108 | return result 109 | -------------------------------------------------------------------------------- /QSR-WGAN-GP/tflib/ops/conv2d.py: -------------------------------------------------------------------------------- 1 | import tflib as lib 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | _default_weightnorm = False 7 | def enable_default_weightnorm(): 8 | global _default_weightnorm 9 | _default_weightnorm = True 10 | 11 | _weights_stdev = None 12 | def set_weights_stdev(weights_stdev): 13 | global _weights_stdev 14 | _weights_stdev = weights_stdev 15 | 16 | def unset_weights_stdev(): 17 | global _weights_stdev 18 | _weights_stdev = None 19 | 20 | def Conv2D(name, input_dim, output_dim, filter_size, inputs, he_init=True, mask_type=None, stride=1, weightnorm=None, biases=True, gain=1.): 21 | """ 22 | inputs: tensor of shape (batch size, num channels, height, width) 23 | mask_type: one of None, 'a', 'b' 24 | 25 | returns: tensor of shape (batch size, num channels, height, width) 26 | """ 27 | with tf.name_scope(name) as scope: 28 | 29 | if mask_type is not None: 30 | mask_type, mask_n_channels = mask_type 31 | 32 | mask = np.ones( 33 | (filter_size, filter_size, input_dim, output_dim), 34 | dtype='float32' 35 | ) 36 | center = filter_size // 2 37 | 38 | # Mask out future locations 39 | # filter shape is (height, width, input channels, output channels) 40 | mask[center+1:, :, :, :] = 0. 41 | mask[center, center+1:, :, :] = 0. 42 | 43 | # Mask out future channels 44 | for i in xrange(mask_n_channels): 45 | for j in xrange(mask_n_channels): 46 | if (mask_type=='a' and i >= j) or (mask_type=='b' and i > j): 47 | mask[ 48 | center, 49 | center, 50 | i::mask_n_channels, 51 | j::mask_n_channels 52 | ] = 0. 53 | 54 | 55 | def uniform(stdev, size): 56 | return np.random.uniform( 57 | low=-stdev * np.sqrt(3), 58 | high=stdev * np.sqrt(3), 59 | size=size 60 | ).astype('float32') 61 | 62 | fan_in = input_dim * filter_size**2 63 | fan_out = output_dim * filter_size**2 / (stride**2) 64 | 65 | if mask_type is not None: # only approximately correct 66 | fan_in /= 2. 67 | fan_out /= 2. 68 | 69 | if he_init: 70 | filters_stdev = np.sqrt(4./(fan_in+fan_out)) 71 | else: # Normalized init (Glorot & Bengio) 72 | filters_stdev = np.sqrt(2./(fan_in+fan_out)) 73 | 74 | if _weights_stdev is not None: 75 | filter_values = uniform( 76 | _weights_stdev, 77 | (filter_size, filter_size, input_dim, output_dim) 78 | ) 79 | else: 80 | filter_values = uniform( 81 | filters_stdev, 82 | (filter_size, filter_size, input_dim, output_dim) 83 | ) 84 | 85 | # print "WARNING IGNORING GAIN" 86 | filter_values *= gain 87 | 88 | filters = lib.param(name+'.Filters', filter_values) 89 | 90 | if weightnorm==None: 91 | weightnorm = _default_weightnorm 92 | if weightnorm: 93 | norm_values = np.sqrt(np.sum(np.square(filter_values), axis=(0,1,2))) 94 | target_norms = lib.param( 95 | name + '.g', 96 | norm_values 97 | ) 98 | with tf.name_scope('weightnorm') as scope: 99 | norms = tf.sqrt(tf.reduce_sum(tf.square(filters), reduction_indices=[0,1,2])) 100 | filters = filters * (target_norms / norms) 101 | 102 | if mask_type is not None: 103 | with tf.name_scope('filter_mask'): 104 | filters = filters * mask 105 | 106 | result = tf.nn.conv2d( 107 | input=inputs, 108 | filter=filters, 109 | strides=[1, 1, stride, stride], 110 | padding='SAME', 111 | data_format='NCHW' 112 | ) 113 | 114 | if biases: 115 | _biases = lib.param( 116 | name+'.Biases', 117 | np.zeros(output_dim, dtype='float32') 118 | ) 119 | 120 | result = tf.nn.bias_add(result, _biases, data_format='NCHW') 121 | 122 | 123 | return result 124 | -------------------------------------------------------------------------------- /QSR-WGAN-GP/tflib/ops/deconv2d.py: -------------------------------------------------------------------------------- 1 | import tflib as lib 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | _default_weightnorm = False 7 | def enable_default_weightnorm(): 8 | global _default_weightnorm 9 | _default_weightnorm = True 10 | 11 | _weights_stdev = None 12 | def set_weights_stdev(weights_stdev): 13 | global _weights_stdev 14 | _weights_stdev = weights_stdev 15 | 16 | def unset_weights_stdev(): 17 | global _weights_stdev 18 | _weights_stdev = None 19 | 20 | def Deconv2D( 21 | name, 22 | input_dim, 23 | output_dim, 24 | filter_size, 25 | inputs, 26 | he_init=True, 27 | weightnorm=None, 28 | biases=True, 29 | gain=1., 30 | mask_type=None, 31 | ): 32 | """ 33 | inputs: tensor of shape (batch size, height, width, input_dim) 34 | returns: tensor of shape (batch size, 2*height, 2*width, output_dim) 35 | """ 36 | with tf.name_scope(name) as scope: 37 | 38 | if mask_type != None: 39 | raise Exception('Unsupported configuration') 40 | 41 | def uniform(stdev, size): 42 | return np.random.uniform( 43 | low=-stdev * np.sqrt(3), 44 | high=stdev * np.sqrt(3), 45 | size=size 46 | ).astype('float32') 47 | 48 | stride = 2 49 | fan_in = input_dim * filter_size**2 / (stride**2) 50 | fan_out = output_dim * filter_size**2 51 | 52 | if he_init: 53 | filters_stdev = np.sqrt(4./(fan_in+fan_out)) 54 | else: # Normalized init (Glorot & Bengio) 55 | filters_stdev = np.sqrt(2./(fan_in+fan_out)) 56 | 57 | 58 | if _weights_stdev is not None: 59 | filter_values = uniform( 60 | _weights_stdev, 61 | (filter_size, filter_size, output_dim, input_dim) 62 | ) 63 | else: 64 | filter_values = uniform( 65 | filters_stdev, 66 | (filter_size, filter_size, output_dim, input_dim) 67 | ) 68 | 69 | filter_values *= gain 70 | 71 | filters = lib.param( 72 | name+'.Filters', 73 | filter_values 74 | ) 75 | 76 | if weightnorm==None: 77 | weightnorm = _default_weightnorm 78 | if weightnorm: 79 | norm_values = np.sqrt(np.sum(np.square(filter_values), axis=(0,1,3))) 80 | target_norms = lib.param( 81 | name + '.g', 82 | norm_values 83 | ) 84 | with tf.name_scope('weightnorm') as scope: 85 | norms = tf.sqrt(tf.reduce_sum(tf.square(filters), reduction_indices=[0,1,3])) 86 | filters = filters * tf.expand_dims(target_norms / norms, 1) 87 | 88 | 89 | inputs = tf.transpose(inputs, [0,2,3,1], name='NCHW_to_NHWC') 90 | 91 | input_shape = tf.shape(inputs) 92 | try: # tf pre-1.0 (top) vs 1.0 (bottom) 93 | output_shape = tf.pack([input_shape[0], 2*input_shape[1], 2*input_shape[2], output_dim]) 94 | except Exception as e: 95 | output_shape = tf.stack([input_shape[0], 2*input_shape[1], 2*input_shape[2], output_dim]) 96 | 97 | result = tf.nn.conv2d_transpose( 98 | value=inputs, 99 | filter=filters, 100 | output_shape=output_shape, 101 | strides=[1, 2, 2, 1], 102 | padding='SAME' 103 | ) 104 | 105 | if biases: 106 | _biases = lib.param( 107 | name+'.Biases', 108 | np.zeros(output_dim, dtype='float32') 109 | ) 110 | result = tf.nn.bias_add(result, _biases) 111 | 112 | result = tf.transpose(result, [0,3,1,2], name='NHWC_to_NCHW') 113 | 114 | 115 | return result 116 | -------------------------------------------------------------------------------- /QSR-WGAN-GP/tflib/ops/layernorm.py: -------------------------------------------------------------------------------- 1 | import tflib as lib 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | def Layernorm(name, norm_axes, inputs): 7 | mean, var = tf.nn.moments(inputs, norm_axes, keep_dims=True) 8 | 9 | # Assume the 'neurons' axis is the first of norm_axes. This is the case for fully-connected and BCHW conv layers. 10 | n_neurons = inputs.get_shape().as_list()[norm_axes[0]] 11 | 12 | offset = lib.param(name+'.offset', np.zeros(n_neurons, dtype='float32')) 13 | scale = lib.param(name+'.scale', np.ones(n_neurons, dtype='float32')) 14 | 15 | # Add broadcasting dims to offset and scale (e.g. BCHW conv data) 16 | offset = tf.reshape(offset, [-1] + [1 for i in xrange(len(norm_axes)-1)]) 17 | scale = tf.reshape(scale, [-1] + [1 for i in xrange(len(norm_axes)-1)]) 18 | 19 | result = tf.nn.batch_normalization(inputs, mean, var, offset, scale, 1e-5) 20 | 21 | return result -------------------------------------------------------------------------------- /QSR-WGAN-GP/tflib/ops/linear.py: -------------------------------------------------------------------------------- 1 | import tflib as lib 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | _default_weightnorm = False 7 | def enable_default_weightnorm(): 8 | global _default_weightnorm 9 | _default_weightnorm = True 10 | 11 | def disable_default_weightnorm(): 12 | global _default_weightnorm 13 | _default_weightnorm = False 14 | 15 | _weights_stdev = None 16 | def set_weights_stdev(weights_stdev): 17 | global _weights_stdev 18 | _weights_stdev = weights_stdev 19 | 20 | def unset_weights_stdev(): 21 | global _weights_stdev 22 | _weights_stdev = None 23 | 24 | def Linear( 25 | name, 26 | input_dim, 27 | output_dim, 28 | inputs, 29 | biases=True, 30 | initialization=None, 31 | weightnorm=None, 32 | gain=1. 33 | ): 34 | """ 35 | initialization: None, `lecun`, 'glorot', `he`, 'glorot_he', `orthogonal`, `("uniform", range)` 36 | """ 37 | with tf.name_scope(name) as scope: 38 | 39 | def uniform(stdev, size): 40 | if _weights_stdev is not None: 41 | stdev = _weights_stdev 42 | return np.random.uniform( 43 | low=-stdev * np.sqrt(3), 44 | high=stdev * np.sqrt(3), 45 | size=size 46 | ).astype('float32') 47 | 48 | if initialization == 'lecun':# and input_dim != output_dim): 49 | # disabling orth. init for now because it's too slow 50 | weight_values = uniform( 51 | np.sqrt(1./input_dim), 52 | (input_dim, output_dim) 53 | ) 54 | 55 | elif initialization == 'glorot' or (initialization == None): 56 | 57 | weight_values = uniform( 58 | np.sqrt(2./(input_dim+output_dim)), 59 | (input_dim, output_dim) 60 | ) 61 | 62 | elif initialization == 'he': 63 | 64 | weight_values = uniform( 65 | np.sqrt(2./input_dim), 66 | (input_dim, output_dim) 67 | ) 68 | 69 | elif initialization == 'glorot_he': 70 | 71 | weight_values = uniform( 72 | np.sqrt(4./(input_dim+output_dim)), 73 | (input_dim, output_dim) 74 | ) 75 | 76 | elif initialization == 'orthogonal' or \ 77 | (initialization == None and input_dim == output_dim): 78 | 79 | # From lasagne 80 | def sample(shape): 81 | if len(shape) < 2: 82 | raise RuntimeError("Only shapes of length 2 or more are " 83 | "supported.") 84 | flat_shape = (shape[0], np.prod(shape[1:])) 85 | # TODO: why normal and not uniform? 86 | a = np.random.normal(0.0, 1.0, flat_shape) 87 | u, _, v = np.linalg.svd(a, full_matrices=False) 88 | # pick the one with the correct shape 89 | q = u if u.shape == flat_shape else v 90 | q = q.reshape(shape) 91 | return q.astype('float32') 92 | weight_values = sample((input_dim, output_dim)) 93 | 94 | elif initialization[0] == 'uniform': 95 | 96 | weight_values = np.random.uniform( 97 | low=-initialization[1], 98 | high=initialization[1], 99 | size=(input_dim, output_dim) 100 | ).astype('float32') 101 | 102 | else: 103 | 104 | raise Exception('Invalid initialization!') 105 | 106 | weight_values *= gain 107 | 108 | weight = lib.param( 109 | name + '.W', 110 | weight_values 111 | ) 112 | 113 | if weightnorm==None: 114 | weightnorm = _default_weightnorm 115 | if weightnorm: 116 | norm_values = np.sqrt(np.sum(np.square(weight_values), axis=0)) 117 | # norm_values = np.linalg.norm(weight_values, axis=0) 118 | 119 | target_norms = lib.param( 120 | name + '.g', 121 | norm_values 122 | ) 123 | 124 | with tf.name_scope('weightnorm') as scope: 125 | norms = tf.sqrt(tf.reduce_sum(tf.square(weight), reduction_indices=[0])) 126 | weight = weight * (target_norms / norms) 127 | 128 | # if 'Discriminator' in name: 129 | # print "WARNING weight constraint on {}".format(name) 130 | # weight = tf.nn.softsign(10.*weight)*.1 131 | 132 | if inputs.get_shape().ndims == 2: 133 | result = tf.matmul(inputs, weight) 134 | else: 135 | reshaped_inputs = tf.reshape(inputs, [-1, input_dim]) 136 | result = tf.matmul(reshaped_inputs, weight) 137 | result = tf.reshape(result, tf.stack(tf.unstack(tf.shape(inputs))[:-1] + [output_dim])) 138 | 139 | if biases: 140 | result = tf.nn.bias_add( 141 | result, 142 | lib.param( 143 | name + '.b', 144 | np.zeros((output_dim,), dtype='float32') 145 | ) 146 | ) 147 | 148 | return result -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Convolutional Neural Networks to Enhance Coded Speech 2 | (Here Part of the project code,**Not for commercial use!!!**) 3 | 4 | **Abstract**—Enhancing coded speech suffering from far-end acoustic background noise, quantization noise, and potentially transmission errors, is a challenging task. In this work we propose two postprocessing approaches applying convolutional neural networks (CNNs) either in the time domain or the cepstral domain to enhance the coded speech without any modification of the codecs. The time domain approach follows an end-to-end fashion, while the cepstral domain approach uses analysis-synthesis with 5 | cepstral domain features. The proposed postprocessors in both domains are evaluated for various narrowband and wideband speech codecs in a wide range of conditions. The proposed postprocessor improves speech quality (PESQ) by up to 0.25 MOS-LQO points for G.711, 0.30 points for G.726, 0.82 points for G.722, and 0.26 points for adaptive multirate wideband codec(AMR-WB). In a subjective CCR listening test, the proposed postprocessor on G.711-coded speech exceeds the speech quality of an ITU-T standardized postfilter by 0.36 CMOS points, and obtains a clear preference of 1.77 CMOS points compared to G.711, even en par with uncoded speech. 6 | 7 | **Index Terms—convolutional neural networks, speech codecs, speech enhancement.** 8 | 9 | If you use **Convolutional Neural Networks to Enhance Coded Speech** in your research, please cite: 10 | ```bibtex 11 | @article{cnn2codedspeech, 12 | title={Convolutional Neural Networks to Enhance Coded Speech}, 13 | author={Zhao, Ziyue and Liu, Huijun and Fingscheidt, Tim}, 14 | journal={Transactions on Audio, Speech and Language Processing}, 15 | year={2018} 16 | } 17 | ``` 18 | 19 |

20 | 21 |

22 | 23 | -------------------------------------------------------------------------------- /WaveformCNN/DataPrepare.py: -------------------------------------------------------------------------------- 1 | ##################################################################################### 2 | # DataPrepare.py: data prepare and load data 3 | # Author: Huijun Liu 4 | # Time: 17.07.2017 5 | # Location: TU Braunschweig IfN 6 | ##################################################################################### 7 | 8 | import os 9 | import time 10 | # import h5py as h5 11 | import numpy as np 12 | import scipy.io as sio 13 | 14 | from numpy import random 15 | 16 | # ------------------------------------------------------------------------------- 17 | # 1. load data 18 | # ------------------------------------------------------------------------------- 19 | 20 | 21 | def load_train_data(train_inputs, train_targets, vali_inputs, vali_targets): 22 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ') 23 | print('> Loading data ') 24 | 25 | start = time.time() 26 | # --------------------------------------------------------- 27 | # 1. Load Input Data for Training 28 | # --------------------------------------------------------- 29 | mat_input = train_inputs 30 | mat_input = os.path.normcase(mat_input) 31 | print('> 1. Loading Training Input: ' + mat_input + '...') 32 | 33 | x_train_noisy = sio.loadmat(mat_input) 34 | x_train_noisy = x_train_noisy['inputSetNorm'] 35 | x_train_noisy = np.array(x_train_noisy) 36 | 37 | # --------------------------------------------------------- 38 | # 2. Load Target Data for training 39 | # --------------------------------------------------------- 40 | mat_target = train_targets 41 | mat_target = os.path.normcase(mat_target) 42 | print('> 2. Loading Training Target: ' + mat_target + '...') 43 | 44 | x_train = sio.loadmat(mat_target) 45 | x_train = x_train['targetSet'] 46 | x_train = np.array(x_train) 47 | # x_train = target_min_max_scaler.fit_transform(x_train) 48 | 49 | # --------------------------------------------------------- 50 | # 3. Load Input Data for Validation 51 | # --------------------------------------------------------- 52 | mat_input_vali = vali_inputs 53 | mat_input_vali = os.path.normcase(mat_input_vali) 54 | print('> 3. Loading Validation Input: ' + mat_input_vali + '...') 55 | 56 | x_train_noisy_vali = sio.loadmat(mat_input_vali) 57 | x_train_noisy_vali = x_train_noisy_vali['inputSetNorm'] 58 | x_train_noisy_vali = np.array(x_train_noisy_vali) 59 | 60 | # --------------------------------------------------------- 61 | # 4. Load Target Data for Validation 62 | # --------------------------------------------------------- 63 | mat_target_vali = vali_targets 64 | mat_target_vali = os.path.normcase(mat_target_vali) 65 | print('> 4. Loading Validation Target: ' + mat_target_vali + '...') 66 | 67 | x_train_vali = sio.loadmat(mat_target_vali) 68 | x_train_vali = x_train_vali['targetSet'] 69 | x_train_vali = np.array(x_train_vali) 70 | 71 | # --------------------------------------------------------- 72 | # 5. Randomization of Training and/or validation Pairs 73 | # --------------------------------------------------------- 74 | print('> 5. Randomization of Training Pairs ...') 75 | frame_length = x_train_noisy.shape[1] 76 | 77 | random.seed(1234) 78 | train = np.column_stack((x_train_noisy, x_train)) 79 | np.random.shuffle(train) 80 | x_train_noisy = train[:, :frame_length] 81 | x_train = train[:, frame_length:] 82 | 83 | # validation = np.column_stack((x_train_noisy_vali, x_train_vali)) 84 | # np.random.shuffle(validation ) 85 | # x_train_noisy_vali = validation [:, :80] 86 | # x_train_vali = validation [:, 80:] 87 | 88 | # --------------------------------------------------------- 89 | # 6. Reshape of Training and validation Pairs 90 | # --------------------------------------------------------- 91 | x_train_noisy = np.reshape(x_train_noisy, (x_train_noisy.shape[0], x_train_noisy.shape[1], 1)) 92 | x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1)) 93 | 94 | x_train_noisy_vali = np.reshape(x_train_noisy_vali, (x_train_noisy_vali.shape[0], x_train_noisy_vali.shape[1], 1)) 95 | x_train_vali = np.reshape(x_train_vali, (x_train_vali.shape[0], x_train_vali.shape[1], 1)) 96 | 97 | print("> Data Loaded, , Time : ", time.time() - start) 98 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ') 99 | 100 | return x_train_noisy, x_train, x_train_noisy_vali, x_train_vali 101 | 102 | 103 | def load_test_data(testfile_path="inputTestSet_g711concat_Type3_Frame_256_ceps_v73.mat"): 104 | print('> Loading Test data... ') 105 | 106 | test_file_root = "./TestData" 107 | if not (os.path.exists(test_file_root)): 108 | os.makedirs(test_file_root) 109 | 110 | mat_input = test_file_root + "/" + testfile_path 111 | mat_input = os.path.normcase(mat_input) 112 | 113 | # x_test_noisy = h5.File(mat_input, 'r') 114 | # x_test_noisy = x_test_noisy.get('inputTestNorm') 115 | # x_test_noisy = np.array(x_test_noisy) 116 | # x_test_noisy = np.transpose(x_test_noisy) 117 | 118 | x_test_noisy = sio.loadmat(mat_input) 119 | x_test_noisy = x_test_noisy['inputTestNorm'] 120 | x_test_noisy = np.array(x_test_noisy) 121 | 122 | x_test_noisy = np.reshape(x_test_noisy,(x_test_noisy.shape[0], x_test_noisy.shape[1], 1)) 123 | 124 | return x_test_noisy -------------------------------------------------------------------------------- /WaveformCNN/Opting_Results/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/WaveformCNN/Opting_Results/README.md -------------------------------------------------------------------------------- /WaveformCNN/TestData/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/WaveformCNN/TestData/README.md -------------------------------------------------------------------------------- /WaveformCNN/TrainValiData/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/WaveformCNN/TrainValiData/README.md -------------------------------------------------------------------------------- /WaveformCNN/WaveformQSRCNN.py: -------------------------------------------------------------------------------- 1 | ######################################################################################### 2 | # CepstralQSRCNN.py: QSRCNN for G711/ADPCM/AMR/EVS using Cepstral features 3 | # Author: Huijun Liu 4 | # Time: 17.07.2017 5 | # Location: TU Braunschweig IfN 6 | ######################################################################################### 7 | 8 | import os 9 | import time 10 | import math 11 | import scipy.io as sio 12 | import tensorflow as tf 13 | 14 | from keras.models import Model 15 | from keras import backend as K 16 | from keras.layers import Input, Add, Activation 17 | from keras.layers.convolutional import Conv1D, MaxPooling1D, UpSampling1D 18 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger, TensorBoard, LearningRateScheduler 19 | 20 | from weightnorm import AdamWithWeightnorm 21 | from tensorflow.python.framework import ops 22 | 23 | # ------------------------------------------------------------------------------- 24 | # 0. define metric and activation function 25 | # ------------------------------------------------------------------------------- 26 | 27 | 28 | def snr(y_true, y_pred): 29 | """ 30 | SNR is Signal to Noise Ratio 31 | 32 | """ 33 | return 10.0 * K.log((K.sum(K.square(y_true))) / (K.sum(K.square(y_pred - y_true)))) / K.log(10.0) 34 | 35 | 36 | def selu(x): 37 | with ops.name_scope('elu') as scope: 38 | alpha = 1.6732632423543772848170429916717 39 | scale = 1.0507009873554804934193349852946 40 | return scale * tf.where(x >= 0.0, x, alpha * tf.nn.elu(x)) 41 | 42 | """ 43 | def step_decay(epoch): 44 | initial_lrate = 0.001 45 | 46 | drop = 0.25 47 | epochs_drop = 3.0 48 | lrate = initial_lrate * math.pow(drop, math.floor((1 + epoch) / epochs_drop)) 49 | 50 | return lrate 51 | """ 52 | 53 | # ------------------------------------------------------------------------------- 54 | # 1. define Cepstral-QSRCNN Model 55 | # ------------------------------------------------------------------------------- 56 | 57 | 58 | class WaveformQSRCNN(object): 59 | def __init__(self, opt_params={'lr': 5e-4, 'batch_size': 32, 'nb_epochs': 100}, 60 | model_params={'n1': 16, 'n2': 32, 'n3': 16, 'frame_len': 32}, 61 | codec_type_params={'weights_dir': "./model_weights", 'logdir': "./log"}): 62 | self.learning_rate = opt_params['lr'] 63 | self.batch_size = opt_params['batch_size'] 64 | self.nb_epochs = opt_params['nb_epochs'] 65 | 66 | self.log_dir = codec_type_params['logdir'] 67 | if not (os.path.exists(self.log_dir)): 68 | os.makedirs(self.log_dir) 69 | 70 | self.weights_dir = codec_type_params['weights_dir'] 71 | if not (os.path.exists(self.weights_dir)): 72 | os.makedirs(self.weights_dir) 73 | 74 | self.frame_len = model_params['frame_len'] 75 | self.model = self.create_model(model_params) 76 | 77 | # ------------------------------------------------------------------------------- 78 | # Load the Weights of the Model 79 | # ------------------------------------------------------------------------------- 80 | def load_weights(self, file_path=""): 81 | if file_path == "": 82 | file_path = self.weights_dir + '/' + 'G711_WaveformQSRCNN_Weights_Best_bs' + \ 83 | str(self.batch_size) + '_lr' + str(self.learning_rate) + '.h5' 84 | 85 | file_path = os.path.normcase(file_path) 86 | self.model.load_weights(file_path) 87 | 88 | # ------------------------------------------------------------------------------- 89 | # Save the Weights of the Model 90 | # ------------------------------------------------------------------------------- 91 | def save_weights(self): 92 | file_path = self.weights_dir + '/' + 'G711_WaveformQSRCNN_Weights_Final_bs' + \ 93 | str(self.batch_size) + '_lr' + str(self.learning_rate) + '.h5' 94 | file_path = os.path.normcase(file_path) 95 | self.model.save_weights(file_path) 96 | 97 | # ------------------------------------------------------------------------------- 98 | # 1. define model 99 | # ------------------------------------------------------------------------------- 100 | def create_model(self, model_params={'n1': 32, 'n2': 64, 'n3': 32, 'frame_len': 80}): 101 | frame_len = self.frame_len 102 | n1 = model_params['n1'] 103 | n2 = model_params['n2'] 104 | n3 = model_params['n3'] 105 | 106 | input_sque = Input(shape=(frame_len, 1)) 107 | c1 = Conv1D(n1, 3, padding='same')(input_sque) 108 | c1 = Activation(selu)(c1) 109 | c1 = Conv1D(n1, 3, padding='same')(c1) 110 | c1 = Activation(selu)(c1) 111 | x = MaxPooling1D(2)(c1) 112 | 113 | c2 = Conv1D(n2, 3, padding='same')(x) 114 | c2 = Activation(selu)(c2) 115 | c2 = Conv1D(n2, 3, padding='same')(c2) 116 | c2 = Activation(selu)(c2) 117 | x = MaxPooling1D(2)(c2) 118 | 119 | c3 = Conv1D(n3, 3, padding='same')(x) 120 | c3 = Activation(selu)(c3) 121 | x = UpSampling1D(2)(c3) 122 | 123 | c2_2 = Conv1D(n2, 3, padding='same')(x) 124 | c2_2 = Activation(selu)(c2_2) 125 | c2_2 = Conv1D(n2, 3, padding='same')(c2_2) 126 | c2_2 = Activation(selu)(c2_2) 127 | 128 | m1 = Add()([c2, c2_2]) 129 | m1 = UpSampling1D(2)(m1) 130 | 131 | c1_2 = Conv1D(n1, 3, padding='same')(m1) 132 | c1_2 = Activation(selu)(c1_2) 133 | c1_2 = Conv1D(n1, 3, padding='same')(c1_2) 134 | c1_2 = Activation(selu)(c1_2) 135 | 136 | m2 = Add()([c1, c1_2]) 137 | 138 | decoded = Conv1D(1, 5, padding='same', activation='linear')(m2) 139 | 140 | model = Model(input_sque, decoded) 141 | model.summary() 142 | 143 | learning_rate = self.learning_rate 144 | # adam = optimizers.Adam(lr=learning_rate) 145 | # model.compile(optimizer=adam, loss='mse', metrics=[SNRLoss]) 146 | 147 | adam_wn = AdamWithWeightnorm(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08) 148 | model.compile(optimizer=adam_wn, loss='mse', metrics=[snr]) 149 | 150 | return model 151 | 152 | # ------------------------------------------------------------------------------- 153 | # 2. Fit the model 154 | # ------------------------------------------------------------------------------- 155 | def step_decay(self, epoch): 156 | initial_lrate = self.learning_rate 157 | 158 | drop = 0.25 159 | epochs_drop = 4.0 160 | lrate = initial_lrate * math.pow(drop, math.floor((1 + epoch) / epochs_drop)) 161 | 162 | old_lr = K.get_value(self.model.optimizer.lr) 163 | K.set_value(self.model.optimizer.lr, lrate) 164 | lrate = K.get_value(self.model.optimizer.lr) 165 | print("> Ir reduced from %f to %f" % (old_lr, lrate)) 166 | return lrate 167 | 168 | def fit(self, x_train_noisy, x_train, x_train_noisy_vali, x_train_vali): 169 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ') 170 | print("> Training model ...") 171 | 172 | nb_epochs = self.nb_epochs 173 | batch_size = self.batch_size 174 | learning_rate = self.learning_rate 175 | 176 | # --------------------------------------------------------- 177 | # 1. define callback functions 178 | # --------------------------------------------------------- 179 | # Stop training after 10 epoches if the vali_loss not decreasing 180 | stop_str = EarlyStopping(monitor='val_snr', patience=16, verbose=1, mode='max') 181 | 182 | # Reduce learning rate when stop improving lr = lr*factor 183 | reduce_LR = ReduceLROnPlateau(monitor='val_snr', factor=0.5, patience=2, verbose=1, mode='max', epsilon=0.0001, cooldown=0, min_lr=0) 184 | 185 | best_weights = self.weights_dir + '/' + 'G711_WaveformQSRCNN_Weights_Best_bs' + \ 186 | str(batch_size) + '_lr' + str(learning_rate) + '.h5' 187 | best_weights = os.path.normcase(best_weights) 188 | model_save = ModelCheckpoint(best_weights, monitor='val_snr', save_best_only=True, mode='max', save_weights_only=True, period=1) 189 | 190 | logger_name = self.log_dir + '/' + 'G711_WaveformQSRCNN_log_bs' + \ 191 | str(batch_size) + '_lr' + str(learning_rate) + '.csv' 192 | logger_name = os.path.normcase(logger_name) 193 | logger = CSVLogger(logger_name, separator=',', append=False) 194 | tensor_board = TensorBoard(log_dir=self.log_dir, histogram_freq=1) 195 | 196 | lrate = LearningRateScheduler(self.step_decay) 197 | 198 | start = time.time() 199 | 200 | # --------------------------------------------------------- 201 | # 2. fit the model 202 | # --------------------------------------------------------- 203 | print("> Training model " + "using Batch-size: " + str(batch_size) + ", Learning_rate: " + str(learning_rate) + "...") 204 | hist = self.model.fit(x_train_noisy, x_train, epochs=nb_epochs, batch_size=batch_size, shuffle=True, 205 | validation_data=[x_train_noisy_vali, x_train_vali], 206 | callbacks=[lrate, reduce_LR, stop_str, model_save, logger]) 207 | 208 | print("> Saving Completed, Time : ", time.time() - start) 209 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ') 210 | return hist 211 | 212 | # ------------------------------------------------------------------------------- 213 | # 3. Save loss snr val_loss val_snr as .mat File 214 | # ------------------------------------------------------------------------------- 215 | def save_training_curves(self, hist): 216 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ') 217 | print("> Saving Training and Validation loss-metric curve ...") 218 | 219 | start = time.time() 220 | 221 | trian_curve_root = "./Opting_Results" 222 | if not(os.path.exists(trian_curve_root)): 223 | os.makedirs(trian_curve_root) 224 | # --------------------------------------------------------- 225 | # 1. Saving Training Loss 226 | # --------------------------------------------------------- 227 | TrainLossVec = trian_curve_root + '/' + 'G711_WaveformQSRCNN_TrainLoss_bs' + \ 228 | str(self.batch_size) + '_lr' + str(self.learning_rate) + '.mat' 229 | TrainLossVec = os.path.normcase(TrainLossVec) 230 | 231 | sio.savemat(TrainLossVec, {'Train_Loss_Vec': hist.history['loss']}) 232 | 233 | # --------------------------------------------------------- 234 | # 2. Saving Training Metric 235 | # --------------------------------------------------------- 236 | TrainSNRVec = trian_curve_root + '/' + 'G711_WaveformQSRCNN_TrainMetrice_bs' + \ 237 | str(self.batch_size) + '_lr' + str(self.learning_rate) + '.mat' 238 | TrainSNRVec = os.path.normcase(TrainSNRVec) 239 | sio.savemat(TrainSNRVec, {'Train_SNR_Vec': hist.history['snr']}) # snr 240 | 241 | # --------------------------------------------------------- 242 | # 3. Saving Validation Loss 243 | # --------------------------------------------------------- 244 | ValiLossVec = trian_curve_root + '/' + 'G711_WaveformDDQSRCNN_ValiLoss_bs' + \ 245 | str(self.batch_size) + '_lr' + str(self.learning_rate) + '.mat' 246 | ValiLossVec = os.path.normcase(ValiLossVec) 247 | sio.savemat(ValiLossVec, {'Vali_Loss_Vec': hist.history['val_loss']}) 248 | 249 | # --------------------------------------------------------- 250 | # 4. Saving Validation Metric 251 | # --------------------------------------------------------- 252 | ValiSNRVec = trian_curve_root + '/' + 'G711_WaveformQSRCNN_ValiMetrice_bs' + \ 253 | str(self.batch_size) + '_lr' + str(self.learning_rate) + '.mat' 254 | ValiSNRVec = os.path.normcase(ValiSNRVec) 255 | sio.savemat(ValiSNRVec, {'Vali_SNR_Vec': hist.history['val_snr']}) # val_snr 256 | 257 | print("> Saving Completed, Time : ", time.time() - start) 258 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ') 259 | 260 | # ------------------------------------------------------------------------------- 261 | # 4. Evaluate the Trained Model 262 | # ------------------------------------------------------------------------------- 263 | def evaluation_model(self, x_test_noisy, weights_path=""): 264 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ') 265 | print("> Evaluation of the Trained Model ...") 266 | # --------------------------------------------------------- 267 | # 1. Load Model Weights 268 | # --------------------------------------------------------- 269 | print('> 1. Loading the Weights of the Model ...') 270 | self.load_weights(weights_path) 271 | 272 | # --------------------------------------------------------- 273 | # 2. Evaluate the Model 274 | # --------------------------------------------------------- 275 | start = time.time() 276 | print('> 2. Evaluating the Model, Please wait for a Moment ...') 277 | predicted = self.model.predict(x_test_noisy) 278 | print('> 2. Evaluating Completed, Time : ' + str(time.time() - start)) 279 | 280 | # --------------------------------------------------------- 281 | # 3. Saving the Evaluation Result 282 | # --------------------------------------------------------- 283 | print('> 3. Saving the Evaluation Result ...') 284 | start = time.time() 285 | pre_file_root = "./Test_Outputs" 286 | if not (os.path.exists(pre_file_root)): 287 | os.makedirs(pre_file_root) 288 | 289 | preOutput = pre_file_root + "/" + "G711_CNN_testplan_vec.mat" 290 | preOutput = os.path.normcase(preOutput) 291 | 292 | sio.savemat(preOutput, {'predictions': predicted}) 293 | print('> 3. Evaluation Result Saving Completed, Time : ' + str(time.time() - start)) 294 | print('> +++++++++++++++++++++++++++++++++++++++++++++++++++++ ') 295 | -------------------------------------------------------------------------------- /WaveformCNN/WaveformQSRCNN_TrainTest_GPUs.py: -------------------------------------------------------------------------------- 1 | ############################################################################################ 2 | # CepstralQSRCNN_TrainTest_GPUs.py: tRAIN QSRCNN for G711/ADPCM/AMR/EVS using Cepstral features 3 | # Author: Huijun Liu 4 | # Time: 17.07.2017 5 | # Location: TU Braunschweig IfN 6 | ############################################################################################ 7 | 8 | import os 9 | import sys 10 | import time 11 | 12 | import WaveformQSRCNN as model 13 | import tensorflow as tf 14 | import DataPrepare as dp 15 | from keras.backend.tensorflow_backend import set_session 16 | 17 | ##################################################################################### 18 | # 0. Setteings For GPUs and Parameters 19 | ##################################################################################### 20 | using_gpu = 0 21 | if using_gpu == 1: 22 | os.environ["CUDA_VISIBLE_DEVICES"] = "3" # x stand for GPU index: 3-x!! 23 | config = tf.ConfigProto() 24 | config.gpu_options.per_process_gpu_memory_fraction = 0.5 # Only 30% Memory of GPUs can be used 25 | set_session(tf.Session(config=config)) 26 | 27 | train_or_test = "train" # train or test 28 | 29 | default_opt_params = {'lr': 5e-4, 'batch_size': 32, 'nb_epochs': 1000} 30 | default_model_params = {'n1': 32, 'n2': 64, 'n3': 32, 'frame_len': 80} 31 | codec_type_params = {'weights_dir': "./model_weights", 'logdir': "./log"} 32 | 33 | train_inputs = "" 34 | train_targets = "" 35 | 36 | vali_inputs = "" 37 | vali_targets = "" 38 | 39 | test_inputs = "" 40 | 41 | if train_or_test == "train": 42 | # ------------------------------------------------------------------------------- 43 | # 1. Load Data 44 | # ------------------------------------------------------------------------------- 45 | train_inputs = "./TrainValiData/Train_inputSet_ALaw_defautLang_OLdata_ValiTrain_Reverse_Extended_Reverse.mat" 46 | train_targets = "./TrainValiData/Train_targetSet_ALaw_defautLang_OLdata_ValiTrain_Reverse_Extended_Reverse.mat" 47 | 48 | vali_inputs = "./TrainValiData/Vali_inputSet_ALaw_defautLang_OLdata_ValiTrain_smallVali_Reverse_Extended.mat" 49 | vali_targets = "./TrainValiData/Vali_targetSet_ALaw_defautLang_OLdata_ValiTrain_smallVali_Reverse_Extended.mat" 50 | 51 | x_train_noisy, x_train, x_train_noisy_vali, x_train_vali = dp.load_train_data(train_inputs, train_targets, 52 | vali_inputs, vali_targets) 53 | 54 | # ------------------------------------------------------------------------------- 55 | # 2. Init Cepstral-QSRCNN Model 56 | # ------------------------------------------------------------------------------- 57 | qsrcnn = model.WaveformQSRCNN(opt_params=default_opt_params, 58 | model_params=default_model_params, 59 | codec_type_params=codec_type_params) 60 | 61 | # ------------------------------------------------------------------------------- 62 | # 3. Fit The Cepstral-QSRCNNe Model 63 | # ------------------------------------------------------------------------------- 64 | hist =qsrcnn.fit(x_train_noisy, x_train, x_train_noisy_vali, x_train_vali) 65 | 66 | # ------------------------------------------------------------------------------- 67 | # 4. Save Weights and Traning Curves 68 | # ------------------------------------------------------------------------------- 69 | qsrcnn.save_weights() 70 | qsrcnn.save_training_curves(hist=hist) 71 | 72 | elif train_or_test == "test": 73 | # ------------------------------------------------------------------------------- 74 | # 1. Load Data 75 | # ------------------------------------------------------------------------------- 76 | test_inputs = "inputTestSet_g711concat_PDandOLAI_Frame_80v73.mat" 77 | x_test_noisy = dp.load_test_data(test_inputs) 78 | 79 | # ------------------------------------------------------------------------------- 80 | # 2. Init Cepstral-QSRCNN Model 81 | # ------------------------------------------------------------------------------- 82 | 83 | codec_type_params = {'weights_dir': "./model_weights", 'logdir': "./log"} 84 | qsrcnn = model.WaveformQSRCNN(opt_params=default_opt_params, 85 | model_params=default_model_params, 86 | codec_type_params=codec_type_params) 87 | 88 | # ------------------------------------------------------------------------------- 89 | # 3. Evaluate The Cepstral-QSRCNNe Model 90 | # ------------------------------------------------------------------------------- 91 | qsrcnn.evaluation_model(x_test_noisy) 92 | 93 | else: 94 | raise Exception("Do you want to train or test the model ? Please set the variable train_or_test !") 95 | # sys.exit("Please set the codec name !") 96 | -------------------------------------------------------------------------------- /WaveformCNN/log/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/WaveformCNN/log/README.md -------------------------------------------------------------------------------- /WaveformCNN/model_weights/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/WaveformCNN/model_weights/README.md -------------------------------------------------------------------------------- /WaveformCNN/model_weights/g711_waveformqsrcnn_weights_best_bs32_lr0.0005.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linksense/ConvolutionaNeuralNetworksToEnhanceCodedSpeech/2f0852ef2d97338a8cf42fe7e20231f38c0613de/WaveformCNN/model_weights/g711_waveformqsrcnn_weights_best_bs32_lr0.0005.h5 -------------------------------------------------------------------------------- /WaveformCNN/weightnorm.py: -------------------------------------------------------------------------------- 1 | from keras import backend as K 2 | from keras.optimizers import SGD,Adam 3 | import tensorflow as tf 4 | 5 | # adapted from keras.optimizers.SGD 6 | class SGDWithWeightnorm(SGD): 7 | def get_updates(self, params, constraints, loss): 8 | grads = self.get_gradients(loss, params) 9 | self.updates = [] 10 | 11 | lr = self.lr 12 | if self.initial_decay > 0: 13 | lr *= (1. / (1. + self.decay * self.iterations)) 14 | self.updates .append(K.update_add(self.iterations, 1)) 15 | 16 | # momentum 17 | shapes = [K.get_variable_shape(p) for p in params] 18 | moments = [K.zeros(shape) for shape in shapes] 19 | self.weights = [self.iterations] + moments 20 | for p, g, m in zip(params, grads, moments): 21 | 22 | # if a weight tensor (len > 1) use weight normalized parameterization 23 | ps = K.get_variable_shape(p) 24 | if len(ps) > 1: 25 | 26 | # get weight normalization parameters 27 | V, V_norm, V_scaler, g_param, grad_g, grad_V = get_weightnorm_params_and_grads(p, g) 28 | 29 | # momentum container for the 'g' parameter 30 | V_scaler_shape = K.get_variable_shape(V_scaler) 31 | m_g = K.zeros(V_scaler_shape) 32 | 33 | # update g parameters 34 | v_g = self.momentum * m_g - lr * grad_g # velocity 35 | self.updates.append(K.update(m_g, v_g)) 36 | if self.nesterov: 37 | new_g_param = g_param + self.momentum * v_g - lr * grad_g 38 | else: 39 | new_g_param = g_param + v_g 40 | 41 | # update V parameters 42 | v_v = self.momentum * m - lr * grad_V # velocity 43 | self.updates.append(K.update(m, v_v)) 44 | if self.nesterov: 45 | new_V_param = V + self.momentum * v_v - lr * grad_V 46 | else: 47 | new_V_param = V + v_v 48 | 49 | # if there are constraints we apply them to V, not W 50 | if p in constraints: 51 | c = constraints[p] 52 | new_V_param = c(new_V_param) 53 | 54 | # wn param updates --> W updates 55 | add_weightnorm_param_updates(self.updates, new_V_param, new_g_param, p, V_scaler) 56 | 57 | else: # normal SGD with momentum 58 | v = self.momentum * m - lr * g # velocity 59 | self.updates.append(K.update(m, v)) 60 | 61 | if self.nesterov: 62 | new_p = p + self.momentum * v - lr * g 63 | else: 64 | new_p = p + v 65 | 66 | # apply constraints 67 | if p in constraints: 68 | c = constraints[p] 69 | new_p = c(new_p) 70 | 71 | self.updates.append(K.update(p, new_p)) 72 | return self.updates 73 | 74 | # adapted from keras.optimizers.Adam 75 | class AdamWithWeightnorm(Adam): 76 | def get_updates(self, params, constraints, loss): 77 | grads = self.get_gradients(loss, params) 78 | self.updates = [K.update_add(self.iterations, 1)] 79 | 80 | lr = self.lr 81 | if self.initial_decay > 0: 82 | lr *= (1. / (1. + self.decay * self.iterations)) 83 | 84 | t = self.iterations + 1 85 | lr_t = lr * K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t)) 86 | 87 | shapes = [K.get_variable_shape(p) for p in params] 88 | ms = [K.zeros(shape) for shape in shapes] 89 | vs = [K.zeros(shape) for shape in shapes] 90 | self.weights = [self.iterations] + ms + vs 91 | 92 | for p, g, m, v in zip(params, grads, ms, vs): 93 | 94 | # if a weight tensor (len > 1) use weight normalized parameterization 95 | # this is the only part changed w.r.t. keras.optimizers.Adam 96 | ps = K.get_variable_shape(p) 97 | if len(ps)>1: 98 | 99 | # get weight normalization parameters 100 | V, V_norm, V_scaler, g_param, grad_g, grad_V = get_weightnorm_params_and_grads(p, g) 101 | 102 | # Adam containers for the 'g' parameter 103 | V_scaler_shape = K.get_variable_shape(V_scaler) 104 | m_g = K.zeros(V_scaler_shape) 105 | v_g = K.zeros(V_scaler_shape) 106 | 107 | # update g parameters 108 | m_g_t = (self.beta_1 * m_g) + (1. - self.beta_1) * grad_g 109 | v_g_t = (self.beta_2 * v_g) + (1. - self.beta_2) * K.square(grad_g) 110 | new_g_param = g_param - lr_t * m_g_t / (K.sqrt(v_g_t) + self.epsilon) 111 | self.updates.append(K.update(m_g, m_g_t)) 112 | self.updates.append(K.update(v_g, v_g_t)) 113 | 114 | # update V parameters 115 | m_t = (self.beta_1 * m) + (1. - self.beta_1) * grad_V 116 | v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(grad_V) 117 | new_V_param = V - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) 118 | self.updates.append(K.update(m, m_t)) 119 | self.updates.append(K.update(v, v_t)) 120 | 121 | # if there are constraints we apply them to V, not W 122 | if p in constraints: 123 | c = constraints[p] 124 | new_V_param = c(new_V_param) 125 | 126 | # wn param updates --> W updates 127 | add_weightnorm_param_updates(self.updates, new_V_param, new_g_param, p, V_scaler) 128 | 129 | else: # do optimization normally 130 | m_t = (self.beta_1 * m) + (1. - self.beta_1) * g 131 | v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) 132 | p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) 133 | 134 | self.updates.append(K.update(m, m_t)) 135 | self.updates.append(K.update(v, v_t)) 136 | 137 | new_p = p_t 138 | # apply constraints 139 | if p in constraints: 140 | c = constraints[p] 141 | new_p = c(new_p) 142 | self.updates.append(K.update(p, new_p)) 143 | return self.updates 144 | 145 | 146 | def get_weightnorm_params_and_grads(p, g): 147 | ps = K.get_variable_shape(p) 148 | 149 | # construct weight scaler: V_scaler = g/||V|| 150 | V_scaler_shape = (ps[-1],) # assumes we're using tensorflow! 151 | V_scaler = K.ones(V_scaler_shape) # init to ones, so effective parameters don't change 152 | 153 | # get V parameters = ||V||/g * W 154 | norm_axes = [i for i in range(len(ps) - 1)] 155 | V = p / tf.reshape(V_scaler, [1] * len(norm_axes) + [-1]) 156 | 157 | # split V_scaler into ||V|| and g parameters 158 | V_norm = tf.sqrt(tf.reduce_sum(tf.square(V), norm_axes)) 159 | g_param = V_scaler * V_norm 160 | 161 | # get grad in V,g parameters 162 | grad_g = tf.reduce_sum(g * V, norm_axes) / V_norm 163 | grad_V = tf.reshape(V_scaler, [1] * len(norm_axes) + [-1]) * \ 164 | (g - tf.reshape(grad_g / V_norm, [1] * len(norm_axes) + [-1]) * V) 165 | 166 | return V, V_norm, V_scaler, g_param, grad_g, grad_V 167 | 168 | 169 | def add_weightnorm_param_updates(updates, new_V_param, new_g_param, W, V_scaler): 170 | ps = K.get_variable_shape(new_V_param) 171 | norm_axes = [i for i in range(len(ps) - 1)] 172 | 173 | # update W and V_scaler 174 | new_V_norm = tf.sqrt(tf.reduce_sum(tf.square(new_V_param), norm_axes)) 175 | new_V_scaler = new_g_param / new_V_norm 176 | new_W = tf.reshape(new_V_scaler, [1] * len(norm_axes) + [-1]) * new_V_param 177 | updates.append(K.update(W, new_W)) 178 | updates.append(K.update(V_scaler, new_V_scaler)) 179 | 180 | 181 | # data based initialization for a given Keras model 182 | def data_based_init(model, input): 183 | 184 | # input can be dict, numpy array, or list of numpy arrays 185 | if type(input) is dict: 186 | feed_dict = input 187 | elif type(input) is list: 188 | feed_dict = {tf_inp: np_inp for tf_inp,np_inp in zip(model.inputs,input)} 189 | else: 190 | feed_dict = {model.inputs[0]: input} 191 | 192 | # add learning phase if required 193 | if model.uses_learning_phase and K.learning_phase() not in feed_dict: 194 | feed_dict.update({K.learning_phase(): 1}) 195 | 196 | # get all layer name, output, weight, bias tuples 197 | layer_output_weight_bias = [] 198 | for l in model.layers: 199 | if hasattr(l, 'W') and hasattr(l, 'b'): 200 | assert(l.built) 201 | layer_output_weight_bias.append( (l.name,l.get_output_at(0),l.W,l.b) ) # if more than one node, only use the first 202 | 203 | # iterate over our list and do data dependent init 204 | sess = K.get_session() 205 | for l,o,W,b in layer_output_weight_bias: 206 | print('Performing data dependent initialization for layer ' + l) 207 | m,v = tf.nn.moments(o, [i for i in range(len(o.get_shape())-1)]) 208 | s = tf.sqrt(v + 1e-10) 209 | updates = tf.group(W.assign(W/tf.reshape(s,[1]*(len(W.get_shape())-1)+[-1])), b.assign((b-m)/s)) 210 | sess.run(updates, feed_dict) --------------------------------------------------------------------------------