├── README.md
├── _pesq_itu_results.txt
├── _pesq_results.txt
├── avr_pesq
├── ca_pesq.sh
├── cln.txt
├── config.py
├── cut_data
    ├── cut_cln_wav.m
    └── cut_wav.m
├── data
    ├── batch_num.txt
    ├── cv.list
    ├── cv
    │   ├── all_real_cln_dt.txt
    │   ├── all_real_rev_dt.txt
    │   ├── cln_dt.txt
    │   ├── far_dt
    │   ├── far_dt.txt
    │   ├── inputs.txt
    │   ├── inputs2.txt
    │   ├── inputs_dir.txt
    │   ├── inputs_feat.txt
    │   ├── nall_real_cln_dt.txt
    │   ├── name
    │   ├── near_dt
    │   └── near_dt.txt
    ├── test
    │   ├── far
    │   │   ├── cln_et.txt
    │   │   ├── far_dir.txt
    │   │   ├── inputs.txt
    │   │   ├── inputs_dir.txt
    │   │   ├── inputs_feat.txt
    │   │   ├── name
    │   │   ├── pro.sh
    │   │   └── test.list
    │   ├── near
    │   │   ├── cln_et.txt
    │   │   ├── inputs.txt
    │   │   ├── inputs_dir.txt
    │   │   ├── inputs_feat.txt
    │   │   ├── name
    │   │   ├── near_dir.txt
    │   │   └── test.list
    │   └── real
    │   │   ├── all_real_cln_et.txt
    │   │   ├── cln
    │   │   ├── inputs.txt
    │   │   ├── inputs_dir.txt
    │   │   ├── inputs_feat.txt
    │   │   ├── name
    │   │   ├── rev_dir.txt
    │   │   └── test.list
    ├── tr.list
    ├── tr
    │   ├── inputs.txt
    │   ├── inputs2.txt
    │   ├── inputs_feat.txt
    │   └── inputs_nmae
    └── train_cmvn.npz
├── evaluate.py
├── ex_trac.sh
├── inputs.scp
├── io_funcs
    ├── __init__.py
    ├── __init__.pyc
    ├── __pycache__
    │   └── kaldi_io.cpython-35.pyc
    ├── convert_cmvn_to_numpy.py
    ├── cut_cln_wav.m
    ├── kaldi_io.py
    ├── kaldi_io.pyc
    ├── make_sete.py
    ├── make_setf.py
    ├── make_tfrecords.py
    ├── make_tfrecords_rta.py
    ├── teconvert_cmvn_to_numpy.py
    ├── test.py
    ├── test2.py
    ├── tfrecords_dataset.py
    ├── tfrecords_dataset.pyc
    ├── tfrecords_dataset_test.py
    ├── tfrecords_io.py
    ├── tfrecords_io.pyc
    ├── tfrecords_io_test.py
    └── verify_tfrecords.py
├── mini_data
    ├── Noise
    │   ├── Babble.wav
    │   ├── Babble2.wav
    │   ├── F16.wav
    │   ├── F162.wav
    │   ├── Factory1.wav
    │   ├── Factory2.wav
    │   ├── Pink.wav
    │   ├── Pink2.wav
    │   ├── Volvo.wav
    │   ├── Volvo2.wav
    │   ├── White.wav
    │   └── White2.wav
    ├── cln_dt.txt
    ├── test_noise
    │   ├── n64.wav
    │   └── n71.wav
    ├── test_speech
    │   └── cln_et.txt
    ├── train_noise
    │   ├── n1.wav
    │   ├── n49.wav
    │   └── n95.wav
    └── train_speech
    │   ├── TRAIN_DR1_FCJF0_SA1.WAV
    │   ├── TRAIN_DR1_FKFB0_SX348.WAV
    │   ├── TRAIN_DR1_MPGR0_SX150.WAV
    │   ├── TRAIN_DR1_MRDD0_SI1680.WAV
    │   ├── cleandata.txt
    │   └── cleandata_test.txt
├── models
    ├── __init__.py
    ├── attention_dir
    │   └── resnet2_rced.py
    ├── dnn.py
    ├── dnn_trainer.py
    └── resnet_rced.py
├── pesq
├── pre_process_data.py
├── pre_process_test.py
├── scripts
    ├── audio_utilities.py
    ├── audio_utilities.pyc
    ├── config.py
    ├── config.pyc
    ├── dataset_test.sh
    ├── datasets
    │   ├── __init__.py
    │   ├── __init__.pyc
    │   ├── audio.py
    │   ├── audio.pyc
    │   ├── preprocessor.py
    │   └── wavenet_preprocessor.py
    ├── get_train_val_scp.py
    ├── hparams.py
    ├── io_test.sh
    ├── parse_options.sh
    ├── prepare_data.py
    ├── prepare_data.pyc
    ├── spectrogram_to_wave.py
    ├── spectrogram_to_wave.pyc
    └── train_dnn.py
├── train.sh
├── train.txt
└── utils
    ├── __init__.py
    ├── __init__.pyc
    ├── add_additive_noise.py
    ├── bnorm.py
    ├── bnorm.pyc
    ├── common.py
    ├── generate_plots.py
    ├── misc.py
    ├── misc.pyc
    ├── ops.py
    ├── ops.pyc
    └── select_data.py


/README.md:
--------------------------------------------------------------------------------
 1 | [English](https://github.com/linan2/TensorFlow-speech-enhancement.git) | 中文
 2 | # 基于深度特征映射的语音增强方法
 3 | 本项目为可以利用DNN和CNN的方法来进行语音增强，其中DNN使用的三个隐层每个隐层512个节点，CNN使用的是R-CED的网络结构并且加入了一些resnet来防止过拟合。你也可以选择是否使用dropout或者l2等。
 4 | 
 5 | ## 注意:
 6 | requirements：TensorFlow1.5 Python2.7
 7 | 
 8 | [制造数据](https://github.com/linan2/add_reverb2.git) 在运行此代码之前你应该先准备好干净和相应的含噪或者含混响的语音; 或者运行utils/add_additive_noise.py 添加相应信噪比的加性噪声，这里使用的事NOISEX-92噪声库在目录mini_data/Noise底下，带2的是8k采样率其他是相应的16k采样率噪音。
 9 | 
10 | 如果你的任务是去混响，在运行此代码之前你需要将含混响的语音剪切的和干净的语音一样长,cut_wav里面的脚本可能对你有用。
11 | 
12 | 如果你的任务是做特征增强（不需要还原到语音），你可以把log spectragram特征替换成其他特征（比如MFCC）。
13 | 
14 | ## 使用:
15 | 第一步. 运行 ex_trac.sh 数据准备并将数据分成训练集和交叉验证集，然后提取 log spectragram 特征.
16 | 
17 | 第二步. 运行 train.sh 来训练和测试模型.
18 | 
19 | 第三步. 运行 ca_pesq.sh 使用PESQ来评价你的结果。
20 | 
21 | ## 补充:
22 | 代码还不完善，持续更新ing…大家如果发现有什么bug可以在代码上直接更改，然后更新。科研任务重，更新慢大家见谅。
23 | 
24 | 本人在 REVERB challenge 数据集上测试了此代码的效果，PESQ能提高大约0.6—0.8。
25 | 
26 | 过段时间我会继续更新一些比如生成对抗网络、 多任务学习和多目标学习的模型, 一些基于注意力机制的模型也会进行更新,敬请期待…
27 | 
28 | 在解码阶段，您可以选择G&L声码器，也可以使用有噪声的语音原始的相位来合成语音，但是我已经尝试过G&L方法，与原始的相位的使用相比，它不会获得更好的性能。
29 | 
30 | 运行环境教程：
31 | https://github.com/linan2/tensorflow-1.4.0.git
32 | 
33 | [1] Li N., Ge M., Wang L., Dang J. (2019) [A Fast Convolutional Self-attention Based Speech Dereverberation Method for Robust Speech Recognition](https://link.springer.com/chapter/10.1007/978-3-030-36718-3_25). In: Gedeon T., Wong K., Lee M. (eds) Neural Information Processing. ICONIP 2019. Lecture Notes in Computer Science, vol 11955. Springer, Cham
34 | 
35 | [2] Wang, K., Zhang, J., Sun, S., Wang, Y., Xiang, F., Xie, L. (2018) Investigating Generative Adversarial Networks Based Speech Dereverberation for Robust Speech Recognition. Proc. Interspeech 2018, 1581-1585, DOI: 10.21437/Interspeech.2018-1780.
36 | 
37 | [3] Ge, M., Wang, L., Li, N., Shi, H., Dang, J., Li, X. (2019) Environment-Dependent Attention-Driven Recurrent Convolutional Neural Network for Robust Speech Enhancement. Proc. Interspeech 2019, 3153-3157, DOI: 10.21437/Interspeech.2019-1477.
38 | 
39 | 
40 | Email: linanvae@163.com
41 | 


--------------------------------------------------------------------------------
/avr_pesq:
--------------------------------------------------------------------------------
1 | far_pesq =  2.25776
2 | near_pesq =  2.34079
3 | real_pesq =  1.83094
4 | 


--------------------------------------------------------------------------------
/ca_pesq.sh:
--------------------------------------------------------------------------------
 1 | stage=0
 2 | if [ $stage -le 0 ]; then
 3 | python evaluate.py calculate_pesq --workspace='workspace' --speech_dir='cut_data/SIMU/cln_evl' --type='test2/far'
 4 | 
 5 | cat _pesq_results.txt|tail -n 539 |head -n 538|awk '{sum+=$2} END {print "far_pesq = ", sum/NR}' > avr_pesq
 6 | wait
 7 | fi
 8 | if [ $stage -le 1 ]; then
 9 | python evaluate.py calculate_pesq --workspace='workspace' --speech_dir='cut_data/SIMU/cln_evl' --type='test2/near'
10 | 
11 | cat _pesq_results.txt|tail -n 539 |head -n 538|awk '{sum+=$2} END {print "near_pesq = ", sum/NR}' >> avr_pesq
12 | wait
13 | fi
14 | 
15 | if [ $stage -le 2 ]; then
16 | python evaluate.py calculate_pesq --workspace='workspace' --speech_dir='cut_data/Real/cln_et' --type='test2/real'
17 | 
18 | cat _pesq_results.txt|tail -n 373 |head -n 372|awk '{sum+=$2} END {print "real_pesq = ", sum/NR}' >> avr_pesq
19 | wait
20 | fi
21 | cat avr_pesq
22 | rm avr_pesq
23 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
1 | sample_rate = 16000
2 | n_window = 400      # windows size for FFT 25ms
3 | n_overlap = 160     # overlap of window 10ms
4 | 


--------------------------------------------------------------------------------
/cut_data/cut_cln_wav.m:
--------------------------------------------------------------------------------
 1 | % ----------------------------------------------------------------------------------------------------
 2 | % parameters and configures
 3 | % ----------------------------------------------------------------------------------------------------
 4 | %dir_name = {'c31/','c34/','c35/','c38/','c3c/','c3d/','c3f/','c3j/','c3k/','c3l/','c3p/','c3s/','c3t/','c3w/','c3z/','c40/','c41/','c42/','c45/','c49/'};
 5 | %dir_name = {'c30/','c32/','c33/','c37/','c39/','c3b/','c3h/','c3o/','c3q/','c3r/','c3y/','c46/','c48/','c4a/'};
 6 | %dir_name = {'c36/','c3a/','c3e/','c3g/','c3i/','c3m/','c3n/','c3u/','c3v/','c3x/','c43/','c44/','c47/','c4b/'};
 7 | %dir_name = {'c02/','c05/','c08/','c0b/','c0e/','c0h/','c0k/','c0n/','c0q/','c0t/','c0w/','c0z/','c12/','c15/','c18/','c1b/','c1e/','c1h/','c1k/','c1n/','c1q/','c1t/','c1w/','c1z/','c22/','c25/','c28/','c2b/','c2e/','c2h/','c2k/','c03/','c06/','c09/','c0c/','c0f/','c0i/','c0l/','c0o/','c0r/','c0u/','c0x/','c10/','c13/','c16/','c19/','c1c/','c1f/','c1i/','c1l/','c1o/','c1r/','c1u/','c1x/','c20/','c23/','c26/','c29/','c2c/','c2f/','c2i/','c2l/','c04/','c07/','c0a/','c0d/','c0g/','c0j/','c0m/','c0p/','c0s/','c0v/','c0y/','c11/','c14/','c17/','c1a/','c1d/','c1g/','c1j/','c1m/','c1p/','c1s/','c1v/','c1y/','c21/','c24/','c27/','c2a/','c2d/','c2g/','c2j/'};
 8 | dir_name ={'c02/'};
 9 | %disp(length(dir_name));
10 | 
11 | % ---------------------------------------------------------------------------------------------------
12 | % cut wavforms
13 | % --------------------------------------------------------------------------------------------------
14 | for t=1:length(dir_name)
15 |     % get the current sub-directory
16 |     tempdir=dir_name{t};
17 |     disp(tempdir);
18 |     % define the path of reverberation wavforms and enhanced wavforms
19 |     clean_filedir = ['/CDShare/REVERB_DATA/raw_wsj0_data/data/primary_microphone/si_tr/',tempdir];
20 |     enh_filedir = ['/Work18/2015/gemeng/se/mydnn/tools/MSLP/MCMSLP_L750_D512/dereverb_GSSn1a1b0.15/si_tr/',tempdir,'/1/RAW/'];
21 |     % get all the file names of enhanced wavforms
22 |     dirOutput = dir([enh_filedir, '*_2.wav']);
23 |     file_name = {dirOutput.name}';
24 |     disp(file_name);
25 |     [rows,cols] = size(file_name); 
26 |     
27 |     % cut the reverberation wavforms based on the length of the corresponding enhanced wavforms
28 |     save_path = ['/Work18/2015/gemeng/se/mydnn/tools/MSLP/MCMSLP_L750_D512/dereverb_GSSn1a1b0.15/cln_cut/si_tr/',tempdir];
29 |     mkdir(save_path);
30 |     for i=1:rows
31 |         enh_na = file_name{i};
32 |         clean_na = [enh_na(1:8),'.wav'];
33 |         disp(clean_na)
34 |         %na = file_name(i);
35 |         %audiopath=dir([filedir,file_name{i}]);
36 |         [clean_x, Fs] = audioread([clean_filedir, clean_na]);
37 |         [enh_x,Fs] = audioread([enh_filedir, enh_na]);
38 |         %[r,c]=size(x);
39 |         %if c > 1
40 |         %    disp(na);
41 |         %end;
42 |         y = clean_x(1:length(enh_x));
43 |         wrt_path = [save_path, clean_na];
44 |         audiowrite(wrt_path,y,Fs);
45 |     end;
46 |     %disp(x);
47 |     %disp(Fs);
48 | end;
49 | 


--------------------------------------------------------------------------------
/cut_data/cut_wav.m:
--------------------------------------------------------------------------------
 1 | filename1 = 'si_tr.txt'
 2 | [name1,path1] = textread(filename1,'%s %s')
 3 | filename2 = 'REVERB_WSJCAM0_tr.txt'
 4 | [name2,path2] = textread(filename2,'%s %s')
 5 | wavlist1 = path1;
 6 | wavlist1 = [wavlist1];
 7 | wavlist2 = path2;
 8 | wavlist2 = [wavlist2];
 9 | 
10 | 
11 | for i=1:length(wavlist2)
12 |      wav_cln = audioread(wavlist1{i});
13 |      wav_rev = audioread(wavlist2{i});
14 |      disp(wavlist1{i});
15 |      disp(wavlist2{i});
16 |      Fs = 16000
17 |      disp(length(wav_cln));
18 |      disp(length(wav_rev))
19 |      y = wav_rev(1:length(wav_cln));
20 |      str1 = '.wav'
21 |      wrt_path = ['reverb/',name2{i},str1];
22 |      audiowrite(wrt_path,y,Fs);
23 | end;
24 | 


--------------------------------------------------------------------------------
/data/batch_num.txt:
--------------------------------------------------------------------------------
1 | 11043 54451


--------------------------------------------------------------------------------
/data/cv.list:
--------------------------------------------------------------------------------
1 | data/tfrecords/cv.tfrecords
2 | 


--------------------------------------------------------------------------------
/data/cv/all_real_rev_dt.txt:
--------------------------------------------------------------------------------
  1 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c0201.wav
  2 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c0202.wav
  3 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c0205.wav
  4 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c0206.wav
  5 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c0208.wav
  6 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c0209.wav
  7 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c020b.wav
  8 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c020c.wav
  9 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c020g.wav
 10 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c020i.wav
 11 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c020j.wav
 12 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c020l.wav
 13 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c020n.wav
 14 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c020q.wav
 15 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c0201.wav
 16 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c0203.wav
 17 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c0205.wav
 18 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c0207.wav
 19 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c0208.wav
 20 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c020b.wav
 21 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c020c.wav
 22 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c020e.wav
 23 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c020g.wav
 24 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c020h.wav
 25 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c020i.wav
 26 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c020l.wav
 27 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c020m.wav
 28 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c020n.wav
 29 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c020o.wav
 30 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c020p.wav
 31 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c020r.wav
 32 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c0201.wav
 33 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c0202.wav
 34 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c0203.wav
 35 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c0204.wav
 36 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c0205.wav
 37 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c0206.wav
 38 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c0207.wav
 39 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c0208.wav
 40 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c0209.wav
 41 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c020a.wav
 42 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c020b.wav
 43 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c020c.wav
 44 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c020d.wav
 45 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c020e.wav
 46 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c020f.wav
 47 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c020g.wav
 48 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c020k.wav
 49 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c020l.wav
 50 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c020m.wav
 51 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c020n.wav
 52 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c020p.wav
 53 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c020q.wav
 54 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c020r.wav
 55 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c0201.wav
 56 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c0202.wav
 57 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c0203.wav
 58 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c0204.wav
 59 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c0205.wav
 60 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c0206.wav
 61 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c0209.wav
 62 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c020a.wav
 63 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c020b.wav
 64 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c020d.wav
 65 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c020e.wav
 66 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c020f.wav
 67 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c020g.wav
 68 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c020h.wav
 69 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c020i.wav
 70 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c020k.wav
 71 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c020l.wav
 72 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c020m.wav
 73 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c020o.wav
 74 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c020p.wav
 75 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c020q.wav
 76 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c020r.wav
 77 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t9c0202.wav
 78 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t9c0203.wav
 79 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t9c0207.wav
 80 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t9c0208.wav
 81 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t9c020a.wav
 82 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t9c020b.wav
 83 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t9c020d.wav
 84 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t9c020e.wav
 85 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t9c020g.wav
 86 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t9c020i.wav
 87 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t9c020l.wav
 88 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t9c020m.wav
 89 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t9c020o.wav
 90 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t9c020q.wav
 91 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c0203.wav
 92 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c0204.wav
 93 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c0207.wav
 94 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c020a.wav
 95 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c020d.wav
 96 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c020e.wav
 97 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c020f.wav
 98 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c020k.wav
 99 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c020m.wav
100 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c020o.wav
101 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c020p.wav
102 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c020r.wav
103 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c020s.wav
104 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c020t.wav
105 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c020u.wav
106 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c020v.wav
107 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c020w.wav
108 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c020x.wav
109 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c020y.wav
110 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c020z.wav
111 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c0210.wav
112 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c0211.wav
113 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t10c0212.wav
114 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c0202.wav
115 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c0204.wav
116 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c0206.wav
117 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c020d.wav
118 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c020f.wav
119 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c020j.wav
120 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c020k.wav
121 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c020q.wav
122 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c020s.wav
123 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c020t.wav
124 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c020u.wav
125 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c020v.wav
126 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c020y.wav
127 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c020z.wav
128 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c0210.wav
129 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c0211.wav
130 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c0212.wav
131 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t6c0213.wav
132 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c020h.wav
133 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c020i.wav
134 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c020j.wav
135 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c020o.wav
136 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c020s.wav
137 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c020t.wav
138 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c020u.wav
139 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c020w.wav
140 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c020x.wav
141 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c020y.wav
142 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c020z.wav
143 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c0210.wav
144 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c0211.wav
145 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c0212.wav
146 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t7c0213.wav
147 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c0207.wav
148 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c0208.wav
149 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c020c.wav
150 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c020j.wav
151 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c020n.wav
152 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c020t.wav
153 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c020u.wav
154 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c020v.wav
155 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c020w.wav
156 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c020x.wav
157 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c020z.wav
158 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c0210.wav
159 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c0211.wav
160 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c0212.wav
161 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t8c0213.wav
162 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t9c0205.wav
163 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t9c0206.wav
164 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t9c0209.wav
165 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t9c020c.wav
166 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t9c020f.wav
167 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t9c020h.wav
168 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t9c020j.wav
169 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t9c020n.wav
170 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t9c020p.wav
171 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t9c020r.wav
172 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t9c020s.wav
173 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t9c020t.wav
174 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t9c020u.wav
175 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t9c020v.wav
176 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t9c020w.wav
177 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t9c020x.wav
178 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t9c0210.wav
179 | /Work18/2017/linan/SE/my_enh/cut_data/Real/real_dev/t9c0211.wav
180 | 


--------------------------------------------------------------------------------
/data/test/far/name:
--------------------------------------------------------------------------------
  1 | c30c0201
  2 | c30c0202
  3 | c30c0203
  4 | c30c0204
  5 | c30c0205
  6 | c30c0206
  7 | c30c0207
  8 | c30c0208
  9 | c30c0209
 10 | c30c020a
 11 | c30c020b
 12 | c30c020c
 13 | c30c020d
 14 | c30c020e
 15 | c30c020f
 16 | c30c020g
 17 | c30c020h
 18 | c30c020i
 19 | c30c020j
 20 | c30c020k
 21 | c30c020l
 22 | c30c020m
 23 | c30c020n
 24 | c30c020o
 25 | c30c020p
 26 | c30c020q
 27 | c30c020r
 28 | c30c020s
 29 | c30c020t
 30 | c30c020u
 31 | c30c020v
 32 | c30c020w
 33 | c30c020x
 34 | c30c020y
 35 | c30c020z
 36 | c30c0210
 37 | c30c0211
 38 | c30c0212
 39 | c30c0213
 40 | c30c0214
 41 | c30c0215
 42 | c32c0201
 43 | c32c0202
 44 | c32c0203
 45 | c32c0204
 46 | c32c0205
 47 | c32c0206
 48 | c32c0207
 49 | c32c0208
 50 | c32c0209
 51 | c32c020a
 52 | c32c020b
 53 | c32c020c
 54 | c32c020d
 55 | c32c020e
 56 | c32c020f
 57 | c32c020g
 58 | c32c020h
 59 | c32c020i
 60 | c32c020j
 61 | c32c020k
 62 | c32c020l
 63 | c32c020m
 64 | c32c020n
 65 | c32c020o
 66 | c32c020p
 67 | c32c020q
 68 | c32c020r
 69 | c32c020s
 70 | c32c020t
 71 | c32c020u
 72 | c32c020v
 73 | c32c020w
 74 | c32c020x
 75 | c32c020y
 76 | c32c020z
 77 | c32c0210
 78 | c32c0211
 79 | c32c0212
 80 | c32c0213
 81 | c32c0214
 82 | c32c0215
 83 | c33c0201
 84 | c33c0202
 85 | c33c0203
 86 | c33c0204
 87 | c33c0205
 88 | c33c0206
 89 | c33c0207
 90 | c33c0208
 91 | c33c0209
 92 | c33c020a
 93 | c33c020b
 94 | c33c020c
 95 | c33c020d
 96 | c33c020e
 97 | c33c020f
 98 | c33c020g
 99 | c33c020h
100 | c33c020i
101 | c33c020j
102 | c33c020k
103 | c33c020l
104 | c33c020m
105 | c33c020n
106 | c33c020o
107 | c33c020p
108 | c33c020q
109 | c33c020r
110 | c33c020s
111 | c33c020t
112 | c33c020u
113 | c33c020v
114 | c33c020w
115 | c33c020x
116 | c33c020y
117 | c33c020z
118 | c33c0210
119 | c33c0211
120 | c33c0212
121 | c33c0213
122 | c37c0201
123 | c37c0202
124 | c37c0203
125 | c37c0204
126 | c37c0205
127 | c37c0206
128 | c37c0207
129 | c37c0208
130 | c37c0209
131 | c37c020a
132 | c37c020b
133 | c37c020c
134 | c37c020d
135 | c37c020e
136 | c37c020f
137 | c37c020g
138 | c37c020h
139 | c37c020i
140 | c37c020j
141 | c37c020k
142 | c37c020l
143 | c37c020m
144 | c37c020n
145 | c37c020o
146 | c37c020p
147 | c37c020q
148 | c37c020r
149 | c37c020s
150 | c37c020t
151 | c37c020u
152 | c37c020v
153 | c37c020w
154 | c37c020x
155 | c37c020y
156 | c37c020z
157 | c37c0210
158 | c37c0211
159 | c37c0212
160 | c37c0213
161 | c39c0201
162 | c39c0202
163 | c39c0203
164 | c39c0204
165 | c39c0205
166 | c39c0206
167 | c39c0207
168 | c39c0208
169 | c39c0209
170 | c39c020a
171 | c39c020b
172 | c39c020c
173 | c39c020d
174 | c39c020e
175 | c39c020f
176 | c39c020g
177 | c39c020h
178 | c39c020i
179 | c39c020j
180 | c39c020k
181 | c39c020l
182 | c39c020m
183 | c39c020n
184 | c39c020o
185 | c39c020p
186 | c39c020q
187 | c39c020r
188 | c39c020s
189 | c39c020t
190 | c39c020u
191 | c39c020v
192 | c39c020w
193 | c39c020x
194 | c39c020y
195 | c39c020z
196 | c39c0210
197 | c39c0211
198 | c39c0212
199 | c3bc0201
200 | c3bc0202
201 | c3bc0203
202 | c3bc0204
203 | c3bc0205
204 | c3bc0206
205 | c3bc0207
206 | c3bc0208
207 | c3bc0209
208 | c3bc020a
209 | c3bc020b
210 | c3bc020c
211 | c3bc020d
212 | c3bc020e
213 | c3bc020f
214 | c3bc020g
215 | c3bc020h
216 | c3bc020i
217 | c3bc020j
218 | c3bc020k
219 | c3bc020l
220 | c3bc020m
221 | c3bc020n
222 | c3bc020o
223 | c3bc020p
224 | c3bc020q
225 | c3bc020r
226 | c3bc020s
227 | c3bc020t
228 | c3bc020u
229 | c3bc020v
230 | c3bc020w
231 | c3bc020x
232 | c3bc020y
233 | c3bc020z
234 | c3bc0210
235 | c3bc0211
236 | c3bc0212
237 | c3hc0201
238 | c3hc0202
239 | c3hc0203
240 | c3hc0204
241 | c3hc0205
242 | c3hc0206
243 | c3hc0207
244 | c3hc0208
245 | c3hc0209
246 | c3hc020a
247 | c3hc020b
248 | c3hc020c
249 | c3hc020d
250 | c3hc020e
251 | c3hc020f
252 | c3hc020g
253 | c3hc020h
254 | c3hc020i
255 | c3hc020j
256 | c3hc020k
257 | c3hc020l
258 | c3hc020m
259 | c3hc020n
260 | c3hc020o
261 | c3hc020p
262 | c3hc020q
263 | c3hc020r
264 | c3hc020s
265 | c3hc020t
266 | c3hc020u
267 | c3hc020v
268 | c3hc020w
269 | c3hc020x
270 | c3hc020y
271 | c3hc020z
272 | c3hc0210
273 | c3hc0211
274 | c3oc0201
275 | c3oc0202
276 | c3oc0203
277 | c3oc0204
278 | c3oc0205
279 | c3oc0206
280 | c3oc0207
281 | c3oc0208
282 | c3oc0209
283 | c3oc020a
284 | c3oc020b
285 | c3oc020c
286 | c3oc020d
287 | c3oc020e
288 | c3oc020f
289 | c3oc020g
290 | c3oc020h
291 | c3oc020i
292 | c3oc020j
293 | c3oc020k
294 | c3oc020l
295 | c3oc020m
296 | c3oc020n
297 | c3oc020o
298 | c3oc020p
299 | c3oc020q
300 | c3oc020r
301 | c3oc020s
302 | c3oc020t
303 | c3oc020u
304 | c3oc020v
305 | c3oc020w
306 | c3oc020x
307 | c3oc020y
308 | c3oc020z
309 | c3oc0210
310 | c3oc0212
311 | c3qc0201
312 | c3qc0202
313 | c3qc0203
314 | c3qc0204
315 | c3qc0205
316 | c3qc0206
317 | c3qc0207
318 | c3qc0208
319 | c3qc0209
320 | c3qc020a
321 | c3qc020b
322 | c3qc020c
323 | c3qc020d
324 | c3qc020e
325 | c3qc020f
326 | c3qc020g
327 | c3qc020h
328 | c3qc020i
329 | c3qc020j
330 | c3qc020k
331 | c3qc020l
332 | c3qc020m
333 | c3qc020n
334 | c3qc020o
335 | c3qc020p
336 | c3qc020q
337 | c3qc020r
338 | c3qc020s
339 | c3qc020t
340 | c3qc020u
341 | c3qc020v
342 | c3qc020w
343 | c3qc020x
344 | c3qc020y
345 | c3qc020z
346 | c3qc0210
347 | c3qc0211
348 | c3qc0212
349 | c3rc0201
350 | c3rc0202
351 | c3rc0203
352 | c3rc0204
353 | c3rc0205
354 | c3rc0206
355 | c3rc0207
356 | c3rc0208
357 | c3rc0209
358 | c3rc020a
359 | c3rc020b
360 | c3rc020c
361 | c3rc020d
362 | c3rc020e
363 | c3rc020f
364 | c3rc020g
365 | c3rc020h
366 | c3rc020i
367 | c3rc020j
368 | c3rc020k
369 | c3rc020l
370 | c3rc020m
371 | c3rc020n
372 | c3rc020o
373 | c3rc020p
374 | c3rc020q
375 | c3rc020r
376 | c3rc020s
377 | c3rc020t
378 | c3rc020u
379 | c3rc020v
380 | c3rc020w
381 | c3yc0201
382 | c3yc0202
383 | c3yc0203
384 | c3yc0204
385 | c3yc0205
386 | c3yc0206
387 | c3yc0207
388 | c3yc0208
389 | c3yc0209
390 | c3yc020a
391 | c3yc020b
392 | c3yc020c
393 | c3yc020d
394 | c3yc020e
395 | c3yc020f
396 | c3yc020g
397 | c3yc020h
398 | c3yc020i
399 | c3yc020j
400 | c3yc020k
401 | c3yc020l
402 | c3yc020m
403 | c3yc020n
404 | c3yc020o
405 | c3yc020p
406 | c3yc020q
407 | c3yc020r
408 | c3yc020s
409 | c3yc020t
410 | c3yc020u
411 | c3yc020v
412 | c3yc020w
413 | c3yc020x
414 | c3yc020y
415 | c3yc020z
416 | c3yc0210
417 | c3yc0211
418 | c3yc0212
419 | c3yc0213
420 | c3yc0214
421 | c46c0201
422 | c46c0202
423 | c46c0203
424 | c46c0204
425 | c46c0205
426 | c46c0206
427 | c46c0207
428 | c46c0208
429 | c46c0209
430 | c46c020a
431 | c46c020b
432 | c46c020c
433 | c46c020d
434 | c46c020e
435 | c46c020f
436 | c46c020g
437 | c46c020h
438 | c46c020i
439 | c46c020j
440 | c46c020k
441 | c46c020l
442 | c46c020m
443 | c46c020n
444 | c46c020o
445 | c46c020p
446 | c46c020q
447 | c46c020r
448 | c46c020s
449 | c46c020t
450 | c46c020u
451 | c46c020v
452 | c46c020w
453 | c46c020x
454 | c46c020y
455 | c46c020z
456 | c46c0210
457 | c46c0211
458 | c46c0212
459 | c46c0213
460 | c46c0214
461 | c48c0201
462 | c48c0202
463 | c48c0203
464 | c48c0204
465 | c48c0205
466 | c48c0206
467 | c48c0207
468 | c48c0208
469 | c48c0209
470 | c48c020a
471 | c48c020b
472 | c48c020c
473 | c48c020d
474 | c48c020e
475 | c48c020f
476 | c48c020g
477 | c48c020h
478 | c48c020i
479 | c48c020j
480 | c48c020k
481 | c48c020l
482 | c48c020m
483 | c48c020n
484 | c48c020o
485 | c48c020p
486 | c48c020q
487 | c48c020r
488 | c48c020s
489 | c48c020t
490 | c48c020u
491 | c48c020v
492 | c48c020w
493 | c48c020x
494 | c48c020y
495 | c48c020z
496 | c48c0210
497 | c48c0211
498 | c48c0212
499 | c48c0213
500 | c4ac0201
501 | c4ac0202
502 | c4ac0203
503 | c4ac0204
504 | c4ac0205
505 | c4ac0206
506 | c4ac0207
507 | c4ac0208
508 | c4ac0209
509 | c4ac020a
510 | c4ac020b
511 | c4ac020c
512 | c4ac020d
513 | c4ac020e
514 | c4ac020f
515 | c4ac020g
516 | c4ac020h
517 | c4ac020i
518 | c4ac020j
519 | c4ac020k
520 | c4ac020l
521 | c4ac020m
522 | c4ac020n
523 | c4ac020o
524 | c4ac020p
525 | c4ac020q
526 | c4ac020r
527 | c4ac020s
528 | c4ac020t
529 | c4ac020u
530 | c4ac020v
531 | c4ac020w
532 | c4ac020x
533 | c4ac020y
534 | c4ac020z
535 | c4ac0210
536 | c4ac0211
537 | c4ac0212
538 | c4ac0213
539 | 


--------------------------------------------------------------------------------
/data/test/far/pro.sh:
--------------------------------------------------------------------------------
1 | find /Work18/2017/linan/SE/my_enh/cut_data/SIMU/far_evl/ -name *.wav > far_dir
2 | paste -d ' ' far_dir cln_et.txt > inputs_dir.txt
3 | paste -d   name inputs_feat.txt > inputs.txt
4 | awk -F / {print } inputs_feat.txt | awk -F . {print } > name
5 | find /Work18/2017/linan/SE/my_enh/workspace/features/spectrogram/test/far/ -name *.p > inputs_feat.txt
6 | 


--------------------------------------------------------------------------------
/data/test/far/test.list:
--------------------------------------------------------------------------------
1 | data/test/far/tfrecords/test.tfrecords
2 | 


--------------------------------------------------------------------------------
/data/test/near/name:
--------------------------------------------------------------------------------
  1 | c30c0201
  2 | c30c0202
  3 | c30c0203
  4 | c30c0204
  5 | c30c0205
  6 | c30c0206
  7 | c30c0207
  8 | c30c0208
  9 | c30c0209
 10 | c30c020a
 11 | c30c020b
 12 | c30c020c
 13 | c30c020d
 14 | c30c020e
 15 | c30c020f
 16 | c30c020g
 17 | c30c020h
 18 | c30c020i
 19 | c30c020j
 20 | c30c020k
 21 | c30c020l
 22 | c30c020m
 23 | c30c020n
 24 | c30c020o
 25 | c30c020p
 26 | c30c020q
 27 | c30c020r
 28 | c30c020s
 29 | c30c020t
 30 | c30c020u
 31 | c30c020v
 32 | c30c020w
 33 | c30c020x
 34 | c30c020y
 35 | c30c020z
 36 | c30c0210
 37 | c30c0211
 38 | c30c0212
 39 | c30c0213
 40 | c30c0214
 41 | c30c0215
 42 | c32c0201
 43 | c32c0202
 44 | c32c0203
 45 | c32c0204
 46 | c32c0205
 47 | c32c0206
 48 | c32c0207
 49 | c32c0208
 50 | c32c0209
 51 | c32c020a
 52 | c32c020b
 53 | c32c020c
 54 | c32c020d
 55 | c32c020e
 56 | c32c020f
 57 | c32c020g
 58 | c32c020h
 59 | c32c020i
 60 | c32c020j
 61 | c32c020k
 62 | c32c020l
 63 | c32c020m
 64 | c32c020n
 65 | c32c020o
 66 | c32c020p
 67 | c32c020q
 68 | c32c020r
 69 | c32c020s
 70 | c32c020t
 71 | c32c020u
 72 | c32c020v
 73 | c32c020w
 74 | c32c020x
 75 | c32c020y
 76 | c32c020z
 77 | c32c0210
 78 | c32c0211
 79 | c32c0212
 80 | c32c0213
 81 | c32c0214
 82 | c32c0215
 83 | c33c0201
 84 | c33c0202
 85 | c33c0203
 86 | c33c0204
 87 | c33c0205
 88 | c33c0206
 89 | c33c0207
 90 | c33c0208
 91 | c33c0209
 92 | c33c020a
 93 | c33c020b
 94 | c33c020c
 95 | c33c020d
 96 | c33c020e
 97 | c33c020f
 98 | c33c020g
 99 | c33c020h
100 | c33c020i
101 | c33c020j
102 | c33c020k
103 | c33c020l
104 | c33c020m
105 | c33c020n
106 | c33c020o
107 | c33c020p
108 | c33c020q
109 | c33c020r
110 | c33c020s
111 | c33c020t
112 | c33c020u
113 | c33c020v
114 | c33c020w
115 | c33c020x
116 | c33c020y
117 | c33c020z
118 | c33c0210
119 | c33c0211
120 | c33c0212
121 | c33c0213
122 | c37c0201
123 | c37c0202
124 | c37c0203
125 | c37c0204
126 | c37c0205
127 | c37c0206
128 | c37c0207
129 | c37c0208
130 | c37c0209
131 | c37c020a
132 | c37c020b
133 | c37c020c
134 | c37c020d
135 | c37c020e
136 | c37c020f
137 | c37c020g
138 | c37c020h
139 | c37c020i
140 | c37c020j
141 | c37c020k
142 | c37c020l
143 | c37c020m
144 | c37c020n
145 | c37c020o
146 | c37c020p
147 | c37c020q
148 | c37c020r
149 | c37c020s
150 | c37c020t
151 | c37c020u
152 | c37c020v
153 | c37c020w
154 | c37c020x
155 | c37c020y
156 | c37c020z
157 | c37c0210
158 | c37c0211
159 | c37c0212
160 | c37c0213
161 | c39c0201
162 | c39c0202
163 | c39c0203
164 | c39c0204
165 | c39c0205
166 | c39c0206
167 | c39c0207
168 | c39c0208
169 | c39c0209
170 | c39c020a
171 | c39c020b
172 | c39c020c
173 | c39c020d
174 | c39c020e
175 | c39c020f
176 | c39c020g
177 | c39c020h
178 | c39c020i
179 | c39c020j
180 | c39c020k
181 | c39c020l
182 | c39c020m
183 | c39c020n
184 | c39c020o
185 | c39c020p
186 | c39c020q
187 | c39c020r
188 | c39c020s
189 | c39c020t
190 | c39c020u
191 | c39c020v
192 | c39c020w
193 | c39c020x
194 | c39c020y
195 | c39c020z
196 | c39c0210
197 | c39c0211
198 | c39c0212
199 | c3bc0201
200 | c3bc0202
201 | c3bc0203
202 | c3bc0204
203 | c3bc0205
204 | c3bc0206
205 | c3bc0207
206 | c3bc0208
207 | c3bc0209
208 | c3bc020a
209 | c3bc020b
210 | c3bc020c
211 | c3bc020d
212 | c3bc020e
213 | c3bc020f
214 | c3bc020g
215 | c3bc020h
216 | c3bc020i
217 | c3bc020j
218 | c3bc020k
219 | c3bc020l
220 | c3bc020m
221 | c3bc020n
222 | c3bc020o
223 | c3bc020p
224 | c3bc020q
225 | c3bc020r
226 | c3bc020s
227 | c3bc020t
228 | c3bc020u
229 | c3bc020v
230 | c3bc020w
231 | c3bc020x
232 | c3bc020y
233 | c3bc020z
234 | c3bc0210
235 | c3bc0211
236 | c3bc0212
237 | c3hc0201
238 | c3hc0202
239 | c3hc0203
240 | c3hc0204
241 | c3hc0205
242 | c3hc0206
243 | c3hc0207
244 | c3hc0208
245 | c3hc0209
246 | c3hc020a
247 | c3hc020b
248 | c3hc020c
249 | c3hc020d
250 | c3hc020e
251 | c3hc020f
252 | c3hc020g
253 | c3hc020h
254 | c3hc020i
255 | c3hc020j
256 | c3hc020k
257 | c3hc020l
258 | c3hc020m
259 | c3hc020n
260 | c3hc020o
261 | c3hc020p
262 | c3hc020q
263 | c3hc020r
264 | c3hc020s
265 | c3hc020t
266 | c3hc020u
267 | c3hc020v
268 | c3hc020w
269 | c3hc020x
270 | c3hc020y
271 | c3hc020z
272 | c3hc0210
273 | c3hc0211
274 | c3oc0201
275 | c3oc0202
276 | c3oc0203
277 | c3oc0204
278 | c3oc0205
279 | c3oc0206
280 | c3oc0207
281 | c3oc0208
282 | c3oc0209
283 | c3oc020a
284 | c3oc020b
285 | c3oc020c
286 | c3oc020d
287 | c3oc020e
288 | c3oc020f
289 | c3oc020g
290 | c3oc020h
291 | c3oc020i
292 | c3oc020j
293 | c3oc020k
294 | c3oc020l
295 | c3oc020m
296 | c3oc020n
297 | c3oc020o
298 | c3oc020p
299 | c3oc020q
300 | c3oc020r
301 | c3oc020s
302 | c3oc020t
303 | c3oc020u
304 | c3oc020v
305 | c3oc020w
306 | c3oc020x
307 | c3oc020y
308 | c3oc020z
309 | c3oc0210
310 | c3oc0212
311 | c3qc0201
312 | c3qc0202
313 | c3qc0203
314 | c3qc0204
315 | c3qc0205
316 | c3qc0206
317 | c3qc0207
318 | c3qc0208
319 | c3qc0209
320 | c3qc020a
321 | c3qc020b
322 | c3qc020c
323 | c3qc020d
324 | c3qc020e
325 | c3qc020f
326 | c3qc020g
327 | c3qc020h
328 | c3qc020i
329 | c3qc020j
330 | c3qc020k
331 | c3qc020l
332 | c3qc020m
333 | c3qc020n
334 | c3qc020o
335 | c3qc020p
336 | c3qc020q
337 | c3qc020r
338 | c3qc020s
339 | c3qc020t
340 | c3qc020u
341 | c3qc020v
342 | c3qc020w
343 | c3qc020x
344 | c3qc020y
345 | c3qc020z
346 | c3qc0210
347 | c3qc0211
348 | c3qc0212
349 | c3rc0201
350 | c3rc0202
351 | c3rc0203
352 | c3rc0204
353 | c3rc0205
354 | c3rc0206
355 | c3rc0207
356 | c3rc0208
357 | c3rc0209
358 | c3rc020a
359 | c3rc020b
360 | c3rc020c
361 | c3rc020d
362 | c3rc020e
363 | c3rc020f
364 | c3rc020g
365 | c3rc020h
366 | c3rc020i
367 | c3rc020j
368 | c3rc020k
369 | c3rc020l
370 | c3rc020m
371 | c3rc020n
372 | c3rc020o
373 | c3rc020p
374 | c3rc020q
375 | c3rc020r
376 | c3rc020s
377 | c3rc020t
378 | c3rc020u
379 | c3rc020v
380 | c3rc020w
381 | c3yc0201
382 | c3yc0202
383 | c3yc0203
384 | c3yc0204
385 | c3yc0205
386 | c3yc0206
387 | c3yc0207
388 | c3yc0208
389 | c3yc0209
390 | c3yc020a
391 | c3yc020b
392 | c3yc020c
393 | c3yc020d
394 | c3yc020e
395 | c3yc020f
396 | c3yc020g
397 | c3yc020h
398 | c3yc020i
399 | c3yc020j
400 | c3yc020k
401 | c3yc020l
402 | c3yc020m
403 | c3yc020n
404 | c3yc020o
405 | c3yc020p
406 | c3yc020q
407 | c3yc020r
408 | c3yc020s
409 | c3yc020t
410 | c3yc020u
411 | c3yc020v
412 | c3yc020w
413 | c3yc020x
414 | c3yc020y
415 | c3yc020z
416 | c3yc0210
417 | c3yc0211
418 | c3yc0212
419 | c3yc0213
420 | c3yc0214
421 | c46c0201
422 | c46c0202
423 | c46c0203
424 | c46c0204
425 | c46c0205
426 | c46c0206
427 | c46c0207
428 | c46c0208
429 | c46c0209
430 | c46c020a
431 | c46c020b
432 | c46c020c
433 | c46c020d
434 | c46c020e
435 | c46c020f
436 | c46c020g
437 | c46c020h
438 | c46c020i
439 | c46c020j
440 | c46c020k
441 | c46c020l
442 | c46c020m
443 | c46c020n
444 | c46c020o
445 | c46c020p
446 | c46c020q
447 | c46c020r
448 | c46c020s
449 | c46c020t
450 | c46c020u
451 | c46c020v
452 | c46c020w
453 | c46c020x
454 | c46c020y
455 | c46c020z
456 | c46c0210
457 | c46c0211
458 | c46c0212
459 | c46c0213
460 | c46c0214
461 | c48c0201
462 | c48c0202
463 | c48c0203
464 | c48c0204
465 | c48c0205
466 | c48c0206
467 | c48c0207
468 | c48c0208
469 | c48c0209
470 | c48c020a
471 | c48c020b
472 | c48c020c
473 | c48c020d
474 | c48c020e
475 | c48c020f
476 | c48c020g
477 | c48c020h
478 | c48c020i
479 | c48c020j
480 | c48c020k
481 | c48c020l
482 | c48c020m
483 | c48c020n
484 | c48c020o
485 | c48c020p
486 | c48c020q
487 | c48c020r
488 | c48c020s
489 | c48c020t
490 | c48c020u
491 | c48c020v
492 | c48c020w
493 | c48c020x
494 | c48c020y
495 | c48c020z
496 | c48c0210
497 | c48c0211
498 | c48c0212
499 | c48c0213
500 | c4ac0201
501 | c4ac0202
502 | c4ac0203
503 | c4ac0204
504 | c4ac0205
505 | c4ac0206
506 | c4ac0207
507 | c4ac0208
508 | c4ac0209
509 | c4ac020a
510 | c4ac020b
511 | c4ac020c
512 | c4ac020d
513 | c4ac020e
514 | c4ac020f
515 | c4ac020g
516 | c4ac020h
517 | c4ac020i
518 | c4ac020j
519 | c4ac020k
520 | c4ac020l
521 | c4ac020m
522 | c4ac020n
523 | c4ac020o
524 | c4ac020p
525 | c4ac020q
526 | c4ac020r
527 | c4ac020s
528 | c4ac020t
529 | c4ac020u
530 | c4ac020v
531 | c4ac020w
532 | c4ac020x
533 | c4ac020y
534 | c4ac020z
535 | c4ac0210
536 | c4ac0211
537 | c4ac0212
538 | c4ac0213
539 | 


--------------------------------------------------------------------------------
/data/test/near/test.list:
--------------------------------------------------------------------------------
1 | data/test/near/tfrecords/test.tfrecords
2 | 


--------------------------------------------------------------------------------
/data/test/real/name:
--------------------------------------------------------------------------------
  1 | t21c0201
  2 | t21c0202
  3 | t21c0204
  4 | t21c0205
  5 | t21c0209
  6 | t21c020b
  7 | t21c020d
  8 | t21c020e
  9 | t21c020g
 10 | t21c020i
 11 | t21c020j
 12 | t21c020k
 13 | t21c020m
 14 | t21c020n
 15 | t21c020o
 16 | t21c020q
 17 | t22c0202
 18 | t22c0205
 19 | t22c0206
 20 | t22c0208
 21 | t22c020b
 22 | t22c020d
 23 | t22c020e
 24 | t22c020f
 25 | t22c020h
 26 | t22c020i
 27 | t22c020k
 28 | t22c020m
 29 | t22c020n
 30 | t22c020p
 31 | t23c0201
 32 | t23c0202
 33 | t23c0203
 34 | t23c020b
 35 | t23c020c
 36 | t23c020d
 37 | t23c020e
 38 | t23c020f
 39 | t23c020h
 40 | t23c020i
 41 | t23c020j
 42 | t23c020k
 43 | t23c020m
 44 | t23c020n
 45 | t23c020o
 46 | t23c020p
 47 | t23c020q
 48 | t24c0201
 49 | t24c0202
 50 | t24c0203
 51 | t24c0204
 52 | t24c0205
 53 | t24c0206
 54 | t24c0207
 55 | t24c0208
 56 | t24c0209
 57 | t24c020a
 58 | t24c020b
 59 | t24c020d
 60 | t24c020e
 61 | t24c020f
 62 | t24c020h
 63 | t24c020j
 64 | t24c020k
 65 | t24c020l
 66 | t24c020m
 67 | t24c020n
 68 | t24c020o
 69 | t24c020p
 70 | t24c020q
 71 | t25c0201
 72 | t25c0203
 73 | t25c0204
 74 | t25c0205
 75 | t25c0207
 76 | t25c0208
 77 | t25c0209
 78 | t25c020b
 79 | t25c020d
 80 | t25c020g
 81 | t25c020h
 82 | t25c020i
 83 | t25c020j
 84 | t25c020l
 85 | t25c020o
 86 | t25c020p
 87 | t25c020r
 88 | t36c0201
 89 | t36c0202
 90 | t36c0203
 91 | t36c0204
 92 | t36c0206
 93 | t36c0207
 94 | t36c0208
 95 | t36c020g
 96 | t36c020k
 97 | t36c020n
 98 | t37c0202
 99 | t37c0204
100 | t37c0205
101 | t37c0209
102 | t37c020a
103 | t37c020g
104 | t37c020h
105 | t37c020i
106 | t37c020j
107 | t37c020l
108 | t37c020n
109 | t37c020p
110 | t37c020r
111 | t38c0201
112 | t38c0202
113 | t38c0203
114 | t38c0204
115 | t38c0205
116 | t38c0206
117 | t38c0207
118 | t38c0208
119 | t38c0209
120 | t38c020a
121 | t38c020b
122 | t38c020c
123 | t38c020d
124 | t38c020e
125 | t38c020f
126 | t38c020h
127 | t38c020i
128 | t38c020j
129 | t38c020k
130 | t38c020l
131 | t38c020m
132 | t38c020o
133 | t38c020r
134 | t38c020s
135 | t39c0201
136 | t39c0202
137 | t39c0203
138 | t39c0204
139 | t39c0205
140 | t39c0206
141 | t39c0207
142 | t39c0208
143 | t39c0209
144 | t39c020a
145 | t39c020b
146 | t39c020c
147 | t39c020d
148 | t39c020e
149 | t39c020f
150 | t39c020g
151 | t39c020h
152 | t39c020k
153 | t39c020l
154 | t39c020n
155 | t39c020o
156 | t39c020p
157 | t39c020q
158 | t39c020r
159 | t39c020s
160 | t40c0201
161 | t40c0202
162 | t40c0203
163 | t40c0204
164 | t40c0205
165 | t40c0206
166 | t40c0207
167 | t40c0208
168 | t40c0209
169 | t40c020a
170 | t40c020b
171 | t40c020c
172 | t40c020d
173 | t40c020e
174 | t40c020f
175 | t40c020g
176 | t40c020h
177 | t40c020i
178 | t40c020k
179 | t40c020l
180 | t40c020m
181 | t40c020n
182 | t40c020o
183 | t40c020p
184 | t40c020q
185 | t40c020r
186 | t40c020s
187 | t21c0206
188 | t21c0207
189 | t21c0208
190 | t21c020a
191 | t21c020c
192 | t21c020f
193 | t21c020h
194 | t21c020l
195 | t21c020p
196 | t21c020r
197 | t21c020s
198 | t21c020t
199 | t21c020u
200 | t21c020v
201 | t21c020w
202 | t21c020x
203 | t21c020y
204 | t21c020z
205 | t21c0210
206 | t21c0211
207 | t21c0212
208 | t22c0201
209 | t22c0203
210 | t22c0204
211 | t22c0207
212 | t22c0209
213 | t22c020a
214 | t22c020c
215 | t22c020g
216 | t22c020j
217 | t22c020l
218 | t22c020o
219 | t22c020q
220 | t22c020r
221 | t22c020s
222 | t22c020t
223 | t22c020u
224 | t22c020v
225 | t22c020w
226 | t22c020x
227 | t22c020y
228 | t22c020z
229 | t22c0210
230 | t22c0211
231 | t23c0204
232 | t23c0205
233 | t23c0206
234 | t23c0207
235 | t23c0208
236 | t23c0209
237 | t23c020a
238 | t23c020g
239 | t23c020l
240 | t23c020r
241 | t23c020s
242 | t23c020t
243 | t23c020u
244 | t23c020v
245 | t23c020w
246 | t23c020x
247 | t23c020y
248 | t23c020z
249 | t23c0210
250 | t23c0211
251 | t23c0212
252 | t24c020c
253 | t24c020i
254 | t24c020r
255 | t24c020s
256 | t24c020t
257 | t24c020u
258 | t24c020v
259 | t24c020w
260 | t24c020y
261 | t24c020z
262 | t24c0210
263 | t24c0211
264 | t24c0212
265 | t25c0202
266 | t25c0206
267 | t25c020a
268 | t25c020c
269 | t25c020e
270 | t25c020f
271 | t25c020k
272 | t25c020m
273 | t25c020n
274 | t25c020q
275 | t25c020s
276 | t25c020t
277 | t25c020u
278 | t25c020v
279 | t25c020w
280 | t25c020x
281 | t25c020y
282 | t25c020z
283 | t25c0210
284 | t25c0211
285 | t25c0212
286 | t25c0213
287 | t25c0214
288 | t25c0215
289 | t36c0205
290 | t36c0209
291 | t36c020a
292 | t36c020b
293 | t36c020c
294 | t36c020d
295 | t36c020e
296 | t36c020f
297 | t36c020h
298 | t36c020i
299 | t36c020l
300 | t36c020m
301 | t36c020o
302 | t36c020p
303 | t36c020r
304 | t36c020s
305 | t36c020t
306 | t36c020u
307 | t36c020w
308 | t36c020x
309 | t36c020y
310 | t36c020z
311 | t36c0210
312 | t36c0211
313 | t36c0212
314 | t36c0213
315 | t36c0214
316 | t37c0203
317 | t37c0206
318 | t37c0208
319 | t37c020b
320 | t37c020c
321 | t37c020d
322 | t37c020e
323 | t37c020f
324 | t37c020k
325 | t37c020m
326 | t37c020o
327 | t37c020q
328 | t37c020s
329 | t37c020t
330 | t37c020u
331 | t37c020v
332 | t37c020w
333 | t37c020x
334 | t37c020y
335 | t37c020z
336 | t37c0210
337 | t37c0212
338 | t37c0213
339 | t38c020n
340 | t38c020t
341 | t38c020u
342 | t38c020v
343 | t38c020w
344 | t38c020x
345 | t38c020y
346 | t38c020z
347 | t38c0210
348 | t38c0211
349 | t38c0212
350 | t38c0214
351 | t39c020t
352 | t39c020u
353 | t39c020v
354 | t39c020x
355 | t39c020y
356 | t39c020z
357 | t39c0210
358 | t39c0211
359 | t39c0212
360 | t39c0213
361 | t39c0214
362 | t40c020t
363 | t40c020u
364 | t40c020v
365 | t40c020w
366 | t40c020x
367 | t40c020y
368 | t40c020z
369 | t40c0210
370 | t40c0211
371 | t40c0213
372 | t40c0214
373 | 


--------------------------------------------------------------------------------
/data/test/real/test.list:
--------------------------------------------------------------------------------
1 | data/test/real/tfrecords/test.tfrecords
2 | 


--------------------------------------------------------------------------------
/data/tr.list:
--------------------------------------------------------------------------------
1 | data/tfrecords/tr.tfrecords
2 | 


--------------------------------------------------------------------------------
/data/train_cmvn.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/data/train_cmvn.npz


--------------------------------------------------------------------------------
/evaluate.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2019.7    Nan LEE
  2 | 
  3 | import argparse
  4 | import os
  5 | import csv
  6 | import numpy as np
  7 | import cPickle
  8 | #import matplotlib.pyplot as plt
  9 | 
 10 | 
 11 | def calculate_pesq(args):
 12 |     """Calculate PESQ of all enhaced speech. 
 13 |     
 14 |     Args:
 15 |       workspace: str, path of workspace. 
 16 |       speech_dir: str, path of clean speech. 
 17 |       te_snr: float, testing SNR. 
 18 |     """
 19 |     workspace = args.workspace
 20 |     speech_dir = args.speech_dir
 21 |     #te_snr = args.te_snr
 22 |     type = args.type
 23 |     # Remove already existed file. 
 24 |     os.system('rm _pesq_itu_results.txt')
 25 |     os.system('rm _pesq_results.txt')
 26 |     
 27 |     # Calculate PESQ of all enhaced speech. 
 28 |     enh_speech_dir = os.path.join(workspace, "enh_wavs", type)
 29 |     names = os.listdir(enh_speech_dir)
 30 |     for (cnt, na) in enumerate(names):
 31 |         print(cnt, na)
 32 |         enh_path = os.path.join(enh_speech_dir, na)
 33 |         
 34 |         speech_na = na.split('.')[0]
 35 |         speech_path = os.path.join(speech_dir, "%s.wav" % speech_na)
 36 |         print(speech_path)
 37 |         print(enh_path) 
 38 |         # Call executable PESQ tool. 
 39 |         cmd = ' '.join(["./pesq", speech_path, enh_path, "+16000"])
 40 |         os.system(cmd)        
 41 |         
 42 |         
 43 | def get_stats(args):
 44 |     """Calculate stats of PESQ. 
 45 |     """
 46 |     pesq_path = "_pesq_results.txt"
 47 |     with open(pesq_path, 'rb') as f:
 48 |         reader = csv.reader(f, delimiter='\t')
 49 |         lis = list(reader)
 50 |         
 51 |     pesq_dict = {}
 52 |     for i1 in xrange(1, len(lis) - 1):
 53 |         li = lis[i1]
 54 |         na = li[0]
 55 |         pesq = float(li[1])
 56 |         noise_type = na.split('.')[1]
 57 |         if noise_type not in pesq_dict.keys():
 58 |             pesq_dict[noise_type] = [pesq]
 59 |         else:
 60 |             pesq_dict[noise_type].append(pesq)
 61 |         
 62 |     avg_list, std_list = [], []
 63 |     f = "{0:<16} {1:<16}"
 64 |     print(f.format("Noise", "PESQ"))
 65 |     print("---------------------------------")
 66 |     for noise_type in pesq_dict.keys():
 67 |         pesqs = pesq_dict[noise_type]
 68 |         avg_pesq = np.mean(pesqs)
 69 |         std_pesq = np.std(pesqs)
 70 |         avg_list.append(avg_pesq)
 71 |         std_list.append(std_pesq)
 72 |         print(f.format(noise_type, "%.2f +- %.2f" % (avg_pesq, std_pesq)))
 73 |     print("---------------------------------")
 74 |     print(f.format("Avg.", "%.2f +- %.2f" % (np.mean(avg_list), np.mean(std_list))))
 75 | 
 76 | 
 77 | if __name__ == '__main__':
 78 |     parser = argparse.ArgumentParser()
 79 |     subparsers = parser.add_subparsers(dest='mode')
 80 | 
 81 |     parser_plot_training_stat = subparsers.add_parser('plot_training_stat')
 82 |     parser_plot_training_stat.add_argument('--workspace', type=str, required=True)
 83 |     #parser_plot_training_stat.add_argument('--tr_snr', type=float, required=True)
 84 |     parser_plot_training_stat.add_argument('--bgn_iter', type=int, required=True)
 85 |     parser_plot_training_stat.add_argument('--fin_iter', type=int, required=True)
 86 |     parser_plot_training_stat.add_argument('--interval_iter', type=int, required=True)
 87 | 
 88 |     parser_calculate_pesq = subparsers.add_parser('calculate_pesq')
 89 |     parser_calculate_pesq.add_argument('--type', type=str, required=True)
 90 |     parser_calculate_pesq.add_argument('--workspace', type=str, required=True)
 91 |     parser_calculate_pesq.add_argument('--speech_dir', type=str, required=True)
 92 |     #parser_calculate_pesq.add_argument('--te_snr', type=float, required=True)
 93 |     
 94 |     parser_get_stats = subparsers.add_parser('get_stats')
 95 |     
 96 |     args = parser.parse_args()
 97 |     
 98 |     if args.mode == 'plot_training_stat':
 99 |         plot_training_stat(args)
100 |     elif args.mode == 'calculate_pesq':
101 |         calculate_pesq(args)
102 |     elif args.mode == 'get_stats':
103 |         get_stats(args)
104 |     else:
105 |         raise Exception("Error!")
106 | 


--------------------------------------------------------------------------------
/ex_trac.sh:
--------------------------------------------------------------------------------
 1 | train_file_dir=data/train_dir
 2 | test_file_dir=data/test_dir
 3 | val_size=1000
 4 | #####################Data prepare########################
 5 | find $train_file_dir -name '*.wav' > data/all_wav
 6 | find $test_file_dir -name '*.wav' > data/test_wav
 7 | for dataset in data/all_wav data/test_wav;do
 8 | 	awk -F '/' '{print $NF}' $dataset | awk -F '.' '{print $1}' > data/wav_name
 9 | 	past -d ' ' data/wav_name $dataset > data/`echo $dataset | awk -F '/' '{print $2}'`.txt
10 | 	rm data/wav_name
11 | done
12 | python scripts/get_train_val_scp.py --data_dir=$data --val_size $val_size
13 | echo "Finish data prepare!"
14 | date
15 | ########################################################
16 | 
17 | python2.7 pre_process_data.py calculate_train_features --train_speech_path="data/tr/inputs.txt" --data_type=train
18 | python2.7 pre_process_data.py calculate_train_features --train_speech_path="data/cv/inputs.txt" --data_type=cv
19 | python2.7 pre_process_test.py calculate_train_features --train_speech_path="data/test/real/rev_dir.txt" --data_type=test/real
20 | 


--------------------------------------------------------------------------------
/io_funcs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/io_funcs/__init__.py


--------------------------------------------------------------------------------
/io_funcs/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/io_funcs/__init__.pyc


--------------------------------------------------------------------------------
/io_funcs/__pycache__/kaldi_io.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/io_funcs/__pycache__/kaldi_io.cpython-35.pyc


--------------------------------------------------------------------------------
/io_funcs/convert_cmvn_to_numpy.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Copyright 2017    Ke Wang
  5 | 
  6 | """Convert inputs and lables GLOBAL cmvns to a Numpy file."""
  7 | 
  8 | from __future__ import absolute_import
  9 | from __future__ import division
 10 | from __future__ import print_function
 11 | 
 12 | import argparse
 13 | import os
 14 | import sys
 15 | import struct
 16 | 
 17 | import numpy as np
 18 | 
 19 | def convert_cmvn_to_numpy(inputs_cmvn, labels_cmvn, save_dir):
 20 |     """Convert global binary ark cmvn to numpy format."""
 21 | 
 22 |     print("Convert %s and %s to Numpy format" % (inputs_cmvn, labels_cmvn))
 23 |     inputs_filename = inputs_cmvn
 24 |     labels_filename = labels_cmvn
 25 | 
 26 |     inputs = read_binary_file(inputs_filename, 0)
 27 |     labels = read_binary_file(labels_filename, 0)
 28 | 
 29 |     inputs_frame = inputs[0][-1]
 30 |     labels_frame = labels[0][-1]
 31 | 
 32 |     # assert inputs_frame == labels_frame
 33 | 
 34 |     cmvn_inputs = np.hsplit(inputs, [inputs.shape[1] - 1])[0]
 35 |     cmvn_labels = np.hsplit(labels, [labels.shape[1] - 1])[0]
 36 | 
 37 |     mean_inputs = cmvn_inputs[0] / inputs_frame
 38 |     stddev_inputs = np.sqrt(cmvn_inputs[1] / inputs_frame - mean_inputs ** 2)
 39 |     mean_labels = cmvn_labels[0] / labels_frame
 40 |     stddev_labels = np.sqrt(cmvn_labels[1] / labels_frame - mean_labels ** 2)
 41 | 
 42 |     cmvn_name = os.path.join(save_dir, "train_cmvn.npz")
 43 |     np.savez(cmvn_name,
 44 |              mean_inputs=mean_inputs,
 45 |              stddev_inputs=stddev_inputs,
 46 |              mean_labels=mean_labels,
 47 |              stddev_labels=stddev_labels)
 48 | 
 49 |     print("Write to %s" % cmvn_name)
 50 | 
 51 | 
 52 | def read_binary_file(filename, offset=0):
 53 |     """Read data from matlab binary file (row, col and matrix).
 54 | 
 55 |     Returns:
 56 |         A numpy matrix containing data of the given binary file.
 57 |     """
 58 |     read_buffer = open(filename, 'rb')
 59 |     read_buffer.seek(int(offset), 0)
 60 |     header = struct.unpack('<xcccc', read_buffer.read(5))
 61 |     if header[0] != 'B':
 62 |         print("Input .ark file is not binary")
 63 |         sys.exit(-1)
 64 |     if header[1] == 'C':
 65 |         print("Input .ark file is compressed, exist now.")
 66 |         sys.exit(-1)
 67 | 
 68 |     rows = 0; cols= 0
 69 |     _, rows = struct.unpack('<bi', read_buffer.read(5))
 70 |     _, cols = struct.unpack('<bi', read_buffer.read(5))
 71 | 
 72 |     if header[1] == "F":
 73 |         tmp_mat = np.frombuffer(read_buffer.read(rows * cols * 4),
 74 |                                 dtype=np.float32)
 75 |     elif header[1] == "D":
 76 |         tmp_mat = np.frombuffer(read_buffer.read(rows * cols * 8),
 77 |                                 dtype=np.float64)
 78 |     mat = np.reshape(tmp_mat, (rows, cols))
 79 |     read_buffer.close()
 80 | 
 81 |     return mat
 82 | 
 83 | 
 84 | if __name__ == '__main__':
 85 |     parser = argparse.ArgumentParser()
 86 |     parser.add_argument(
 87 |         '--inputs',
 88 |         type=str,
 89 |         default='data/train/inputs.cmvn',
 90 |         help="Name of input CMVN file."
 91 |     )
 92 | 
 93 |     parser.add_argument(
 94 |         '--labels',
 95 |         type=str,
 96 |         default='data/train/labels.cmvn',
 97 |         help="Name of label CMVN file."
 98 |     )
 99 |     parser.add_argument(
100 |         '--save_dir',
101 |         required=True,
102 |         help="Directory to save Numpy format CMVN file."
103 |     )
104 |     FLAGS, unparsed = parser.parse_known_args()
105 | 
106 |     convert_cmvn_to_numpy(FLAGS.inputs, FLAGS.labels, FLAGS.save_dir)
107 | 


--------------------------------------------------------------------------------
/io_funcs/cut_cln_wav.m:
--------------------------------------------------------------------------------
 1 | % ----------------------------------------------------------------------------------------------------
 2 | % parameters and configures
 3 | % ----------------------------------------------------------------------------------------------------
 4 | %dir_name = {'c31/','c34/','c35/','c38/','c3c/','c3d/','c3f/','c3j/','c3k/','c3l/','c3p/','c3s/','c3t/','c3w/','c3z/','c40/','c41/','c42/','c45/','c49/'};
 5 | %dir_name = {'c30/','c32/','c33/','c37/','c39/','c3b/','c3h/','c3o/','c3q/','c3r/','c3y/','c46/','c48/','c4a/'};
 6 | %dir_name = {'c36/','c3a/','c3e/','c3g/','c3i/','c3m/','c3n/','c3u/','c3v/','c3x/','c43/','c44/','c47/','c4b/'};
 7 | %dir_name = {'c02/','c05/','c08/','c0b/','c0e/','c0h/','c0k/','c0n/','c0q/','c0t/','c0w/','c0z/','c12/','c15/','c18/','c1b/','c1e/','c1h/','c1k/','c1n/','c1q/','c1t/','c1w/','c1z/','c22/','c25/','c28/','c2b/','c2e/','c2h/','c2k/','c03/','c06/','c09/','c0c/','c0f/','c0i/','c0l/','c0o/','c0r/','c0u/','c0x/','c10/','c13/','c16/','c19/','c1c/','c1f/','c1i/','c1l/','c1o/','c1r/','c1u/','c1x/','c20/','c23/','c26/','c29/','c2c/','c2f/','c2i/','c2l/','c04/','c07/','c0a/','c0d/','c0g/','c0j/','c0m/','c0p/','c0s/','c0v/','c0y/','c11/','c14/','c17/','c1a/','c1d/','c1g/','c1j/','c1m/','c1p/','c1s/','c1v/','c1y/','c21/','c24/','c27/','c2a/','c2d/','c2g/','c2j/'};
 8 | dir_name ={'c02/'};
 9 | %disp(length(dir_name));
10 | 
11 | % ---------------------------------------------------------------------------------------------------
12 | % cut wavforms
13 | % --------------------------------------------------------------------------------------------------
14 | for t=1:length(dir_name)
15 |     % get the current sub-directory
16 |     tempdir=dir_name{t};
17 |     disp(tempdir);
18 |     % define the path of reverberation wavforms and enhanced wavforms
19 |     clean_filedir = ['/CDShare/REVERB_DATA/raw_wsj0_data/data/primary_microphone/si_tr/',tempdir];
20 |     enh_filedir = ['/Work18/2015/gemeng/se/mydnn/tools/MSLP/MCMSLP_L750_D512/dereverb_GSSn1a1b0.15/si_tr/',tempdir,'/1/RAW/'];
21 |     % get all the file names of enhanced wavforms
22 |     dirOutput = dir([enh_filedir, '*_2.wav']);
23 |     file_name = {dirOutput.name}';
24 |     disp(file_name);
25 |     [rows,cols] = size(file_name); 
26 |     
27 |     % cut the reverberation wavforms based on the length of the corresponding enhanced wavforms
28 |     save_path = ['/Work18/2015/gemeng/se/mydnn/tools/MSLP/MCMSLP_L750_D512/dereverb_GSSn1a1b0.15/cln_cut/si_tr/',tempdir];
29 |     mkdir(save_path);
30 |     for i=1:rows
31 |         enh_na = file_name{i};
32 |         clean_na = [enh_na(1:8),'.wav'];
33 |         disp(clean_na)
34 |         %na = file_name(i);
35 |         %audiopath=dir([filedir,file_name{i}]);
36 |         [clean_x, Fs] = audioread([clean_filedir, clean_na]);
37 |         [enh_x,Fs] = audioread([enh_filedir, enh_na]);
38 |         %[r,c]=size(x);
39 |         %if c > 1
40 |         %    disp(na);
41 |         %end;
42 |         y = clean_x(1:length(enh_x));
43 |         wrt_path = [save_path, clean_na];
44 |         audiowrite(wrt_path,y,Fs);
45 |     end;
46 |     %disp(x);
47 |     %disp(Fs);
48 | end;
49 | 


--------------------------------------------------------------------------------
/io_funcs/kaldi_io.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Copyright 2017    Ke Wang     Xiaomi
  5 | 
  6 | """IO classes for reading and writing kaldi .ark
  7 | 
  8 | This module provides io interfaces for reading and writing kaldi .ark files.
  9 | Currently, this module only supports binary-formatted .ark files. Text .ark
 10 | files are not supported.
 11 | 
 12 | To use this module, you need to provide kaldi .scp files only. The .ark
 13 | locations with corresponding offsets can be retrieved from .scp files.
 14 | """
 15 | from __future__ import absolute_import
 16 | from __future__ import division
 17 | from __future__ import print_function
 18 | 
 19 | import sys
 20 | import struct
 21 | import random
 22 | import numpy as np
 23 | 
 24 | class GlobalHeader(object):
 25 |     """ Compress ark format header. """
 26 |     def __init__(self, format, header):
 27 |         self.format = format
 28 |         self.min_value = header[0]
 29 |         self.range = header[1]
 30 |         self.num_rows = header[2]
 31 |         self.num_cols = header[3]
 32 | 
 33 | class PerColHeader(object):
 34 |     """ Compress ark format per column header. """
 35 |     def __init__(self, header):
 36 |         self.percentile_0 = header[0]
 37 |         self.percentile_25 = header[1]
 38 |         self.percentile_75 = header[2]
 39 |         self.percentile_100 = header[3]
 40 | 
 41 | class ArkReader(object):
 42 |     """ Class to read Kaldi ark format.
 43 | 
 44 |     Each time, it reads one line of the .scp file and reads in the
 45 |     corresponding features into a numpy matrix. It only supports
 46 |     binary-formatted .ark files. Text files are not supported.
 47 | 
 48 |     Attributes:
 49 |         utt_ids: A list saving utterance identities.
 50 |         scp_data: A list saving .ark path and offset for items in utt_ids.
 51 |         scp_position: An integer indicating which utt_id and correspoding
 52 |             scp_data will be read next.
 53 |     """
 54 | 
 55 |     def __init__(self, name="ArkReader"):
 56 |         self.name = name
 57 | 
 58 |     def __call__(self, scp_path):
 59 |         """Init utt_ids along with scp_data according to .scp file."""
 60 |         self.scp_position = 0
 61 |         fin = open(scp_path,"r")
 62 |         self.utt_ids = []
 63 |         self.scp_data = []
 64 |         line = fin.readline()
 65 |         while line != '' and line != None:
 66 |             utt_id, path_pos = line.replace('\n','').split(' ')
 67 |             path, pos = path_pos.split(':')
 68 |             self.utt_ids.append(utt_id)
 69 |             self.scp_data.append((path, pos))
 70 |             line = fin.readline()
 71 | 
 72 |         fin.close()
 73 | 
 74 |     def shuffle(self):
 75 |         """Shuffle utt_ids along with scp_data and reset scp_position."""
 76 |         zipped = zip(self.utt_ids, self.scp_data)
 77 |         random.shuffle(zipped)
 78 |         self.utt_ids, self.scp_data = zip(*zipped)  # unzip and assign
 79 |         self.scp_position = 0
 80 | 
 81 |     def read_ark(self, ark_file, ark_offset=0):
 82 |         """Read data from the archive (.ark from kaldi).
 83 | 
 84 |         Returns:
 85 |             A numpy matrix containing data of ark_file.
 86 |         """
 87 |         ark_read_buffer = open(ark_file, 'rb')
 88 |         ark_read_buffer.seek(int(ark_offset), 0)
 89 |         header = struct.unpack('<xcccc', ark_read_buffer.read(5))
 90 |         if header[0] != b"B":
 91 |             print(ark_file)
 92 |             print("Input .ark file is not binary")
 93 |             sys.exit(1)
 94 |         if header[1] == b"C":
 95 |             if header[2] == b"M" and header[3] != b"2":
 96 |                 header_read = struct.unpack('<ffii', ark_read_buffer.read(16))
 97 |                 if header_read[3] == 0:
 98 |                     print("Empty matrix.")
 99 |                     sys.exit(1)
100 |                 else:
101 |                     global_header = GlobalHeader(1, header_read)
102 |                     ark_mat = self.read_compress(self, global_header, ark_read_buffer)
103 |                 return ark_mat
104 |             else:
105 |                 print("Unsupport format.")
106 |                 print("Maybe because of the matrices with 8 or fewer rows.")
107 |                 sys.exit(1)
108 |         else:
109 |              _, rows = struct.unpack('<bi', ark_read_buffer.read(5))
110 |              _, cols = struct.unpack('<bi', ark_read_buffer.read(5))
111 |              if header[1] == b"F":
112 |                  tmp_mat = np.frombuffer(ark_read_buffer.read(rows * cols * 4),
113 |                              dtype = np.float32)
114 |              elif header[1] == b"D":
115 |                  tmp_mat = np.frombuffer(ark_read_buffer.read(rows * cols * 8),
116 |                              dtype = np.float64)
117 |              ark_mat = np.reshape(tmp_mat, (rows, cols))
118 |              return ark_mat
119 |         return None
120 | 
121 |     @staticmethod
122 |     def uint16_to_float(global_header, value):
123 |         """ Convert uint16 to float. """
124 |         # the constant 1.52590218966964e-05 is 1/65535.
125 |         return global_header.min_value \
126 |             + global_header.range * 1.52590218966964e-05 * value
127 | 
128 |     @staticmethod
129 |     def char_to_float(p0, p25, p75, p100, value):
130 |         """ Convert char to float. """
131 |         if value < 64 :
132 |             return p0 + (p25 - p0) * value * (1/64.0)
133 |         elif value <= 192:
134 |             return p25 + (p75 - p25) * (value - 64) * (1/128.0)
135 |         else:
136 |             return p75 + (p100 - p75) * (value - 192) * (1/63.0)
137 | 
138 |     @staticmethod
139 |     def read_compress(self, header, buf):
140 |         """Read compress ark format.
141 | 
142 |         Args:
143 |         header: GlobalHeader format data.
144 |         buf: PerColHeader and the byte data.
145 |         """
146 |         col_header = []
147 |         ark_mat = np.zeros((header.num_rows, header.num_cols))
148 | 
149 |         for i in range(header.num_cols):
150 |             col_header.append(PerColHeader(\
151 |                     struct.unpack('<HHHH', buf.read(8))))
152 | 
153 |         for i in range(header.num_cols):
154 |             p0 = self.uint16_to_float(header, col_header[i].percentile_0)
155 |             p25 = self.uint16_to_float(header, col_header[i].percentile_25)
156 |             p75 = self.uint16_to_float(header, col_header[i].percentile_75)
157 |             p100 = self.uint16_to_float(header, col_header[i].percentile_100)
158 |             for j in range(header.num_rows):
159 |                 value = struct.unpack('<B', buf.read(1))[0]
160 |                 ark_mat[j, i] = self.char_to_float(p0, p25, p75, p100, value)
161 |         return ark_mat
162 | 
163 |     def read_next_utt(self):
164 |         """Read the next utterance in the scp file.
165 | 
166 |         Returns:
167 |             The utterance ID of the utterance that was read, the utterance
168 |             data, and a bool indicating if the reader looped back to the
169 |             beginning.
170 |         """
171 |         if len(self.scp_data) == 0:
172 |              return None , None, True
173 | 
174 |         if self.scp_position >= len(self.scp_data):  #if at end of file loop around
175 |             looped = True
176 |             self.scp_position = 0
177 |         else:
178 |             looped = False
179 | 
180 |         self.scp_position += 1
181 | 
182 |         utt_ids = self.utt_ids[self.scp_position-1]
183 |         utt_data = self.read_utt_data_from_index(self.scp_position-1)
184 | 
185 |         return utt_ids, utt_data, looped
186 | 
187 |     def read_next_scp(self):
188 |         """Read the next utterance ID but don't read the data.
189 | 
190 |         Returns:
191 |             The utterance ID of the utterance that was read.
192 |         """
193 |         if self.scp_position >= len(self.scp_data):  #if at end of file loop around
194 |             self.scp_position = 0
195 | 
196 |         self.scp_position += 1
197 | 
198 |         return self.utt_ids[self.scp_position-1]
199 | 
200 |     def read_previous_scp(self):
201 |         """Read the previous utterance ID but don't read the data.
202 | 
203 |         Returns:
204 |             The utterance ID of the utterance that was read.
205 |         """
206 |         if self.scp_position < 0:  #if at beginning of file loop around
207 |             self.scp_position = len(self.scp_data) - 1
208 | 
209 |         self.scp_position -= 1
210 | 
211 |         return self.utt_ids[self.scp_position+1]
212 | 
213 |     def read_utt_data_from_id(self, utt_id):
214 |         """Read the data of a certain utterance ID.
215 | 
216 |         Args:
217 |             utt_id: A string indicating a certain utterance ID.
218 | 
219 |         Returns:
220 |             A numpy array containing the utterance data corresponding to the ID.
221 |         """
222 |         utt_mat = self.read_utt_data_from_index(self.utt_ids.index(utt_id))
223 | 
224 |         return utt_mat
225 | 
226 |     def read_utt_data_from_index(self, index):
227 |         """Read the data of a certain index.
228 | 
229 |         Args:
230 |             index: A integer index corresponding to a certain utterance ID.
231 | 
232 |         Returns:
233 |             A numpy array containing the utterance data corresponding to the
234 |             index.
235 |         """
236 |         return self.read_ark(self.scp_data[index][0], self.scp_data[index][1])
237 | 
238 |     def split(self):
239 |         """Split of the data that was read so far."""
240 |         self.scp_data = self.scp_data[self.scp_position:-1]
241 |         self.utt_ids = self.utt_ids[self.scp_position:-1]
242 | 
243 | 
244 | class ArkWriter(object):
245 |     """Class to write numpy matrices into Kaldi .ark file and create the
246 |     corresponding .scp file. It only supports binary-formatted .ark files.
247 |     Text and compressed .ark files are not supported.
248 | 
249 |     Attributes:
250 |         scp_path: The path to the .scp file that will be written.
251 |         scp_file_write: The file object corresponds to scp_path.
252 | 
253 |     """
254 | 
255 |     def __init__(self, scp_path):
256 |         """Arkwriter constructor."""
257 |         self.scp_path = scp_path
258 |         self.scp_file_write = open(self.scp_path, "w")
259 | 
260 |     def write_next_utt(self, ark_path, utt_id, utt_mat):
261 |         """Read an utterance to the archive.
262 | 
263 |         Args:
264 |             ark_path: Path to the .ark file that will be used for writing.
265 |             utt_id: The utterance ID.
266 |             utt_mat: A numpy array containing the utterance data.
267 |         """
268 |         ark_file_write = open(ark_path,"ab")
269 |         utt_mat = np.asarray(utt_mat, dtype=np.float32)
270 |         rows, cols = utt_mat.shape
271 |         ark_file_write.write(struct.pack('<%ds'%(len(utt_id)), utt_id))
272 |         pos = ark_file_write.tell()
273 |         ark_file_write.write(struct.pack('<xcccc','B','F','M',' '))
274 |         ark_file_write.write(struct.pack('<bi', 4, rows))
275 |         ark_file_write.write(struct.pack('<bi', 4, cols))
276 |         ark_file_write.write(utt_mat)
277 |         self.scp_file_write.write('%s %s:%s\n' % (utt_id, ark_path, pos))
278 |         ark_file_write.close()
279 | 
280 |     def close(self):
281 |         """close the ark writer"""
282 |         self.scp_file_write.close()
283 | 


--------------------------------------------------------------------------------
/io_funcs/kaldi_io.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/io_funcs/kaldi_io.pyc


--------------------------------------------------------------------------------
/io_funcs/make_sete.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Copyright 2017    Ke Wang
  5 | 
  6 | """Converts data to TFRecords file format with Example protos."""
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import argparse
 12 | import os
 13 | import shutil
 14 | import sys
 15 | 
 16 | import numpy as np
 17 | import tensorflow as tf
 18 | 
 19 | sys.path.append(os.path.dirname(sys.path[0]))
 20 | from tfrecords_io import make_sequence_example
 21 | from utils.misc import *
 22 | import cPickle
 23 | def convert_to(name, inputs_scp,
 24 |                output_dir, test=False):
 25 |     """Converts a dataset to tfrecords."""
 26 |     if not os.path.exists(output_dir):
 27 |         os.makedirs(output_dir)
 28 | 
 29 |     tfrecords_name = os.path.join(output_dir, name + ".tfrecords")
 30 |     con = 0
 31 |     with tf.python_io.TFRecordWriter(tfrecords_name) as writer, \
 32 |         open(inputs_scp,'r') as inputs_f:
 33 |         for line in inputs_f:
 34 |             #print(line)
 35 |             line = line.split()
 36 |             utt_id = line[0] 
 37 |             #print(utt_id)
 38 |             inputs_path = line[-1]
 39 |             data = cPickle.load(open(inputs_path, 'rb'))
 40 |             [mixed_complx_x] = data
 41 |             inputs = mixed_complx_x
 42 |             inputs = np.abs(inputs)
 43 |             inputs = np.log(inputs + 1e-08).astype(np.float32)
 44 |             inputs = inputs - np.mean(inputs,0)
 45 |             #print(np.shape(inputs))
 46 |             #print(speech_x)
 47 |             #labels = speech_x
 48 |             #print(np.shape(labels))
 49 |             tf.logging.info("Writing utterance %s to %s" % (
 50 |                 utt_id, tfrecords_name))
 51 |             ex = make_sequence_example(utt_id, inputs)
 52 |             writer.write(ex.SerializeToString())
 53 |             con += 1
 54 |             print("done write")
 55 |             print(con)
 56 |     #os.remove(config_file)
 57 |     print("done")
 58 | 
 59 | def main(unused_argv):
 60 |     """Convert to Examples and write the result to TFRecords."""
 61 |     convert_to(FLAGS.name, FLAGS.inputs,
 62 |               FLAGS.output_dir, FLAGS.test)
 63 |     print("done all")
 64 | 
 65 | if __name__ == '__main__':
 66 |     parser = argparse.ArgumentParser()
 67 |     parser.add_argument(
 68 |         '--inputs',
 69 |         type=str,
 70 |         default='data/train/inputs.scp',
 71 |         help='File name of inputs.'
 72 |     )
 73 |     parser.add_argument(
 74 |         '--output_dir',
 75 |         type=str,
 76 |         default='data/tfrecords',
 77 |         help='Directory to write the converted result.'
 78 |     )
 79 |     parser.add_argument(
 80 |         '--name',
 81 |         type=str,
 82 |         default='train',
 83 |         help="TFRecords name to save."
 84 |     )
 85 |     parser.add_argument(
 86 |         '--test',
 87 |         action='store_true',
 88 |         help="Whether inputs is test file."
 89 |     )
 90 |     parser.add_argument(
 91 |         '--verbose',
 92 |         choices=[tf.logging.DEBUG,
 93 |                  tf.logging.ERROR,
 94 |                  tf.logging.FATAL,
 95 |                  tf.logging.INFO,
 96 |                  tf.logging.WARN],
 97 |         type=int,
 98 |         default=tf.logging.WARN,
 99 |         help="Log verbose."
100 |     )
101 |     FLAGS, unparsed = parser.parse_known_args()
102 |     pp.pprint(FLAGS.__dict__)
103 | 
104 |     tf.logging.set_verbosity(FLAGS.verbose)
105 |     tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
106 | 


--------------------------------------------------------------------------------
/io_funcs/make_setf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Copyright 2019.1    Nan LEE
  5 | 
  6 | """Converts data to TFRecords file format with Example protos."""
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import argparse
 12 | import os
 13 | import shutil
 14 | import sys
 15 | 
 16 | import numpy as np
 17 | import tensorflow as tf
 18 | 
 19 | sys.path.append(os.path.dirname(sys.path[0]))
 20 | from tfrecords_io import make_sequence_example
 21 | from utils.misc import *
 22 | import cPickle
 23 | def convert_to(name, inputs_scp,
 24 |                output_dir, test=False):
 25 |     """Converts a dataset to tfrecords."""
 26 |     if not os.path.exists(output_dir):
 27 |         os.makedirs(output_dir)
 28 | 
 29 |     tfrecords_name = os.path.join(output_dir, name + ".tfrecords")
 30 |     con = 0
 31 |     with tf.python_io.TFRecordWriter(tfrecords_name) as writer, \
 32 |         open(inputs_scp,'r') as inputs_f:
 33 |         for line in inputs_f:
 34 |             #print(line)
 35 |             line = line.split()
 36 |             utt_id = line[0] 
 37 |             #print(utt_id)
 38 |             inputs_path = line[-1]
 39 |             data = cPickle.load(open(inputs_path, 'rb'))
 40 |             [mixed_complx_x, speech_x] = data
 41 |             inputs = mixed_complx_x
 42 |             inputs = inputs - np.mean(inputs,0)
 43 |             #print(np.shape(inputs))
 44 |             #print(speech_x)
 45 |             labels = speech_x
 46 |             #print(np.shape(labels))
 47 |             tf.logging.info("Writing utterance %s to %s" % (
 48 |                 utt_id, tfrecords_name))
 49 |             ex = make_sequence_example(utt_id, inputs, labels)
 50 |             writer.write(ex.SerializeToString())
 51 |             con += 1
 52 |             print("done write")
 53 |             print(con)
 54 |     #os.remove(config_file)
 55 |     print("done")
 56 | 
 57 | def main(unused_argv):
 58 |     """Convert to Examples and write the result to TFRecords."""
 59 |     convert_to(FLAGS.name, FLAGS.inputs,
 60 |               FLAGS.output_dir, FLAGS.test)
 61 |     print("done all")
 62 | 
 63 | if __name__ == '__main__':
 64 |     parser = argparse.ArgumentParser()
 65 |     parser.add_argument(
 66 |         '--inputs',
 67 |         type=str,
 68 |         default='data/train/inputs.scp',
 69 |         help='File name of inputs.'
 70 |     )
 71 |     parser.add_argument(
 72 |         '--output_dir',
 73 |         type=str,
 74 |         default='data/tfrecords',
 75 |         help='Directory to write the converted result.'
 76 |     )
 77 |     parser.add_argument(
 78 |         '--name',
 79 |         type=str,
 80 |         default='train',
 81 |         help="TFRecords name to save."
 82 |     )
 83 |     parser.add_argument(
 84 |         '--test',
 85 |         action='store_true',
 86 |         help="Whether inputs is test file."
 87 |     )
 88 |     parser.add_argument(
 89 |         '--verbose',
 90 |         choices=[tf.logging.DEBUG,
 91 |                  tf.logging.ERROR,
 92 |                  tf.logging.FATAL,
 93 |                  tf.logging.INFO,
 94 |                  tf.logging.WARN],
 95 |         type=int,
 96 |         default=tf.logging.WARN,
 97 |         help="Log verbose."
 98 |     )
 99 |     FLAGS, unparsed = parser.parse_known_args()
100 |     pp.pprint(FLAGS.__dict__)
101 | 
102 |     tf.logging.set_verbosity(FLAGS.verbose)
103 |     tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
104 | 


--------------------------------------------------------------------------------
/io_funcs/make_tfrecords.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Copyright 2017    Ke Wang
  5 | 
  6 | """Converts data to TFRecords file format with Example protos."""
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import argparse
 12 | import os
 13 | import shutil
 14 | import sys
 15 | 
 16 | import numpy as np
 17 | import tensorflow as tf
 18 | 
 19 | sys.path.append(os.path.dirname(sys.path[0]))
 20 | from kaldi_io import ArkReader
 21 | from tfrecords_io import make_sequence_example
 22 | from utils.misc import *
 23 | 
 24 | def make_config_file(name, output_dir, inputs_scp, labels_scp=None):
 25 |     """Make temporal config file for making TFRecord files."""
 26 |     config_file = os.path.join(output_dir, "config_%s.list" % name)
 27 |     if labels_scp is not None:
 28 |         with open(inputs_scp, 'r') as fr_inputs, \
 29 |             open(labels_scp, 'r') as fr_labels, \
 30 |             open(config_file, 'w') as fw_config:
 31 |             for line_inputs in fr_inputs:
 32 |                 line_labels = fr_labels.readline()
 33 |                 inputs_utt_id, inputs_path = line_inputs.strip().split()
 34 |                 labels_utt_id, labels_path = line_labels.strip().split()
 35 |                 assert inputs_utt_id == labels_utt_id
 36 |                 line_config = inputs_utt_id + ' ' + inputs_path + ' ' \
 37 |                             + labels_path + '\n'
 38 |                 fw_config.write(line_config)
 39 |     else:
 40 |         shutil.copyfile(inputs_scp, config_file)
 41 | 
 42 | 
 43 | def convert_to(name, inputs_scp, labels_scp,
 44 |                output_dir, apply_cmvn=True, test=False):
 45 |     """Converts a dataset to tfrecords."""
 46 |     if not os.path.exists(output_dir):
 47 |         os.makedirs(output_dir)
 48 | 
 49 |     if apply_cmvn:
 50 |         cmvn = np.load(os.path.join(FLAGS.cmvn_dir, "train_cmvn.npz"))
 51 |         
 52 |     if test:
 53 |         make_config_file(name, output_dir, inputs_scp)
 54 |     else:
 55 |         make_config_file(name, output_dir, inputs_scp, labels_scp)
 56 | 
 57 |     config_file = os.path.join(output_dir, "config_%s.list" % name)
 58 |     tfrecords_name = os.path.join(output_dir, name + ".tfrecords")
 59 |     reader = ArkReader()
 60 |     con = 0
 61 |     with tf.python_io.TFRecordWriter(tfrecords_name) as writer, \
 62 |         open(config_file) as fr_config:
 63 |         for line in fr_config:
 64 |             if test:
 65 |                 utt_id, inputs_path = line.strip().split()
 66 |                 inputs_path, inputs_offset = inputs_path.split(':')
 67 |             else:
 68 |                 utt_id, inputs_path, labels_path = line.strip().split()
 69 |                 inputs_path, inputs_offset = inputs_path.split(':')
 70 |                 labels_path, labels_offset = labels_path.split(':')
 71 | 
 72 |             tf.logging.info("Writing utterance %s to %s" % (
 73 |                 utt_id, tfrecords_name))
 74 |             inputs = reader.read_ark(
 75 |                 inputs_path, inputs_offset).astype(np.float64)
 76 |             # inputs = read_binary_file(
 77 |             #     inputs_path, inputs_offset).astype(np.float64)
 78 |             if test:
 79 |                 labels = None
 80 |             else:
 81 |                 labels = reader.read_ark(
 82 |                     labels_path, labels_offset).astype(np.float64)
 83 |                 # labels = read_binary_file(
 84 |                 #     labels_path, labels_offset).astype(np.float64)
 85 |             if apply_cmvn:
 86 |                 # print(cmvn["mean_inputs"])
 87 |                 #inputs = (inputs - cmvn["mean_inputs"]) / cmvn["stddev_inputs"]
 88 |                 inputs = inputs - np.mean(inputs,0)
 89 |                 if labels is not None:
 90 |                     # labels = (labels - cmvn["mean_labels"]) / cmvn["stddev_labels"]
 91 |                     labels = labels
 92 |             ex = make_sequence_example(utt_id, inputs, labels)
 93 |             writer.write(ex.SerializeToString())
 94 |             con += 1
 95 |             print("done write")
 96 |             print(con)
 97 |     os.remove(config_file)
 98 |     print("done")
 99 | 
100 | def main(unused_argv):
101 |     """Convert to Examples and write the result to TFRecords."""
102 |     convert_to(FLAGS.name, FLAGS.inputs, FLAGS.labels,
103 |               FLAGS.output_dir, FLAGS.apply_cmvn, FLAGS.test)
104 |     print("done all")
105 | 
106 | if __name__ == '__main__':
107 |     parser = argparse.ArgumentParser()
108 |     parser.add_argument(
109 |         '--inputs',
110 |         type=str,
111 |         default='data/train/inputs.scp',
112 |         help='File name of inputs.'
113 |     )
114 |     parser.add_argument(
115 |         '--labels',
116 |         type=str,
117 |         default=None,
118 |         help='File name of labels.'
119 |     )
120 |     parser.add_argument(
121 |         '--cmvn_dir',
122 |         type=str,
123 |         default='data/train',
124 |         help='Name of Numpy format CMVN file.'
125 |     )
126 |     parser.add_argument(
127 |         '--apply_cmvn',
128 |         type=str2bool,
129 |         nargs='?',
130 |         default='True',
131 |         help='Whether apply CMVN to inputs and labels.'
132 |     )
133 |     parser.add_argument(
134 |         '--output_dir',
135 |         type=str,
136 |         default='data/tfrecords',
137 |         help='Directory to write the converted result.'
138 |     )
139 |     parser.add_argument(
140 |         '--name',
141 |         type=str,
142 |         default='train',
143 |         help="TFRecords name to save."
144 |     )
145 |     parser.add_argument(
146 |         '--test',
147 |         action='store_true',
148 |         help="Whether inputs is test file."
149 |     )
150 |     parser.add_argument(
151 |         '--verbose',
152 |         choices=[tf.logging.DEBUG,
153 |                  tf.logging.ERROR,
154 |                  tf.logging.FATAL,
155 |                  tf.logging.INFO,
156 |                  tf.logging.WARN],
157 |         type=int,
158 |         default=tf.logging.WARN,
159 |         help="Log verbose."
160 |     )
161 |     FLAGS, unparsed = parser.parse_known_args()
162 |     pp.pprint(FLAGS.__dict__)
163 | 
164 |     tf.logging.set_verbosity(FLAGS.verbose)
165 |     tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
166 | 


--------------------------------------------------------------------------------
/io_funcs/make_tfrecords_rta.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Copyright 2017    Ke Wang
  5 | 
  6 | """Converts data to TFRecords file format with Example protos."""
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import argparse
 12 | import os
 13 | import shutil
 14 | import sys
 15 | 
 16 | import numpy as np
 17 | import tensorflow as tf
 18 | 
 19 | sys.path.append(os.path.dirname(sys.path[0]))
 20 | from kaldi_io import ArkReader
 21 | from tfrecords_io import make_sequence_example
 22 | from utils.misc import *
 23 | 
 24 | def make_config_file(name, output_dir, rt60_scp, inputs_scp, labels_scp=None):
 25 |     """Make temporal config file for making TFRecord files."""
 26 |     config_file = os.path.join(output_dir, "config_%s.list" % name)
 27 |     if labels_scp is not None:
 28 |         with open(rt60_scp, 'r') as fr_rt60, \
 29 |             open(inputs_scp, 'r') as fr_inputs, \
 30 |             open(labels_scp, 'r') as fr_labels, \
 31 |             open(config_file, 'w') as fw_config:
 32 |             for line_inputs in fr_inputs:
 33 |                 line_rt60 = fr_rt60.readline()
 34 |                 line_labels = fr_labels.readline()
 35 |                 rt60_utt_id, rt60 = line_rt60.strip().split()
 36 |                 inputs_utt_id, inputs_path = line_inputs.strip().split()
 37 |                 labels_utt_id, labels_path = line_labels.strip().split()
 38 |                 assert rt60_utt_id == inputs_utt_id
 39 |                 assert inputs_utt_id == labels_utt_id
 40 |                 line_config = inputs_utt_id + ' ' + inputs_path + ' ' \
 41 |                             + labels_path + ' ' + rt60 + '\n'
 42 |                 fw_config.write(line_config)
 43 |     else:
 44 |         with open(rt60_scp, 'r') as fr_rt60, \
 45 |             open(inputs_scp, 'r') as fr_inputs, \
 46 |             open(config_file, 'w') as fw_config:
 47 |             for line_inputs in fr_inputs:
 48 |                 line_rt60 = fr_rt60.readline()
 49 |                 rt60_utt_id, rt60 = line_rt60.strip().split()
 50 |                 inputs_utt_id, inputs_path = line_inputs.strip().split()
 51 |                 assert rt60_utt_id == inputs_utt_id
 52 |                 line_config = inputs_utt_id + ' ' + inputs_path + \
 53 |                     ' ' + rt60 + '\n'
 54 |                 fw_config.write(line_config)
 55 |         # shutil.copyfile(inputs_scp, config_file)
 56 | 
 57 | 
 58 | def convert_to(name, rt60_scp, inputs_scp, labels_scp,
 59 |                output_dir, apply_cmvn=True, test=False):
 60 |     """Converts a dataset to tfrecords."""
 61 |     if not os.path.exists(output_dir):
 62 |         os.makedirs(output_dir)
 63 | 
 64 |     if apply_cmvn:
 65 |         cmvn = np.load(os.path.join(FLAGS.cmvn_dir, "train_cmvn.npz"))
 66 | 
 67 |     if test:
 68 |         make_config_file(name, output_dir, rt60_scp, inputs_scp)
 69 |     else:
 70 |         make_config_file(name, output_dir, rt60_scp, inputs_scp, labels_scp)
 71 | 
 72 |     config_file = os.path.join(output_dir, "config_%s.list" % name)
 73 |     tfrecords_name = os.path.join(output_dir, name + ".tfrecords")
 74 |     reader = ArkReader()
 75 |     with tf.python_io.TFRecordWriter(tfrecords_name) as writer, \
 76 |         open(config_file) as fr_config:
 77 |         for line in fr_config:
 78 |             if test:
 79 |                 utt_id, inputs_path, rt60 = line.strip().split()
 80 |                 inputs_path, inputs_offset = inputs_path.split(':')
 81 |             else:
 82 |                 utt_id, inputs_path, labels_path, rt60 = line.strip().split()
 83 |                 inputs_path, inputs_offset = inputs_path.split(':')
 84 |                 labels_path, labels_offset = labels_path.split(':')
 85 | 
 86 |             tf.logging.info("Writing utterance %s to %s" % (
 87 |                 utt_id, tfrecords_name))
 88 |             inputs = reader.read_ark(
 89 |                 inputs_path, inputs_offset).astype(np.float64)
 90 |             # inputs = read_binary_file(
 91 |             #     inputs_path, inputs_offset).astype(np.float64)
 92 |             if test:
 93 |                 labels = None
 94 |             else:
 95 |                 labels = reader.read_ark(
 96 |                     labels_path, labels_offset).astype(np.float64)
 97 |                 # labels = read_binary_file(
 98 |                 #     labels_path, labels_offset).astype(np.float64)
 99 |             if apply_cmvn:
100 |                 inputs = (inputs - cmvn["mean_inputs"]) / cmvn["stddev_inputs"]
101 |                 frame_num = inputs.shape[0]
102 |                 rt60_numpy = np.ones(frame_num) * float(rt60)
103 |                 inputs = np.insert(inputs, 0, values=rt60_numpy, axis=1)
104 |                 if labels is not None:
105 |                     labels = (labels - cmvn["mean_labels"]) / cmvn["stddev_labels"]
106 |             ex = make_sequence_example(utt_id, inputs, labels)
107 |             writer.write(ex.SerializeToString())
108 | 
109 |     os.remove(config_file)
110 | 
111 | 
112 | def main(unused_argv):
113 |     """Convert to Examples and write the result to TFRecords."""
114 |     convert_to(FLAGS.name, FLAGS.rt60, FLAGS.inputs, FLAGS.labels,
115 |               FLAGS.output_dir, FLAGS.apply_cmvn, FLAGS.test)
116 | 
117 | 
118 | if __name__ == '__main__':
119 |     parser = argparse.ArgumentParser()
120 |     parser.add_argument(
121 |         '--rt60',
122 |         type=str,
123 |         default='data/train/rt60.scp',
124 |         help='File name of rt60 file.'
125 |     )
126 |     parser.add_argument(
127 |         '--inputs',
128 |         type=str,
129 |         default='data/train/inputs.scp',
130 |         help='File name of inputs.'
131 |     )
132 |     parser.add_argument(
133 |         '--labels',
134 |         type=str,
135 |         default=None,
136 |         help='File name of labels.'
137 |     )
138 |     parser.add_argument(
139 |         '--cmvn_dir',
140 |         type=str,
141 |         default='data/train',
142 |         help='Name of Numpy format CMVN file.'
143 |     )
144 |     parser.add_argument(
145 |         '--apply_cmvn',
146 |         type=str2bool,
147 |         nargs='?',
148 |         default='True',
149 |         help='Whether apply CMVN to inputs and labels.'
150 |     )
151 |     parser.add_argument(
152 |         '--output_dir',
153 |         type=str,
154 |         default='data/tfrecords',
155 |         help='Directory to write the converted result.'
156 |     )
157 |     parser.add_argument(
158 |         '--name',
159 |         type=str,
160 |         default='train',
161 |         help="TFRecords name to save."
162 |     )
163 |     parser.add_argument(
164 |         '--test',
165 |         action='store_true',
166 |         help="Whether inputs is test file."
167 |     )
168 |     parser.add_argument(
169 |         '--verbose',
170 |         choices=[tf.logging.DEBUG,
171 |                  tf.logging.ERROR,
172 |                  tf.logging.FATAL,
173 |                  tf.logging.INFO,
174 |                  tf.logging.WARN],
175 |         type=int,
176 |         default=tf.logging.WARN,
177 |         help="Log verbose."
178 |     )
179 |     FLAGS, unparsed = parser.parse_known_args()
180 |     pp.pprint(FLAGS.__dict__)
181 | 
182 |     tf.logging.set_verbosity(FLAGS.verbose)
183 |     tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
184 | 


--------------------------------------------------------------------------------
/io_funcs/teconvert_cmvn_to_numpy.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright 2017    Ke Wang
 5 | 
 6 | """Convert inputs and lables GLOBAL cmvns to a Numpy file."""
 7 | 
 8 | from __future__ import absolute_import
 9 | from __future__ import division
10 | from __future__ import print_function
11 | 
12 | import argparse
13 | import os
14 | import sys
15 | import struct
16 | 
17 | import numpy as np
18 | 
19 | def convert_cmvn_to_numpy(inputs_cmvn, save_dir):
20 |     """Convert global binary ark cmvn to numpy format."""
21 | 
22 |     print("Convert %sto Numpy format" % (inputs_cmvn))
23 |     inputs_filename = inputs_cmvn
24 | 
25 |     inputs = read_binary_file(inputs_filename, 0)
26 | 
27 |     inputs_frame = inputs[0][-1]
28 | 
29 |     # assert inputs_frame == labels_frame
30 | 
31 |     cmvn_inputs = np.hsplit(inputs, [inputs.shape[1] - 1])[0]
32 |     print(cmvn_inputs[0])
33 |     mean_inputs = cmvn_inputs[0] / inputs_frame
34 |     stddev_inputs = np.sqrt(cmvn_inputs[1] / inputs_frame - mean_inputs ** 2)
35 |     print(stddev_inputs)
36 |     print(len(stddev_inputs))
37 |     cmvn_name = os.path.join(save_dir, "train_cmvn.npz")
38 |     np.savez(cmvn_name,
39 |              mean_inputs=mean_inputs,
40 |              stddev_inputs=stddev_inputs)
41 | 
42 |     print("Write to %s" % cmvn_name)
43 | 
44 | 
45 | def read_binary_file(filename, offset=0):
46 |     """Read data from matlab binary file (row, col and matrix).
47 | 
48 |     Returns:
49 |         A numpy matrix containing data of the given binary file.
50 |     """
51 |     read_buffer = open(filename, 'rb')
52 |     read_buffer.seek(int(offset), 0)
53 |     header = struct.unpack('<xcccc', read_buffer.read(5))
54 |     if header[0] != 'B':
55 |         print("Input .ark file is not binary")
56 |         sys.exit(-1)
57 |     if header[1] == 'C':
58 |         print("Input .ark file is compressed, exist now.")
59 |         sys.exit(-1)
60 | 
61 |     rows = 0; cols= 0
62 |     _, rows = struct.unpack('<bi', read_buffer.read(5))
63 |     _, cols = struct.unpack('<bi', read_buffer.read(5))
64 | 
65 |     if header[1] == "F":
66 |         tmp_mat = np.frombuffer(read_buffer.read(rows * cols * 4),
67 |                                 dtype=np.float32)
68 |     elif header[1] == "D":
69 |         tmp_mat = np.frombuffer(read_buffer.read(rows * cols * 8),
70 |                                 dtype=np.float64)
71 |     mat = np.reshape(tmp_mat, (rows, cols))
72 |     read_buffer.close()
73 | 
74 |     return mat
75 | 
76 | 
77 | if __name__ == '__main__':
78 |     parser = argparse.ArgumentParser()
79 |     parser.add_argument(
80 |         '--inputs',
81 |         type=str,
82 |         default='data/train/inputs.cmvn',
83 |         help="Name of input CMVN file."
84 |     )
85 |     parser.add_argument(
86 |         '--save_dir',
87 |         required=True,
88 |         help="Directory to save Numpy format CMVN file."
89 |     )
90 |     FLAGS, unparsed = parser.parse_known_args()
91 | 
92 |     convert_cmvn_to_numpy(FLAGS.inputs, FLAGS.save_dir)
93 | 


--------------------------------------------------------------------------------
/io_funcs/test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from kaldi_io import ArkReader
 4 | import numpy as np
 5 | import random
 6 | 
 7 | inputs_path = '/home/train02/linan/ASR/Ganspeechenhan/rsrgan/data/tr/inputs.ark'
 8 | inputs_offset = '836825'
 9 | reader = ArkReader()
10 | #a = {"a","b","c","d","e"}
11 | inputs_path2 = '/home/train02/linan/ASR/Ganspeechenhan/rsrgan/data/simuda/labels.ark'
12 | inputs_offset2 = '836829'
13 | 
14 | inputs2 = reader.read_ark(inputs_path2, inputs_offset2).astype(np.float64)
15 | inputs = reader.read_ark(inputs_path, inputs_offset).astype(np.float64)
16 | print inputs2.shape
17 | print inputs
18 | print inputs.shape
19 | #lists = range(len(a))
20 | #random.shuffle(lists)
21 | #for i in xrange(len(lists)):
22 | #    print i
23 | 
24 | 


--------------------------------------------------------------------------------
/io_funcs/test2.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from kaldi_io import ArkReader
 4 | import numpy as np
 5 | import random
 6 | 
 7 | inputs_path = '/home/train02/linan/ASR/Ganspeechenhan/rsrgan/data/tr/inputs.ark'
 8 | inputs_offset = '9'
 9 | reader = ArkReader()
10 | #a = {"a","b","c","d","e"}
11 | inputs_path2 = '/home/train02/linan/ASR/Ganspeechenhan/rsrgan/data/tr/labels.ark'
12 | inputs_offset2 = '9'
13 | 
14 | inputs2 = reader.read_ark(inputs_path2, inputs_offset2).astype(np.float64)
15 | inputs = reader.read_ark(inputs_path, inputs_offset).astype(np.float64)
16 | print inputs2.shape
17 | print inputs
18 | print inputs.shape
19 | #lists = range(len(a))
20 | #random.shuffle(lists)
21 | #for i in xrange(len(lists)):
22 | #    print i
23 | 
24 | 


--------------------------------------------------------------------------------
/io_funcs/tfrecords_dataset.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/io_funcs/tfrecords_dataset.pyc


--------------------------------------------------------------------------------
/io_funcs/tfrecords_dataset_test.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Copyright 2017    Ke Wang     Xiaomi
  5 | 
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import argparse
 11 | import datetime
 12 | import os.path
 13 | import sys
 14 | import time
 15 | 
 16 | import numpy as np
 17 | import tensorflow as tf
 18 | 
 19 | sys.path.append(os.path.join(os.path.dirname(sys.path[0]), "utils"))
 20 | from tfrecords_dataset import get_batch, get_padded_batch
 21 | from utils.misc import pp
 22 | 
 23 | tf.logging.set_verbosity(tf.logging.INFO)
 24 | 
 25 | 
 26 | def main():
 27 |     names = ['train1']
 28 |     # names = ['tr1', 'tr2', 'tr3', 'tr4', 'tr5']
 29 |     tfrecords_lst = []
 30 |     for name in names:
 31 |         tfrecords_name = os.path.join(FLAGS.data_dir, name + ".tfrecords")
 32 |         tfrecords_lst.append(tfrecords_name)
 33 |     tf.logging.info(tfrecords_lst)
 34 | 
 35 |     with tf.Graph().as_default():
 36 |         # utt_id, inputs, labels, lengths = get_padded_batch(
 37 |         #     tfrecords_lst, FLAGS.batch_size, FLAGS.input_dim,
 38 |         #     FLAGS.output_dim, 0, 0,
 39 |         #     num_threads=FLAGS.num_threads,
 40 |         #     num_buckets=20,
 41 |         #     num_epochs=FLAGS.num_epochs)
 42 |         inputs, labels = get_batch(
 43 |             tfrecords_lst, FLAGS.batch_size, FLAGS.input_dim,
 44 |             FLAGS.output_dim, 0, 0, num_threads=FLAGS.num_threads,
 45 |             num_epochs=FLAGS.num_epochs, infer=False)
 46 |         # inputs.set_shape([FLAGS.batch_size, FLAGS.input_dim * 3])
 47 |         # print(inputs.get_shape().as_list())
 48 |         # utt_id, inputs, lengths = get_batch(
 49 |         #     tfrecords_lst, FLAGS.batch_size, FLAGS.input_dim,
 50 |         #     FLAGS.output_dim, 3, 3, num_threads=FLAGS.num_threads,
 51 |         #     num_epochs=FLAGS.num_epochs, infer=True)
 52 | 
 53 | 
 54 |         init = tf.group(tf.global_variables_initializer(),
 55 |                         tf.local_variables_initializer())
 56 | 
 57 |         sess = tf.Session()
 58 | 
 59 |         sess.run(init)
 60 | 
 61 |         start = datetime.datetime.now()
 62 |         batch = 0
 63 |         while True:
 64 |             try:
 65 |                 # Print an overview fairly often.
 66 |                 # tr_utt_id, tr_inputs, tr_labels, tr_lengths = sess.run([
 67 |                 #     utt_id, inputs, labels, lengths])
 68 |                 tr_inputs, tr_labels = sess.run([
 69 |                     inputs, labels])
 70 |                 # tr_utt_id, tr_inputs, tr_lengths = sess.run([
 71 |                 #     utt_id, inputs, lengths])
 72 |                 # tf.logging.info(tr_utt_id)
 73 |                 # tf.logging.info(tr_inputs)
 74 |                 # tf.logging.info(tr_labels)
 75 |                 tf.logging.info('inputs shape : '+ str(tr_inputs.shape))
 76 |                 # tf.logging.info('labels shape : ' + str(tr_labels.shape))
 77 |                 # tf.logging.info('actual lengths : ' + str(tr_lengths))
 78 |                 batch += 1
 79 |             except tf.errors.OutOfRangeError:
 80 |                 tf.logging.info("Batch number is %d" % batch)
 81 |                 tf.logging.info('Done training -- epoch limit reached')
 82 |                 break
 83 |         end = datetime.datetime.now()
 84 |         duration = (end - start).total_seconds()
 85 |         print("Reading time is %.0fs." % duration)
 86 | 
 87 |         sess.close()
 88 | 
 89 | 
 90 | if __name__ == '__main__':
 91 |     parser = argparse.ArgumentParser()
 92 |     parser.add_argument(
 93 |         '--batch_size',
 94 |         type=int,
 95 |         default=32,
 96 |         help='Mini-batch size.'
 97 |     )
 98 |     parser.add_argument(
 99 |         '--input_dim',
100 |         type=int,
101 |         default=257,
102 |         help='The dimension of inputs.'
103 |     )
104 |     parser.add_argument(
105 |         '--output_dim',
106 |         type=int,
107 |         default=40,
108 |         help='The dimension of outputs.'
109 |     )
110 |     parser.add_argument(
111 |         '--num_threads',
112 |         type=int,
113 |         default=1,
114 |         help='The num of threads to read tfrecords files.'
115 |     )
116 |     parser.add_argument(
117 |         '--num_epochs',
118 |         type=int,
119 |         default=1,
120 |         help='The num of epochs to read tfrecords files.'
121 |     )
122 |     parser.add_argument(
123 |         '--data_dir',
124 |         type=str,
125 |         default='data/tfrecords/',
126 |         help='Directory of train, val and test data.'
127 |     )
128 |     FLAGS, unparsed = parser.parse_known_args()
129 |     pp.pprint(FLAGS.__dict__)
130 |     sys.stdout.flush()
131 |     main()
132 | 


--------------------------------------------------------------------------------
/io_funcs/tfrecords_io.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Copyright 2017    Ke Wang     Xiaomi
  5 | 
  6 | 
  7 | """Utility functions for working with tf.train.SequenceExamples."""
  8 | 
  9 | import tensorflow as tf
 10 | 
 11 | 
 12 | def make_sequence_example(utt_id, inputs, labels=None):
 13 |     """Returns a SequenceExample for the given inputs and labels(optional).
 14 |     Args:
 15 |         utt_id: The key of corresponding features. It is very useful for decoding.
 16 |         inputs: A list of input vectors. Each input vector is a list of floats.
 17 |         labels(optional): A list of label vectors. Each label vector is a list of floats.
 18 |     Returns:
 19 |         A tf.train.SequenceExample containing inputs and labels(optional).
 20 |     """
 21 |     utt_id_feature = {
 22 |         'utt_id': tf.train.Feature(
 23 |             bytes_list=tf.train.BytesList(value=[utt_id]))
 24 |     }
 25 |     input_features = [
 26 |         tf.train.Feature(float_list=tf.train.FloatList(value=input_))
 27 |         for input_ in inputs]
 28 | 
 29 |     context = tf.train.Features(feature=utt_id_feature)
 30 | 
 31 |     if labels is not None:
 32 |         label_features = [
 33 |             tf.train.Feature(float_list=tf.train.FloatList(value=label_))
 34 |             for label_ in labels]
 35 |         feature_list = {
 36 |             'inputs': tf.train.FeatureList(feature=input_features),
 37 |             'labels': tf.train.FeatureList(feature=label_features)
 38 |         }
 39 |     else:
 40 |         feature_list = {
 41 |             'inputs': tf.train.FeatureList(feature=input_features)
 42 |         }
 43 |     feature_lists = tf.train.FeatureLists(feature_list=feature_list)
 44 |     return tf.train.SequenceExample(context=context, feature_lists=feature_lists)
 45 | 
 46 | 
 47 | def get_padded_batch(file_list, batch_size, input_size, output_size,
 48 |                      left, right, num_enqueuing_threads=4,
 49 |                      num_epochs=1, infer=False):
 50 |     """Reads batches of SequenceExamples from TFRecords and pads them.
 51 |     Can deal with variable length SequenceExamples by padding each batch to the
 52 |     length of the longest sequence with zeros.
 53 |     Args:
 54 |         file_list: A list of paths to TFRecord files containing SequenceExamples.
 55 |         batch_size: The number of SequenceExamples to include in each batch.
 56 |         input_size: The size of each input vector. The returned batch of inputs
 57 |             will have a shape [batch_size, num_steps, input_size].
 58 |         output_size: The size of each output vector.
 59 |         left: An integer indicates left context number.
 60 |         right: An integer indicates right context number.
 61 |         num_enqueuing_threads: The number of threads to use for enqueuing
 62 |             SequenceExamples.
 63 |     Returns:
 64 |         utt_id: A string of inputs and labels id.
 65 |         inputs: A tensor of shape [batch_size, num_steps, input_size] of floats32s.
 66 |         labels: A tensor of shape [batch_size, num_steps] of float32s.
 67 |         lengths: A tensor of shape [batch_size] of int32s. The lengths of each
 68 |             SequenceExample before padding.
 69 |     """
 70 |     file_queue = tf.train.string_input_producer(
 71 |         file_list, num_epochs=num_epochs, shuffle=(not infer))
 72 |     reader = tf.TFRecordReader()
 73 |     _, serialized_example = reader.read(file_queue)
 74 | 
 75 |     context_features = {
 76 |         'utt_id': tf.FixedLenFeature([], tf.string),
 77 |     }
 78 | 
 79 |     if not infer:
 80 |         sequence_features = {
 81 |             'inputs': tf.FixedLenSequenceFeature(shape=[input_size],
 82 |                                                  dtype=tf.float32),
 83 |             'labels': tf.FixedLenSequenceFeature(shape=[output_size],
 84 |                                                  dtype=tf.float32)}
 85 | 
 86 |         context, sequence = tf.parse_single_sequence_example(
 87 |                 serialized_example,
 88 |                 context_features=context_features,
 89 |                 sequence_features=sequence_features)
 90 | 
 91 |         length = tf.shape(sequence['inputs'])[0]
 92 | 
 93 |         capacity = 1000 + (num_enqueuing_threads + 1) * batch_size
 94 |         queue = tf.PaddingFIFOQueue(
 95 |             capacity=capacity,
 96 |             dtypes=[tf.string, tf.float32, tf.float32, tf.int32],
 97 |             shapes=[(), (None, input_size*(left+1+right)),
 98 |                     (None, output_size), ()])
 99 | 
100 |         sfeats = splice_feats(sequence['inputs'], left, right)
101 | 
102 |         enqueue_ops = [queue.enqueue([context['utt_id'],
103 |                                       sfeats,
104 |                                       sequence['labels'],
105 |                                       length])] * num_enqueuing_threads
106 |     else:
107 |         assert batch_size == 1, num_epochs == 1
108 |         sequence_features = {
109 |             'inputs': tf.FixedLenSequenceFeature(shape=[input_size],
110 |                                                  dtype=tf.float32)}
111 | 
112 |         context, sequence = tf.parse_single_sequence_example(
113 |             serialized_example,
114 |             context_features=context_features,
115 |             sequence_features=sequence_features)
116 | 
117 |         length = tf.shape(sequence['inputs'])[0]
118 | 
119 |         capacity = 1000 + (num_enqueuing_threads + 1) * batch_size
120 |         queue = tf.PaddingFIFOQueue(
121 |             capacity=capacity,
122 |             dtypes=[tf.string, tf.float32, tf.int32],
123 |             shapes=[(), (None, input_size*(left+1+right)), ()])
124 | 
125 |         sfeats = splice_feats(sequence['inputs'], left, right)
126 | 
127 |         enqueue_ops = [queue.enqueue([context['utt_id'],
128 |                                       sfeats,
129 |                                       length])] * num_enqueuing_threads
130 | 
131 |     tf.train.add_queue_runner(tf.train.QueueRunner(queue, enqueue_ops))
132 |     return queue.dequeue_many(batch_size)
133 |     print('queue dequeue_many is:',queue.dequeue_many(batch_size))
134 |     print queue.dequeue_many(batch_size)
135 | 
136 | def read_tfrecords(file_list, input_size, output_size, num_epochs, infer=False):
137 |     """Reads data from TFRecords.
138 |     Args:
139 |         file_list: A list of paths to TFRecord files containing SequenceExamples.
140 |         input_size: The dim of each input vector.
141 |         output_size: The dim of each output vector.
142 |     Returns:
143 |         A tf.train.SequenceExample containing inputs and labels(optional).
144 |     """
145 |     filename_queue = tf.train.string_input_producer(
146 |         file_list, num_epochs=num_epochs, shuffle=(not infer))
147 |     reader = tf.TFRecordReader()
148 |     _, serialized_example = reader.read(filename_queue)
149 | 
150 |     context_features = {
151 |         'utt_id': tf.FixedLenFeature([], tf.string),
152 |     }
153 | 
154 |     if not infer:
155 |         sequence_features = {
156 |             'inputs': tf.FixedLenSequenceFeature(shape=[input_size],
157 |                                                  dtype=tf.float32),
158 |             'labels': tf.FixedLenSequenceFeature(shape=[output_size],
159 |                                                  dtype=tf.float32)}
160 | 
161 |         context, sequence = tf.parse_single_sequence_example(
162 |             serialized_example,
163 |             context_features=context_features,
164 |             sequence_features=sequence_features)
165 |         return context['utt_id'], sequence['inputs'], sequence['labels']
166 |     else:
167 |         sequence_features = {
168 |             'inputs': tf.FixedLenSequenceFeature(shape=[input_size],
169 |                                                  dtype=tf.float32)}
170 | 
171 |         context, sequence = tf.parse_single_sequence_example(
172 |             serialized_example,
173 |             context_features=context_features,
174 |             sequence_features=sequence_features)
175 |         return context['utt_id'], sequence['inputs']
176 | 
177 | 
178 | def splice_feats(feats, left, right):
179 |     """Splice feats like KALDI.
180 |     Args:
181 |         feats: input feats have a shape [row, col].
182 |         left: left context number.
183 |         right: right context number.
184 |     Returns:
185 |         Spliced feats with a shape [row, col*(left+1+right)]
186 |     """
187 |     sfeats = []
188 |     row = tf.shape(feats)[0]
189 |     # Left
190 |     for i in range(left, 0, -1):
191 |         fl = tf.slice(feats, [0, 0], [row-i, -1])
192 |         for j in range(i):
193 |             fl = tf.pad(fl, [[1, 0], [0, 0]], mode='SYMMETRIC')
194 |         sfeats.append(fl)
195 |     sfeats.append(feats)
196 | 
197 |     # Right
198 |     for i in range(1, right+1):
199 |         fr = tf.slice(feats, [i, 0], [-1, -1])
200 |         for j in range(i):
201 |             fr = tf.pad(fr, [[0, 1], [0, 0]], mode='SYMMETRIC')
202 |         sfeats.append(fr)
203 | 
204 |     return tf.concat(sfeats, 1)
205 | 
206 | 
207 | def get_batch(file_list, batch_size, input_size, output_size, left, right,
208 |               num_enqueuing_threads=4, num_epochs=1,infer=False):
209 |     """Reads batches of Examples from TFRecords and splice them.
210 |     Args:
211 |         file_list:  A list of paths to TFRecord files containing SequenceExamples.
212 |         batch_size: The number of Examples to include in each batch.
213 |         input_size: The size of each input vector. The returned batch of inputs
214 |             will have a shape [batch_size, input_size*(left+1+right)].
215 |         num_enqueuing_threads: The number of threads to use for enqueuing Examples.
216 |     Return:
217 |         batch_x: A tensor of shape [batch_size, input_size*(left+1+right)] of floats32s.
218 |         batch_y: A tensor of shape [batch_size, output_size] of float32s.
219 |     """
220 |     if not infer:
221 |         utt_id, inputs, labels = read_tfrecords(file_list, input_size,
222 |                                                 output_size, num_epochs, infer)
223 |     else:
224 |         utt_id, inputs = read_tfrecords(file_list, input_size, output_size,
225 |                                         num_epochs, infer)
226 |     sfeats = splice_feats(inputs, left, right)
227 | 
228 |     if infer:
229 |         assert batch_size == 1, num_epochs == 1
230 | 
231 |     length = tf.shape(inputs)[0]
232 |     capacity = 1000 + (num_enqueuing_threads + 1) * batch_size
233 |     if not infer:
234 |         slice_queue = tf.RandomShuffleQueue(
235 |             capacity=capacity,
236 |             min_after_dequeue = 1000,
237 |             dtypes=[tf.float32, tf.float32],
238 |             shapes=[[input_size*(left+1+right),], [output_size,]])
239 |         enqueue_ops = [slice_queue.enqueue_many(
240 |             [sfeats, labels])] * num_enqueuing_threads
241 |     else:
242 |         slice_queue = tf.PaddingFIFOQueue(
243 |             capacity=capacity,
244 |             dtypes=[tf.string, tf.float32, tf.int32],
245 |             shapes=[(), (None, input_size*(left+1+right)), ()])
246 |         enqueue_ops = [slice_queue.enqueue(
247 |             [utt_id, sfeats, length])] * num_enqueuing_threads
248 | 
249 |     tf.train.add_queue_runner(tf.train.QueueRunner(slice_queue, enqueue_ops))
250 | 
251 |     if not infer:
252 |         batch_x, batch_y = slice_queue.dequeue_many(batch_size)
253 |         return batch_x, batch_y
254 |     else:
255 |         batch_id, batch_x, batch_len = slice_queue.dequeue_many(batch_size)
256 |         return batch_id, batch_x, batch_len
257 | 


--------------------------------------------------------------------------------
/io_funcs/tfrecords_io.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/io_funcs/tfrecords_io.pyc


--------------------------------------------------------------------------------
/io_funcs/tfrecords_io_test.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Copyright 2017    Ke Wang     Xiaomi
  5 | 
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import argparse
 11 | import datetime
 12 | import os.path
 13 | import sys
 14 | import time
 15 | 
 16 | import numpy as np
 17 | import tensorflow as tf
 18 | 
 19 | sys.path.append(os.path.dirname(sys.path[0]))
 20 | from tfrecords_io import get_padded_batch, get_batch
 21 | from utils.misc import pp
 22 | 
 23 | tf.logging.set_verbosity(tf.logging.INFO)
 24 | 
 25 | 
 26 | def main():
 27 |     # names = ['train1']
 28 |     names = ['tr1', 'tr2', 'tr3', 'tr4', 'tr5']
 29 |     # names = ['train']
 30 |     tfrecords_lst = []
 31 |     for name in names:
 32 |         tfrecords_name = os.path.join(FLAGS.data_dir, name + ".tfrecords")
 33 |         tfrecords_lst.append(tfrecords_name)
 34 |     tf.logging.info(tfrecords_lst)
 35 | 
 36 |     with tf.Graph().as_default():
 37 |         # utt_id, inputs, labels, lengths = get_padded_batch(
 38 |         #     tfrecords_lst, FLAGS.batch_size, FLAGS.input_dim,
 39 |         #     FLAGS.output_dim, 0, 0,
 40 |         #     num_enqueuing_threads=FLAGS.num_threads,
 41 |         #     num_epochs=FLAGS.num_epochs,
 42 |         #     infer=False)
 43 |         # utt_id, inputs, lengths = get_padded_batch(
 44 |         #     tfrecords_lst, FLAGS.batch_size, FLAGS.input_dim,
 45 |         #     FLAGS.output_dim, 1, 1,
 46 |         #     num_enqueuing_threads=FLAGS.num_threads,
 47 |         #     num_epochs=FLAGS.num_epochs,
 48 |         #     infer=True)
 49 |         inputs, labels = get_batch(
 50 |             tfrecords_lst, FLAGS.batch_size, FLAGS.input_dim,
 51 |             FLAGS.output_dim, 5, 5, num_enqueuing_threads=FLAGS.num_threads,
 52 |             num_epochs=FLAGS.num_epochs)
 53 |         # print(inputs.get_shape().as_list())
 54 |         # utt_id, inputs, lengths = get_batch(
 55 |         #     tfrecords_lst, FLAGS.batch_size, FLAGS.input_dim,
 56 |         #     FLAGS.output_dim, 3, 3, num_enqueuing_threads=FLAGS.num_threads,
 57 |         #     num_epochs=FLAGS.num_epochs, infer=True)
 58 | 
 59 | 
 60 |         init = tf.group(tf.global_variables_initializer(),
 61 |                         tf.local_variables_initializer())
 62 | 
 63 |         sess = tf.Session()
 64 | 
 65 |         sess.run(init)
 66 | 
 67 |         start = datetime.datetime.now()
 68 |         coord = tf.train.Coordinator()
 69 |         threads = tf.train.start_queue_runners(sess=sess, coord=coord)
 70 | 
 71 |         try:
 72 |             batch = 0
 73 |             while not coord.should_stop():
 74 |                 # Print an overview fairly often.
 75 |                 # tr_utt_id, tr_inputs, tr_labels, tr_lengths = sess.run([
 76 |                 #     utt_id, inputs, labels, lengths])
 77 |                 tr_inputs, tr_labels = sess.run([
 78 |                     inputs, labels])
 79 |                 # tr_utt_id, tr_inputs, tr_lengths = sess.run([
 80 |                 #     utt_id, inputs, lengths])
 81 |                 # tf.logging.info(tr_utt_id)
 82 |                 # tf.logging.info(tr_inputs)
 83 |                 # tf.logging.info(tr_labels)
 84 |                 # tf.logging.info('inputs shape : '+ str(tr_inputs.shape))
 85 |                 # tf.logging.info('labels shape : ' + str(tr_labels.shape))
 86 |                 # tf.logging.info('actual lengths : ' + str(tr_lengths))
 87 |                 batch += 1
 88 |         except tf.errors.OutOfRangeError:
 89 |             tf.logging.info("Batch number is %d" % batch)
 90 |             tf.logging.info('Done training -- epoch limit reached')
 91 |         finally:
 92 |             # When done, ask the threads to stop.
 93 |             coord.request_stop()
 94 | 
 95 |         end = datetime.datetime.now()
 96 |         duration = (end - start).total_seconds()
 97 |         print("Reading time is %.0fs." % duration)
 98 |         # Wait for threads to finish.
 99 |         coord.join(threads)
100 |         sess.close()
101 | 
102 | 
103 | if __name__ == '__main__':
104 |     parser = argparse.ArgumentParser()
105 |     parser.add_argument(
106 |         '--batch_size',
107 |         type=int,
108 |         default=32,
109 |         help='Mini-batch size.'
110 |     )
111 |     parser.add_argument(
112 |         '--input_dim',
113 |         type=int,
114 |         default=257,
115 |         help='The dimension of inputs.'
116 |     )
117 |     parser.add_argument(
118 |         '--output_dim',
119 |         type=int,
120 |         default=40,
121 |         help='The dimension of outputs.'
122 |     )
123 |     parser.add_argument(
124 |         '--num_threads',
125 |         type=int,
126 |         default=1,
127 |         help='The num of threads to read tfrecords files.'
128 |     )
129 |     parser.add_argument(
130 |         '--num_epochs',
131 |         type=int,
132 |         default=1,
133 |         help='The num of epochs to read tfrecords files.'
134 |     )
135 |     parser.add_argument(
136 |         '--data_dir',
137 |         type=str,
138 |         default='data/tfrecords/',
139 |         help='Directory of train, val and test data.'
140 |     )
141 |     FLAGS, unparsed = parser.parse_known_args()
142 |     pp.pprint(FLAGS.__dict__)
143 |     sys.stdout.flush()
144 |     main()
145 | 


--------------------------------------------------------------------------------
/io_funcs/verify_tfrecords.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """Checks if a set of TFRecords appear to be valid.
 5 | 
 6 | Specifically, this checks whether the provided record sizes are consistent and
 7 | that the file does not end in the middle of a record. It does not verify the
 8 | CRCs.
 9 | """
10 | 
11 | import struct
12 | import tensorflow as tf
13 | 
14 | from tensorflow import app
15 | from tensorflow import flags
16 | from tensorflow import gfile
17 | from tensorflow import logging
18 | 
19 | flags = tf.app.flags
20 | FLAGS = flags.FLAGS
21 | flags.DEFINE_string("input_data_pattern", "",
22 |                     "File glob defining for the TFRecords files.")
23 | 
24 | 
25 | def main(unused_argv):
26 |   logging.set_verbosity(tf.logging.INFO)
27 |   logging.info(FLAGS.input_data_pattern)
28 |   paths = gfile.Glob(FLAGS.input_data_pattern)
29 |   logging.info("Found %s files.", len(paths))
30 |   for path in paths:
31 |     with gfile.Open(path, "r") as f:
32 |       first_read = True
33 |       while True:
34 |         length_raw = f.read(8)
35 |         if not length_raw and first_read:
36 |           logging.fatal("File %s has no data.", path)
37 |           break
38 |         elif not length_raw:
39 |           logging.info("File %s looks good.", path)
40 |           break
41 |         else:
42 |           first_read = False
43 |         if len(length_raw) != 8:
44 |           logging.fatal("File ends when reading record length: " + path)
45 |           break
46 |         length, = struct.unpack("L", length_raw)
47 |         # +8 to include the crc values.
48 |         record = f.read(length + 8)
49 |         if len(record) != length + 8:
50 |           logging.fatal("File ends in the middle of a record: " + path)
51 |           break
52 | 
53 | 
54 | if __name__ == "__main__":
55 |   app.run()
56 | 


--------------------------------------------------------------------------------
/mini_data/Noise/Babble.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/mini_data/Noise/Babble.wav


--------------------------------------------------------------------------------
/mini_data/Noise/Babble2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/mini_data/Noise/Babble2.wav


--------------------------------------------------------------------------------
/mini_data/Noise/F16.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/mini_data/Noise/F16.wav


--------------------------------------------------------------------------------
/mini_data/Noise/F162.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/mini_data/Noise/F162.wav


--------------------------------------------------------------------------------
/mini_data/Noise/Factory1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/mini_data/Noise/Factory1.wav


--------------------------------------------------------------------------------
/mini_data/Noise/Factory2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/mini_data/Noise/Factory2.wav


--------------------------------------------------------------------------------
/mini_data/Noise/Pink.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/mini_data/Noise/Pink.wav


--------------------------------------------------------------------------------
/mini_data/Noise/Pink2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/mini_data/Noise/Pink2.wav


--------------------------------------------------------------------------------
/mini_data/Noise/Volvo.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/mini_data/Noise/Volvo.wav


--------------------------------------------------------------------------------
/mini_data/Noise/Volvo2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/mini_data/Noise/Volvo2.wav


--------------------------------------------------------------------------------
/mini_data/Noise/White.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/mini_data/Noise/White.wav


--------------------------------------------------------------------------------
/mini_data/Noise/White2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/mini_data/Noise/White2.wav


--------------------------------------------------------------------------------
/mini_data/test_noise/n64.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/mini_data/test_noise/n64.wav


--------------------------------------------------------------------------------
/mini_data/test_noise/n71.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/mini_data/test_noise/n71.wav


--------------------------------------------------------------------------------
/mini_data/train_noise/n1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/mini_data/train_noise/n1.wav


--------------------------------------------------------------------------------
/mini_data/train_noise/n49.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/mini_data/train_noise/n49.wav


--------------------------------------------------------------------------------
/mini_data/train_noise/n95.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/mini_data/train_noise/n95.wav


--------------------------------------------------------------------------------
/mini_data/train_speech/TRAIN_DR1_FCJF0_SA1.WAV:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/mini_data/train_speech/TRAIN_DR1_FCJF0_SA1.WAV


--------------------------------------------------------------------------------
/mini_data/train_speech/TRAIN_DR1_FKFB0_SX348.WAV:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/mini_data/train_speech/TRAIN_DR1_FKFB0_SX348.WAV


--------------------------------------------------------------------------------
/mini_data/train_speech/TRAIN_DR1_MPGR0_SX150.WAV:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/mini_data/train_speech/TRAIN_DR1_MPGR0_SX150.WAV


--------------------------------------------------------------------------------
/mini_data/train_speech/TRAIN_DR1_MRDD0_SI1680.WAV:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/mini_data/train_speech/TRAIN_DR1_MRDD0_SI1680.WAV


--------------------------------------------------------------------------------
/mini_data/train_speech/cleandata_test.txt:
--------------------------------------------------------------------------------
1 | /CDShare/REVERB_DATA/raw_wsj0_data/data/primary_microphone/si_tr/c1u/c1uc021p.wav
2 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | 
4 | # Copyright 2017    Ke Wang
5 | 
6 | 
7 | 


--------------------------------------------------------------------------------
/models/dnn.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Copyright 2017    Ke Wang
  5 | 
  6 | """Build the feed forward fully connected neural networks."""
  7 | 
  8 | from __future__ import absolute_import
  9 | from __future__ import division
 10 | from __future__ import print_function
 11 | 
 12 | import sys
 13 | 
 14 | import numpy as np
 15 | import tensorflow as tf
 16 | from tensorflow.contrib.layers import batch_norm, fully_connected
 17 | from tensorflow.contrib.layers import xavier_initializer, l2_regularizer
 18 | 
 19 | 
 20 | class DNN(object):
 21 | 
 22 |   def __init__(self, dnn):
 23 |     self.dnn = dnn
 24 | 
 25 |   def __call__(self, inputs, labels, reuse=False):
 26 |     """Build DNN model. On first pass will make vars."""
 27 |     self.inputs = inputs
 28 |     self.labels = labels
 29 |     outputs = self.infer(reuse)
 30 |     return outputs
 31 | 
 32 |   def infer(self, reuse):
 33 |     dnn = self.dnn
 34 |     units = 1024
 35 |     hidden_layers = 3
 36 |     activation_fn = tf.nn.relu
 37 | 
 38 |     in_dims = self.inputs.get_shape().as_list()
 39 |     if len(in_dims) == 2:
 40 |       # shape format [batch, width]
 41 |       dims = self.inputs.get_shape().as_list()
 42 |       inputs = self.inputs
 43 |     elif len(in_dims) == 3:
 44 |       # shape format [batch, length, width]
 45 |       dims = self.inputs.get_shape().as_list()
 46 |       assert dims[0] == 1
 47 |       inputs = tf.squeeze(self.inputs, axis=[0])
 48 | 
 49 |     # If test of cv , BN should use global mean / stddev
 50 |     is_training = False if dnn.cross_validation else True
 51 | 
 52 |     with tf.variable_scope('g_model') as scope:
 53 |       if reuse:
 54 |         scope.reuse_variables()
 55 | 
 56 |       if dnn.batch_norm:
 57 |         normalizer_fn = batch_norm
 58 |         normalizer_params = {
 59 |             "is_training": is_training,
 60 |             "scale": True,
 61 |             "renorm": True
 62 |         }
 63 |       else:
 64 |         normalizer_fn = None
 65 |         normalizer_params = None
 66 | 
 67 |       if dnn.l2_scale > 0.0 and is_training:
 68 |         weights_regularizer = l2_regularizer(dnn.l2_scale)
 69 |       else:
 70 |         weights_regularizer = None
 71 |         dnn.keep_prob = 1.0
 72 | 
 73 |       if not reuse:
 74 |         print("****************************************")
 75 |         print("*** Generator summary ***")
 76 |         print("G inputs shape: {}".format(inputs.get_shape()))
 77 |       sys.stdout.flush()
 78 | 
 79 |       h = fully_connected(inputs, units,
 80 |                           activation_fn=activation_fn,
 81 |                           normalizer_fn=normalizer_fn,
 82 |                           normalizer_params=normalizer_params,
 83 |                           weights_initializer=xavier_initializer(),
 84 |                           weights_regularizer=weights_regularizer,
 85 |                           biases_initializer=tf.zeros_initializer())
 86 |       h = self.dropout(h, dnn.keep_prob)
 87 |       if not reuse:
 88 |         print("G layer 1 output shape: {}".format(h.get_shape()), end=" *** ")
 89 |         self.nnet_info(normalizer_fn, dnn.keep_prob, weights_regularizer)
 90 | 
 91 |       for layer in range(hidden_layers):
 92 |         h = fully_connected(h, units,
 93 |                             activation_fn=activation_fn,
 94 |                             normalizer_fn=normalizer_fn,
 95 |                             normalizer_params=normalizer_params,
 96 |                             weights_initializer=xavier_initializer(),
 97 |                             weights_regularizer=weights_regularizer,
 98 |                             biases_initializer=tf.zeros_initializer())
 99 |         h = self.dropout(h, dnn.keep_prob)
100 |         if not reuse:
101 |           print("G layer {} output shape: {}".format(
102 |               layer+2, h.get_shape()), end=" *** ")
103 |           self.nnet_info(normalizer_fn, dnn.keep_prob, weights_regularizer)
104 | 
105 |       # Linear output
106 |       y = fully_connected(h, dnn.output_dim,
107 |                           activation_fn=None,
108 |                           weights_initializer=xavier_initializer(),
109 |                           weights_regularizer=weights_regularizer,
110 |                           biases_initializer=tf.zeros_initializer())
111 |       if not reuse:
112 |         print("G output shape: {}".format(y.get_shape()))
113 |         sys.stdout.flush()
114 |     return y
115 | 
116 |   def dropout(self, x, keep_prob):
117 |     if keep_prob != 1.0:
118 |       y = tf.nn.dropout(x, keep_prob)
119 |     else:
120 |       y = x
121 |     return y
122 | 
123 |   def nnet_info(self, batch_norm, keep_prob, weights_regularizer):
124 |     if batch_norm is not None:
125 |       print("use batch normalization", end=" *** ")
126 |     if keep_prob != 1.0:
127 |       print("keep prob is {}".format(keep_prob), end=" *** ")
128 |     if weights_regularizer is not None:
129 |       print("L2 regularizer scale is {}".format(self.dnn.l2_scale), end=" *** ")
130 |     print()
131 | 


--------------------------------------------------------------------------------
/models/dnn_trainer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Copyright 2019.1    Nan LEE
  5 | 
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import os
 11 | import sys
 12 | 
 13 | import numpy as np
 14 | 
 15 | sys.path.append(os.path.dirname(sys.path[0]))
 16 | from models.dnn import *
 17 | from models.resnet_rced import *
 18 | from utils.ops import *
 19 | from keras.backend.tensorflow_backend import set_session
 20 | config = tf.ConfigProto()
 21 | config.gpu_options.per_process_gpu_memory_fraction = 0.4
 22 |         # config.gpu_options.allow_growth = True
 23 | set_session(tf.Session(config=config))
 24 | 
 25 | class Model(object):
 26 | 
 27 |     def __init__(self, name='BaseModel'):
 28 |         self.name = name
 29 | 
 30 |     def save(self, save_dir, step):
 31 |         model_name = self.name
 32 |         if not os.path.exists(save_dir):
 33 |             os.makedirs(save_dir)
 34 |         if not hasattr(self, 'saver'):
 35 |             self.saver = tf.train.Saver()
 36 |         self.saver.save(self.sess,
 37 |                         os.path.join(save_dir, model_name),
 38 |                         global_step=step)
 39 | 
 40 |     def load(self, save_dir, model_file=None, moving_average=False):
 41 |         if not os.path.exists(save_dir):
 42 |             print('[!] Checkpoints path does not exist...')
 43 |             return False
 44 |         print('[*] Reading checkpoints...')
 45 |         if model_file is None:
 46 |             ckpt = tf.train.get_checkpoint_state(save_dir)
 47 |             if ckpt and ckpt.model_checkpoint_path:
 48 |                 ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
 49 |             else:
 50 |                 return False
 51 |         else:
 52 |             ckpt_name = model_file
 53 | 
 54 |         if moving_average:
 55 |             # Restore the moving average version of the learned variables for eval.
 56 |             variable_averages = tf.train.ExponentialMovingAverage(
 57 |                                                      self.MOVING_AVERAGE_DECAY)
 58 |             variables_to_restore = variable_averages.variables_to_restore()
 59 |             saver = tf.train.Saver(variables_to_restore)
 60 |         else:
 61 |             saver = tf.train.Saver()
 62 |         saver.restore(self.sess, os.path.join(save_dir, ckpt_name))
 63 |         print('[*] Read {}'.format(ckpt_name))
 64 |         return True
 65 | 
 66 | 
 67 | class DNNTrainer(Model):
 68 |     def __init__(self, sess, args, devices,
 69 |                  inputs, labels, cross_validation=False, name='DNNTrainer'):
 70 |         super(DNNTrainer, self).__init__(name)
 71 |         self.sess = sess
 72 |         self.cross_validation = cross_validation
 73 |         self.MOVING_AVERAGE_DECAY = 0.9999
 74 |         if cross_validation:
 75 |             self.keep_prob = 1.0
 76 |         else:
 77 |             self.keep_prob = args.keep_prob
 78 |         self.batch_norm = args.batch_norm
 79 |         self.batch_size = args.batch_size
 80 |         self.devices = devices
 81 |         self.save_dir = args.save_dir
 82 |         self.writer = tf.summary.FileWriter(os.path.join(
 83 |             args.save_dir,'train'), sess.graph)
 84 |         self.l2_scale = args.l2_scale
 85 |         # data
 86 |         self.input_dim = args.input_dim
 87 |         self.output_dim = args.output_dim
 88 |         self.left_context = args.left_context
 89 |         self.right_context = args.right_context
 90 |         self.batch_size = args.batch_size
 91 |         # Batch Normalization
 92 |         self.batch_norm = args.batch_norm
 93 |         self.g_disturb_weights = False
 94 |         # define the functions
 95 |         self.g_learning_rate = tf.Variable(args.g_learning_rate, trainable=False)
 96 |         if args.g_type == 'dnn':
 97 |             self.generator = DNN(self)
 98 |         elif args.g_type == 'res_rced':
 99 |             self.generator = R_RCED(self)
100 |         else:
101 |             raise ValueError('Unrecognized G type {}'.format(args.g_type))
102 |         if labels is None:
103 |             self.generator(inputs, labels, reuse=False)
104 |         else:
105 |             self.build_model(inputs, labels)
106 | 
107 |     def build_model(self, inputs, labels):
108 |         all_g_grads = []
109 |         # g_opt = tf.train.RMSPropOptimizer(self.g_learning_rate)
110 |         # g_opt = tf.train.GradientDescentOptimizer(self.g_learning_rate)
111 |         g_opt = tf.train.AdamOptimizer(self.g_learning_rate)
112 |         # Track the moving averages of all trainable variables.
113 |         variable_averages = tf.train.ExponentialMovingAverage(
114 |                 self.MOVING_AVERAGE_DECAY)
115 | 
116 |         with tf.variable_scope(tf.get_variable_scope()):
117 |             for idx, device in enumerate(self.devices):
118 |                 with tf.device("/%s" % device):
119 |                     with tf.name_scope("device_%s" % idx):
120 |                         with variables_on_gpu():
121 |                             self.build_model_single_gpu(idx, inputs, labels)
122 |                             tf.get_variable_scope().reuse_variables()
123 |                             if not self.cross_validation:
124 |                                 update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
125 |                                 with tf.control_dependencies(update_ops):
126 |                                     g_grads = g_opt.compute_gradients(
127 |                                         self.g_losses[-1], var_list=self.g_vars)
128 |                                     all_g_grads.append(g_grads)
129 |         if not self.cross_validation:
130 |             avg_g_grads = average_gradients(all_g_grads)
131 |             g_apply_gradient_op = g_opt.apply_gradients(avg_g_grads)
132 |             variables_averages_op = variable_averages.apply(
133 |                     tf.trainable_variables())
134 |             # Group all updates to into a single train op.
135 |             self.g_opt = tf.group(g_apply_gradient_op, variables_averages_op)
136 | 
137 | 
138 |     def build_model_single_gpu(self, gpu_idx, inputs, labels):
139 |         if gpu_idx == 0:
140 |             g = self.generator(inputs, labels, reuse=False)
141 | 
142 |         g = self.generator(inputs, labels, reuse=True)
143 | 
144 |         if gpu_idx == 0:
145 |             self.g_losses = []
146 |             self.g_mse_losses = []
147 |             self.g_l2_losses = []
148 | 
149 |         g_mse_loss = tf.losses.mean_squared_error(g, labels) 
150 |         if not self.cross_validation and self.l2_scale > 0.0:
151 |             reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES, '.*g_model')
152 |             g_l2_loss = tf.reduce_sum(reg_losses)
153 |         else:
154 |             g_l2_loss = tf.constant(0.0)
155 |         g_loss = g_mse_loss + g_l2_loss
156 | 
157 |         self.g_mse_losses.append(g_mse_loss)
158 |         self.g_l2_losses.append(g_l2_loss)
159 |         self.g_losses.append(g_loss)
160 | 
161 |         self.g_mse_loss_summ = scalar_summary("g_mse_loss", g_mse_loss)
162 |         self.g_l2_loss_summ = scalar_summary("g_l2_loss", g_l2_loss)
163 |         self.g_loss_summ = scalar_summary("g_loss", g_loss)
164 | 
165 |         summaries = [self.g_mse_loss_summ,
166 |                      self.g_l2_loss_summ,
167 |                      self.g_loss_summ]
168 | 
169 |         self.summaries = tf.summary.merge(summaries)
170 | 
171 |         if gpu_idx == 0:
172 |             self.get_vars()
173 | 
174 |     def get_vars(self):
175 |         t_vars = tf.trainable_variables()
176 |         self.g_vars_dict = {}
177 |         for var in t_vars:
178 |             if var.name.startswith('g_'):
179 |                 self.g_vars_dict[var.name] = var
180 |         self.g_vars = self.g_vars_dict.values()
181 |         self.all_vars = t_vars
182 |         if self.g_disturb_weights and not self.cross_validation:
183 |             stddev = 0.00001
184 |             print("Add Gaussian noise to G weights (stddev = %s)" % (stddev))
185 |             sys.stdout.flush()
186 |             self.g_disturb = [v.assign(
187 |                 tf.add(v, tf.truncated_normal([], 0, stddev))) for v in self.g_vars]
188 |         else:
189 |             print("Not add noise to G weights")
190 | 


--------------------------------------------------------------------------------
/models/resnet_rced.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Copyright 2019.7    Lee
  5 | 
  6 | """Redundant Convolutional Encoder Decoder (R-CED)
  7 | A fully convolutional neural network for speech enhancement(https://arxiv.org/pdf/1609.07132).
  8 | """
  9 | 
 10 | from __future__ import absolute_import
 11 | from __future__ import division
 12 | from __future__ import print_function
 13 | 
 14 | import sys
 15 | import numpy as np
 16 | import tensorflow as tf
 17 | from tensorflow.contrib.layers import xavier_initializer, l2_regularizer
 18 | from tensorflow.contrib.layers import batch_norm, fully_connected
 19 | 
 20 | class R_RCED(object):
 21 | 
 22 |     def __init__(self, rced):
 23 |         self.rced = rced
 24 | 
 25 |     def __call__(self, inputs, labels, reuse=False):
 26 |         """Build CNN models. On first pass will make vars."""
 27 |         self.inputs = inputs
 28 |         self.labels = labels
 29 |         print("-----------------------------inputs--------")
 30 |         print(np.shape(inputs))
 31 |         self.inputs_O = inputs
 32 |         outputs = self.infer(reuse)
 33 |         print(np.shape(outputs))
 34 |         return outputs
 35 | 
 36 |     def infer(self, reuse):
 37 |         rced = self.rced
 38 |         activation_fn = tf.nn.relu
 39 |         is_training = True
 40 | 
 41 |         input_dim = rced.input_dim
 42 |         left_context = rced.left_context
 43 |         right_context = rced.right_context
 44 |         splice_dim = left_context + 1 + right_context
 45 |         #inputs_O = self.inputs
 46 |         in_dims = self.inputs.get_shape().as_list()
 47 |         if len(in_dims) == 2:
 48 |             # shape format [batch, width]
 49 |             dims = self.inputs.get_shape().as_list()
 50 |             assert dims[0] == rced.batch_size
 51 |             inputs = tf.reshape(self.inputs, [dims[0], splice_dim, input_dim])
 52 |             inputs = tf.expand_dims(inputs, -1)
 53 |         elif len(in_dims) == 3:
 54 |             # shape format [batch, length, width]
 55 |             dims = self.inputs.get_shape().as_list()
 56 |             assert dims[0] == 1
 57 |             inputs = tf.squeeze(self.inputs, [0])
 58 |             inputs = tf.reshape(self.inputs, [-1, splice_dim, input_dim])
 59 |             inputs = tf.expand_dims(inputs, -1)
 60 | 
 61 |         # If test of cv , BN should use global mean / stddev
 62 |         if rced.cross_validation:
 63 |             is_training = False
 64 | 
 65 |         with tf.variable_scope('g_model') as scope:
 66 |             if reuse:
 67 |                 scope.reuse_variables()
 68 | 
 69 |             if rced.batch_norm:
 70 |                 normalizer_fn = batch_norm
 71 |                 normalizer_params = {
 72 |                     "is_training": is_training,
 73 |                     "scale": True,
 74 |                     "renorm": True
 75 |                 }
 76 |             else:
 77 |                 normalizer_fn = None
 78 |                 normalizer_params = None
 79 | 
 80 |             if rced.l2_scale > 0.0 and is_training:
 81 |                 weights_regularizer = l2_regularizer(rced.l2_scale)
 82 |             else:
 83 |                 weights_regularizer = None
 84 |                 keep_prob = 1.0
 85 | 
 86 |             if not reuse:
 87 |                 print("*** Generator summary ***")
 88 |                 print("G inputs shape: {}".format(inputs.get_shape()))
 89 | 
 90 |             # inputs format [batch, in_height, in_width, in_channels]
 91 |             # filters format [filter_height, filter_width, in_channels, out_channels]
 92 |             filters_num = [12, 12, 24, 24, 32, 64, 32, 24, 24, 12, 12]
 93 |             filters_width = [13, 11, 9, 7, 7, 7, 7 ,7, 9, 11, 13]
 94 |             assert len(filters_num) == len(filters_num)
 95 |             inputs_O = tf.reshape(inputs, [-1,  splice_dim * input_dim])
 96 |             inputs_0 = tf.contrib.layers.conv2d(inputs, filters_num[0],[splice_dim, filters_width[0]],activation_fn=activation_fn,
 97 |                             normalizer_fn=normalizer_fn, normalizer_params=normalizer_params,weights_initializer=xavier_initializer(),
 98 |                             weights_regularizer=weights_regularizer,biases_initializer=tf.zeros_initializer())
 99 |             #inputs_333 = inputs + inputs_0
100 |             inputs_1 = tf.contrib.layers.conv2d(inputs_0, filters_num[1],[splice_dim, filters_width[1]],activation_fn=activation_fn,
101 |                             normalizer_fn=normalizer_fn, normalizer_params=normalizer_params,weights_initializer=xavier_initializer(),
102 |                             weights_regularizer=weights_regularizer,biases_initializer=tf.zeros_initializer())
103 | #            #inputs_1 = inputs_1 + inputs_0
104 |             #inputs_1=tf.layers.max_pooling2d(inputs=inputs_1, pool_size=[2, 2], strides=2,padding = 'valid')
105 |             inputs_2 = tf.contrib.layers.conv2d(inputs_1, filters_num[2],[splice_dim, filters_width[2]],activation_fn=activation_fn,
106 |                             normalizer_fn=normalizer_fn, normalizer_params=normalizer_params,weights_initializer=xavier_initializer(),
107 |                             weights_regularizer=weights_regularizer,biases_initializer=tf.zeros_initializer())
108 |             #inputs_2 = inputs_2 + inputs_1
109 |             inputs_3 = tf.contrib.layers.conv2d(inputs_2, filters_num[3],[splice_dim, filters_width[3]],activation_fn=activation_fn,
110 |                             normalizer_fn=normalizer_fn, normalizer_params=normalizer_params,weights_initializer=xavier_initializer(),
111 |                             weights_regularizer=weights_regularizer,biases_initializer=tf.zeros_initializer())
112 | #            #inputs_3 = inputs_3 + inputs_2
113 |             #inputs_3=tf.layers.max_pooling2d(inputs=inputs_3, pool_size=[2, 2], strides=2,padding = 'valid')
114 |             inputs_4 = tf.contrib.layers.conv2d(inputs_3, filters_num[4],[splice_dim, filters_width[4]],activation_fn=activation_fn,
115 |                             normalizer_fn=normalizer_fn, normalizer_params=normalizer_params,weights_initializer=xavier_initializer(),
116 |                             weights_regularizer=weights_regularizer,biases_initializer=tf.zeros_initializer())
117 |             inputs_5 = tf.contrib.layers.conv2d(inputs_3, filters_num[5],[splice_dim, filters_width[5]],activation_fn=activation_fn,
118 |                             normalizer_fn=normalizer_fn, normalizer_params=normalizer_params,weights_initializer=xavier_initializer(),
119 |                             weights_regularizer=weights_regularizer,biases_initializer=tf.zeros_initializer())
120 |             #inputs_4 = inputs_4 + inputs_3
121 |             inputs_6 = tf.contrib.layers.conv2d(inputs_4, filters_num[6],[splice_dim, filters_width[6]],activation_fn=activation_fn,
122 |                             normalizer_fn=normalizer_fn, normalizer_params=normalizer_params,weights_initializer=xavier_initializer(),
123 |                             weights_regularizer=weights_regularizer,biases_initializer=tf.zeros_initializer())
124 | #            #inputs_5 = inputs_5 + inputs_4  
125 |             #inputs_5=tf.layers.max_pooling2d(inputs=inputs_5, pool_size=[2, 2], strides=2)
126 |             #inputs_5=tf.layers.conv2d_transpose(inputs_5,filters = filters_num[6],kernel_size= (2,2),strides= (2,2),padding= 'valid',activation= tf.nn.relu)
127 |             inputs_7 = tf.contrib.layers.conv2d(inputs_5, filters_num[7],[splice_dim, filters_width[7]],activation_fn=activation_fn,
128 |                             normalizer_fn=normalizer_fn, normalizer_params=normalizer_params,weights_initializer=xavier_initializer(),
129 |                             weights_regularizer=weights_regularizer,biases_initializer=tf.zeros_initializer())
130 | 
131 |             #inputs_7 = inputs_7 + inputs_3
132 |             inputs_8 = tf.contrib.layers.conv2d(inputs_6, filters_num[8],[splice_dim, filters_width[8]],activation_fn=activation_fn,
133 |                             normalizer_fn=normalizer_fn, normalizer_params=normalizer_params,weights_initializer=xavier_initializer(),
134 |                             weights_regularizer=weights_regularizer,biases_initializer=tf.zeros_initializer())
135 |             
136 |             #inputs_8 = inputs_8 + inputs_6 
137 |             #inputs_7=tf.layers.conv2d_transpose(inputs_7,filters = filters_num[6],kernel_size= (2,2),strides= (2,2),padding= 'valid',activation= tf.nn.relu)
138 |             #inputs_7=tf.layers.max_pooling2d(inputs=inputs_7, pool_size=[2, 2], strides=2)
139 |             inputs_8 = tf.contrib.layers.conv2d(inputs_7, filters_num[8],[splice_dim, filters_width[8]],activation_fn=activation_fn,
140 |                             normalizer_fn=normalizer_fn, normalizer_params=normalizer_params,weights_initializer=xavier_initializer(),
141 |                             weights_regularizer=weights_regularizer,biases_initializer=tf.zeros_initializer())
142 | 
143 |             inputs_9 = tf.contrib.layers.conv2d(inputs_8, filters_num[9],[splice_dim, filters_width[9]],activation_fn=activation_fn,
144 |                             normalizer_fn=normalizer_fn, normalizer_params=normalizer_params,weights_initializer=xavier_initializer(),
145 |                             weights_regularizer=weights_regularizer,biases_initializer=tf.zeros_initializer())
146 |             #inputs_9=tf.layers.max_pooling2d(inputs=inputs_9, pool_size=[2, 2], strides=2)
147 |             #inputs_9 = inputs_9 + inputs_8
148 |             print("***********shaper---------------------")
149 |             print(np.shape(inputs_9))
150 | 
151 | #            name_I = "inputs_"+str(len(filters_num)+1)
152 | #            inputs = name_I
153 |             # Linear output
154 |             # inputs = tf.reshape(inputs, [rced.batch_size, -1])
155 |             inputs_D = tf.reshape(inputs_9, [-1, 11 * 257 * filters_num[-1]])
156 |             print("***********reshaper------------after---------")
157 |             print(np.shape(inputs_D))
158 | 
159 |             inputs_D = tf.concat([inputs_D, inputs_O],1)
160 |             y = fully_connected(inputs_D, 257,
161 |                                 activation_fn=None,
162 |                                 weights_initializer=xavier_initializer(),
163 |                                 weights_regularizer=weights_regularizer,
164 |                                 biases_initializer=tf.zeros_initializer())
165 |             if not reuse:
166 |                 print("G output shape: {}".format(y.get_shape()))
167 |                 sys.stdout.flush()
168 | 
169 |         return y
170 | 
171 |     def nnet_info(self, batch_norm, keep_prob, weights_regularizer):
172 |         if batch_norm is not None:
173 |             print("use batch normalization", end=" *** ")
174 |         if keep_prob != 1.0:
175 |             print("keep prob is {}".format(keep_prob),
176 |                   end=" *** ")
177 |         if weights_regularizer is not None:
178 |             print("L2 regularizer scale is {}".format(self.rced.l2_scale),
179 |                   end=" *** ")
180 | 
181 |         print()
182 | 


--------------------------------------------------------------------------------
/pesq:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/pesq


--------------------------------------------------------------------------------
/pre_process_data.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Copyright 2019.7    Nan Lee
  5 | 
  6 | import os
  7 | import soundfile
  8 | import numpy as np
  9 | import argparse
 10 | import csv
 11 | import time
 12 | #import matplotlib.pyplot as plt
 13 | from scipy import signal
 14 | import pickle
 15 | import cPickle
 16 | import h5py
 17 | from sklearn import preprocessing
 18 | import fnmatch
 19 | # import prepare_data as pp_data
 20 | import config as cfg
 21 | 
 22 | def create_folder(fd):
 23 |     if not os.path.exists(fd):
 24 |         os.makedirs(fd)
 25 |         
 26 | def read_audio(path, target_fs=None):
 27 |     (audio, fs) = soundfile.read(path)
 28 |     if audio.ndim > 1:
 29 |         audio = np.mean(audio, axis=1)
 30 |     if target_fs is not None and fs != target_fs:
 31 |         audio = librosa.resample(audio, orig_sr=fs, target_sr=target_fs)
 32 |         fs = target_fs
 33 |     return audio, fs
 34 |     
 35 | def write_audio(path, audio, sample_rate):
 36 |     soundfile.write(file=path, data=audio, samplerate=sample_rate)
 37 | 	
 38 | def calculate_train_features(args):
 39 |     """Calculate spectrogram for mixed, speech and noise audio. Then write the 
 40 |     features to disk. 
 41 |     
 42 |     Args:
 43 |       workspace: str, path of workspace. 
 44 |       data_type: str, 'train' | 'test'. 
 45 |       speech_path:str, noisy_speech_dir clean_speech_dir
 46 |     """
 47 |     data_type = args.data_type
 48 |     fs = cfg.sample_rate
 49 |     train_speech_path = args.train_speech_path
 50 |     cnt =0
 51 |     t1 = time.time()
 52 |     with open(train_speech_path,'r') as speech_org_path:
 53 |         for ii in speech_org_path:
 54 |             #read clean and noisy speech
 55 |             path_tmp = ii.split()
 56 |             noise_path = path_tmp[0]
 57 |             #out_feature_name = noise_path.split("/")[-1]
 58 |             cln_path = path_tmp[1]
 59 |             #out_feature_name = cln_path.split("/")[-1]
 60 |             out_feature_name = noise_path.split("/")[-1]
 61 |             (reverb_speech_audio, _) = read_audio(noise_path, target_fs=fs)
 62 |             (clean_speech_audio, _) = read_audio(cln_path, target_fs=fs)
 63 |             #extract logspectram feature
 64 |             mixed_complx_x = calc_sp(reverb_speech_audio, mode='magnitude')
 65 |             mixed_complx_x = np.log(mixed_complx_x + 1e-08).astype(np.float32)
 66 |             speech_x = calc_sp(clean_speech_audio, mode='magnitude')
 67 |             #print(mixed_complx_x)
 68 |             speech_x = np.log(speech_x + 1e-08).astype(np.float32)
 69 |             # the output feature path
 70 |             out_feat_path = os.path.join("workspace", "features", "spectrogram",data_type,"%s.p" % out_feature_name)
 71 |             create_folder(os.path.dirname(out_feat_path))
 72 |             data = [mixed_complx_x, speech_x]
 73 |             cPickle.dump(data, open(out_feat_path, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL)
 74 |             cnt += 1
 75 |             print cnt
 76 |     print("Extracting feature time: %s" % (time.time() - t1))
 77 | def rms(y):
 78 |     """Root mean square. 
 79 |     """
 80 |     return np.sqrt(np.mean(np.abs(y) ** 2, axis=0, keepdims=False))
 81 | 
 82 | def get_amplitude_scaling_factor(s, n, snr, method='rms'):
 83 |     """Given s and n, return the scaler s according to the snr. 
 84 |     
 85 |     Args:
 86 |       s: ndarray, source1. 
 87 |       n: ndarray, source2. 
 88 |       snr: float, SNR. 
 89 |       method: 'rms'. 
 90 |       
 91 |     Outputs:
 92 |       float, scaler. 
 93 |     """
 94 |     original_sn_rms_ratio = rms(s) / rms(n)
 95 |     target_sn_rms_ratio =  10. ** (float(snr) / 20.)    # snr = 20 * lg(rms(s) / rms(n))
 96 |     signal_scaling_factor = target_sn_rms_ratio / original_sn_rms_ratio
 97 |     return signal_scaling_factor
 98 | 
 99 | def calc_sp(audio, mode):
100 |     """Calculate spectrogram. 
101 |     
102 |     Args:
103 |       audio: 1darray. 
104 |       mode: string, 'magnitude' | 'complex'
105 |     
106 |     Returns:
107 |       spectrogram: 2darray, (n_time, n_freq). 
108 |     """
109 |     n_window = cfg.n_window
110 |     n_overlap = cfg.n_overlap
111 |     ham_win = np.hamming(n_window)
112 |     [f, t, x] = signal.spectral.spectrogram(
113 |                     audio, 
114 |                     window=ham_win,
115 |                     nperseg=n_window, 
116 |                     noverlap=n_overlap, 
117 |                     detrend=False, 
118 |                     return_onesided=True, 
119 |                     mode=mode) 
120 |     x = x.T
121 |     if mode == 'magnitude':
122 |         x = x.astype(np.float32)
123 |     elif mode == 'complex':
124 |         x = x.astype(np.complex64)
125 |     else:
126 |         raise Exception("Incorrect mode!")
127 |     return x
128 | def log_sp(x):
129 |     return np.log(x + 1e-08)
130 | if __name__ == '__main__':
131 |     parser = argparse.ArgumentParser()
132 |     subparsers = parser.add_subparsers(dest='mode')
133 | 
134 |     parser_calculate_train_features = subparsers.add_parser('calculate_train_features')
135 |     parser_calculate_train_features.add_argument('--train_speech_path', type=str, required=True)
136 |     parser_calculate_train_features.add_argument('--data_type', type=str, required=True)
137 | 
138 |     args = parser.parse_args()
139 |     if args.mode == 'create_mixture_csv':
140 |         create_mixture_csv(args)
141 |     elif args.mode == 'calculate_train_features':
142 |         calculate_train_features(args)
143 |     elif args.mode == 'calculate_test_features':
144 |         calculate_test_features(args)
145 |     elif args.mode == 'pack_features':
146 |         pack_features(args)       
147 |     elif args.mode == 'compute_scaler':
148 |         compute_scaler(args)
149 |     else:
150 |         raise Exception("Error!")
151 | 


--------------------------------------------------------------------------------
/pre_process_test.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Copyright 2019.7    Nan Lee
  5 | 
  6 | import os
  7 | import soundfile
  8 | import numpy as np
  9 | import argparse
 10 | import csv
 11 | import time
 12 | #import matplotlib.pyplot as plt
 13 | from scipy import signal
 14 | import pickle
 15 | import cPickle
 16 | import h5py
 17 | from sklearn import preprocessing
 18 | import fnmatch
 19 | # import prepare_data as pp_data
 20 | import config as cfg
 21 | 
 22 | def create_folder(fd):
 23 |     if not os.path.exists(fd):
 24 |         os.makedirs(fd)
 25 |         
 26 | def read_audio(path, target_fs=None):
 27 |     (audio, fs) = soundfile.read(path)
 28 |     if audio.ndim > 1:
 29 |         audio = np.mean(audio, axis=1)
 30 |     if target_fs is not None and fs != target_fs:
 31 |         audio = librosa.resample(audio, orig_sr=fs, target_sr=target_fs)
 32 |         fs = target_fs
 33 |     return audio, fs
 34 |     
 35 | def write_audio(path, audio, sample_rate):
 36 |     soundfile.write(file=path, data=audio, samplerate=sample_rate)
 37 | 	
 38 | def calculate_train_features(args):
 39 |     """Calculate spectrogram for mixed, speech and noise audio. Then write the 
 40 |     features to disk. 
 41 |     
 42 |     Args:
 43 |       workspace: str, path of workspace. 
 44 |       data_type: str, 'train' | 'test'. 
 45 |       speech_path:str, noisy_speech_dir clean_speech_dir
 46 |     """
 47 |     data_type = args.data_type
 48 |     fs = cfg.sample_rate
 49 |     train_speech_path = args.train_speech_path
 50 |     cnt =0
 51 |     t1 = time.time()
 52 |     with open(train_speech_path,'r') as speech_org_path:
 53 |         for ii in speech_org_path:
 54 |             #read clean and noisy speech
 55 |             path_tmp = ii.split()
 56 |             noise_path = path_tmp[0]
 57 |             #out_feature_name = noise_path.split("/")[-1]
 58 |             out_feature_name = noise_path.split("/")[-1]
 59 |             (reverb_speech_audio, _) = read_audio(noise_path, target_fs=fs)
 60 |             #extract logspectram feature
 61 |             mixed_complx_x = calc_sp(reverb_speech_audio, mode='complex')
 62 |             #mixed_complx_x = np.log(mixed_complx_x + 1e-08).astype(np.float32)
 63 |             # the output feature path
 64 |             out_feat_path = os.path.join("workspace", "features", "spectrogram",data_type,"%s.p" % out_feature_name)
 65 |             create_folder(os.path.dirname(out_feat_path))
 66 |             data = [mixed_complx_x]
 67 |             cPickle.dump(data, open(out_feat_path, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL)
 68 |             cnt += 1
 69 |             print cnt
 70 |     print("Extracting feature time: %s" % (time.time() - t1))
 71 | def rms(y):
 72 |     """Root mean square. 
 73 |     """
 74 |     return np.sqrt(np.mean(np.abs(y) ** 2, axis=0, keepdims=False))
 75 | 
 76 | def get_amplitude_scaling_factor(s, n, snr, method='rms'):
 77 |     """Given s and n, return the scaler s according to the snr. 
 78 |     
 79 |     Args:
 80 |       s: ndarray, source1. 
 81 |       n: ndarray, source2. 
 82 |       snr: float, SNR. 
 83 |       method: 'rms'. 
 84 |       
 85 |     Outputs:
 86 |       float, scaler. 
 87 |     """
 88 |     original_sn_rms_ratio = rms(s) / rms(n)
 89 |     target_sn_rms_ratio =  10. ** (float(snr) / 20.)    # snr = 20 * lg(rms(s) / rms(n))
 90 |     signal_scaling_factor = target_sn_rms_ratio / original_sn_rms_ratio
 91 |     return signal_scaling_factor
 92 | 
 93 | def calc_sp(audio, mode):
 94 |     """Calculate spectrogram. 
 95 |     
 96 |     Args:
 97 |       audio: 1darray. 
 98 |       mode: string, 'magnitude' | 'complex'
 99 |     
100 |     Returns:
101 |       spectrogram: 2darray, (n_time, n_freq). 
102 |     """
103 |     n_window = cfg.n_window
104 |     n_overlap = cfg.n_overlap
105 |     ham_win = np.hamming(n_window)
106 |     [f, t, x] = signal.spectral.spectrogram(
107 |                     audio, 
108 |                     window=ham_win,
109 |                     nperseg=n_window, 
110 |                     noverlap=n_overlap, 
111 |                     detrend=False, 
112 |                     return_onesided=True, 
113 |                     mode=mode) 
114 |     x = x.T
115 |     if mode == 'magnitude':
116 |         x = x.astype(np.float32)
117 |     elif mode == 'complex':
118 |         x = x.astype(np.complex64)
119 |     else:
120 |         raise Exception("Incorrect mode!")
121 |     return x
122 | def log_sp(x):
123 |     return np.log(x + 1e-08)
124 | if __name__ == '__main__':
125 |     parser = argparse.ArgumentParser()
126 |     subparsers = parser.add_subparsers(dest='mode')
127 | 
128 |     parser_calculate_train_features = subparsers.add_parser('calculate_train_features')
129 |     parser_calculate_train_features.add_argument('--train_speech_path', type=str, required=True)
130 |     parser_calculate_train_features.add_argument('--data_type', type=str, required=True)
131 | 
132 |     args = parser.parse_args()
133 |     if args.mode == 'create_mixture_csv':
134 |         create_mixture_csv(args)
135 |     elif args.mode == 'calculate_train_features':
136 |         calculate_train_features(args)
137 |     elif args.mode == 'calculate_test_features':
138 |         calculate_test_features(args)
139 |     elif args.mode == 'pack_features':
140 |         pack_features(args)       
141 |     elif args.mode == 'compute_scaler':
142 |         compute_scaler(args)
143 |     else:
144 |         raise Exception("Error!")
145 | 


--------------------------------------------------------------------------------
/scripts/audio_utilities.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/scripts/audio_utilities.pyc


--------------------------------------------------------------------------------
/scripts/config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Summary:  Config file. 
 3 | Author:   Qiuqiang Kong
 4 | Created:  2017.12.21
 5 | Modified: -
 6 | """
 7 | 
 8 | sample_rate = 16000
 9 | n_window = 512      # windows size for FFT
10 | n_overlap = 256     # overlap of window
11 | 


--------------------------------------------------------------------------------
/scripts/config.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/scripts/config.pyc


--------------------------------------------------------------------------------
/scripts/dataset_test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2017    Ke Wang
 4 | 
 5 | set -euo pipefail
 6 | 
 7 | stage=2
 8 | 
 9 | train_dir=data/train/io_test
10 | #train_dir=data/train/train_100h
11 | 
12 | if [ $stage -le 0 ]; then
13 |   python io_funcs/convert_cmvn_to_numpy.py \
14 |     --inputs=$train_dir/inputs.cmvn \
15 |     --labels=$train_dir/labels.cmvn \
16 |     --save_dir=$train_dir
17 | fi
18 | 
19 | if [ $stage -le 1 ]; then
20 |   nj=1
21 |   logdir=exp/
22 |   if [ ! -d $logdir ]; then
23 |     mkdir -p $logdir
24 |   fi
25 | 
26 |   rm -rf $logdir/.error || exit 1;
27 |   bash scripts/split_scp.sh --nj $nj $train_dir
28 |   for i in $(seq $nj); do
29 |   (
30 |     python io_funcs/make_tfrecords.py \
31 |       --inputs=$train_dir/split${nj}/inputs${i}.scp \
32 |       --labels=$train_dir/split${nj}/labels${i}.scp \
33 |       --cmvn_dir=$train_dir \
34 |       --apply_cmvn=true \
35 |       --output_dir=$train_dir/tfrecords \
36 |       --name="train${i}"
37 |   ) || touch $logdir/.error &
38 |   done
39 |   wait
40 |   [ -f $logdir/.error ] && \
41 |     echo "$0: there was a problem while making TFRecords" && exit 1
42 |   echo "Making TFRecords done."
43 | fi
44 | 
45 | if [ $stage -le 2 ]; then
46 |   CUDA_VISIBLE_DEVICES="3" python io_funcs/tfrecords_dataset_test.py \
47 |     --batch_size=128 \
48 |     --input_dim=257 \
49 |     --output_dim=40 \
50 |     --num_threads=32 \
51 |     --num_epochs=1 \
52 |     --data_dir=$train_dir/tfrecords
53 | fi
54 | 
55 | 


--------------------------------------------------------------------------------
/scripts/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | #


--------------------------------------------------------------------------------
/scripts/datasets/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/scripts/datasets/__init__.pyc


--------------------------------------------------------------------------------
/scripts/datasets/audio.py:
--------------------------------------------------------------------------------
  1 | import librosa
  2 | import librosa.filters
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | from scipy import signal
  6 | from scipy.io import wavfile
  7 | 
  8 | 
  9 | def load_wav(path, sr):
 10 | 	return librosa.core.load(path, sr=sr)[0]
 11 | 
 12 | def save_wav(wav, path, sr):
 13 | 	wav *= 32767 / max(0.01, np.max(np.abs(wav)))
 14 | 	#proposed by @dsmiller
 15 | 	wavfile.write(path, sr, wav.astype(np.int16))
 16 | 
 17 | def save_wavenet_wav(wav, path, sr):
 18 | 	librosa.output.write_wav(path, wav, sr=sr)
 19 | 
 20 | def preemphasis(wav, k, preemphasize=True):
 21 | 	if preemphasize:
 22 | 		return signal.lfilter([1, -k], [1], wav)
 23 | 	return wav
 24 | 
 25 | def inv_preemphasis(wav, k, inv_preemphasize=True):
 26 | 	if inv_preemphasize:
 27 | 		return signal.lfilter([1], [1, -k], wav)
 28 | 	return wav
 29 | 
 30 | #From https://github.com/r9y9/wavenet_vocoder/blob/master/audio.py
 31 | def start_and_end_indices(quantized, silence_threshold=2):
 32 | 	for start in range(quantized.size):
 33 | 		if abs(quantized[start] - 127) > silence_threshold:
 34 | 			break
 35 | 	for end in range(quantized.size - 1, 1, -1):
 36 | 		if abs(quantized[end] - 127) > silence_threshold:
 37 | 			break
 38 | 
 39 | 	assert abs(quantized[start] - 127) > silence_threshold
 40 | 	assert abs(quantized[end] - 127) > silence_threshold
 41 | 
 42 | 	return start, end
 43 | 
 44 | def trim_silence(wav, hparams):
 45 | 	'''Trim leading and trailing silence
 46 | 
 47 | 	Useful for M-AILABS dataset if we choose to trim the extra 0.5 silence at beginning and end.
 48 | 	'''
 49 | 	#Thanks @begeekmyfriend and @lautjy for pointing out the params contradiction. These params are separate and tunable per dataset.
 50 | 	return librosa.effects.trim(wav, top_db= hparams.trim_top_db, frame_length=hparams.trim_fft_size, hop_length=hparams.trim_hop_size)[0]
 51 | 
 52 | def get_hop_size(hparams):
 53 | 	hop_size = hparams.hop_size
 54 | 	if hop_size is None:
 55 | 		assert hparams.frame_shift_ms is not None
 56 | 		hop_size = int(hparams.frame_shift_ms / 1000 * hparams.sample_rate)
 57 | 	return hop_size
 58 | 
 59 | def linearspectrogram(wav, hparams):
 60 | 	D = _stft(preemphasis(wav, hparams.preemphasis, hparams.preemphasize), hparams)
 61 | 	S = _amp_to_db(np.abs(D), hparams) - hparams.ref_level_db
 62 | 
 63 | 	if hparams.signal_normalization:
 64 | 		return _normalize(S, hparams)
 65 | 	return S
 66 | 
 67 | def melspectrogram(wav, hparams):
 68 | 	D = _stft(preemphasis(wav, hparams.preemphasis, hparams.preemphasize), hparams)
 69 | 	S = _amp_to_db(_linear_to_mel(np.abs(D), hparams), hparams) - hparams.ref_level_db
 70 | 
 71 | 	if hparams.signal_normalization:
 72 | 		return _normalize(S, hparams)
 73 | 	return S
 74 | 
 75 | def inv_linear_spectrogram(linear_spectrogram, hparams):
 76 | 	'''Converts linear spectrogram to waveform using librosa'''
 77 | 	if hparams.signal_normalization:
 78 | 		D = _denormalize(linear_spectrogram, hparams)
 79 | 	else:
 80 | 		D = linear_spectrogram
 81 | 
 82 | 	S = _db_to_amp(D + hparams.ref_level_db) #Convert back to linear
 83 | 
 84 | 	if hparams.use_lws:
 85 | 		processor = _lws_processor(hparams)
 86 | 		D = processor.run_lws(S.astype(np.float64).T ** hparams.power)
 87 | 		y = processor.istft(D).astype(np.float32)
 88 | 		return inv_preemphasis(y, hparams.preemphasis, hparams.preemphasize)
 89 | 	else:
 90 | 		return inv_preemphasis(_griffin_lim(S ** hparams.power, hparams), hparams.preemphasis, hparams.preemphasize)
 91 | 
 92 | 
 93 | def inv_mel_spectrogram(mel_spectrogram, hparams):
 94 | 	'''Converts mel spectrogram to waveform using librosa'''
 95 | 	if hparams.signal_normalization:
 96 | 		D = _denormalize(mel_spectrogram, hparams)
 97 | 	else:
 98 | 		D = mel_spectrogram
 99 | 
100 | 	S = _mel_to_linear(_db_to_amp(D + hparams.ref_level_db), hparams)  # Convert back to linear
101 | 
102 | 	if hparams.use_lws:
103 | 		processor = _lws_processor(hparams)
104 | 		D = processor.run_lws(S.astype(np.float64).T ** hparams.power)
105 | 		y = processor.istft(D).astype(np.float32)
106 | 		return inv_preemphasis(y, hparams.preemphasis, hparams.preemphasize)
107 | 	else:
108 | 		return inv_preemphasis(_griffin_lim(S ** hparams.power, hparams), hparams.preemphasis, hparams.preemphasize)
109 | 
110 | def _lws_processor(hparams):
111 | 	import lws
112 | 	return lws.lws(hparams.n_fft, get_hop_size(hparams), fftsize=hparams.win_size, mode="speech")
113 | 
114 | def _griffin_lim(S):
115 | 	'''librosa implementation of Griffin-Lim
116 | 	Based on https://github.com/librosa/librosa/issues/434
117 | 	'''
118 | 	#angles = np.exp(2j * np.pi * np.random.rand(*S.shape))
119 | 	S_complex = np.abs(S)
120 | 	#y = _istft(S_complex * angles)
121 | 	y = signal.spectral.istft(S,nperseg=512,noverlap=256)
122 | 	for i in range(100):
123 |                 theta = np.angle(signal.spectral.stft(y,nperseg=512,noverlap=256))
124 | 		tmp = S_complex * np.exp(1j * theta)
125 | 		y = signal.spectral.istft(tmp)
126 | 	return y
127 | 	#angles = np.exp(2j * np.pi * np.random.rand(*S.shape)
128 | 	#S_complex = np.abs(S).astype(np.complex)
129 | 	#y = _istft(S_complex * angles)
130 | 	#for i in range(30):
131 | #		angles = np.exp(1j * np.angle(_stft(y)))
132 | #		y = _istft(S_complex * angles)
133 | #	return y
134 | 
135 | def _stft(y):
136 |         
137 | 	#if hparams.use_lws:
138 | 	#	return _lws_processor(hparams).stft(y).T
139 | 	#else:
140 | 	return librosa.stft(y=y, hop_length=256, win_length=512)
141 | 
142 | def _istft(y):
143 | 	return librosa.istft(y, hop_length=256, win_length=512)
144 | 
145 | ##########################################################
146 | #Those are only correct when using lws!!! (This was messing with Wavenet quality for a long time!)
147 | def num_frames(length, fsize, fshift):
148 | 	"""Compute number of time frames of spectrogram
149 | 	"""
150 | 	pad = (fsize - fshift)
151 | 	if length % fshift == 0:
152 | 		M = (length + pad * 2 - fsize) // fshift + 1
153 | 	else:
154 | 		M = (length + pad * 2 - fsize) // fshift + 2
155 | 	return M
156 | 
157 | 
158 | def pad_lr(x, fsize, fshift):
159 | 	"""Compute left and right padding
160 | 	"""
161 | 	M = num_frames(len(x), fsize, fshift)
162 | 	pad = (fsize - fshift)
163 | 	T = len(x) + 2 * pad
164 | 	r = (M - 1) * fshift + fsize - T
165 | 	return pad, pad + r
166 | ##########################################################
167 | #Librosa correct padding
168 | def librosa_pad_lr(x, fsize, fshift):
169 | 	'''compute right padding (final frame)
170 | 	'''
171 | 	return int(fsize // 2)
172 | 
173 | 
174 | # Conversions
175 | _mel_basis = None
176 | _inv_mel_basis = None
177 | 
178 | def _linear_to_mel(spectogram, hparams):
179 | 	global _mel_basis
180 | 	if _mel_basis is None:
181 | 		_mel_basis = _build_mel_basis(hparams)
182 | 	return np.dot(_mel_basis, spectogram)
183 | 
184 | def _mel_to_linear(mel_spectrogram, hparams):
185 | 	global _inv_mel_basis
186 | 	if _inv_mel_basis is None:
187 | 		_inv_mel_basis = np.linalg.pinv(_build_mel_basis(hparams))
188 | 	return np.maximum(1e-10, np.dot(_inv_mel_basis, mel_spectrogram))
189 | 
190 | def _build_mel_basis(hparams):
191 | 	assert hparams.fmax <= hparams.sample_rate // 2
192 | 	return librosa.filters.mel(hparams.sample_rate, hparams.n_fft, n_mels=hparams.num_mels,
193 | 							   fmin=hparams.fmin, fmax=hparams.fmax)
194 | 
195 | def _amp_to_db(x, hparams):
196 | 	min_level = np.exp(hparams.min_level_db / 20 * np.log(10))
197 | 	return 20 * np.log10(np.maximum(min_level, x))
198 | 
199 | def _db_to_amp(x):
200 | 	return np.power(10.0, (x) * 0.05)
201 | 
202 | def _normalize(S, hparams):
203 | 	if hparams.allow_clipping_in_normalization:
204 | 		if hparams.symmetric_mels:
205 | 			return np.clip((2 * hparams.max_abs_value) * ((S - hparams.min_level_db) / (-hparams.min_level_db)) - hparams.max_abs_value,
206 | 			 -hparams.max_abs_value, hparams.max_abs_value)
207 | 		else:
208 | 			return np.clip(hparams.max_abs_value * ((S - hparams.min_level_db) / (-hparams.min_level_db)), 0, hparams.max_abs_value)
209 | 
210 | 	assert S.max() <= 0 and S.min() - hparams.min_level_db >= 0
211 | 	if hparams.symmetric_mels:
212 | 		return (2 * hparams.max_abs_value) * ((S - hparams.min_level_db) / (-hparams.min_level_db)) - hparams.max_abs_value
213 | 	else:
214 | 		return hparams.max_abs_value * ((S - hparams.min_level_db) / (-hparams.min_level_db))
215 | 
216 | def _denormalize(D, hparams):
217 | 	if hparams.allow_clipping_in_normalization:
218 | 		if hparams.symmetric_mels:
219 | 			return (((np.clip(D, -hparams.max_abs_value,
220 | 				hparams.max_abs_value) + hparams.max_abs_value) * -hparams.min_level_db / (2 * hparams.max_abs_value))
221 | 				+ hparams.min_level_db)
222 | 		else:
223 | 			return ((np.clip(D, 0, hparams.max_abs_value) * -hparams.min_level_db / hparams.max_abs_value) + hparams.min_level_db)
224 | 
225 | 	if hparams.symmetric_mels:
226 | 		return (((D + hparams.max_abs_value) * -hparams.min_level_db / (2 * hparams.max_abs_value)) + hparams.min_level_db)
227 | 	else:
228 | 		return ((D * -hparams.min_level_db / hparams.max_abs_value) + hparams.min_level_db)
229 | 


--------------------------------------------------------------------------------
/scripts/datasets/audio.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/scripts/datasets/audio.pyc


--------------------------------------------------------------------------------
/scripts/datasets/preprocessor.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from concurrent.futures import ProcessPoolExecutor
  3 | from functools import partial
  4 | 
  5 | import numpy as np
  6 | from datasets import audio
  7 | from wavenet_vocoder.util import is_mulaw, is_mulaw_quantize, mulaw, mulaw_quantize
  8 | 
  9 | 
 10 | def build_from_path(hparams, input_dirs, mel_dir, linear_dir, wav_dir, n_jobs=12, tqdm=lambda x: x):
 11 | 	"""
 12 | 	Preprocesses the speech dataset from a gven input path to given output directories
 13 | 
 14 | 	Args:
 15 | 		- hparams: hyper parameters
 16 | 		- input_dir: input directory that contains the files to prerocess
 17 | 		- mel_dir: output directory of the preprocessed speech mel-spectrogram dataset
 18 | 		- linear_dir: output directory of the preprocessed speech linear-spectrogram dataset
 19 | 		- wav_dir: output directory of the preprocessed speech audio dataset
 20 | 		- n_jobs: Optional, number of worker process to parallelize across
 21 | 		- tqdm: Optional, provides a nice progress bar
 22 | 
 23 | 	Returns:
 24 | 		- A list of tuple describing the train examples. this should be written to train.txt
 25 | 	"""
 26 | 
 27 | 	# We use ProcessPoolExecutor to parallelize across processes, this is just for
 28 | 	# optimization purposes and it can be omited
 29 | 	executor = ProcessPoolExecutor(max_workers=n_jobs)
 30 | 	futures = []
 31 | 	index = 1
 32 | 	for input_dir in input_dirs:
 33 | 		with open(os.path.join(input_dir, 'metadata.csv'), encoding='utf-8') as f:
 34 | 			for line in f:
 35 | 				parts = line.strip().split('|')
 36 | 				basename = parts[0]
 37 | 				wav_path = os.path.join(input_dir, 'wavs', '{}.wav'.format(basename))
 38 | 				text = parts[2]
 39 | 				futures.append(executor.submit(partial(_process_utterance, mel_dir, linear_dir, wav_dir, basename, wav_path, text, hparams)))
 40 | 				index += 1
 41 | 
 42 | 	return [future.result() for future in tqdm(futures) if future.result() is not None]
 43 | 
 44 | 
 45 | def _process_utterance(mel_dir, linear_dir, wav_dir, index, wav_path, text, hparams):
 46 | 	"""
 47 | 	Preprocesses a single utterance wav/text pair
 48 | 
 49 | 	this writes the mel scale spectogram to disk and return a tuple to write
 50 | 	to the train.txt file
 51 | 
 52 | 	Args:
 53 | 		- mel_dir: the directory to write the mel spectograms into
 54 | 		- linear_dir: the directory to write the linear spectrograms into
 55 | 		- wav_dir: the directory to write the preprocessed wav into
 56 | 		- index: the numeric index to use in the spectogram filename
 57 | 		- wav_path: path to the audio file containing the speech input
 58 | 		- text: text spoken in the input audio file
 59 | 		- hparams: hyper parameters
 60 | 
 61 | 	Returns:
 62 | 		- A tuple: (audio_filename, mel_filename, linear_filename, time_steps, mel_frames, linear_frames, text)
 63 | 	"""
 64 | 	try:
 65 | 		# Load the audio as numpy array
 66 | 		wav = audio.load_wav(wav_path, sr=hparams.sample_rate)
 67 | 	except FileNotFoundError: #catch missing wav exception
 68 | 		print('file {} present in csv metadata is not present in wav folder. skipping!'.format(
 69 | 			wav_path))
 70 | 		return None
 71 | 
 72 | 	#rescale wav
 73 | 	if hparams.rescale:
 74 | 		wav = wav / np.abs(wav).max() * hparams.rescaling_max
 75 | 
 76 | 	#M-AILABS extra silence specific
 77 | 	if hparams.trim_silence:
 78 | 		wav = audio.trim_silence(wav, hparams)
 79 | 
 80 | 	#Mu-law quantize
 81 | 	if is_mulaw_quantize(hparams.input_type):
 82 | 		#[0, quantize_channels)
 83 | 		out = mulaw_quantize(wav, hparams.quantize_channels)
 84 | 
 85 | 		#Trim silences
 86 | 		start, end = audio.start_and_end_indices(out, hparams.silence_threshold)
 87 | 		wav = wav[start: end]
 88 | 		out = out[start: end]
 89 | 
 90 | 		constant_values = mulaw_quantize(0, hparams.quantize_channels)
 91 | 		out_dtype = np.int16
 92 | 
 93 | 	elif is_mulaw(hparams.input_type):
 94 | 		#[-1, 1]
 95 | 		out = mulaw(wav, hparams.quantize_channels)
 96 | 		constant_values = mulaw(0., hparams.quantize_channels)
 97 | 		out_dtype = np.float32
 98 | 
 99 | 	else:
100 | 		#[-1, 1]
101 | 		out = wav
102 | 		constant_values = 0.
103 | 		out_dtype = np.float32
104 | 
105 | 	# Compute the mel scale spectrogram from the wav
106 | 	mel_spectrogram = audio.melspectrogram(wav, hparams).astype(np.float32)
107 | 	mel_frames = mel_spectrogram.shape[1]
108 | 
109 | 	if mel_frames > hparams.max_mel_frames and hparams.clip_mels_length:
110 | 		return None
111 | 
112 | 	#Compute the linear scale spectrogram from the wav
113 | 	linear_spectrogram = audio.linearspectrogram(wav, hparams).astype(np.float32)
114 | 	linear_frames = linear_spectrogram.shape[1]
115 | 
116 | 	#sanity check
117 | 	assert linear_frames == mel_frames
118 | 
119 | 	if hparams.use_lws:
120 | 		#Ensure time resolution adjustement between audio and mel-spectrogram
121 | 		fft_size = hparams.n_fft if hparams.win_size is None else hparams.win_size
122 | 		l, r = audio.pad_lr(wav, fft_size, audio.get_hop_size(hparams))
123 | 
124 | 		#Zero pad audio signal
125 | 		out = np.pad(out, (l, r), mode='constant', constant_values=constant_values)
126 | 	else:
127 | 		#Ensure time resolution adjustement between audio and mel-spectrogram
128 | 		pad = audio.librosa_pad_lr(wav, hparams.n_fft, audio.get_hop_size(hparams))
129 | 
130 | 		#Reflect pad audio signal (Just like it's done in Librosa to avoid frame inconsistency)
131 | 		out = np.pad(out, pad, mode='reflect')
132 | 
133 | 	assert len(out) >= mel_frames * audio.get_hop_size(hparams)
134 | 
135 | 	#time resolution adjustement
136 | 	#ensure length of raw audio is multiple of hop size so that we can use
137 | 	#transposed convolution to upsample
138 | 	out = out[:mel_frames * audio.get_hop_size(hparams)]
139 | 	assert len(out) % audio.get_hop_size(hparams) == 0
140 | 	time_steps = len(out)
141 | 
142 | 	# Write the spectrogram and audio to disk
143 | 	audio_filename = 'audio-{}.npy'.format(index)
144 | 	mel_filename = 'mel-{}.npy'.format(index)
145 | 	linear_filename = 'linear-{}.npy'.format(index)
146 | 	np.save(os.path.join(wav_dir, audio_filename), out.astype(out_dtype), allow_pickle=False)
147 | 	np.save(os.path.join(mel_dir, mel_filename), mel_spectrogram.T, allow_pickle=False)
148 | 	np.save(os.path.join(linear_dir, linear_filename), linear_spectrogram.T, allow_pickle=False)
149 | 
150 | 	# Return a tuple describing this training example
151 | 	return (audio_filename, mel_filename, linear_filename, time_steps, mel_frames, text)
152 | 


--------------------------------------------------------------------------------
/scripts/datasets/wavenet_preprocessor.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from concurrent.futures import ProcessPoolExecutor
  3 | from functools import partial
  4 | 
  5 | import numpy as np
  6 | from datasets import audio
  7 | from wavenet_vocoder.util import is_mulaw, is_mulaw_quantize, mulaw, mulaw_quantize
  8 | 
  9 | 
 10 | def build_from_path(hparams, input_dir, mel_dir, wav_dir, n_jobs=12, tqdm=lambda x: x):
 11 | 	"""
 12 | 	Preprocesses the speech dataset from a gven input path to given output directories
 13 | 
 14 | 	Args:
 15 | 		- hparams: hyper parameters
 16 | 		- input_dir: input directory that contains the files to prerocess
 17 | 		- mel_dir: output directory of the preprocessed speech mel-spectrogram dataset
 18 | 		- linear_dir: output directory of the preprocessed speech linear-spectrogram dataset
 19 | 		- wav_dir: output directory of the preprocessed speech audio dataset
 20 | 		- n_jobs: Optional, number of worker process to parallelize across
 21 | 		- tqdm: Optional, provides a nice progress bar
 22 | 
 23 | 	Returns:
 24 | 		- A list of tuple describing the train examples. this should be written to train.txt
 25 | 	"""
 26 | 
 27 | 	# We use ProcessPoolExecutor to parallelize across processes, this is just for
 28 | 	# optimization purposes and it can be omited
 29 | 	executor = ProcessPoolExecutor(max_workers=n_jobs)
 30 | 	futures = []
 31 | 	for file in os.listdir(input_dir):
 32 | 		wav_path = os.path.join(input_dir, file)
 33 | 		basename = os.path.basename(wav_path).replace('.wav', '')
 34 | 		futures.append(executor.submit(partial(_process_utterance, mel_dir, wav_dir, basename, wav_path, hparams)))
 35 | 
 36 | 	return [future.result() for future in tqdm(futures) if future.result() is not None]
 37 | 
 38 | 
 39 | def _process_utterance(mel_dir, wav_dir, index, wav_path, hparams):
 40 | 	"""
 41 | 	Preprocesses a single utterance wav/text pair
 42 | 
 43 | 	this writes the mel scale spectogram to disk and return a tuple to write
 44 | 	to the train.txt file
 45 | 
 46 | 	Args:
 47 | 		- mel_dir: the directory to write the mel spectograms into
 48 | 		- linear_dir: the directory to write the linear spectrograms into
 49 | 		- wav_dir: the directory to write the preprocessed wav into
 50 | 		- index: the numeric index to use in the spectrogram filename
 51 | 		- wav_path: path to the audio file containing the speech input
 52 | 		- text: text spoken in the input audio file
 53 | 		- hparams: hyper parameters
 54 | 
 55 | 	Returns:
 56 | 		- A tuple: (audio_filename, mel_filename, linear_filename, time_steps, mel_frames, linear_frames, text)
 57 | 	"""
 58 | 	try:
 59 | 		# Load the audio as numpy array
 60 | 		wav = audio.load_wav(wav_path, sr=hparams.sample_rate)
 61 | 	except FileNotFoundError: #catch missing wav exception
 62 | 		print('file {} present in csv metadata is not present in wav folder. skipping!'.format(
 63 | 			wav_path))
 64 | 		return None
 65 | 
 66 | 	#rescale wav
 67 | 	if hparams.rescale:
 68 | 		wav = wav / np.abs(wav).max() * hparams.rescaling_max
 69 | 
 70 | 	#M-AILABS extra silence specific
 71 | 	if hparams.trim_silence:
 72 | 		wav = audio.trim_silence(wav, hparams)
 73 | 
 74 | 	#Mu-law quantize
 75 | 	if is_mulaw_quantize(hparams.input_type):
 76 | 		#[0, quantize_channels)
 77 | 		out = mulaw_quantize(wav, hparams.quantize_channels)
 78 | 
 79 | 		#Trim silences
 80 | 		start, end = audio.start_and_end_indices(out, hparams.silence_threshold)
 81 | 		wav = wav[start: end]
 82 | 		out = out[start: end]
 83 | 
 84 | 		constant_values = mulaw_quantize(0, hparams.quantize_channels)
 85 | 		out_dtype = np.int16
 86 | 
 87 | 	elif is_mulaw(hparams.input_type):
 88 | 		#[-1, 1]
 89 | 		out = mulaw(wav, hparams.quantize_channels)
 90 | 		constant_values = mulaw(0., hparams.quantize_channels)
 91 | 		out_dtype = np.float32
 92 | 
 93 | 	else:
 94 | 		#[-1, 1]
 95 | 		out = wav
 96 | 		constant_values = 0.
 97 | 		out_dtype = np.float32
 98 | 
 99 | 	# Compute the mel scale spectrogram from the wav
100 | 	mel_spectrogram = audio.melspectrogram(wav, hparams).astype(np.float32)
101 | 	mel_frames = mel_spectrogram.shape[1]
102 | 
103 | 	if mel_frames > hparams.max_mel_frames and hparams.clip_mels_length:
104 | 		return None
105 | 
106 | 	if hparams.use_lws:
107 | 		#Ensure time resolution adjustement between audio and mel-spectrogram
108 | 		fft_size = hparams.n_fft if hparams.win_size is None else hparams.win_size
109 | 		l, r = audio.pad_lr(wav, fft_size, audio.get_hop_size(hparams))
110 | 
111 | 		#Zero pad audio signal
112 | 		out = np.pad(out, (l, r), mode='constant', constant_values=constant_values)
113 | 	else:
114 | 		#Ensure time resolution adjustement between audio and mel-spectrogram
115 | 		pad = audio.librosa_pad_lr(wav, hparams.n_fft, audio.get_hop_size(hparams))
116 | 
117 | 		#Reflect pad audio signal (Just like it's done in Librosa to avoid frame inconsistency)
118 | 		out = np.pad(out, pad, mode='reflect')
119 | 
120 | 	assert len(out) >= mel_frames * audio.get_hop_size(hparams)
121 | 
122 | 	#time resolution adjustement
123 | 	#ensure length of raw audio is multiple of hop size so that we can use
124 | 	#transposed convolution to upsample
125 | 	out = out[:mel_frames * audio.get_hop_size(hparams)]
126 | 	assert len(out) % audio.get_hop_size(hparams) == 0
127 | 	time_steps = len(out)
128 | 
129 | 	# Write the spectrogram and audio to disk
130 | 	audio_filename = os.path.join(wav_dir, 'audio-{}.npy'.format(index))
131 | 	mel_filename = os.path.join(mel_dir, 'mel-{}.npy'.format(index))
132 | 	np.save(audio_filename, out.astype(out_dtype), allow_pickle=False)
133 | 	np.save(mel_filename, mel_spectrogram.T, allow_pickle=False)
134 | 
135 | 	#global condition features
136 | 	if hparams.gin_channels > 0:
137 | 		raise RuntimeError('When activating global conditions, please set your speaker_id rules in line 129 of datasets/wavenet_preprocessor.py to use them during training')
138 | 		speaker_id = '<no_g>' #put the rule to determine how to assign speaker ids (using file names maybe? file basenames are available in "index" variable)
139 | 	else:
140 | 		speaker_id = '<no_g>'
141 | 
142 | 	# Return a tuple describing this training example
143 | 	return (audio_filename, mel_filename, '_', speaker_id, time_steps, mel_frames)
144 | 


--------------------------------------------------------------------------------
/scripts/get_train_val_scp.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright 2017    Ke Wang
 5 | 
 6 | """Get train and validation set."""
 7 | 
 8 | from __future__ import absolute_import
 9 | from __future__ import print_function
10 | 
11 | import argparse
12 | import os
13 | import pprint
14 | import random
15 | import sys
16 | 
17 | 
18 | def main():
19 |     inputs_scp = os.path.join(FLAGS.data_dir, "all_wav.txt")
20 |     tr_dir = os.path.join(FLAGS.data_dir, "tr")
21 |     cv_dir = os.path.join(FLAGS.data_dir, "cv")
22 |     tr_inputs_scp = os.path.join(tr_dir, "inputs.txt")
23 |     cv_inputs_scp = os.path.join(cv_dir, "inputs.txt")
24 | 
25 |     print("Split to %s and %s" % (tr_dir, cv_dir))
26 | 
27 |     if not os.path.exists(tr_dir):
28 |         os.makedirs(tr_dir)
29 |     if not os.path.exists(cv_dir):
30 |         os.makedirs(cv_dir)
31 | 
32 |     with open(inputs_scp, 'r') as fr_inputs, \
33 |             open(tr_inputs_scp, 'w') as fw_tr_inputs, \
34 |             open(cv_inputs_scp, 'w') as fw_cv_inputs:
35 |         lists_inputs = fr_inputs.readlines()
36 |         if len(lists_inputs) <= FLAGS.val_size:
37 |             print(("Validation size %s is bigger than inputs scp length %s."
38 |                    " Please reduce validation size.") % (
39 |                        FLAGS.val_size, len(lists_inputs)))
40 | 
41 |         lists = range(len(lists_inputs))
42 |         random.shuffle(lists)
43 |         # print(lists)
44 |         for i in lists:
45 |             line_input = lists_inputs[i]
46 |             print(line_input)
47 |             if i < FLAGS.val_size:
48 |                 fw_cv_inputs.write(line_input)
49 |             else:
50 |                 fw_tr_inputs.write(line_input)
51 | 
52 | 
53 | if __name__ == '__main__':
54 |     parser = argparse.ArgumentParser()
55 |     parser.add_argument(
56 |         '--data_dir',
57 |         type=str,
58 |         required=True,
59 |         help="Directory name of data to spliting."
60 |              "(Note: inputs.scp and labels.scp)"
61 |     )
62 |     parser.add_argument(
63 |         '--val_size',
64 |         type=int,
65 |         default=361,
66 |         help="Validation set size."
67 |     )
68 | 
69 |     FLAGS, unparsed = parser.parse_known_args()
70 |     # pp = pprint.PrettyPrinter()
71 |     # pp.pprint(FLAGS.__dict__)
72 |     main()
73 | 


--------------------------------------------------------------------------------
/scripts/io_test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2017    Ke Wang
 4 | 
 5 | set -euo pipefail
 6 | 
 7 | stage=2
 8 | 
 9 | # train_dir=data/train/io_test
10 | train_dir=data/train/train_100h
11 | 
12 | if [ $stage -le 0 ]; then
13 |   python io_funcs/convert_cmvn_to_numpy.py \
14 |     --inputs=$train_dir/inputs.cmvn \
15 |     --labels=$train_dir/labels.cmvn \
16 |     --save_dir=$train_dir
17 | fi
18 | 
19 | if [ $stage -le 1 ]; then
20 |   nj=1
21 |   logdir=exp/
22 |   if [ ! -d $logdir ]; then
23 |     mkdir -p $logdir
24 |   fi
25 | 
26 |   rm -rf $logdir/.error || exit 1;
27 |   bash scripts/split_scp.sh --nj $nj $train_dir
28 |   for i in $(seq $nj); do
29 |   (
30 |     python io_funcs/make_tfrecords.py \
31 |       --inputs=$train_dir/split${nj}/inputs${i}.scp \
32 |       --labels=$train_dir/split${nj}/labels${i}.scp \
33 |       --cmvn_dir=$train_dir \
34 |       --apply_cmvn=True \
35 |       --output_dir=$train_dir/tfrecords \
36 |       --name="train${i}"
37 |   ) || touch $logdir/.error &
38 |   done
39 |   wait
40 |   [ -f $logdir/.error ] && \
41 |     echo "$0: there was a problem while making TFRecords" && exit 1
42 |   echo "Making TFRecords done."
43 | fi
44 | 
45 | if [ $stage -le 2 ]; then
46 |   CUDA_VISIBLE_DEVICES="3" python io_funcs/tfrecords_io_test.py \
47 |     --batch_size=128 \
48 |     --input_dim=257 \
49 |     --output_dim=40 \
50 |     --num_threads=32 \
51 |     --num_epochs=1 \
52 |     --data_dir=$train_dir/tfrecords
53 | fi
54 | 
55 | 


--------------------------------------------------------------------------------
/scripts/parse_options.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
 4 | #                 Arnab Ghoshal, Karel Vesely
 5 | 
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #  http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
14 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
15 | # MERCHANTABLITY OR NON-INFRINGEMENT.
16 | # See the Apache 2 License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | 
20 | # Parse command-line options.
21 | # To be sourced by another script (as in ". parse_options.sh").
22 | # Option format is: --option-name arg
23 | # and shell variable "option_name" gets set to value "arg."
24 | # The exception is --help, which takes no arguments, but prints the
25 | # $help_message variable (if defined).
26 | 
27 | 
28 | ###
29 | ### The --config file options have lower priority to command line
30 | ### options, so we need to import them first...
31 | ###
32 | 
33 | # Now import all the configs specified by command-line, in left-to-right order
34 | for ((argpos=1; argpos<$#; argpos++)); do
35 |   if [ "${!argpos}" == "--config" ]; then
36 |     argpos_plus1=$((argpos+1))
37 |     config=${!argpos_plus1}
38 |     [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
39 |     . $config  # source the config file.
40 |   fi
41 | done
42 | 
43 | 
44 | ###
45 | ### No we process the command line options
46 | ###
47 | while true; do
48 |   [ -z "${1:-}" ] && break;  # break if there are no arguments
49 |   case "$1" in
50 |     # If the enclosing script is called with --help option, print the help
51 |     # message and exit.  Scripts should put help messages in $help_message
52 |   --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
53 | 	  else printf "$help_message\n" 1>&2 ; fi;
54 | 	  exit 0 ;;
55 |   --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
56 |        exit 1 ;;
57 |     # If the first command-line argument begins with "--" (e.g. --foo-bar),
58 |     # then work out the variable name as $name, which will equal "foo_bar".
59 |   --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
60 |     # Next we test whether the variable in question is undefned-- if so it's
61 |     # an invalid option and we die.  Note: $0 evaluates to the name of the
62 |     # enclosing script.
63 |     # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
64 |     # is undefined.  We then have to wrap this test inside "eval" because
65 |     # foo_bar is itself inside a variable ($name).
66 |       eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
67 | 
68 |       oldval="`eval echo \\$$name`";
69 |     # Work out whether we seem to be expecting a Boolean argument.
70 |       if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
71 | 	was_bool=true;
72 |       else
73 | 	was_bool=false;
74 |       fi
75 | 
76 |     # Set the variable to the right value-- the escaped quotes make it work if
77 |     # the option had spaces, like --cmd "queue.pl -sync y"
78 |       eval $name=\"$2\";
79 | 
80 |     # Check that Boolean-valued arguments are really Boolean.
81 |       if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
82 |         echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
83 |         exit 1;
84 |       fi
85 |       shift 2;
86 |       ;;
87 |   *) break;
88 |   esac
89 | done
90 | 
91 | 
92 | # Check for an empty argument to the --cmd option, which can easily occur as a
93 | # result of scripting errors.
94 | [ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
95 | 
96 | 
97 | true; # so this script returns exit code 0.
98 | 


--------------------------------------------------------------------------------
/scripts/prepare_data.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/scripts/prepare_data.pyc


--------------------------------------------------------------------------------
/scripts/spectrogram_to_wave.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Summary:  Recover spectrogram to wave. 
  3 | Author:   Qiuqiang Kong
  4 | Created:  2017.09
  5 | Modified: -
  6 | """
  7 | import numpy as np
  8 | import numpy
  9 | import decimal
 10 |     
 11 | def recover_wav(pd_abs_x, gt_x, n_overlap, winfunc, wav_len=None):
 12 |     """Recover wave from spectrogram. 
 13 |     If you are using scipy.signal.spectrogram, you may need to multipy a scaler
 14 |     to the recovered audio after using this function. For example, 
 15 |     recover_scaler = np.sqrt((ham_win**2).sum())
 16 |     
 17 |     Args:
 18 |       pd_abs_x: 2d array, (n_time, n_freq)
 19 |       gt_x: 2d complex array, (n_time, n_freq)
 20 |       n_overlap: integar. 
 21 |       winfunc: func, the analysis window to apply to each frame.
 22 |       wav_len: integer. Pad or trunc to wav_len with zero. 
 23 |       
 24 |     Returns:
 25 |       1d array. 
 26 |     """
 27 |     x = real_to_complex(pd_abs_x, gt_x)
 28 |     x = half_to_whole(x)
 29 |     frames = ifft_to_wav(x)
 30 |     (n_frames, n_window) = frames.shape
 31 |     s = deframesig(frames=frames, siglen=0, frame_len=n_window, 
 32 |                    frame_step=n_window-n_overlap, winfunc=winfunc)
 33 |     if wav_len:
 34 |         s = pad_or_trunc(s, wav_len)
 35 |     return s
 36 |     
 37 | def real_to_complex(pd_abs_x, gt_x):
 38 |     """Recover pred spectrogram's phase from ground truth's phase. 
 39 |     
 40 |     Args:
 41 |       pd_abs_x: 2d array, (n_time, n_freq)
 42 |       gt_x: 2d complex array, (n_time, n_freq)
 43 |       
 44 |     Returns:
 45 |       2d complex array, (n_time, n_freq)
 46 |     """
 47 |     theta = np.angle(gt_x)
 48 |     cmplx = pd_abs_x * np.exp(1j * theta)
 49 |     return cmplx
 50 |     
 51 | def half_to_whole(x):
 52 |     """Recover whole spectrogram from half spectrogram. 
 53 |     """
 54 |     return np.concatenate((x, np.fliplr(np.conj(x[:, 1:-1]))), axis=1)
 55 | 
 56 | def ifft_to_wav(x):
 57 |     """Recover wav from whole spectrogram"""
 58 |     return np.real(np.fft.ifft(x))
 59 | 
 60 | def pad_or_trunc(s, wav_len):
 61 |     if len(s) >= wav_len:
 62 |         s = s[0 : wav_len]
 63 |     else:
 64 |         s = np.concatenate((s, np.zeros(wav_len - len(s))))
 65 |     return s
 66 | 
 67 | def recover_gt_wav(x, n_overlap, winfunc, wav_len=None):
 68 |     """Recover ground truth wav. 
 69 |     """
 70 |     x = half_to_whole(x)
 71 |     frames = ifft_to_wav(x)
 72 |     (n_frames, n_window) = frames.shape
 73 |     s = deframesig(frames=frames, siglen=0, frame_len=n_window, 
 74 |                    frame_step=n_window-n_overlap, winfunc=winfunc)
 75 |     if wav_len:
 76 |         s = pad_or_trunc(s, wav_len)
 77 |     return s
 78 | 
 79 | def deframesig(frames,siglen,frame_len,frame_step,winfunc=lambda x:numpy.ones((x,))):    
 80 |     """Does overlap-add procedure to undo the action of framesig.
 81 |     Ref: From https://github.com/jameslyons/python_speech_features
 82 |     
 83 |     :param frames: the array of frames.
 84 |     :param siglen: the length of the desired signal, use 0 if unknown. Output will be truncated to siglen samples.
 85 |     :param frame_len: length of each frame measured in samples.
 86 |     :param frame_step: number of samples after the start of the previous frame that the next frame should begin.
 87 |     :param winfunc: the analysis window to apply to each frame. By default no window is applied.
 88 |     :returns: a 1-D signal.
 89 |     """
 90 |     frame_len = round_half_up(frame_len)
 91 |     frame_step = round_half_up(frame_step)
 92 |     numframes = numpy.shape(frames)[0]
 93 |     assert numpy.shape(frames)[1] == frame_len, '"frames" matrix is wrong size, 2nd dim is not equal to frame_len'
 94 |  
 95 |     indices = numpy.tile(numpy.arange(0,frame_len),(numframes,1)) + numpy.tile(numpy.arange(0,numframes*frame_step,frame_step),(frame_len,1)).T
 96 |     indices = numpy.array(indices,dtype=numpy.int32)
 97 |     padlen = (numframes-1)*frame_step + frame_len   
 98 |     
 99 |     if siglen <= 0: siglen = padlen
100 |     
101 |     rec_signal = numpy.zeros((padlen,))
102 |     window_correction = numpy.zeros((padlen,))
103 |     win = winfunc(frame_len)
104 |     
105 |     for i in range(0,numframes):
106 |         window_correction[indices[i,:]] = window_correction[indices[i,:]] + win + 1e-15 #add a little bit so it is never zero
107 |         rec_signal[indices[i,:]] = rec_signal[indices[i,:]] + frames[i,:]
108 |         
109 |     rec_signal = rec_signal/window_correction
110 |     return rec_signal[0:siglen]
111 |     
112 | def round_half_up(number):
113 |     return int(decimal.Decimal(number).quantize(decimal.Decimal('1'), rounding=decimal.ROUND_HALF_UP))


--------------------------------------------------------------------------------
/scripts/spectrogram_to_wave.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/scripts/spectrogram_to_wave.pyc


--------------------------------------------------------------------------------
/train.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2019.7    Nan LEE
  4 | 
  5 | set -euo pipefail
  6 | 
  7 | stage=0
  8 | 
  9 | nj=1
 10 | val_size=500
 11 | train_dir=data
 12 | test_dir=data/test
 13 | logdir=exp
 14 | tr_list=$train_dir/tr.list
 15 | cv_list=$train_dir/cv.list
 16 | test_list=$test_dir/test.list
 17 | save_dir=exp/dnn
 18 | 
 19 | # Data prepare
 20 | if [ $stage -le 0 ]; then
 21 |   # Make TFRecords file
 22 |   echo "Begin making TFRecords files ..."
 23 |   if [ ! -d $logdir ]; then
 24 |     mkdir -p $logdir || exit 1;
 25 |   fi
 26 | 
 27 |   # cv set
 28 |   declare -i verbose=30
 29 |   [ -d $train_dir/tfrecords ] && (rm -rf $train_dir/tfrecords || exit 1;)
 30 |   mkdir -p $train_dir/tfrecords || exit 1;
 31 |   
 32 |   TF_CPP_MIN_LOG_LEVEL=1 python io_funcs/make_setf.py --inputs=$train_dir/cv/inputs_feat.txt --name="cv"
 33 |   echo "$train_dir/tfrecords/cv.tfrecords" > $cv_list
 34 |  wait
 35 |  date
 36 | 
 37 |  TF_CPP_MIN_LOG_LEVEL=1 python io_funcs/make_setf.py --inputs=$train_dir/tr/inputs_feat.txt --name="tr"
 38 |  echo "$train_dir/tfrecords/tr.tfrecords" > $tr_list
 39 |   wait
 40 |   date
 41 | 
 42 |   [ -f $train_dir/batch_num.txt ] && rm $train_dir/batch_num.txt
 43 |   echo "Make train TFRecords files sucessed."
 44 |   echo ""
 45 | fi
 46 | #exit 0;
 47 | # Train model
 48 | if [ $stage -le 2 ]; then
 49 |   echo "$(date): $(hostname)"
 50 |  CUDA_VISIBLE_DEVICES="1,2,3" TF_CPP_MIN_LOG_LEVEL=2 \
 51 |     python scripts/train_dnn.py \
 52 |       --data_dir=$train_dir \
 53 |       --tr_list_file=$tr_list \
 54 |       --cv_list_file=$cv_list \
 55 |       --g_type="res_rced" \
 56 |       --save_dir=$save_dir \
 57 |       --batch_size=64 \
 58 |       --g_learning_rate=0.001 \
 59 |       --keep_lr=2 \
 60 |       --batch_norm=true \
 61 |       --keep_prob=1 \
 62 |       --l2_scale=0 \
 63 |       --input_dim=257 \
 64 |       --output_dim=257 \
 65 |       --left_context=5 \
 66 |       --right_context=5 \
 67 |       --min_epoches=30 \
 68 |       --max_epoches=35 \
 69 |       --decay_factor=0.8 \
 70 |       --start_halving_impr=0.01 \
 71 |       --end_halving_impr=0.001 \
 72 |       --num_threads=1 \
 73 |       --num_gpu=1 || exit 1;
 74 | 
 75 |   echo "Finished training successfully on $(date)"
 76 |   echo ""
 77 | fi
 78 | # exit 0;
 79 | 
 80 | # Decode
 81 | 
 82 | if [ $stage -le 4 ]; then
 83 |   echo "Prepare test data"
 84 |   if [ -f $logdir/.test.error ]; then
 85 |     rm -rf $logdir/.test.error || exit 1;
 86 |   fi
 87 |   declare -i verbose=30
 88 |   # [ -d $test_dir/tfrecords ] && (rm -rf $test_dir/tfrecords || exit 1;)
 89 |   # mkdir -p $test_dir/tfrecords || exit 1;
 90 |  for datase in data/test/*;do
 91 |  # for datase in data/simusi;do
 92 |   rm -rf $datase/tfrecords 
 93 |   TF_CPP_MIN_LOG_LEVEL=1 python io_funcs/make_sete.py \
 94 |     --inputs=$datase/inputs.txt \
 95 |     --output_dir=$datase/tfrecords \
 96 |     --name="test" || touch $logdir/.test.error &
 97 |   echo "$datase/tfrecords/test.tfrecords" > $datase/test.list
 98 |   # exit 0;
 99 |   wait
100 |  done
101 | fi
102 | # Decode
103 | if [ $stage -le 5 ]; then
104 | 
105 |   echo "Start decoding test data"
106 | for datase in data/test/*;do
107 | # for datase in data/simusi;do
108 |     CUDA_VISIBLE_DEVICES="1" TF_CPP_MIN_LOG_LEVEL=2 python scripts/train_dnn.py \
109 |       --decode \
110 |       --data_dir=$train_dir \
111 |       --test_list_file=$datase/test.list \
112 |       --g_type="res_rced" \
113 |       --save_dir=$save_dir \
114 | 	  --g_learning_rate=0.001 \
115 |       --batch_norm=true \
116 |       --input_dim=257 \
117 |       --output_dim=257 \
118 |       --left_context=5 \
119 |       --right_context=5 \
120 |       --batch_size=1 \
121 |       --keep_prob=1 \
122 |       --l2_scale=0 \
123 |       --num_threads=1 \
124 | 	  --savetestdir=$datase || exit 1;
125 |   echo "Decoding done"
126 | wait
127 | done
128 | fi
129 | 
130 | exit 0
131 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | 
4 | # Copyright 2017    Ke Wang
5 | 
6 | 
7 | 


--------------------------------------------------------------------------------
/utils/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/utils/__init__.pyc


--------------------------------------------------------------------------------
/utils/add_additive_noise.py:
--------------------------------------------------------------------------------
  1 | """
  2 | LI, Nan
  3 | 2019.08
  4 | """
  5 | import os
  6 | import soundfile
  7 | import numpy as np
  8 | import argparse
  9 | import csv
 10 | import time
 11 | #import matplotlib.pyplot as plt
 12 | from scipy import signal
 13 | #import pickle
 14 | #import cPickle
 15 | import h5py
 16 | from sklearn import preprocessing
 17 | import librosa
 18 | import prepare_data as pp_data
 19 | import config as cfg
 20 | import math
 21 | from utils.tools import *
 22 | import random
 23 | def create_folder(fd):
 24 |     if not os.path.exists(fd):
 25 |         os.makedirs(fd)
 26 |         
 27 | def read_audio(path, target_fs=None):
 28 |     (audio, fs) = soundfile.read(path)
 29 |     if audio.ndim > 1:
 30 |         audio = np.mean(audio, axis=1)
 31 |     if target_fs is not None and fs != target_fs:
 32 |         audio = librosa.resample(audio, orig_sr=fs, target_sr=target_fs)
 33 |         fs = target_fs
 34 |     print(fs)
 35 |     return audio, fs
 36 |     
 37 | def write_audio(path, audio, sample_rate):
 38 |     soundfile.write(file=path, data=audio, samplerate=sample_rate)
 39 | 
 40 | ###
 41 | def create_mixture_csv(args):
 42 |     """Create csv containing mixture information. 
 43 |     Each line in the .csv file contains [speech_name, noise_name, noise_onset, noise_offset]
 44 |     
 45 |     Args:
 46 |       workspace: str, path of workspace. 
 47 |       speech_dir: str, path of speech data. 
 48 |       noise_dir: str, path of noise data. 
 49 |       data_type: str, 'train' | 'test'. 
 50 |       magnification: int, only used when data_type='train', number of noise 
 51 |           selected to mix with a speech. E.g., when magnication=3, then 4620
 52 |           speech with create 4620*3 mixtures. magnification should not larger 
 53 |           than the species of noises. 
 54 |     """
 55 |     workspace = args.workspace
 56 |     speech_dir = args.speech_dir
 57 |     noise_dir = args.noise_dir
 58 |     data_type = args.data_type
 59 |     magnification = args.magnification
 60 |     fs = cfg.sample_rate
 61 |     
 62 |     speech_names = [na for na in os.listdir(speech_dir) if na.lower().endswith(".wav")]
 63 |     noise_names = [na for na in os.listdir(noise_dir) if na.lower().endswith(".wav")]
 64 |     
 65 |     rs = np.random.RandomState(0)
 66 |     out_csv_path = os.path.join(workspace, "mixture_csvs", "%s.csv" % data_type)
 67 |     pp_data.create_folder(os.path.dirname(out_csv_path))
 68 |     
 69 |     cnt = 0
 70 |     f = open(out_csv_path, 'w')
 71 |     f.write("%s\t%s\t%s\t%s\n" % ("speech_name", "noise_name", "noise_onset", "noise_offset"))
 72 |     for speech_na in speech_names:
 73 |         # Read speech. 
 74 |         speech_path = os.path.join(speech_dir, speech_na)
 75 |         (speech_audio, _) = read_audio(speech_path)
 76 |         len_speech = len(speech_audio)
 77 |         
 78 |         # For training data, mix each speech with randomly picked #magnification noises. 
 79 |         if data_type == 'train':
 80 |             selected_noise_names = rs.choice(noise_names, size=magnification, replace=False)
 81 |         # For test data, mix each speech with all noises. 
 82 |         elif data_type == 'test':
 83 |             selected_noise_names = noise_names
 84 |         else:
 85 |             raise Exception("data_type must be train | test!")
 86 | 
 87 |         # Mix one speech with different noises many times. 
 88 |         for noise_na in selected_noise_names:
 89 |             noise_path = os.path.join(noise_dir, noise_na)
 90 |             (noise_audio, _) = read_audio(noise_path)
 91 |             
 92 |             len_noise = len(noise_audio)
 93 | 
 94 |             if len_noise <= len_speech:
 95 |                 noise_onset = 0
 96 |                 nosie_offset = len_speech
 97 |             # If noise longer than speech then randomly select a segment of noise. 
 98 |             else:
 99 |                 noise_onset = rs.randint(0, len_noise - len_speech, size=1)[0]
100 |                 nosie_offset = noise_onset + len_speech
101 |             
102 |             if cnt % 100 == 0:
103 |                 print cnt
104 |                 
105 |             cnt += 1
106 |             f.write("%s\t%s\t%d\t%d\n" % (speech_na, noise_na, noise_onset, nosie_offset))
107 |     f.close()
108 |     print(out_csv_path)
109 |     print("Create %s mixture csv finished!" % data_type)
110 |     
111 | ###
112 | def calculate_mixture_features(args):
113 |     mixture_csv_path = os.path.join("mini_data_bak","train_speech/cleandata.txt")
114 |     out_dir = "/Work18/2017/linan/ASR/data/aur/train"
115 |     with open(mixture_csv_path, 'rb') as f:
116 |         lis = list(f)
117 |         for x in lis:
118 |             x.replace("\n", "")
119 |             print(x)
120 |     print("finish read")
121 |     noise_dir = "mini_data/Noise"
122 |     all_noise_na = ["Babble2.wav", "F162.wav", "Factory2.wav", "Pink2.wav", "Volvo2.wav", "White2.wav"]
123 |     all_snr = [-10, -5, 0, 5, 10, 15, 20]
124 |     t1 = time.time()
125 |     cnt = 0
126 |     fs = 8000
127 |     for i1 in xrange(0, len(lis)):
128 |         speech_path = lis[i1].replace("\n", "")
129 |         
130 |         # Read speech audio. 
131 |         (speech_audio, _) = read_audio(speech_path, target_fs=fs)
132 |         name = speech_path.split("/")[-1]
133 |         # Read noise audio. 
134 |         rrr = random.randint(0,5)
135 |         noise_na = all_noise_na[rrr]
136 |         noise_path = os.path.join(noise_dir, noise_na)
137 |         (noise_audio, _) = read_audio(noise_path, target_fs=fs)
138 | 
139 |         noise_len = np.shape(noise_audio)[0]
140 |         speech_len = np.shape(speech_audio)[0]
141 | 
142 |         rdm = random.randint(0,noise_len-speech_len)
143 |         noise_audio = noise_audio[rdm:(rdm+speech_len)]
144 |         rrr2 = random.randint(0,6)
145 |         snr = all_snr[rrr2]
146 |         print("all_snr:",all_snr[rrr2])
147 |         # Scale speech to given snr. 
148 |         scaler = get_amplitude_scaling_factor(speech_audio, noise_audio, snr=snr)
149 |         #speech_audio /= scaler
150 |         
151 |         noise_audio*=scaler
152 |         # Get normalized mixture, speech, noise. 
153 |         print("speech audio shape:",np.shape(speech_audio))
154 |         print("noise audio shape:",np.shape(noise_audio))
155 |         (mixed_audio, speech_audio, noise_audio, alpha) = additive_mixing(speech_audio, noise_audio)
156 |         print(np.shape(speech_audio))
157 |         print(np.shape(mixed_audio))
158 |         tmp1 = np.sum(speech_audio**2)
159 |         tmp2 = np.sum((mixed_audio-speech_audio)**2)
160 | 
161 |         noise2 = "noise" + str(snr)
162 |         if snr < 0:
163 |             snr2 = -snr
164 |             noise2 = "noise_" + str(snr2)
165 |         out_noise_path = os.path.join(out_dir,"noise",name)
166 |         #audiowrite('test_speech.wav', speech_audio, samp_rate=16000)
167 |         audiowrite(out_noise_path, noise_audio, samp_rate=fs)
168 |         snr_bi = tmp1/tmp2
169 |         labels = 10*np.log10(snr_bi)
170 |         print("cacu:snr",labels)
171 |         snr2 = "snr" + str(snr)
172 |         if snr < 0:
173 |             snr2 = -snr
174 |             snr2 = "snr_" + str(snr2)
175 |         out_put_path = os.path.join(out_dir,snr2,name)
176 |         print(out_put_path)
177 |         audiowrite(out_put_path, mixed_audio, samp_rate=fs)
178 | def rms(y):
179 |     """Root mean square. 
180 |     """
181 |     return np.sum(y**2)
182 |     #return np.sqrt(sum(np.abs(y) ** 2, axis=0, keepdims=False))
183 | 
184 | def get_amplitude_scaling_factor(s, n, snr, method='rms'):
185 |     """Given s and n, return the scaler s according to the snr. 
186 |     
187 |     Args:
188 |       s: ndarray, source1. 
189 |       n: ndarray, source2. 
190 |       snr: float, SNR. 
191 |       method: 'rms'. 
192 |       
193 |     Outputs:
194 |       float, scaler. 
195 |     """
196 |     original_sn_rms_ratio = rms(s) / rms(n)
197 |     target_sn_rms_ratio =  10. ** (float(snr) / 10.)    # snr = 10 * lg(rms(s) / rms(n))
198 |     signal_scaling_factor = np.sqrt(original_sn_rms_ratio/target_sn_rms_ratio)
199 |     return signal_scaling_factor
200 | 
201 | def additive_mixing(s, n):
202 |     """Mix normalized source1 and source2. 
203 |     
204 |     Args:
205 |       s: ndarray, source1. 
206 |       n: ndarray, source2. 
207 |       
208 |     Returns:
209 |       mix_audio: ndarray, mixed audio. 
210 |       s: ndarray, pad or truncated and scalered source1. 
211 |       n: ndarray, scaled source2. 
212 |       alpha: float, normalize coefficient. 
213 |     """
214 |     mixed_audio = s + n
215 |         
216 |     alpha = 1. / np.max(np.abs(mixed_audio))
217 |     mixed_audio *= alpha
218 |     s *= alpha
219 |     n *= alpha
220 |     return mixed_audio, s, n, alpha
221 |     
222 | def calc_sp(audio, mode):
223 |     """Calculate spectrogram. 
224 |     
225 |     Args:
226 |       audio: 1darray. 
227 |       mode: string, 'magnitude' | 'complex'
228 |     
229 |     Returns:
230 |       spectrogram: 2darray, (n_time, n_freq). 
231 |     """
232 |     n_window = cfg.n_window
233 |     n_overlap = cfg.n_overlap
234 |     ham_win = np.hamming(n_window)
235 |     [f, t, x] = signal.spectral.spectrogram(
236 |                     audio, 
237 |                     window=ham_win,
238 |                     nperseg=n_window, 
239 |                     noverlap=n_overlap, 
240 |                     detrend=False, 
241 |                     return_onesided=True, 
242 |                     mode=mode) 
243 |     x = x.T
244 |     if mode == 'magnitude':
245 |         x = x.astype(np.float32)
246 |     elif mode == 'complex':
247 |         x = x.astype(np.complex64)
248 |     else:
249 |         raise Exception("Incorrect mode!")
250 |     return x
251 |     
252 | ###
253 |     
254 | def log_sp(x):
255 |     return np.log(x + 1e-08)
256 |     
257 | ###
258 | def load_hdf5(hdf5_path):
259 |     """Load hdf5 data. 
260 |     """
261 |     with h5py.File(hdf5_path, 'r') as hf:
262 |         x = hf.get('x')
263 |         y = hf.get('y')
264 |         x = np.array(x)     # (n_segs, n_concat, n_freq)
265 |         y = np.array(y)     # (n_segs, n_freq)        
266 |     return x, y
267 | 
268 | def np_mean_absolute_error(y_true, y_pred):
269 |     return np.mean(np.abs(y_pred - y_true))
270 |     
271 | ###
272 | if __name__ == '__main__':
273 |     parser = argparse.ArgumentParser()
274 |     subparsers = parser.add_subparsers(dest='mode')
275 | 
276 |     parser_create_mixture_csv = subparsers.add_parser('create_mixture_csv')
277 |     parser_create_mixture_csv.add_argument('--workspace', type=str, required=True)
278 |     parser_create_mixture_csv.add_argument('--speech_dir', type=str, required=True)
279 |     parser_create_mixture_csv.add_argument('--noise_dir', type=str, required=True)
280 |     parser_create_mixture_csv.add_argument('--data_type', type=str, required=True)
281 |     parser_create_mixture_csv.add_argument('--magnification', type=int, default=1)
282 | 
283 |     parser_calculate_mixture_features = subparsers.add_parser('calculate_mixture_features')
284 |     parser_calculate_mixture_features.add_argument('--workspace', type=str, required=True)
285 |     parser_calculate_mixture_features.add_argument('--speech_dir', type=str, required=True)
286 |     parser_calculate_mixture_features.add_argument('--noise_dir', type=str, required=True)
287 |     parser_calculate_mixture_features.add_argument('--data_type', type=str, required=True)
288 |     parser_calculate_mixture_features.add_argument('--snr', type=float, required=True)
289 |     
290 |     parser_pack_features = subparsers.add_parser('pack_features')
291 |     parser_pack_features.add_argument('--workspace', type=str, required=True)
292 |     parser_pack_features.add_argument('--data_type', type=str, required=True)
293 |     parser_pack_features.add_argument('--snr', type=float, required=True)
294 |     parser_pack_features.add_argument('--n_concat', type=int, required=True)
295 |     parser_pack_features.add_argument('--n_hop', type=int, required=True)
296 |     
297 |     parser_compute_scaler = subparsers.add_parser('compute_scaler')
298 |     parser_compute_scaler.add_argument('--workspace', type=str, required=True)
299 |     parser_compute_scaler.add_argument('--data_type', type=str, required=True)
300 |     parser_compute_scaler.add_argument('--snr', type=float, required=True)
301 |     
302 |     args = parser.parse_args()
303 |     if args.mode == 'create_mixture_csv':
304 |         create_mixture_csv(args)
305 |     elif args.mode == 'calculate_mixture_features':
306 |         calculate_mixture_features(args)
307 |     elif args.mode == 'pack_features':
308 |         pack_features(args)       
309 |     elif args.mode == 'compute_scaler':
310 |         compute_scaler(args)
311 |     else:
312 |         raise Exception("Error!")
313 | 


--------------------------------------------------------------------------------
/utils/bnorm.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from __future__ import absolute_import
 5 | from __future__ import division
 6 | from __future__ import print_function
 7 | 
 8 | import tensorflow as tf
 9 | 
10 | 
11 | class VBN(object):
12 |   """
13 |   Virtual Batch Normalization
14 |   (modified from https://github.com/openai/improved-gan/ definition)
15 |   """
16 | 
17 |   def __init__(self, x, name, epsilon=1e-5):
18 |     """
19 |     x is the reference batch
20 |     """
21 |     assert isinstance(epsilon, float)
22 | 
23 |     shape = x.get_shape().as_list()
24 |     assert len(shape) == 3, shape
25 |     with tf.variable_scope(name) as scope:
26 |       assert name.startswith("d_") or name.startswith("g_")
27 |       self.epsilon = epsilon
28 |       self.name = name
29 |       self.mean = tf.reduce_mean(x, [0, 1], keep_dims=True)
30 |       self.mean_sq = tf.reduce_mean(tf.square(x), [0, 1], keep_dims=True)
31 |       self.batch_size = int(x.get_shape()[0])
32 |       assert x is not None
33 |       assert self.mean is not None
34 |       assert self.mean_sq is not None
35 |       out = self._normalize(x, self.mean, self.mean_sq, "reference")
36 |       self.reference_output = out
37 | 
38 |   def __call__(self, x):
39 | 
40 |     shape = x.get_shape().as_list()
41 |     with tf.variable_scope(self.name) as scope:
42 |       new_coeff = 1. / (self.batch_size + 1.)
43 |       old_coeff = 1. - new_coeff
44 |       new_mean = tf.reduce_mean(x, [0, 1], keep_dims=True)
45 |       new_mean_sq = tf.reduce_mean(tf.square(x), [0, 1], keep_dims=True)
46 |       mean = new_coeff * new_mean + old_coeff * self.mean
47 |       mean_sq = new_coeff * new_mean_sq + old_coeff * self.mean_sq
48 |       out = self._normalize(x, mean, mean_sq, "live")
49 |       return out
50 | 
51 |   def _normalize(self, x, mean, mean_sq, message):
52 |     # make sure this is called with a variable scope
53 |     shape = x.get_shape().as_list()
54 |     assert len(shape) == 3
55 |     self.gamma = tf.get_variable("gamma", [shape[-1]],
56 |                             initializer=tf.random_normal_initializer(1., 0.02))
57 |     gamma = tf.reshape(self.gamma, [1, 1, -1])
58 |     self.beta = tf.get_variable("beta", [shape[-1]],
59 |                             initializer=tf.constant_initializer(0.))
60 |     beta = tf.reshape(self.beta, [1, 1, -1])
61 |     assert self.epsilon is not None
62 |     assert mean_sq is not None
63 |     assert mean is not None
64 |     std = tf.sqrt(self.epsilon + mean_sq - tf.square(mean))
65 |     out = x - mean
66 |     out = out / std
67 |     out = out * gamma
68 |     out = out + beta
69 |     return out
70 | 
71 | def vbn(self, tensor, name):
72 |   if self.disable_vbn:
73 |     class Dummy(object):
74 |       # Do nothing here, no bnorm
75 |       def __init__(self, tensor, ignored):
76 |         self.reference_output=tensor
77 |       def __call__(self, x):
78 |         return x
79 |     VBN_cls = Dummy
80 |   else:
81 |     VBN_cls = VBN
82 |   if not hasattr(self, name):
83 |     vbn = VBN_cls(tensor, name)
84 |     setattr(self, name, vbn)
85 |     return vbn.reference_output
86 |   vbn = getattr(self, name)
87 |   return vbn(tensor)
88 | 


--------------------------------------------------------------------------------
/utils/bnorm.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/utils/bnorm.pyc


--------------------------------------------------------------------------------
/utils/common.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright 2017    Ke Wang
 5 | 
 6 | """ This module contains several utility functions and classes that are
 7 | commonly used in every scripts.
 8 | https://github.com/kaldi-asr/kaldi/blob/master/egs/wsj/s5/steps/libs/common.py
 9 | """
10 | 
11 | from __future__ import absolute_import
12 | from __future__ import division
13 | from __future__ import print_function
14 | 
15 | import logging
16 | import subprocess
17 | 
18 | logger = logging.getLogger(__name__)
19 | logger.addHandler(logging.NullHandler())
20 | 
21 | 
22 | def execute_command(command):
23 |     """ Runs a job in the foreground and waits for it to complete; raises an
24 |         exception if its return status is nonzero.  The command is executed in
25 |         'shell' mode so 'command' can involve things like pipes.
26 |         See also: get_command_stdout
27 |     """
28 |     p = subprocess.Popen(command, shell=True)
29 |     p.communicate()
30 |     if p.returncode is not 0:
31 |         raise Exception("Command exited with status {0}: {1}".format(
32 |                 p.returncode, command))
33 | 
34 | 
35 | def get_command_stdout(command, require_zero_status = True):
36 |     """ Executes a command and returns its stdout output as a string. The
37 |         command is executed with shell=True, so it may contain pipes and
38 |         other shell constructs.
39 |         If require_zero_stats is True, this function will raise an exception if
40 |         the command has nonzero exit status. If False, it just prints a warning
41 |         if the exit status is nonzero.
42 |         See also: execute_command
43 |     """
44 |     p = subprocess.Popen(command, shell=True,
45 |                          stdout=subprocess.PIPE)
46 | 
47 |     stdout = p.communicate()[0]
48 |     if p.returncode is not 0:
49 |         output = "Command exited with status {0}: {1}".format(
50 |             p.returncode, command)
51 |         if require_zero_status:
52 |             raise Exception(output)
53 |         else:
54 |             logger.warning(output)
55 |     return stdout if type(stdout) is str else stdout.decode()
56 | 


--------------------------------------------------------------------------------
/utils/generate_plots.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Copyright 2017    Ke Wang
  5 | 
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import argparse
 11 | import errno
 12 | import logging
 13 | import os
 14 | import sys
 15 | import warnings
 16 | 
 17 | sys.path.append(os.path.dirname(sys.path[0]))
 18 | import common as common
 19 | 
 20 | 
 21 | try:
 22 |     import matplotlib as mpl
 23 |     mpl.use('Agg')
 24 |     import matplotlib.pyplot as plt
 25 |     import numpy as np
 26 |     from matplotlib.patches import Rectangle
 27 |     g_plot = True
 28 | except ImportError:
 29 |     warnings.warn(
 30 |         """This script requires matplotlib and numpy.
 31 |         Please install them to generate plots.
 32 |         Proceeding with generation of tables.
 33 |         If you are on a cluster where you do not have admin rights you could
 34 |         try using virtualenv.""")
 35 |     g_plot = False
 36 | 
 37 | 
 38 | logger = logging.getLogger('utils')
 39 | logger.setLevel(logging.INFO)
 40 | handler = logging.StreamHandler()
 41 | handler.setLevel(logging.INFO)
 42 | formatter = logging.Formatter("%(asctime)s [%(filename)s:%(lineno)s - "
 43 |                               "%(funcName)s - %(levelname)s ] %(message)s")
 44 | handler.setFormatter(formatter)
 45 | logger.addHandler(handler)
 46 | logger.info('Generating plots')
 47 | 
 48 | 
 49 | def get_args():
 50 |     parser = argparse.ArgumentParser(
 51 |         description="""Parses the training logs and generates a variety of
 52 |         plots.
 53 |         e.g. utils/generate_plots.py train_dnn.log exp/train_dnn.
 54 |         Look for the report.pdf in the output (report) directory.""")
 55 |     parser.add_argument("--adversarial",
 56 |                         default=False,
 57 |                         action="store_true",
 58 |                         help="Flag indicating parse adversarial model or not."
 59 |     )
 60 |     parser.add_argument("log_file",
 61 |                         # required=True,
 62 |                         help="name of log file."
 63 |     )
 64 |     parser.add_argument("output_dir",
 65 |                         # required=True,
 66 |                         help="report directory."
 67 |     )
 68 |     args = parser.parse_args()
 69 |     return args
 70 | 
 71 | 
 72 | g_plot_colors = ['red', 'blue', 'green', 'black', 'magenta', 'yellow', 'cyan']
 73 | 
 74 | 
 75 | def generate_loss_plots(adversarial, log_file, output_dir, plot):
 76 |     train_key = "TRAIN"
 77 |     valid_key = "CROSS"
 78 |     try:
 79 |         os.makedirs(output_dir)
 80 |     except OSError as e:
 81 |         if e.errno == errno.EEXIST and os.path.isdir(output_dir):
 82 |             pass
 83 |         else:
 84 |             raise e
 85 |     logger.info("Generating loss plots")
 86 |     if adversarial:
 87 |         tr_losses = parse_loss_log_adversarial(log_file, train_key)
 88 |         cv_losses = parse_loss_log_adversarial(log_file, valid_key)
 89 |     else:
 90 |         tr_losses = parse_loss_log(log_file, train_key)
 91 |         cv_losses = parse_loss_log(log_file, valid_key)
 92 | 
 93 |     if plot:
 94 |         fig = plt.figure()
 95 |         plots = []
 96 | 
 97 |     for key_word in sorted(tr_losses.keys()):
 98 |         name = key_word
 99 |         tr_data = tr_losses[key_word]
100 |         tr_data = np.array(tr_data)
101 |         tr_iters = np.arange(1, tr_data.size+1)
102 |         color_val = g_plot_colors[0]
103 |         plot_handle, = plt.plot(tr_iters[:], tr_data[:], color=color_val,
104 |                                linestyle="--", label="train")
105 |         plots.append(plot_handle)
106 |         color_val = g_plot_colors[1]
107 |         cv_data = cv_losses[key_word]
108 |         cv_data = np.array(cv_data)
109 |         cv_iters = np.linspace(0, tr_data.size, num=cv_data.size, dtype=int)
110 |         plot_handle, = plt.plot(cv_iters[:], cv_data[:], color=color_val,
111 |                                 label="valid")
112 |         plots.append(plot_handle)
113 |         if plot:
114 |             plt.xlabel("Iteration")
115 |             plt.ylabel("Loss")
116 |             lgd = plt.legend(handles=plots, loc="upper right",
117 |                              ncol=1, borderaxespad=0.)
118 |             plt.grid(True)
119 |             plt.title(key_word)
120 |             figfile_name = "{0}/{1}.pdf".format(output_dir, key_word)
121 |             plt.savefig(figfile_name, bbox_extra_artists=(lgd,),
122 |                         bbox_inches="tight")
123 |             fig = plt.figure()
124 |             plots = []
125 | 
126 | 
127 | def parse_loss_log_adversarial(log_file, key):
128 |     """Parse adversarial model loss log file.
129 |     train_loss_string format:
130 |       1/821 (TRAIN AVG.LOSS): d_rl_loss = 0.32810, d_fk_loss = 0.32194, d_loss = 0.65004, g_adv_loss = 0.50822, g_mse_loss = 7.11048, g_l2_loss = 0.00000, g_loss = 36.06060
131 |     valid_loss_string format:
132 |       1/821 (CROSS AVG.LOSS): d_rl_loss = 0.34894, d_fk_loss = 0.17205, d_loss = 0.52099, g_adv_loss = 0.39619, g_mse_loss = 8.70989, g_l2_loss = 0.00000, g_loss = 43.94563
133 |     """
134 |     d_rl_losses = []
135 |     d_fk_losses = []
136 |     d_losses = []
137 |     g_adv_losses = []
138 |     g_mse_losses = []
139 |     g_l2_losses = []
140 |     g_losses = []
141 |     key_word = ["d_rl_loss", "d_fk_loss", "d_loss",
142 |                 "g_adv_loss", "g_mse_loss", "g_l2_loss", "g_loss"]
143 |     losses = {key_word[0]: d_rl_losses,
144 |               key_word[1]: d_fk_losses,
145 |               key_word[2]: d_losses,
146 |               key_word[3]: g_adv_losses,
147 |               key_word[4]: g_mse_losses,
148 |               key_word[5]: g_l2_losses,
149 |               key_word[6]: g_losses}
150 | 
151 |     train_loss_strings = common.get_command_stdout(
152 |         "grep -e {} {}".format(key, log_file))
153 |     for line in train_loss_strings.strip().split("\n"):
154 |         line = line.split(",")
155 |         assert len(line) == 7
156 |         for i in range(7):
157 |             sub_line = line[i].split()
158 |             assert key_word[i] in sub_line
159 |             losses[key_word[i]].append(float(sub_line[-1]))
160 | 
161 |     return losses
162 | 
163 | 
164 | def parse_loss_log(log_file, key):
165 |     """Parse loss log file.
166 |     train_loss_string format:
167 |       1/178 (TRAIN AVG.LOSS): g_mse_loss = 12.76571, g_l2_loss = 0.00000, g_loss = 12.76571, learning_rate= 1.200e-03
168 |     valid_loss_string format:
169 |       1/178 (CROSS AVG.LOSS): g_mse_loss = 9.99273, g_l2_loss = 0.00000, g_loss = 9.99273, time = 3.52 min
170 |     """
171 |     g_mse_losses = []
172 |     g_l2_losses = []
173 |     g_losses = []
174 |     key_word = ["g_mse_loss", "g_l2_loss", "g_loss"]
175 |     losses = {key_word[0]: g_mse_losses,
176 |               key_word[1]: g_l2_losses,
177 |               key_word[2]: g_losses}
178 | 
179 |     train_loss_strings = common.get_command_stdout(
180 |         "grep -e {} {}".format(key, log_file))
181 |     for line in train_loss_strings.strip().split("\n"):
182 |         line = line.split(",")
183 |         assert len(line) == 4
184 |         for i in range(3):
185 |             sub_line = line[i].split()
186 |             assert key_word[i] in sub_line
187 |             losses[key_word[i]].append(float(sub_line[-1]))
188 | 
189 |     return losses
190 | 
191 | 
192 | def main():
193 |     args = get_args()
194 |     generate_loss_plots(args.adversarial, args.log_file, args.output_dir, g_plot)
195 |     logger.info("Generating loss plots sucessfully.")
196 | 
197 | 
198 | if __name__ == "__main__":
199 |     main()
200 | 


--------------------------------------------------------------------------------
/utils/misc.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright 2017    Ke Wang     Xiaomi
 5 | 
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import argparse
11 | import sys
12 | import pprint
13 | 
14 | import tensorflow as tf
15 | import tensorflow.contrib.slim as slim
16 | 
17 | 
18 | pp = pprint.PrettyPrinter()
19 | 
20 | def check_tensorflow_version():
21 |     if tf.__version__ < "1.3.0":
22 |         raise EnvironmentError("Tensorflow version must >= 1.3.0")
23 |     else:
24 |         print(tf.__version__)
25 | 
26 | 
27 | def read_list(filename):
28 |     data_list = []
29 |     with open(filename, 'r') as fr:
30 |         lines = fr.readlines()
31 |         for line in lines:
32 |             line = line.strip()
33 |             data_list.append(line)
34 |     return data_list
35 | 
36 | 
37 | def show_all_variables():
38 |     model_vars = tf.trainable_variables()
39 |     slim.model_analyzer.analyze_vars(model_vars, print_info=True)
40 |     sys.stdout.flush()
41 | 
42 | 
43 | def str2bool(v):
44 |     if v.lower() in ('yes', 'true', 't', 'y', '1'):
45 |         return True
46 |     elif v.lower() in ('no', 'false', 'f', 'n', '0'):
47 |         return False
48 |     else:
49 |         raise argparse.ArgumentTypeError('Boolean value expected.')
50 | 


--------------------------------------------------------------------------------
/utils/misc.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/utils/misc.pyc


--------------------------------------------------------------------------------
/utils/ops.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linan2/TensorFlow-speech-enhancement-Chinese/7033215c086efea8bf0fb56319f4185d7fdb5754/utils/ops.pyc


--------------------------------------------------------------------------------
/utils/select_data.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright 2017    Ke Wang
 5 | 
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import sys
11 | 
12 | if __name__ == "__main__":
13 |     if len(sys.argv) != 4:
14 |         print("Error parameter numbers.")
15 |         print("Usage: python select_data.py infile1(key) ", end='')
16 |         print("infile2(text_raw) outfile(text)")
17 |         sys.exit(1)
18 |     file_key = open(sys.argv[1], 'r')
19 |     file_raw = open(sys.argv[2], 'r')
20 |     file_text = open(sys.argv[3], 'w')
21 | 
22 |     key = []
23 | 
24 |     key_lines = file_key.readlines()
25 |     for line in key_lines:
26 |         line = line.decode('utf-8').strip()
27 |         key.append(line)
28 | 
29 |     raw_lines = file_raw.readlines()
30 |     line_num = 0
31 |     line_total = len(key)
32 |     for line in raw_lines:
33 |         line_back = line.decode('utf-8').strip()
34 |         line = line_back.split()
35 |         if line[0] == key[line_num]:
36 |             file_text.write(line_back.encode('utf-8'))
37 |             file_text.write('\n')
38 |             line_num += 1
39 |             if line_num >= line_total:
40 |                 break
41 | 


--------------------------------------------------------------------------------