├── After_DTW ├── S_DTW_sentence_1.wav ├── S_DTW_sentence_10.wav ├── S_DTW_sentence_11.wav ├── S_DTW_sentence_12.wav ├── S_DTW_sentence_13.wav ├── S_DTW_sentence_14.wav ├── S_DTW_sentence_15.wav ├── S_DTW_sentence_16.wav ├── S_DTW_sentence_17.wav ├── S_DTW_sentence_18.wav ├── S_DTW_sentence_19.wav ├── S_DTW_sentence_2.wav ├── S_DTW_sentence_20.wav ├── S_DTW_sentence_3.wav ├── S_DTW_sentence_4.wav ├── S_DTW_sentence_5.wav ├── S_DTW_sentence_6.wav ├── S_DTW_sentence_7.wav ├── S_DTW_sentence_8.wav ├── S_DTW_sentence_9.wav ├── T_DTW_sentence_1.wav ├── T_DTW_sentence_10.wav ├── T_DTW_sentence_11.wav ├── T_DTW_sentence_12.wav ├── T_DTW_sentence_13.wav ├── T_DTW_sentence_14.wav ├── T_DTW_sentence_15.wav ├── T_DTW_sentence_16.wav ├── T_DTW_sentence_17.wav ├── T_DTW_sentence_18.wav ├── T_DTW_sentence_19.wav ├── T_DTW_sentence_2.wav ├── T_DTW_sentence_20.wav ├── T_DTW_sentence_3.wav ├── T_DTW_sentence_4.wav ├── T_DTW_sentence_5.wav ├── T_DTW_sentence_6.wav ├── T_DTW_sentence_7.wav ├── T_DTW_sentence_8.wav └── T_DTW_sentence_9.wav ├── Converted_speech ├── 1.wav ├── 2.wav ├── 3.wav ├── 4.wav └── 5.wav ├── Converted_speech_STRAIGHT ├── 1.wav ├── 2.wav ├── 3.wav ├── 4.wav └── 5.wav ├── Gitsource.list ├── Gitsource_Test.list ├── Gittarget.list ├── JDNMF.m ├── JDNMF_STRAIGHT.m ├── README.md ├── Source ├── 1.wav ├── 10.wav ├── 11.wav ├── 12.wav ├── 13.wav ├── 14.wav ├── 15.wav ├── 16.wav ├── 17.wav ├── 18.wav ├── 19.wav ├── 2.wav ├── 20.wav ├── 3.wav ├── 4.wav ├── 5.wav ├── 6.wav ├── 7.wav ├── 8.wav └── 9.wav ├── Source_Test ├── 1.wav ├── 2.wav ├── 3.wav ├── 4.wav └── 5.wav ├── Target ├── 1.wav ├── 10.wav ├── 11.wav ├── 12.wav ├── 13.wav ├── 14.wav ├── 15.wav ├── 16.wav ├── 17.wav ├── 18.wav ├── 19.wav ├── 2.wav ├── 20.wav ├── 3.wav ├── 4.wav ├── 5.wav ├── 6.wav ├── 7.wav ├── 8.wav └── 9.wav └── utils ├── Buildspectrogram.m ├── Convert2Speech.m ├── DTW.m ├── GetFileNames.m ├── Get_Mel_Spectrum.m ├── Implement_DTW.m ├── MakeMelCoef.m ├── MelSpectrum2PowerSpectrum.m ├── Mel_Spectrum_FromX.m ├── OverlapAddVC.m ├── PowerSpectrum2WaveVC.m ├── Spectrum.m ├── fft2melmx.m ├── multiframes2.m ├── nmfdiv.m ├── nmfmse.m └── singleframe2.m /After_DTW/S_DTW_sentence_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/S_DTW_sentence_1.wav -------------------------------------------------------------------------------- /After_DTW/S_DTW_sentence_10.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/S_DTW_sentence_10.wav -------------------------------------------------------------------------------- /After_DTW/S_DTW_sentence_11.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/S_DTW_sentence_11.wav -------------------------------------------------------------------------------- /After_DTW/S_DTW_sentence_12.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/S_DTW_sentence_12.wav -------------------------------------------------------------------------------- /After_DTW/S_DTW_sentence_13.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/S_DTW_sentence_13.wav -------------------------------------------------------------------------------- /After_DTW/S_DTW_sentence_14.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/S_DTW_sentence_14.wav -------------------------------------------------------------------------------- /After_DTW/S_DTW_sentence_15.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/S_DTW_sentence_15.wav -------------------------------------------------------------------------------- /After_DTW/S_DTW_sentence_16.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/S_DTW_sentence_16.wav -------------------------------------------------------------------------------- /After_DTW/S_DTW_sentence_17.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/S_DTW_sentence_17.wav -------------------------------------------------------------------------------- /After_DTW/S_DTW_sentence_18.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/S_DTW_sentence_18.wav -------------------------------------------------------------------------------- /After_DTW/S_DTW_sentence_19.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/S_DTW_sentence_19.wav -------------------------------------------------------------------------------- /After_DTW/S_DTW_sentence_2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/S_DTW_sentence_2.wav -------------------------------------------------------------------------------- /After_DTW/S_DTW_sentence_20.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/S_DTW_sentence_20.wav -------------------------------------------------------------------------------- /After_DTW/S_DTW_sentence_3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/S_DTW_sentence_3.wav -------------------------------------------------------------------------------- /After_DTW/S_DTW_sentence_4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/S_DTW_sentence_4.wav -------------------------------------------------------------------------------- /After_DTW/S_DTW_sentence_5.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/S_DTW_sentence_5.wav -------------------------------------------------------------------------------- /After_DTW/S_DTW_sentence_6.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/S_DTW_sentence_6.wav -------------------------------------------------------------------------------- /After_DTW/S_DTW_sentence_7.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/S_DTW_sentence_7.wav -------------------------------------------------------------------------------- /After_DTW/S_DTW_sentence_8.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/S_DTW_sentence_8.wav -------------------------------------------------------------------------------- /After_DTW/S_DTW_sentence_9.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/S_DTW_sentence_9.wav -------------------------------------------------------------------------------- /After_DTW/T_DTW_sentence_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/T_DTW_sentence_1.wav -------------------------------------------------------------------------------- /After_DTW/T_DTW_sentence_10.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/T_DTW_sentence_10.wav -------------------------------------------------------------------------------- /After_DTW/T_DTW_sentence_11.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/T_DTW_sentence_11.wav -------------------------------------------------------------------------------- /After_DTW/T_DTW_sentence_12.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/T_DTW_sentence_12.wav -------------------------------------------------------------------------------- /After_DTW/T_DTW_sentence_13.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/T_DTW_sentence_13.wav -------------------------------------------------------------------------------- /After_DTW/T_DTW_sentence_14.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/T_DTW_sentence_14.wav -------------------------------------------------------------------------------- /After_DTW/T_DTW_sentence_15.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/T_DTW_sentence_15.wav -------------------------------------------------------------------------------- /After_DTW/T_DTW_sentence_16.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/T_DTW_sentence_16.wav -------------------------------------------------------------------------------- /After_DTW/T_DTW_sentence_17.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/T_DTW_sentence_17.wav -------------------------------------------------------------------------------- /After_DTW/T_DTW_sentence_18.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/T_DTW_sentence_18.wav -------------------------------------------------------------------------------- /After_DTW/T_DTW_sentence_19.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/T_DTW_sentence_19.wav -------------------------------------------------------------------------------- /After_DTW/T_DTW_sentence_2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/T_DTW_sentence_2.wav -------------------------------------------------------------------------------- /After_DTW/T_DTW_sentence_20.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/T_DTW_sentence_20.wav -------------------------------------------------------------------------------- /After_DTW/T_DTW_sentence_3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/T_DTW_sentence_3.wav -------------------------------------------------------------------------------- /After_DTW/T_DTW_sentence_4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/T_DTW_sentence_4.wav -------------------------------------------------------------------------------- /After_DTW/T_DTW_sentence_5.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/T_DTW_sentence_5.wav -------------------------------------------------------------------------------- /After_DTW/T_DTW_sentence_6.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/T_DTW_sentence_6.wav -------------------------------------------------------------------------------- /After_DTW/T_DTW_sentence_7.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/T_DTW_sentence_7.wav -------------------------------------------------------------------------------- /After_DTW/T_DTW_sentence_8.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/T_DTW_sentence_8.wav -------------------------------------------------------------------------------- /After_DTW/T_DTW_sentence_9.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/After_DTW/T_DTW_sentence_9.wav -------------------------------------------------------------------------------- /Converted_speech/1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Converted_speech/1.wav -------------------------------------------------------------------------------- /Converted_speech/2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Converted_speech/2.wav -------------------------------------------------------------------------------- /Converted_speech/3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Converted_speech/3.wav -------------------------------------------------------------------------------- /Converted_speech/4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Converted_speech/4.wav -------------------------------------------------------------------------------- /Converted_speech/5.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Converted_speech/5.wav -------------------------------------------------------------------------------- /Converted_speech_STRAIGHT/1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Converted_speech_STRAIGHT/1.wav -------------------------------------------------------------------------------- /Converted_speech_STRAIGHT/2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Converted_speech_STRAIGHT/2.wav -------------------------------------------------------------------------------- /Converted_speech_STRAIGHT/3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Converted_speech_STRAIGHT/3.wav -------------------------------------------------------------------------------- /Converted_speech_STRAIGHT/4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Converted_speech_STRAIGHT/4.wav -------------------------------------------------------------------------------- /Converted_speech_STRAIGHT/5.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Converted_speech_STRAIGHT/5.wav -------------------------------------------------------------------------------- /Gitsource.list: -------------------------------------------------------------------------------- 1 | ./Source/1.wav 2 | ./Source/2.wav 3 | ./Source/3.wav 4 | ./Source/4.wav 5 | ./Source/5.wav 6 | ./Source/6.wav 7 | ./Source/7.wav 8 | ./Source/8.wav 9 | ./Source/9.wav 10 | ./Source/10.wav 11 | ./Source/11.wav 12 | ./Source/12.wav 13 | ./Source/13.wav 14 | ./Source/14.wav 15 | ./Source/15.wav 16 | ./Source/16.wav 17 | ./Source/17.wav 18 | ./Source/18.wav 19 | ./Source/19.wav 20 | ./Source/20.wav -------------------------------------------------------------------------------- /Gitsource_Test.list: -------------------------------------------------------------------------------- 1 | ./Source_Test/1.wav 2 | ./Source_Test/2.wav 3 | ./Source_Test/3.wav 4 | ./Source_Test/4.wav 5 | ./Source_Test/5.wav 6 | -------------------------------------------------------------------------------- /Gittarget.list: -------------------------------------------------------------------------------- 1 | ./Target/1.wav 2 | ./Target/2.wav 3 | ./Target/3.wav 4 | ./Target/4.wav 5 | ./Target/5.wav 6 | ./Target/6.wav 7 | ./Target/7.wav 8 | ./Target/8.wav 9 | ./Target/9.wav 10 | ./Target/10.wav 11 | ./Target/11.wav 12 | ./Target/12.wav 13 | ./Target/13.wav 14 | ./Target/14.wav 15 | ./Target/15.wav 16 | ./Target/16.wav 17 | ./Target/17.wav 18 | ./Target/18.wav 19 | ./Target/19.wav 20 | ./Target/20.wav -------------------------------------------------------------------------------- /JDNMF.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/JDNMF.m -------------------------------------------------------------------------------- /JDNMF_STRAIGHT.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/JDNMF_STRAIGHT.m -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Joint Dictionary Learning-based Non-Negative Matrix Factorization for Voice Conversion to Improve Speech Intelligibility After Oral Surgery (TBME 2016) 2 | 3 | 4 | IEEE Transactions on Biomedical Engineering, 2016 5 | 6 | 7 | ### Introduction 8 | The Joint Dictionary Learning-based Non-Negative Matrix Factorization (JD-NMF) is used for training joint dictionary (source & target) for voice conversion. But this method can also be used in other applications where the two dictionaries have to be aligned. The basic idea is that if two signals are first aligned by some methods (e.g., DTW in speech processing), to reconstruct the coupled training data with shared activation matrix, the learned dictionaries are automatcally forced to couple with each other to minimize the distance (e.g., KL divergence). 9 | 10 | 11 | For more details and evaluation results, please check out our [paper](http://ieeexplore.ieee.org/document/7797132/). 12 | 13 | ![teaser](https://jasonswfu.github.io/JasonFu.github.io/images/Joint_NMF.png) 14 | 15 | ### Usuage 16 | 17 | `Gitsource.list` is the list of source speech files used for training JD-NMF. 18 | `Gittarget.list` is the list of target speech files used for training JD-NMF. 19 | `Gitsource_Test.list` is the list of source speech files used for testing (conversion). 20 | 21 | `JDNMF.m`: Convert the source speech files listed in `Gitsource_Test.list` (with spectrogram features) to the `Converted_speech` folder. 22 | 23 | 24 | `JDNMF_STRAIGHT.m`: Convert the source speech files listed in `Gitsource_Test.list` (with STRAIGHT features) to the `Converted_speech_STRAIGHT` folder. This may perform better, but you have to ask the STRAIGHT code from [here](http://www.wakayama-u.ac.jp/~kawahara/index-e.html). 25 | 26 | 27 | ### Citation 28 | 29 | If you find the code and datasets useful in your research, please cite: 30 | 31 | @article{fu2016joint, 32 | title={Joint Dictionary Learning-based Non-Negative Matrix Factorization for Voice Conversion to Improve Speech Intelligibility After Oral Surgery}, 33 | author={Fu, Szu-Wei and Li, Pei-Chun and Lai, Ying-Hui and Yang, Cheng-Chien and Hsieh, Li-Chun and Tsao, Yu}, 34 | journal={IEEE Transactions on Biomedical Engineering}, 35 | year={2016}, 36 | publisher={IEEE} 37 | } 38 | 39 | ### Contact 40 | 41 | e-mail: jasonfu@iis.sinica.edu.tw or d04922007@ntu.edu.tw 42 | -------------------------------------------------------------------------------- /Source/1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Source/1.wav -------------------------------------------------------------------------------- /Source/10.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Source/10.wav -------------------------------------------------------------------------------- /Source/11.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Source/11.wav -------------------------------------------------------------------------------- /Source/12.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Source/12.wav -------------------------------------------------------------------------------- /Source/13.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Source/13.wav -------------------------------------------------------------------------------- /Source/14.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Source/14.wav -------------------------------------------------------------------------------- /Source/15.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Source/15.wav -------------------------------------------------------------------------------- /Source/16.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Source/16.wav -------------------------------------------------------------------------------- /Source/17.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Source/17.wav -------------------------------------------------------------------------------- /Source/18.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Source/18.wav -------------------------------------------------------------------------------- /Source/19.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Source/19.wav -------------------------------------------------------------------------------- /Source/2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Source/2.wav -------------------------------------------------------------------------------- /Source/20.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Source/20.wav -------------------------------------------------------------------------------- /Source/3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Source/3.wav -------------------------------------------------------------------------------- /Source/4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Source/4.wav -------------------------------------------------------------------------------- /Source/5.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Source/5.wav -------------------------------------------------------------------------------- /Source/6.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Source/6.wav -------------------------------------------------------------------------------- /Source/7.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Source/7.wav -------------------------------------------------------------------------------- /Source/8.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Source/8.wav -------------------------------------------------------------------------------- /Source/9.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Source/9.wav -------------------------------------------------------------------------------- /Source_Test/1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Source_Test/1.wav -------------------------------------------------------------------------------- /Source_Test/2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Source_Test/2.wav -------------------------------------------------------------------------------- /Source_Test/3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Source_Test/3.wav -------------------------------------------------------------------------------- /Source_Test/4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Source_Test/4.wav -------------------------------------------------------------------------------- /Source_Test/5.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Source_Test/5.wav -------------------------------------------------------------------------------- /Target/1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Target/1.wav -------------------------------------------------------------------------------- /Target/10.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Target/10.wav -------------------------------------------------------------------------------- /Target/11.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Target/11.wav -------------------------------------------------------------------------------- /Target/12.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Target/12.wav -------------------------------------------------------------------------------- /Target/13.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Target/13.wav -------------------------------------------------------------------------------- /Target/14.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Target/14.wav -------------------------------------------------------------------------------- /Target/15.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Target/15.wav -------------------------------------------------------------------------------- /Target/16.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Target/16.wav -------------------------------------------------------------------------------- /Target/17.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Target/17.wav -------------------------------------------------------------------------------- /Target/18.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Target/18.wav -------------------------------------------------------------------------------- /Target/19.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Target/19.wav -------------------------------------------------------------------------------- /Target/2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Target/2.wav -------------------------------------------------------------------------------- /Target/20.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Target/20.wav -------------------------------------------------------------------------------- /Target/3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Target/3.wav -------------------------------------------------------------------------------- /Target/4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Target/4.wav -------------------------------------------------------------------------------- /Target/5.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Target/5.wav -------------------------------------------------------------------------------- /Target/6.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Target/6.wav -------------------------------------------------------------------------------- /Target/7.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Target/7.wav -------------------------------------------------------------------------------- /Target/8.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Target/8.wav -------------------------------------------------------------------------------- /Target/9.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/Target/9.wav -------------------------------------------------------------------------------- /utils/Buildspectrogram.m: -------------------------------------------------------------------------------- 1 | function [abs_spectrum,phse_spectrum,Nframes,resdiuePoint]=Buildspectrogram(x,Srate) 2 | 3 | len=floor(20*Srate/1000); % Frame size in samples 4 | if rem(len,2)==1, len=len+1; end; 5 | PERC=50; % window overlap in percent of frame size 6 | len1=floor(len*PERC/100); 7 | len2=len-len1; 8 | win=hamming(len); %tukey(len,PERC); % define window 9 | 10 | nFFT=2*2^nextpow2(len); 11 | %fft_vec_size=floor(nFFT/2)+1; 12 | Nframes=floor(length(x)/len2)-1; 13 | spectrgoram=zeros(nFFT,Nframes); 14 | theta=zeros(nFFT,Nframes); 15 | k=1; 16 | for n=1:Nframes 17 | insign=win.*x(k:k+len-1); %Windowing 18 | spec=fft(insign,nFFT); %compute fourier transform of a frame 19 | abs_spec=abs(spec); % compute the magnitude 20 | spectrgoram(:,n)=abs_spec; 21 | theta(:,n)=phase(spec); 22 | k=k+len2; 23 | if n==Nframes 24 | resdiuePoint=x(k:length(x)); 25 | end 26 | end 27 | abs_spectrum=spectrgoram; 28 | phse_spectrum=theta; -------------------------------------------------------------------------------- /utils/Convert2Speech.m: -------------------------------------------------------------------------------- 1 | function originalSpeech=Convert2Speech(spectrum,theta,Srate,Nframes,residuepoint) 2 | len=floor(20*Srate/1000); % Frame size in samples 3 | if rem(len,2)==1, len=len+1; end; 4 | PERC=50; % window overlap in percent of frame size 5 | len1=floor(len*PERC/100); 6 | len2=len-len1; 7 | win=hamming(len); %tukey(len,PERC); % define window 8 | winGain=len2/sum(win); % normalization gain for overlap+add with 50% overlap 9 | nFFT=2*2^nextpow2(len); 10 | x_old=zeros(len1,1); 11 | xfinal=zeros(Nframes*len2,1); 12 | %spectrum(nFFT/2+2:nFFT,:)=flipud(spectrum(2:nFFT/2,:)); 13 | new_speech=real(ifft(spectrum.*exp(j*theta))); 14 | %new_speech_len=new_speech(1:len,:); 15 | k=1; 16 | for cc=1:Nframes 17 | ind_frame=new_speech(:,cc); 18 | xfinal(k:k+len2-1)=x_old+ind_frame(1:len1); 19 | x_old=ind_frame(1+len1:len); 20 | k=k+len2; 21 | end 22 | originalSpeech=[winGain*xfinal;residuepoint]; 23 | 24 | -------------------------------------------------------------------------------- /utils/DTW.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonSWFu/JD-NMF/6f570d0e7dade563e3c61e30c0daa5964a577c03/utils/DTW.m -------------------------------------------------------------------------------- /utils/GetFileNames.m: -------------------------------------------------------------------------------- 1 | function FileName=GetFileNames(TestList) 2 | %This function is used to get file name from a text file. 3 | %function FileName=GetFileName(TestList) 4 | %2003, Nov, 20 5 | %Lu Xugang, JAIST 6 | 7 | Fid1=fopen(TestList,'r'); 8 | if Fid1==-1 9 | sprintf('%s','Can not open to Read'); 10 | error(['cannot open to Read ',TestList]); 11 | end 12 | 13 | Count1=0; 14 | while 1 15 | Count1=Count1+1; 16 | tempFileName{Count1}=fgetl(Fid1); 17 | if tempFileName{Count1}==-1 18 | break; 19 | end 20 | end 21 | fclose(Fid1); 22 | FileName=tempFileName(1:Count1-1); 23 | 24 | return; -------------------------------------------------------------------------------- /utils/Get_Mel_Spectrum.m: -------------------------------------------------------------------------------- 1 | function MFCSpectrum=Get_Mel_Spectrum(PowerSpectrum,MelFilt); 2 | %function MFCSpectrum=Get_Mel_Spectrum(PowerSpectrum,MelFilt); 3 | %PowerSpectrum: Input power spectrum 4 | %MelFilt: The triangle filters with Mel scale 5 | %TempPowerSpectrum=PowerSpectrum(Lower:Higher,:); %Adjust the dimension for MelFilt 6 | %Xugang Lu, @ATR/NICT 7 | %May, 15, 2009 8 | 9 | len1=size(MelFilt,2); 10 | len2=size(PowerSpectrum,1); 11 | if len2~=len1 12 | TempPowerSpectrum=PowerSpectrum(len2-len1:len2-1,:); %Adjust the dimension for MelFilt 13 | MFCSpectrum=MelFilt*TempPowerSpectrum; 14 | else 15 | MFCSpectrum=MelFilt*PowerSpectrum; 16 | end 17 | 18 | return; 19 | -------------------------------------------------------------------------------- /utils/Implement_DTW.m: -------------------------------------------------------------------------------- 1 | % Get the result of DTW 2 | function [AfterDTW_TargetWavFile,AfterDTW_SourceWavFile]=Implement_DTW(TargetList,SourceList) 3 | nfft=256; 4 | sr=16000; % 16k Hz, sampling rate 5 | minfreq=120; 6 | maxfreq=sr/2; 7 | sumpower=1; 8 | bwidth=1; 9 | FeaDim=80; 10 | 11 | 12 | %%% Target Mel spectrum extraction 13 | TargetWavFile=GetFileNames(TargetList); 14 | filenum =size(TargetWavFile,2); 15 | %disp('Converting target waveform to Mel spectrum...') 16 | for i=1:filenum 17 | [tp,FS]=wavread(TargetWavFile{i}); %TargetWavFile{i} is the wav file. 18 | if size(tp,2)==2 19 | x=(tp(:,1)+tp(:,2))/2; 20 | else 21 | x=tp; 22 | end 23 | [TargetMFCSpectrum{i},yphase1{i}]=Mel_Spectrum_FromX(x*1000,2,256,128,256,FS,120,FeaDim); 24 | clear tp 25 | end 26 | 27 | 28 | 29 | %%% Source Mel spectrum extraction 30 | SourceWavFile=GetFileNames(SourceList); 31 | %disp('Converting source waveform to Mel spectrum...') 32 | for i=1:filenum 33 | [tp,FS]=wavread(SourceWavFile{i}); %SourceWavFile{i} is the source wav file corresponding to the TargetWavFile{i}. 34 | if size(tp,2)==2 35 | x=(tp(:,1)+tp(:,2))/2; 36 | else 37 | x=tp; 38 | end 39 | [SourceMFCSpectrum{i},yphase2{i}]=Mel_Spectrum_FromX(x*1000,2,256,128,256,FS,120,FeaDim); 40 | clear tp 41 | end 42 | 43 | % DTW 44 | disp('DTW...') 45 | for i=1:length(SourceMFCSpectrum) 46 | Targetspectrum=TargetMFCSpectrum{i};y1=yphase1{i}; 47 | Sourcespectrum=SourceMFCSpectrum{i};y2=yphase2{i}; 48 | 49 | path= DTW(Sourcespectrum,Targetspectrum); 50 | %%%%%%%%%%%%%%%%%%%%%%%% remove repaeted frames 51 | p1=path(1,:); d1=diff(p1); z1=find(d1==0); 52 | p2=path(2,:); d2=diff(p2); z2=find(d2==0); 53 | path(:,[z1,z2])=[]; 54 | %%%%%%%%%%%%%%%%%%%%%%%% 55 | 56 | SourceMFCSpectrum{i}=[]; yphase2{i}=[]; 57 | SourceMFCSpectrum{i}=Sourcespectrum(:,path(1,:)); yphase2{i}=y2(:,path(1,:)); 58 | TargetMFCSpectrum{i}=[]; yphase1{i}=[]; 59 | TargetMFCSpectrum{i}=Targetspectrum(:,path(2,:)); yphase1{i}=y1(:,path(2,:)); 60 | 61 | clear Targetspectrum Sourcespectrum 62 | end 63 | 64 | 65 | % F domain -> T domain 66 | AfterDTW_TargetWavFile=cell(filenum,1); 67 | AfterDTW_SourceWavFile=cell(filenum,1); 68 | for i=1:filenum 69 | MelSpec =power(10,SourceMFCSpectrum{i}); 70 | [spec,wts,iwts] =MelSpectrum2PowerSpectrum(MelSpec, sr, nfft, 'htkmel', minfreq, maxfreq, sumpower, bwidth); % for 16KHz 71 | log10powerspectrum =log10(spec); 72 | sig=PowerSpectrum2WaveVC(log10powerspectrum,yphase2{i}); 73 | siga=sig/max(abs(sig)); 74 | source_name=strcat('.\After_DTW\S_DTW_sentence_',num2str(i),'.wav'); 75 | wavwrite(siga,sr,source_name); 76 | 77 | MelSpec =power(10,TargetMFCSpectrum{i}); 78 | [spec,wts,iwts] =MelSpectrum2PowerSpectrum(MelSpec, sr, nfft, 'htkmel', minfreq, maxfreq, sumpower, bwidth); % for 16KHz 79 | log10powerspectrum =log10(spec); 80 | sig=PowerSpectrum2WaveVC(log10powerspectrum,yphase1{i}); 81 | siga=sig/max(abs(sig)); 82 | target_name=strcat('.\After_DTW\T_DTW_sentence_',num2str(i),'.wav'); 83 | wavwrite(siga,sr,target_name); 84 | 85 | AfterDTW_TargetWavFile{i}=target_name; 86 | AfterDTW_SourceWavFile{i}=source_name; 87 | end 88 | end 89 | 90 | -------------------------------------------------------------------------------- /utils/MakeMelCoef.m: -------------------------------------------------------------------------------- 1 | function wts = MakeMelCoef(sr, nfilts, fbtype, minfreq, maxfreq, bwidth,FFT_SIZE) 2 | %Make Mel coefficient as global coefficients 3 | 4 | 5 | nfft = (FFT_SIZE/2-1)*2; 6 | 7 | %BARK,MEL, HTKMEL, FCMEL 8 | nfft = (FFT_SIZE/2-1)*2; 9 | if strcmp(fbtype, 'bark') 10 | wts = fft2barkmx(nfft, sr, nfilts, bwidth, minfreq, maxfreq); 11 | elseif strcmp(fbtype, 'mel') 12 | wts = fft2melmx(nfft, sr, nfilts, bwidth, minfreq, maxfreq); 13 | elseif strcmp(fbtype, 'htkmel') 14 | wts = fft2melmx(nfft, sr, nfilts, bwidth, minfreq, maxfreq, 1, 1); 15 | elseif strcmp(fbtype, 'fcmel') 16 | wts = fft2melmx(nfft, sr, nfilts, bwidth, minfreq, maxfreq, 1, 0); 17 | else 18 | disp(['fbtype ', fbtype, ' not recognized']); 19 | error; 20 | end 21 | 22 | wts = wts(:, 1:FFT_SIZE/2); 23 | 24 | 25 | -------------------------------------------------------------------------------- /utils/MelSpectrum2PowerSpectrum.m: -------------------------------------------------------------------------------- 1 | function [spec,wts,iwts] = MelSpectrum2PowerSpectrum(MelSpectrum, sr, nfft, fbtype, minfreq, maxfreq, sumpower, bwidth) 2 | %From Mel spectrum to estimate the FFT power spectrum 3 | %The Mel spectrum is not log compressed 4 | %Must addpath /data/pansrt9/users/xlu/work/DBNCode/crbm_audio/code/ 5 | %Xugang Lu @NICT 6 | %Feb.8, 2013 7 | 8 | % if nargin < % sr = 16000; end 9 | if nargin < 3; nfft = 512; end 10 | if nargin < 4; fbtype = 'bark'; end 11 | % if nargin < % minfreq = 0; end 12 | if nargin < 6; maxfreq = sr/2; end 13 | if nargin < 7; sumpower = 1; end 14 | if nargin < 8; bwidth = 1.0; end 15 | 16 | [nfilts,nframes] = size(MelSpectrum); 17 | 18 | if strcmp(fbtype, 'bark') 19 | wts = fft2barkmx(nfft, sr, nfilts, bwidth, minfreq, maxfreq); 20 | elseif strcmp(fbtype, 'mel') 21 | wts = fft2melmx(nfft, sr, nfilts, bwidth, minfreq, maxfreq); 22 | elseif strcmp(fbtype, 'htkmel') 23 | wts = fft2melmx(nfft, sr, nfilts, bwidth, minfreq, maxfreq, 1, 1); 24 | elseif strcmp(fbtype, 'fcmel') 25 | wts = fft2melmx(nfft, sr, nfilts, bwidth, minfreq, maxfreq, 1); 26 | else 27 | disp(['fbtype ', fbtype, ' not recognized']); 28 | error; 29 | end 30 | 31 | % Cut off 2nd half 32 | wts = wts(:,1:((nfft/2)+1)); 33 | 34 | % Just transpose, fix up 35 | ww = wts'*wts; 36 | iwts = wts'./(repmat(max(mean(diag(ww))/100, sum(ww))',1,nfilts)); 37 | % Apply weights 38 | if (sumpower) 39 | spec = iwts * MelSpectrum; 40 | else 41 | spec = (iwts * sqrt(MelSpectrum)).^2; 42 | end 43 | return -------------------------------------------------------------------------------- /utils/Mel_Spectrum_FromX.m: -------------------------------------------------------------------------------- 1 | function [Log_MFCSpectrum,yphase]=Mel_Spectrum_FromX(x,AmFlag,FrameSize,FrameRate,FFT_SIZE,sr,minfreq,nfilts ); 2 | %function Log_MFCSpectrum=Mel_Spectrum_FromFile(fname,AmFlag); 3 | %fname: input file name 4 | %AmFlag=2, Power Spectrum, else Amplitude 5 | % 6 | 7 | fbtype ='htkmel'; 8 | 9 | [powspectrum,x_seg,yphase] =Spectrum(x,FrameSize,FrameRate,FFT_SIZE, AmFlag); %For power spectrum extraction 10 | 11 | %sr =16000 ;%Sampling frequency is 8khz 12 | % nfilts =40; %40 Mel filter bands 13 | %minfreq =120;%minimum frequency is 120 hz 14 | maxfreq =sr/2; %half of sr 15 | MelCoef = MakeMelCoef(sr, nfilts, fbtype, minfreq, maxfreq, 1,FFT_SIZE); 16 | MFCSpectrum=Get_Mel_Spectrum(powspectrum,MelCoef); 17 | %MFCSpectrum1=Get_MaxMel_Spectrum(powspectrum,MelCoef); 18 | %Log_MFCSpectrum1=log10(0.01+MFCSpectrum1); 19 | Log_MFCSpectrum=log10(eps+MFCSpectrum); 20 | return; 21 | -------------------------------------------------------------------------------- /utils/OverlapAddVC.m: -------------------------------------------------------------------------------- 1 | function sig=OverlapAdd(X,yphase) 2 | %Xugang Lu @NICT 3 | %sig=OverlapAdd(X,A); 4 | %sig is the signal reconstructed signal from its spectrogram. X is a matrix 5 | %with each column being the fft of a segment of signal. A is the phase 6 | %angle of the spectrum which should have the same dimension as X. if it is 7 | %not given the phase angle of X is used which in the case of real values is 8 | %zero (assuming that its the magnitude). 9 | 10 | windowLen = 256; 11 | ShiftLen = 128; 12 | 13 | [FreqRes FrameNum]=size(X); 14 | 15 | Spec=X.*exp(j*yphase); 16 | 17 | if mod(windowLen,2) %if FreqResol is odd 18 | Spec=[Spec;flipud(conj(Spec(2:end,:)))]; 19 | else 20 | Spec=[Spec;flipud(conj(Spec(2:end-1,:)))]; 21 | end 22 | sig=zeros((FrameNum-1)*ShiftLen+windowLen,1); 23 | for i=1:FrameNum 24 | start=(i-1)*ShiftLen+1; 25 | spec=Spec(:,i); 26 | sig(start:start+windowLen-1)=sig(start:start+windowLen-1)+real(ifft(spec,windowLen)); 27 | end 28 | return -------------------------------------------------------------------------------- /utils/PowerSpectrum2WaveVC.m: -------------------------------------------------------------------------------- 1 | function sig=PowerSpectrum2WaveVC(log10powerspectrum,yphase) 2 | %sig=InversePowerSpectrum(log10powerspectrum,yphase) 3 | %log10powerspectrum: estimated from deep autoencoder, must be 129*frames, 4 | %must be log10 compressed 5 | %yphase: clean or noisy phase information, must be a matrix as 256*frames 6 | %Xugang Lu @NICT 7 | 8 | 9 | logpowspectrum =log(power(10,log10powerspectrum)); %log power spectrum 10 | yphase =yphase(1:floor(size(yphase,1)/2)+1,:); %For Odd sample 11 | sig =OverlapAddVC(sqrt(exp(logpowspectrum)),yphase); 12 | 13 | return; 14 | -------------------------------------------------------------------------------- /utils/Spectrum.m: -------------------------------------------------------------------------------- 1 | function [Spectrum,x_seg,yphase] = Spectrum(y,FrameLength,FrameRate,FFT_SIZE, flag); 2 | %function [Spectrum,En] = Spectrum(y,FrameLength,FrameRate,FFT_SIZE,flag); 3 | %y: input wave data 4 | %FrameLength: frame window length (256) 5 | %FrameRate: frame shift (128) 6 | %FFT_SIZE: fft size (256) 7 | %If flag==2, power spectrum, 1 Amplitude, 0 raw fft spectrum 8 | 9 | %Xugang Lu 10 | %May 15, 2009, @ATR/NICT 11 | 12 | Len =length(y); 13 | ncols =fix((Len-FrameLength)/FrameRate); 14 | fftspectrum =zeros(FFT_SIZE,ncols); 15 | Spectrum =zeros(FFT_SIZE/2+1,ncols); %For Odd sample 16 | En =zeros(1,ncols); 17 | wind =hamming(FrameLength); 18 | %wind =1; 19 | i =1; 20 | for t = 1:FrameRate:Len-FrameLength; 21 | x_seg(:,i) = wind.*y(t:(t+FrameLength-1)); 22 | fftspectrum(:,i) = fft(x_seg(:,i),FFT_SIZE); 23 | yphase(:,i) =angle(fftspectrum(:,i)); 24 | Spectrum(:,i) = abs(fftspectrum(1:FFT_SIZE/2+1,i)); %For Odd sample 25 | i = i+1; 26 | end; 27 | if flag==2 28 | Spectrum =Spectrum.^2; 29 | elseif flag==1 30 | Spectrum =Spectrum; 31 | else 32 | Spectrum =fftspectrum(1:FFT_SIZE/2+1,:); %For Odd sample 33 | end 34 | 35 | return; -------------------------------------------------------------------------------- /utils/fft2melmx.m: -------------------------------------------------------------------------------- 1 | function [wts,binfrqs] = fft2melmx(nfft, sr, nfilts, width, minfrq, maxfrq, htkmel, constamp) 2 | % wts = fft2melmx(nfft, sr, nfilts, width, minfrq, maxfrq, htkmel, constamp) 3 | % Generate a matrix of weights to combine FFT bins into Mel 4 | % bins. nfft defines the source FFT size at sampling rate sr. 5 | % Optional nfilts specifies the number of output bands required 6 | % (else one per bark), and width is the constant width of each 7 | % band relative to standard Mel (default 1). 8 | % While wts has nfft columns, the second half are all zero. 9 | % Hence, Mel spectrum is fft2melmx(nfft,sr)*abs(fft(xincols,nfft)); 10 | % minfrq is the frequency (in Hz) of the lowest band edge; 11 | % default is 0, but 133.33 is a common standard (to skip LF). 12 | % maxfrq is frequency in Hz of upper edge; default sr/2. 13 | % You can exactly duplicate the mel matrix in Slaney's mfcc.m 14 | % as fft2melmx(512, 8000, 40, 1, 133.33, 6855.5, 0); 15 | % htkmel=1 means use HTK's version of the mel curve, not Slaney's. 16 | % constamp=1 means make integration windows peak at 1, not sum to 1. 17 | % 2004-09-05 dpwe@ee.columbia.edu based on fft2barkmx 18 | 19 | if nargin < 2; sr = 8000; end 20 | if nargin < 3; nfilts = 40; end 21 | if nargin < 4; width = 1.0; end 22 | if nargin < 5; minfrq = 0; end % default bottom edge at 0 23 | if nargin < 6; maxfrq = sr/2; end % default top edge at nyquist 24 | if nargin < 7; htkmel = 0; end 25 | if nargin < 8; constamp = 0; end 26 | 27 | 28 | wts = zeros(nfilts, nfft); 29 | 30 | % Center freqs of each FFT bin 31 | fftfrqs = [0:nfft-1]/nfft*sr; 32 | 33 | % 'Center freqs' of mel bands - uniformly spaced between limits 34 | minmel = hz2mel(minfrq, htkmel); 35 | maxmel = hz2mel(maxfrq, htkmel); 36 | binfrqs = mel2hz(minmel+[0:(nfilts+1)]/(nfilts+1)*(maxmel-minmel), htkmel); 37 | 38 | %%%%%Add new bandwidth 39 | % load BandWeight; 40 | % BandWidth=1./Aver.*1; 41 | % binfreqs=cumsum(BandWidth); 42 | % my_x=linspace(1,60,62); 43 | % binfreqs=spline(1:60,binfreqs,my_x); 44 | % binfrqs=binfreqs./max(binfreqs)*8000; 45 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 46 | binbin = round(binfrqs/sr*(nfft-1)); 47 | 48 | for i = 1:nfilts 49 | % fs = mel2hz(i + [-1 0 1], htkmel); 50 | fs = binfrqs(i+[0 1 2]); 51 | % scale by width 52 | fs = fs(2)+width*(fs - fs(2)); 53 | % lower and upper slopes for all bins 54 | loslope = (fftfrqs - fs(1))/(fs(2) - fs(1)); 55 | hislope = (fs(3) - fftfrqs)/(fs(3) - fs(2)); 56 | % .. then intersect them with each other and zero 57 | % wts(i,:) = 2/(fs(3)-fs(1))*max(0,min(loslope, hislope)); 58 | wts(i,:) = max(0,min(loslope, hislope)); 59 | 60 | % actual algo and weighting in feacalc (more or less) 61 | % wts(i,:) = 0; 62 | % ww = binbin(i+2)-binbin(i); 63 | % usl = binbin(i+1)-binbin(i); 64 | % wts(i,1+binbin(i)+[1:usl]) = 2/ww * [1:usl]/usl; 65 | % dsl = binbin(i+2)-binbin(i+1); 66 | % wts(i,1+binbin(i+1)+[1:(dsl-1)]) = 2/ww * [(dsl-1):-1:1]/dsl; 67 | % need to disable weighting below if you use this one 68 | 69 | end 70 | 71 | if (constamp == 0) 72 | % Slaney-style mel is scaled to be approx constant E per channel 73 | wts = diag(2./(binfrqs(2+[1:nfilts])-binfrqs(1:nfilts)))*wts; 74 | end 75 | 76 | % Make sure 2nd half of FFT is zero 77 | wts(:,(nfft/2+1):nfft) = 0; 78 | % seems like a good idea to avoid aliasing 79 | 80 | 81 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 82 | function f = mel2hz(z, htk) 83 | % f = mel2hz(z, htk) 84 | % Convert 'mel scale' frequencies into Hz 85 | % Optional htk = 1 means use the HTK formula 86 | % else use the formula from Slaney's mfcc.m 87 | % 2005-04-19 dpwe@ee.columbia.edu 88 | 89 | if nargin < 2 90 | htk = 0; 91 | end 92 | 93 | if htk == 1 94 | f = 700*(10.^(z/2595)-1); 95 | else 96 | 97 | f_0 = 0; % 133.33333; 98 | f_sp = 200/3; % 66.66667; 99 | brkfrq = 1000; 100 | brkpt = (brkfrq - f_0)/f_sp; % starting mel value for log region 101 | logstep = exp(log(6.4)/27); % the magic 1.0711703 which is the ratio needed to get from 1000 Hz to 6400 Hz in 27 steps, and is *almost* the ratio between 1000 Hz and the preceding linear filter center at 933.33333 Hz (actually 1000/933.33333 = 1.07142857142857 and exp(log(6.4)/27) = 1.07117028749447) 102 | 103 | linpts = (z < brkpt); 104 | 105 | f = 0*z; 106 | 107 | % fill in parts separately 108 | f(linpts) = f_0 + f_sp*z(linpts); 109 | f(~linpts) = brkfrq*exp(log(logstep)*(z(~linpts)-brkpt)); 110 | 111 | end 112 | 113 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 114 | function z = hz2mel(f,htk) 115 | % z = hz2mel(f,htk) 116 | % Convert frequencies f (in Hz) to mel 'scale'. 117 | % Optional htk = 1 uses the mel axis defined in the HTKBook 118 | % otherwise use Slaney's formula 119 | % 2005-04-19 dpwe@ee.columbia.edu 120 | 121 | if nargin < 2 122 | htk = 0; 123 | end 124 | 125 | if htk == 1 126 | z = 2595 * log10(1+f/700); 127 | else 128 | % Mel fn to match Slaney's Auditory Toolbox mfcc.m 129 | 130 | f_0 = 0; % 133.33333; 131 | f_sp = 200/3; % 66.66667; 132 | brkfrq = 1000; 133 | brkpt = (brkfrq - f_0)/f_sp; % starting mel value for log region 134 | logstep = exp(log(6.4)/27); % the magic 1.0711703 which is the ratio needed to get from 1000 Hz to 6400 Hz in 27 steps, and is *almost* the ratio between 1000 Hz and the preceding linear filter center at 933.33333 Hz (actually 1000/933.33333 = 1.07142857142857 and exp(log(6.4)/27) = 1.07117028749447) 135 | 136 | linpts = (f < brkfrq); 137 | 138 | z = 0*f; 139 | 140 | % fill in parts separately 141 | z(linpts) = (f(linpts) - f_0)/f_sp; 142 | z(~linpts) = brkpt+(log(f(~linpts)/brkfrq))./log(logstep); 143 | 144 | end 145 | -------------------------------------------------------------------------------- /utils/multiframes2.m: -------------------------------------------------------------------------------- 1 | % Create Multiframes2 2 | function O=multiframes2(I) 3 | [s1,s2]=size(I); 4 | O=zeros(5*s1,s2); 5 | O(:,1)=[I(:,1);I(:,1);I(:,1);I(:,2);I(:,3)]; 6 | O(:,2)=[I(:,1);I(:,1);I(:,2);I(:,3);I(:,4)]; 7 | O(:,end-1)=[I(:,end-3);I(:,end-2);I(:,end-1);I(:,end);I(:,end)]; 8 | O(:,end)=[I(:,end-2);I(:,end-1);I(:,end);I(:,end);I(:,end)]; 9 | for i=3:s2-2 10 | O(:,i)=[I(:,i-2);I(:,i-1);I(:,i);I(:,i+1);I(:,i+2)]; 11 | end 12 | 13 | -------------------------------------------------------------------------------- /utils/nmfdiv.m: -------------------------------------------------------------------------------- 1 | function [W,H]=nmfdiv( V, rdim,iter_num ,showflag) 2 | %%% divergence objective = sum(sum((V.*log(V./(W*H))) - V + W*H)); 3 | 4 | % Check that we have non-negative data 5 | if min(V(:))<0, error('Negative values in data!'); end 6 | 7 | % Globally rescale data to avoid potential overflow/underflow 8 | V = V/max(V(:)); 9 | 10 | % Dimensions 11 | vdim = size(V,1); 12 | samples = size(V,2); 13 | 14 | % Create initial matrices 15 | W = abs(randn(vdim,rdim)); 16 | H = abs(randn(rdim,samples)); 17 | 18 | % Initialize displays 19 | if showflag, 20 | figure(1); clf; % this will show the energies and sparsenesses 21 | end 22 | 23 | 24 | % Start iteration 25 | for iter=1:iter_num 26 | % Show stats 27 | if showflag && (rem(iter,5)==0), 28 | figure(1); 29 | cursW = (sqrt(vdim)-(sum(W)./sqrt(sum(W.^2))))/(sqrt(vdim)-1); 30 | cursH = (sqrt(samples)-(sum(H')./sqrt(sum(H'.^2))))/(sqrt(samples)-1); 31 | subplot(3,1,1); bar(sqrt(sum(W.^2))); 32 | subplot(3,1,2); bar(cursW); 33 | subplot(3,1,3); bar(cursH); 34 | end 35 | 36 | 37 | % Compute new W and H (Lee and Seung; NIPS*2000) 38 | H = H.*(W'*(V./(W*H + 1e-9)))./(sum(W)'*ones(1,samples)); 39 | W = W.*((V./(W*H + 1e-9))*H')./(ones(vdim,1)*sum(H')); 40 | 41 | 42 | 43 | 44 | end -------------------------------------------------------------------------------- /utils/nmfmse.m: -------------------------------------------------------------------------------- 1 | function [W,H]=nmfmse( V, rdim,iter_num ,showflag ) 2 | %%% MSE objective = 0.5*sum(sum((V-W*H).^2)) 3 | 4 | % Check that we have non-negative data 5 | if min(V(:))<0, error('Negative values in data!'); end 6 | 7 | % Globally rescale data to avoid potential overflow/underflow 8 | V = V/max(V(:)); 9 | 10 | % Dimensions 11 | vdim = size(V,1); 12 | samples = size(V,2); 13 | 14 | % Create initial matrices 15 | W = abs(randn(vdim,rdim)); 16 | H = abs(randn(rdim,samples)); 17 | 18 | % Initialize displays 19 | if showflag, 20 | figure(1); clf; % this will show the energies and sparsenesses 21 | figure(2); clf; % this will show the objective function 22 | drawnow; 23 | end 24 | 25 | % Calculate initial objective 26 | objhistory = 0.5*sum(sum((V-W*H).^2)); 27 | 28 | % Start iteration 29 | for iter=1:iter_num 30 | % Show stats 31 | if showflag && (rem(iter,5)==0), 32 | figure(1); 33 | cursW = (sqrt(vdim)-(sum(W)./sqrt(sum(W.^2))))/(sqrt(vdim)-1); 34 | cursH = (sqrt(samples)-(sum(H')./sqrt(sum(H'.^2))))/(sqrt(samples)-1); 35 | subplot(3,1,1); bar(sqrt(sum(W.^2))); title('Energy of W'); 36 | subplot(3,1,2); bar(cursW);title('sparsenesses of W'); ylabel('Sparsenesses'); 37 | subplot(3,1,3); bar(cursH);title('sparsenesses of W'); ylabel('Sparsenesses'); 38 | if iter>1, 39 | figure(2); 40 | plot(objhistory(2:end));title('Objective'); xlabel('number of iterations'); 41 | end 42 | drawnow; 43 | end 44 | 45 | 46 | % Compute new W and H (Lee and Seung; NIPS*2000) 47 | H = H.*(W'*V)./(W'*W*H + 1e-9); 48 | W = W.*(V*H')./(W*H*H' + 1e-9); 49 | 50 | % Renormalize so rows of H have constant energy 51 | norms = sqrt(sum(H'.^2)); 52 | H = H./(norms'*ones(1,samples)); 53 | W = W.*(ones(vdim,1)*norms); 54 | 55 | % Calculate objective 56 | newobj = 0.5*sum(sum((V-W*H).^2)); 57 | objhistory = [objhistory newobj]; 58 | 59 | end -------------------------------------------------------------------------------- /utils/singleframe2.m: -------------------------------------------------------------------------------- 1 | % Create singleframe2 2 | function O=singleframe2(I) 3 | [s1,s2]=size(I); 4 | 5 | 6 | O=zeros(s1/5,s2); 7 | O(:,1)=I(s1/5*2+1:s1/5*3,1); 8 | O(:,2)=I(s1/5*2+1:s1/5*3,2); 9 | O(:,end-1)=I(s1/5*2+1:s1/5*3,end-1); 10 | O(:,end)=I(s1/5*2+1:s1/5*3,end); 11 | for i=3:s2-2 12 | O(:,i)=(I(s1/5*4+1:end,i-2)+I(s1/5*3+1:s1/5*4,i-1)+I(s1/5*2+1:s1/5*3,i)+I(s1/5+1:s1/5*2,i+1)+I(1:s1/5,i+2))/5; 13 | %O(:,i)=(I(s1/5*4+1:end,i-2).*I(s1/5*3+1:s1/5*4,i-1).*I(s1/5*2+1:s1/5*3,i).*I(s1/5+1:s1/5*2,i+1).*I(1:s1/5,i+2)).^(1/5); 14 | %O(:,i)=I(s1/5*2+1:s1/5*3,i); 15 | %O(:,i)=max([I(s1/5*4+1:end,i-2),I(s1/5*3+1:s1/5*4,i-1),I(s1/5*2+1:s1/5*3,i),I(s1/5+1:s1/5*2,i+1),I(1:s1/5,i+2)],[],2); 16 | %O(:,i)=min([I(s1/5*4+1:end,i-2),I(s1/5*3+1:s1/5*4,i-1),I(s1/5*2+1:s1/5*3,i),I(s1/5+1:s1/5*2,i+1),I(1:s1/5,i+2)],[],2); 17 | %O(:,i)=median([I(s1/5*4+1:end,i-2),I(s1/5*3+1:s1/5*4,i-1),I(s1/5*2+1:s1/5*3,i),I(s1/5+1:s1/5*2,i+1),I(1:s1/5,i+2)],2); 18 | 19 | end 20 | --------------------------------------------------------------------------------