├── idiaptts ├── misc │ ├── tfcompat │ │ ├── __init__.py │ │ └── README.md │ ├── __init__.py │ ├── alignment │ │ ├── __init__.py │ │ └── state_align │ │ │ ├── __init__.py │ │ │ ├── binary_io.py │ │ │ └── license.txt │ ├── normalisation │ │ ├── __init__.py │ │ └── NormParamsExtractor.py │ └── get_audio_length.sh ├── src │ ├── neural_networks │ │ ├── pytorch │ │ │ ├── layers │ │ │ │ ├── __init__.py │ │ │ │ └── AlwaysDropout.py │ │ │ ├── models │ │ │ │ ├── enc_dec_dyn │ │ │ │ │ ├── attention │ │ │ │ │ │ ├── DotProductAttention.py │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── FixedAttention.py │ │ │ │ │ │ └── Attention.py │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── SubModule.py │ │ │ │ │ └── EncDecDyn.py │ │ │ │ ├── __init__.py │ │ │ │ ├── rnn_dyn │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── ApplyFunction.py │ │ │ │ │ ├── Mask.py │ │ │ │ │ ├── Norm.py │ │ │ │ │ ├── VAE.py │ │ │ │ │ ├── TransposingWrapper.py │ │ │ │ │ ├── CustomWrapper.py │ │ │ │ │ ├── Pooling.py │ │ │ │ │ └── CNNWrapper.py │ │ │ │ ├── ModelConfig.py │ │ │ │ ├── PhraseNeuralFilters.py │ │ │ │ ├── NamedForwardSplitter.py │ │ │ │ └── NamedForwardCombiner.py │ │ │ ├── __init__.py │ │ │ ├── loss │ │ │ │ ├── __init__.py │ │ │ │ ├── OneHotCrossEntropyLoss.py │ │ │ │ ├── UnWeightedAccuracy.py │ │ │ │ ├── VAEKLDLoss.py │ │ │ │ └── L1WeightedVUVMSELoss.py │ │ │ ├── GradientScaling.py │ │ │ ├── ExponentialMovingAverage.py │ │ │ └── ExtendedExponentialLR.py │ │ ├── __init__.py │ │ └── EmbeddingConfig.py │ ├── __init__.py │ ├── model_trainers │ │ ├── __init__.py │ │ ├── vtln │ │ │ ├── __init__.py │ │ │ └── VTLNMonphoneSpeakerAdaptionModelTrainer.py │ │ ├── wcad │ │ │ └── __init__.py │ │ └── ClassificationTrainer.py │ └── data_preparation │ │ ├── audio │ │ ├── __init__.py │ │ ├── down_sampling.py │ │ ├── normalize_loudness.py │ │ ├── single_channel_noise_reduction.py │ │ └── high_pass_filter.py │ │ ├── wcad │ │ └── __init__.py │ │ ├── world │ │ ├── __init__.py │ │ └── FlatLF0LabelGen.py │ │ ├── phonemes │ │ └── __init__.py │ │ ├── questions │ │ └── __init__.py │ │ ├── convert_to_npz.py │ │ ├── __init__.py │ │ ├── CategoryDataReader.py │ │ └── IntercrossDataReaderConfig.py ├── scripts │ ├── noise_reduction │ │ ├── requiredMCRProducts.txt │ │ ├── ZetaRiemannTable.bin │ │ ├── betaOrderGivenPhase.m │ │ ├── mccExcludedFiles.log │ │ ├── runme.m │ │ ├── run_runme.sh │ │ ├── selectConfig.m │ │ └── betaOrder.m │ ├── tts_frontend │ │ ├── lab_format.pdf │ │ ├── English │ │ │ ├── festival_files.tar.gz │ │ │ ├── Text2FestivalReadyAm.pl │ │ │ ├── Text2FestivalReadyBr.pl │ │ │ ├── Text2FestivalReady.pl │ │ │ └── example_English_prompts.txt │ │ ├── README.md │ │ ├── LICENSE │ │ └── install │ ├── remove_durations.sh │ └── copy_to_speaker_subdirs.sh └── __init__.py ├── conda_requirements.txt ├── test ├── integration │ ├── fixtures │ │ ├── database │ │ │ ├── wcad_.txt │ │ │ ├── wav │ │ │ │ ├── LJ001-0001.wav │ │ │ │ ├── LJ001-0002.wav │ │ │ │ ├── LJ001-0003.wav │ │ │ │ ├── LJ001-0004.wav │ │ │ │ ├── LJ001-0005.wav │ │ │ │ ├── LJ001-0006.wav │ │ │ │ ├── LJ001-0007.wav │ │ │ │ ├── LJ001-0008.wav │ │ │ │ └── LJ001-0009.wav │ │ │ ├── wav48 │ │ │ │ ├── p225_001.wav │ │ │ │ ├── p225_002.wav │ │ │ │ ├── p225_003.wav │ │ │ │ ├── p225_004.wav │ │ │ │ ├── p225_005.wav │ │ │ │ ├── p225_006.wav │ │ │ │ ├── p225_007.wav │ │ │ │ ├── p225_008.wav │ │ │ │ └── p225_009.wav │ │ │ ├── file_id_list.txt │ │ │ └── utts.data │ │ ├── dur │ │ │ ├── stats.bin │ │ │ ├── LJ001-0001.dur │ │ │ ├── LJ001-0002.dur │ │ │ ├── LJ001-0003.dur │ │ │ ├── LJ001-0004.dur │ │ │ ├── LJ001-0005.dur │ │ │ ├── LJ001-0006.dur │ │ │ ├── LJ001-0007.dur │ │ │ ├── LJ001-0008.dur │ │ │ ├── LJ001-0009.dur │ │ │ └── mean-std_dev.bin │ │ ├── WORLD │ │ │ ├── bap │ │ │ │ ├── stats.bin │ │ │ │ ├── LJ001-0001.bap │ │ │ │ ├── LJ001-0002.bap │ │ │ │ ├── LJ001-0003.bap │ │ │ │ ├── LJ001-0004.bap │ │ │ │ ├── LJ001-0005.bap │ │ │ │ ├── LJ001-0006.bap │ │ │ │ ├── LJ001-0007.bap │ │ │ │ ├── LJ001-0008.bap │ │ │ │ ├── LJ001-0009.bap │ │ │ │ └── mean-std_dev.bin │ │ │ ├── lf0 │ │ │ │ ├── stats.bin │ │ │ │ ├── LJ001-0001.lf0 │ │ │ │ ├── LJ001-0002.lf0 │ │ │ │ ├── LJ001-0003.lf0 │ │ │ │ ├── LJ001-0004.lf0 │ │ │ │ ├── LJ001-0005.lf0 │ │ │ │ ├── LJ001-0006.lf0 │ │ │ │ ├── LJ001-0007.lf0 │ │ │ │ ├── LJ001-0008.lf0 │ │ │ │ ├── LJ001-0009.lf0 │ │ │ │ └── mean-std_dev.bin │ │ │ ├── mcep20 │ │ │ │ ├── stats.bin │ │ │ │ ├── LJ001-0001.mcep │ │ │ │ ├── LJ001-0002.mcep │ │ │ │ ├── LJ001-0003.mcep │ │ │ │ ├── LJ001-0004.mcep │ │ │ │ ├── LJ001-0005.mcep │ │ │ │ ├── LJ001-0006.mcep │ │ │ │ ├── LJ001-0007.mcep │ │ │ │ ├── LJ001-0008.mcep │ │ │ │ ├── LJ001-0009.mcep │ │ │ │ └── mean-std_dev.bin │ │ │ ├── vuv │ │ │ │ ├── LJ001-0001.vuv │ │ │ │ ├── LJ001-0002.vuv │ │ │ │ ├── LJ001-0003.vuv │ │ │ │ ├── LJ001-0004.vuv │ │ │ │ ├── LJ001-0005.vuv │ │ │ │ ├── LJ001-0006.vuv │ │ │ │ ├── LJ001-0007.vuv │ │ │ │ ├── LJ001-0008.vuv │ │ │ │ └── LJ001-0009.vuv │ │ │ └── cmp_mcep20 │ │ │ │ ├── bap-stats.bin │ │ │ │ ├── lf0-stats.bin │ │ │ │ ├── stats_bap.bin │ │ │ │ ├── stats_lf0.bin │ │ │ │ ├── LJ001-0001.cmp │ │ │ │ ├── LJ001-0002.cmp │ │ │ │ ├── LJ001-0003.cmp │ │ │ │ ├── LJ001-0004.cmp │ │ │ │ ├── LJ001-0005.cmp │ │ │ │ ├── LJ001-0006.cmp │ │ │ │ ├── LJ001-0007.cmp │ │ │ │ ├── LJ001-0008.cmp │ │ │ │ ├── LJ001-0009.cmp │ │ │ │ ├── mcep20-stats.bin │ │ │ │ ├── stats_mcep20.bin │ │ │ │ ├── bap-mean-covariance.bin │ │ │ │ ├── lf0-mean-covariance.bin │ │ │ │ ├── mean-covariance_bap.bin │ │ │ │ ├── mean-covariance_lf0.bin │ │ │ │ ├── mcep20-mean-covariance.bin │ │ │ │ └── mean-covariance_mcep20.bin │ │ ├── questions │ │ │ ├── min-max.bin │ │ │ ├── LJ001-0001.questions │ │ │ ├── LJ001-0002.questions │ │ │ ├── LJ001-0003.questions │ │ │ ├── LJ001-0004.questions │ │ │ ├── LJ001-0005.questions │ │ │ ├── LJ001-0006.questions │ │ │ ├── LJ001-0007.questions │ │ │ ├── LJ001-0008.questions │ │ │ └── LJ001-0009.questions │ │ ├── test_model_in409_out67 │ │ │ └── nn │ │ │ │ ├── params_e0 │ │ │ │ ├── params_e1 │ │ │ │ ├── params_e2 │ │ │ │ ├── params_s0 │ │ │ │ ├── params_s4 │ │ │ │ ├── params_s8 │ │ │ │ ├── optimiser_e1 │ │ │ │ ├── optimiser_e2 │ │ │ │ ├── optimiser_s4 │ │ │ │ ├── optimiser_s8 │ │ │ │ ├── params_best │ │ │ │ ├── params_e-1 │ │ │ │ ├── params_s-1 │ │ │ │ ├── scheduler_e1 │ │ │ │ ├── scheduler_e2 │ │ │ │ ├── scheduler_s4 │ │ │ │ ├── scheduler_s8 │ │ │ │ ├── optimiser_best │ │ │ │ ├── scheduler_best │ │ │ │ └── config.json │ │ ├── test_model_in409_out67_tmp │ │ │ └── nn │ │ │ │ ├── params_e2 │ │ │ │ ├── params_s8 │ │ │ │ ├── optimiser_e2 │ │ │ │ ├── optimiser_s8 │ │ │ │ ├── scheduler_e2 │ │ │ │ ├── scheduler_s8 │ │ │ │ └── config.json │ │ ├── file_id_list.txt │ │ ├── wcad-0.030_0.060_0.090_0.120_0.150 │ │ │ ├── stats.bin │ │ │ ├── min-max.bin │ │ │ ├── LJ001-0001.atoms │ │ │ ├── LJ001-0001.phrase │ │ │ ├── LJ001-0002.atoms │ │ │ ├── LJ001-0002.phrase │ │ │ ├── LJ001-0003.atoms │ │ │ ├── LJ001-0003.phrase │ │ │ ├── LJ001-0004.atoms │ │ │ ├── LJ001-0004.phrase │ │ │ ├── LJ001-0005.atoms │ │ │ ├── LJ001-0005.phrase │ │ │ ├── LJ001-0006.atoms │ │ │ ├── LJ001-0006.phrase │ │ │ ├── LJ001-0007.atoms │ │ │ ├── LJ001-0007.phrase │ │ │ ├── LJ001-0008.atoms │ │ │ ├── LJ001-0008.phrase │ │ │ ├── LJ001-0009.atoms │ │ │ ├── LJ001-0009.phrase │ │ │ └── mean-std_dev.bin │ │ └── labels │ │ │ ├── mono_no_align │ │ │ ├── LJ001-0008.lab │ │ │ ├── LJ001-0002.lab │ │ │ ├── LJ001-0006.lab │ │ │ ├── LJ001-0004.lab │ │ │ ├── LJ001-0009.lab │ │ │ ├── LJ001-0007.lab │ │ │ ├── LJ001-0005.lab │ │ │ ├── LJ001-0001.lab │ │ │ └── LJ001-0003.lab │ │ │ ├── mono_phone.list │ │ │ ├── phoneset_arpabet.txt │ │ │ ├── mfa │ │ │ ├── LJ001-0008.TextGrid │ │ │ └── LJ001-0002.TextGrid │ │ │ └── full │ │ │ └── LJ001-0008.lab │ ├── __init__.py │ ├── data_preparation │ │ ├── __init__.py │ │ ├── audio │ │ │ ├── __init__.py │ │ │ └── test_RawWaveformLabelGen.py │ │ ├── phonemes │ │ │ ├── __init__.py │ │ │ └── test_PhonemeDurationLabelGen.py │ │ ├── wcad │ │ │ ├── __init__.py │ │ │ ├── test_AtomVUVDistPosLabelGen.py │ │ │ └── test_AtomLabelGen.py │ │ ├── world │ │ │ └── __init__.py │ │ └── questions │ │ │ ├── __init__.py │ │ │ └── test_QuestionLabelGen.py │ └── model_trainers │ │ └── __init__.py ├── __init__.py ├── unit │ ├── __init__.py │ ├── model_trainers │ │ └── __init__.py │ └── neural_networks │ │ ├── __init__.py │ │ └── pytorch │ │ ├── __init__.py │ │ ├── test_GradientScaling.py │ │ ├── test_ModularModelHandlerPyTorch.py │ │ └── test_AllPassLayer.py └── cmd.sh ├── __init__.py ├── requirements.txt ├── .gitignore ├── LICENSE ├── INSTALL.md ├── tools └── compile_htk.sh ├── README.md └── setup.py /idiaptts/misc/tfcompat/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /idiaptts/src/neural_networks/pytorch/layers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /conda_requirements.txt: -------------------------------------------------------------------------------- 1 | jsonpickle 2 | gitpython 3 | tensorboard -------------------------------------------------------------------------------- /test/integration/fixtures/database/wcad_.txt: -------------------------------------------------------------------------------- 1 | LJ001-0002 2 | -------------------------------------------------------------------------------- /idiaptts/scripts/noise_reduction/requiredMCRProducts.txt: -------------------------------------------------------------------------------- 1 | 35000 35010 -------------------------------------------------------------------------------- /idiaptts/src/neural_networks/pytorch/models/enc_dec_dyn/attention/DotProductAttention.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/integration/fixtures/dur/stats.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/dur/stats.bin -------------------------------------------------------------------------------- /idiaptts/scripts/tts_frontend/lab_format.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/idiaptts/scripts/tts_frontend/lab_format.pdf -------------------------------------------------------------------------------- /test/integration/fixtures/dur/LJ001-0001.dur: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/dur/LJ001-0001.dur -------------------------------------------------------------------------------- /test/integration/fixtures/dur/LJ001-0002.dur: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/dur/LJ001-0002.dur -------------------------------------------------------------------------------- /test/integration/fixtures/dur/LJ001-0003.dur: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/dur/LJ001-0003.dur -------------------------------------------------------------------------------- /test/integration/fixtures/dur/LJ001-0004.dur: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/dur/LJ001-0004.dur -------------------------------------------------------------------------------- /test/integration/fixtures/dur/LJ001-0005.dur: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/dur/LJ001-0005.dur -------------------------------------------------------------------------------- /test/integration/fixtures/dur/LJ001-0006.dur: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/dur/LJ001-0006.dur -------------------------------------------------------------------------------- /test/integration/fixtures/dur/LJ001-0007.dur: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/dur/LJ001-0007.dur -------------------------------------------------------------------------------- /test/integration/fixtures/dur/LJ001-0008.dur: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/dur/LJ001-0008.dur -------------------------------------------------------------------------------- /test/integration/fixtures/dur/LJ001-0009.dur: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/dur/LJ001-0009.dur -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/bap/stats.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/bap/stats.bin -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/lf0/stats.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/lf0/stats.bin -------------------------------------------------------------------------------- /test/integration/fixtures/dur/mean-std_dev.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/dur/mean-std_dev.bin -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/mcep20/stats.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/mcep20/stats.bin -------------------------------------------------------------------------------- /test/integration/fixtures/questions/min-max.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/questions/min-max.bin -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/bap/LJ001-0001.bap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/bap/LJ001-0001.bap -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/bap/LJ001-0002.bap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/bap/LJ001-0002.bap -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/bap/LJ001-0003.bap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/bap/LJ001-0003.bap -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/bap/LJ001-0004.bap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/bap/LJ001-0004.bap -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/bap/LJ001-0005.bap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/bap/LJ001-0005.bap -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/bap/LJ001-0006.bap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/bap/LJ001-0006.bap -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/bap/LJ001-0007.bap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/bap/LJ001-0007.bap -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/bap/LJ001-0008.bap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/bap/LJ001-0008.bap -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/bap/LJ001-0009.bap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/bap/LJ001-0009.bap -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/lf0/LJ001-0001.lf0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/lf0/LJ001-0001.lf0 -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/lf0/LJ001-0002.lf0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/lf0/LJ001-0002.lf0 -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/lf0/LJ001-0003.lf0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/lf0/LJ001-0003.lf0 -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/lf0/LJ001-0004.lf0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/lf0/LJ001-0004.lf0 -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/lf0/LJ001-0005.lf0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/lf0/LJ001-0005.lf0 -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/lf0/LJ001-0006.lf0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/lf0/LJ001-0006.lf0 -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/lf0/LJ001-0007.lf0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/lf0/LJ001-0007.lf0 -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/lf0/LJ001-0008.lf0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/lf0/LJ001-0008.lf0 -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/lf0/LJ001-0009.lf0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/lf0/LJ001-0009.lf0 -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/vuv/LJ001-0001.vuv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/vuv/LJ001-0001.vuv -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/vuv/LJ001-0002.vuv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/vuv/LJ001-0002.vuv -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/vuv/LJ001-0003.vuv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/vuv/LJ001-0003.vuv -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/vuv/LJ001-0004.vuv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/vuv/LJ001-0004.vuv -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/vuv/LJ001-0005.vuv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/vuv/LJ001-0005.vuv -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/vuv/LJ001-0006.vuv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/vuv/LJ001-0006.vuv -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/vuv/LJ001-0007.vuv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/vuv/LJ001-0007.vuv -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/vuv/LJ001-0008.vuv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/vuv/LJ001-0008.vuv -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/vuv/LJ001-0009.vuv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/vuv/LJ001-0009.vuv -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | -------------------------------------------------------------------------------- /idiaptts/scripts/noise_reduction/ZetaRiemannTable.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/idiaptts/scripts/noise_reduction/ZetaRiemannTable.bin -------------------------------------------------------------------------------- /idiaptts/scripts/noise_reduction/betaOrderGivenPhase.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/idiaptts/scripts/noise_reduction/betaOrderGivenPhase.m -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/bap/mean-std_dev.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/bap/mean-std_dev.bin -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/lf0/mean-std_dev.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/lf0/mean-std_dev.bin -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/mcep20/LJ001-0001.mcep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/mcep20/LJ001-0001.mcep -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/mcep20/LJ001-0002.mcep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/mcep20/LJ001-0002.mcep -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/mcep20/LJ001-0003.mcep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/mcep20/LJ001-0003.mcep -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/mcep20/LJ001-0004.mcep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/mcep20/LJ001-0004.mcep -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/mcep20/LJ001-0005.mcep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/mcep20/LJ001-0005.mcep -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/mcep20/LJ001-0006.mcep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/mcep20/LJ001-0006.mcep -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/mcep20/LJ001-0007.mcep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/mcep20/LJ001-0007.mcep -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/mcep20/LJ001-0008.mcep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/mcep20/LJ001-0008.mcep -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/mcep20/LJ001-0009.mcep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/mcep20/LJ001-0009.mcep -------------------------------------------------------------------------------- /test/integration/fixtures/database/wav/LJ001-0001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/database/wav/LJ001-0001.wav -------------------------------------------------------------------------------- /test/integration/fixtures/database/wav/LJ001-0002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/database/wav/LJ001-0002.wav -------------------------------------------------------------------------------- /test/integration/fixtures/database/wav/LJ001-0003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/database/wav/LJ001-0003.wav -------------------------------------------------------------------------------- /test/integration/fixtures/database/wav/LJ001-0004.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/database/wav/LJ001-0004.wav -------------------------------------------------------------------------------- /test/integration/fixtures/database/wav/LJ001-0005.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/database/wav/LJ001-0005.wav -------------------------------------------------------------------------------- /test/integration/fixtures/database/wav/LJ001-0006.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/database/wav/LJ001-0006.wav -------------------------------------------------------------------------------- /test/integration/fixtures/database/wav/LJ001-0007.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/database/wav/LJ001-0007.wav -------------------------------------------------------------------------------- /test/integration/fixtures/database/wav/LJ001-0008.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/database/wav/LJ001-0008.wav -------------------------------------------------------------------------------- /test/integration/fixtures/database/wav/LJ001-0009.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/database/wav/LJ001-0009.wav -------------------------------------------------------------------------------- /test/integration/fixtures/database/wav48/p225_001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/database/wav48/p225_001.wav -------------------------------------------------------------------------------- /test/integration/fixtures/database/wav48/p225_002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/database/wav48/p225_002.wav -------------------------------------------------------------------------------- /test/integration/fixtures/database/wav48/p225_003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/database/wav48/p225_003.wav -------------------------------------------------------------------------------- /test/integration/fixtures/database/wav48/p225_004.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/database/wav48/p225_004.wav -------------------------------------------------------------------------------- /test/integration/fixtures/database/wav48/p225_005.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/database/wav48/p225_005.wav -------------------------------------------------------------------------------- /test/integration/fixtures/database/wav48/p225_006.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/database/wav48/p225_006.wav -------------------------------------------------------------------------------- /test/integration/fixtures/database/wav48/p225_007.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/database/wav48/p225_007.wav -------------------------------------------------------------------------------- /test/integration/fixtures/database/wav48/p225_008.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/database/wav48/p225_008.wav -------------------------------------------------------------------------------- /test/integration/fixtures/database/wav48/p225_009.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/database/wav48/p225_009.wav -------------------------------------------------------------------------------- /idiaptts/scripts/noise_reduction/mccExcludedFiles.log: -------------------------------------------------------------------------------- 1 | The List of Excluded Files 2 | Excluded files Exclusion Message ID Reason For Exclusion Exclusion Rule 3 | -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/cmp_mcep20/bap-stats.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/cmp_mcep20/bap-stats.bin -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/cmp_mcep20/lf0-stats.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/cmp_mcep20/lf0-stats.bin -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/cmp_mcep20/stats_bap.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/cmp_mcep20/stats_bap.bin -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/cmp_mcep20/stats_lf0.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/cmp_mcep20/stats_lf0.bin -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/mcep20/mean-std_dev.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/mcep20/mean-std_dev.bin -------------------------------------------------------------------------------- /test/integration/fixtures/questions/LJ001-0001.questions: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/questions/LJ001-0001.questions -------------------------------------------------------------------------------- /test/integration/fixtures/questions/LJ001-0002.questions: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/questions/LJ001-0002.questions -------------------------------------------------------------------------------- /test/integration/fixtures/questions/LJ001-0003.questions: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/questions/LJ001-0003.questions -------------------------------------------------------------------------------- /test/integration/fixtures/questions/LJ001-0004.questions: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/questions/LJ001-0004.questions -------------------------------------------------------------------------------- /test/integration/fixtures/questions/LJ001-0005.questions: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/questions/LJ001-0005.questions -------------------------------------------------------------------------------- /test/integration/fixtures/questions/LJ001-0006.questions: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/questions/LJ001-0006.questions -------------------------------------------------------------------------------- /test/integration/fixtures/questions/LJ001-0007.questions: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/questions/LJ001-0007.questions -------------------------------------------------------------------------------- /test/integration/fixtures/questions/LJ001-0008.questions: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/questions/LJ001-0008.questions -------------------------------------------------------------------------------- /test/integration/fixtures/questions/LJ001-0009.questions: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/questions/LJ001-0009.questions -------------------------------------------------------------------------------- /idiaptts/scripts/tts_frontend/English/festival_files.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/idiaptts/scripts/tts_frontend/English/festival_files.tar.gz -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | 6 | -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/cmp_mcep20/LJ001-0001.cmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/cmp_mcep20/LJ001-0001.cmp -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/cmp_mcep20/LJ001-0002.cmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/cmp_mcep20/LJ001-0002.cmp -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/cmp_mcep20/LJ001-0003.cmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/cmp_mcep20/LJ001-0003.cmp -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/cmp_mcep20/LJ001-0004.cmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/cmp_mcep20/LJ001-0004.cmp -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/cmp_mcep20/LJ001-0005.cmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/cmp_mcep20/LJ001-0005.cmp -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/cmp_mcep20/LJ001-0006.cmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/cmp_mcep20/LJ001-0006.cmp -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/cmp_mcep20/LJ001-0007.cmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/cmp_mcep20/LJ001-0007.cmp -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/cmp_mcep20/LJ001-0008.cmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/cmp_mcep20/LJ001-0008.cmp -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/cmp_mcep20/LJ001-0009.cmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/cmp_mcep20/LJ001-0009.cmp -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/cmp_mcep20/mcep20-stats.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/cmp_mcep20/mcep20-stats.bin -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/cmp_mcep20/stats_mcep20.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/cmp_mcep20/stats_mcep20.bin -------------------------------------------------------------------------------- /idiaptts/src/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | 6 | -------------------------------------------------------------------------------- /test/integration/fixtures/test_model_in409_out67/nn/params_e0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/test_model_in409_out67/nn/params_e0 -------------------------------------------------------------------------------- /test/integration/fixtures/test_model_in409_out67/nn/params_e1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/test_model_in409_out67/nn/params_e1 -------------------------------------------------------------------------------- /test/integration/fixtures/test_model_in409_out67/nn/params_e2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/test_model_in409_out67/nn/params_e2 -------------------------------------------------------------------------------- /test/integration/fixtures/test_model_in409_out67/nn/params_s0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/test_model_in409_out67/nn/params_s0 -------------------------------------------------------------------------------- /test/integration/fixtures/test_model_in409_out67/nn/params_s4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/test_model_in409_out67/nn/params_s4 -------------------------------------------------------------------------------- /test/integration/fixtures/test_model_in409_out67/nn/params_s8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/test_model_in409_out67/nn/params_s8 -------------------------------------------------------------------------------- /test/unit/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | 6 | -------------------------------------------------------------------------------- /idiaptts/misc/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | 6 | -------------------------------------------------------------------------------- /test/integration/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | 6 | -------------------------------------------------------------------------------- /test/integration/fixtures/test_model_in409_out67/nn/optimiser_e1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/test_model_in409_out67/nn/optimiser_e1 -------------------------------------------------------------------------------- /test/integration/fixtures/test_model_in409_out67/nn/optimiser_e2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/test_model_in409_out67/nn/optimiser_e2 -------------------------------------------------------------------------------- /test/integration/fixtures/test_model_in409_out67/nn/optimiser_s4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/test_model_in409_out67/nn/optimiser_s4 -------------------------------------------------------------------------------- /test/integration/fixtures/test_model_in409_out67/nn/optimiser_s8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/test_model_in409_out67/nn/optimiser_s8 -------------------------------------------------------------------------------- /test/integration/fixtures/test_model_in409_out67/nn/params_best: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/test_model_in409_out67/nn/params_best -------------------------------------------------------------------------------- /test/integration/fixtures/test_model_in409_out67/nn/params_e-1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/test_model_in409_out67/nn/params_e-1 -------------------------------------------------------------------------------- /test/integration/fixtures/test_model_in409_out67/nn/params_s-1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/test_model_in409_out67/nn/params_s-1 -------------------------------------------------------------------------------- /test/integration/fixtures/test_model_in409_out67/nn/scheduler_e1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/test_model_in409_out67/nn/scheduler_e1 -------------------------------------------------------------------------------- /test/integration/fixtures/test_model_in409_out67/nn/scheduler_e2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/test_model_in409_out67/nn/scheduler_e2 -------------------------------------------------------------------------------- /test/integration/fixtures/test_model_in409_out67/nn/scheduler_s4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/test_model_in409_out67/nn/scheduler_s4 -------------------------------------------------------------------------------- /test/integration/fixtures/test_model_in409_out67/nn/scheduler_s8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/test_model_in409_out67/nn/scheduler_s8 -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/cmp_mcep20/bap-mean-covariance.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/cmp_mcep20/bap-mean-covariance.bin -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/cmp_mcep20/lf0-mean-covariance.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/cmp_mcep20/lf0-mean-covariance.bin -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/cmp_mcep20/mean-covariance_bap.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/cmp_mcep20/mean-covariance_bap.bin -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/cmp_mcep20/mean-covariance_lf0.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/cmp_mcep20/mean-covariance_lf0.bin -------------------------------------------------------------------------------- /test/integration/fixtures/test_model_in409_out67/nn/optimiser_best: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/test_model_in409_out67/nn/optimiser_best -------------------------------------------------------------------------------- /test/integration/fixtures/test_model_in409_out67/nn/scheduler_best: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/test_model_in409_out67/nn/scheduler_best -------------------------------------------------------------------------------- /test/integration/fixtures/test_model_in409_out67_tmp/nn/params_e2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/test_model_in409_out67_tmp/nn/params_e2 -------------------------------------------------------------------------------- /test/integration/fixtures/test_model_in409_out67_tmp/nn/params_s8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/test_model_in409_out67_tmp/nn/params_s8 -------------------------------------------------------------------------------- /idiaptts/misc/alignment/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | 6 | -------------------------------------------------------------------------------- /idiaptts/misc/normalisation/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | 6 | -------------------------------------------------------------------------------- /idiaptts/src/model_trainers/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | 6 | -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/cmp_mcep20/mcep20-mean-covariance.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/cmp_mcep20/mcep20-mean-covariance.bin -------------------------------------------------------------------------------- /test/integration/fixtures/WORLD/cmp_mcep20/mean-covariance_mcep20.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/WORLD/cmp_mcep20/mean-covariance_mcep20.bin -------------------------------------------------------------------------------- /test/integration/fixtures/file_id_list.txt: -------------------------------------------------------------------------------- 1 | LJ001-0001 2 | LJ001-0002 3 | LJ001-0003 4 | LJ001-0004 5 | LJ001-0005 6 | LJ001-0006 7 | LJ001-0007 8 | LJ001-0008 9 | LJ001-0009 10 | -------------------------------------------------------------------------------- /test/integration/fixtures/test_model_in409_out67_tmp/nn/optimiser_e2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/test_model_in409_out67_tmp/nn/optimiser_e2 -------------------------------------------------------------------------------- /test/integration/fixtures/test_model_in409_out67_tmp/nn/optimiser_s8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/test_model_in409_out67_tmp/nn/optimiser_s8 -------------------------------------------------------------------------------- /test/integration/fixtures/test_model_in409_out67_tmp/nn/scheduler_e2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/test_model_in409_out67_tmp/nn/scheduler_e2 -------------------------------------------------------------------------------- /test/integration/fixtures/test_model_in409_out67_tmp/nn/scheduler_s8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/test_model_in409_out67_tmp/nn/scheduler_s8 -------------------------------------------------------------------------------- /test/unit/model_trainers/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | 6 | -------------------------------------------------------------------------------- /test/unit/neural_networks/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | 6 | -------------------------------------------------------------------------------- /idiaptts/src/model_trainers/vtln/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | 6 | -------------------------------------------------------------------------------- /idiaptts/src/model_trainers/wcad/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | 6 | -------------------------------------------------------------------------------- /idiaptts/src/neural_networks/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | 6 | -------------------------------------------------------------------------------- /test/integration/data_preparation/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | -------------------------------------------------------------------------------- /test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/stats.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/stats.bin -------------------------------------------------------------------------------- /test/integration/model_trainers/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | 6 | -------------------------------------------------------------------------------- /idiaptts/misc/alignment/state_align/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | 6 | -------------------------------------------------------------------------------- /idiaptts/src/data_preparation/audio/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | 6 | -------------------------------------------------------------------------------- /idiaptts/src/data_preparation/wcad/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | 6 | -------------------------------------------------------------------------------- /idiaptts/src/data_preparation/world/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | 6 | -------------------------------------------------------------------------------- /idiaptts/src/neural_networks/pytorch/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | 6 | -------------------------------------------------------------------------------- /test/integration/data_preparation/audio/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | -------------------------------------------------------------------------------- /test/integration/data_preparation/phonemes/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | -------------------------------------------------------------------------------- /test/integration/data_preparation/wcad/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | -------------------------------------------------------------------------------- /test/integration/data_preparation/world/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | -------------------------------------------------------------------------------- /test/integration/fixtures/database/file_id_list.txt: -------------------------------------------------------------------------------- 1 | LJ001-0001 2 | LJ001-0002 3 | LJ001-0003 4 | LJ001-0004 5 | LJ001-0005 6 | LJ001-0006 7 | LJ001-0007 8 | LJ001-0008 9 | LJ001-0009 10 | -------------------------------------------------------------------------------- /test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/min-max.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/min-max.bin -------------------------------------------------------------------------------- /test/unit/neural_networks/pytorch/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | 6 | -------------------------------------------------------------------------------- /idiaptts/src/data_preparation/phonemes/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | 6 | -------------------------------------------------------------------------------- /idiaptts/src/data_preparation/questions/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | 6 | -------------------------------------------------------------------------------- /idiaptts/src/neural_networks/pytorch/models/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | -------------------------------------------------------------------------------- /test/integration/data_preparation/questions/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | -------------------------------------------------------------------------------- /test/integration/fixtures/labels/mono_no_align/LJ001-0008.lab: -------------------------------------------------------------------------------- 1 | # 2 | h 3 | @ 4 | z 5 | n 6 | e 7 | v 8 | @r 9 | r 10 | b 11 | iy 12 | n 13 | s 14 | @r 15 | r 16 | p 17 | a 18 | s 19 | t 20 | # 21 | -------------------------------------------------------------------------------- /test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0001.atoms: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0001.atoms -------------------------------------------------------------------------------- /test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0001.phrase: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0001.phrase -------------------------------------------------------------------------------- /test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0002.atoms: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0002.atoms -------------------------------------------------------------------------------- /test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0002.phrase: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0002.phrase -------------------------------------------------------------------------------- /test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0003.atoms: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0003.atoms -------------------------------------------------------------------------------- /test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0003.phrase: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0003.phrase -------------------------------------------------------------------------------- /test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0004.atoms: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0004.atoms -------------------------------------------------------------------------------- /test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0004.phrase: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0004.phrase -------------------------------------------------------------------------------- /test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0005.atoms: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0005.atoms -------------------------------------------------------------------------------- /test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0005.phrase: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0005.phrase -------------------------------------------------------------------------------- /test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0006.atoms: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0006.atoms -------------------------------------------------------------------------------- /test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0006.phrase: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0006.phrase -------------------------------------------------------------------------------- /test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0007.atoms: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0007.atoms -------------------------------------------------------------------------------- /test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0007.phrase: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0007.phrase -------------------------------------------------------------------------------- /test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0008.atoms: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0008.atoms -------------------------------------------------------------------------------- /test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0008.phrase: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0008.phrase -------------------------------------------------------------------------------- /test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0009.atoms: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0009.atoms -------------------------------------------------------------------------------- /test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0009.phrase: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/LJ001-0009.phrase -------------------------------------------------------------------------------- /test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/mean-std_dev.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idiap/IdiapTTS/HEAD/test/integration/fixtures/wcad-0.030_0.060_0.090_0.120_0.150/mean-std_dev.bin -------------------------------------------------------------------------------- /idiaptts/src/neural_networks/pytorch/models/enc_dec_dyn/attention/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | wheel 2 | asn1crypto 3 | setuptools 4 | pydub 5 | scipy 6 | pyworld 7 | matplotlib 8 | numpy 9 | soundfile 10 | bandmat 11 | pysptk 12 | nnmnkwii 13 | librosa 14 | wavenet_vocoder 15 | textgrid 16 | torchinfo -------------------------------------------------------------------------------- /test/integration/fixtures/labels/mono_no_align/LJ001-0002.lab: -------------------------------------------------------------------------------- 1 | # 2 | i 3 | n 4 | b 5 | iy 6 | i 7 | ng 8 | k 9 | @ 10 | m 11 | p 12 | a 13 | r 14 | @ 15 | t^ 16 | i 17 | v 18 | lw 19 | iy 20 | m 21 | aa1 22 | d 23 | r 24 | n! 25 | # 26 | -------------------------------------------------------------------------------- /idiaptts/src/data_preparation/convert_to_npz.py: -------------------------------------------------------------------------------- 1 | from genericpath import isfile 2 | import os 3 | 4 | for file in os.path.listdir(os.curdir): 5 | if not isfile(file) or file.endswith(".npz"): 6 | print("Skipping {}".format(file)) 7 | continue 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.pyc 3 | *~ 4 | /**/core 5 | docs/build/ 6 | *.prof 7 | .nfs* 8 | /*.egg-info 9 | /dist/ 10 | 11 | .idea/ 12 | \.mailmap 13 | 14 | egs/**/database/ 15 | egs/**/experiments/ 16 | tmp.lab 17 | headers\.py 18 | tools/ 19 | test/integration/fixtures/legacy_models/* -------------------------------------------------------------------------------- /idiaptts/scripts/noise_reduction/runme.m: -------------------------------------------------------------------------------- 1 | function [shat] = runme(noisy,fs) 2 | 3 | % Set config to 2 to replicate results from reverb paper and REVERB journal 4 | stConfig=selectConfig(2); 5 | % Set value for T60 estimate 6 | stConfig.dT60=0; 7 | [shat,~,~,~,g] = ProcessDereverbSpectralSubtract(noisy, stConfig); 8 | -------------------------------------------------------------------------------- /test/integration/fixtures/labels/mono_no_align/LJ001-0006.lab: -------------------------------------------------------------------------------- 1 | # 2 | @ 3 | n 4 | d 5 | i 6 | t 7 | i 8 | z 9 | w 10 | @@r1 11 | r 12 | th 13 | m 14 | e 15 | n 16 | sh 17 | n! 18 | pau 19 | i 20 | n 21 | p 22 | a 23 | s 24 | i 25 | ng 26 | dh 27 | @ 28 | t 29 | pau 30 | @ 31 | z 32 | @ 33 | n 34 | i 35 | g 36 | z 37 | a 38 | m 39 | p 40 | l! 41 | @ 42 | v 43 | f 44 | ai 45 | n 46 | t 47 | ai 48 | p 49 | aa1 50 | g 51 | r 52 | @ 53 | f 54 | iy 55 | # 56 | -------------------------------------------------------------------------------- /test/integration/fixtures/labels/mono_phone.list: -------------------------------------------------------------------------------- 1 | # 2 | p 3 | r 4 | i 5 | n 6 | t^ 7 | ng 8 | pau 9 | dh 10 | @ 11 | ou1 12 | lw 13 | iy 14 | s 15 | e 16 | w 17 | th 18 | hw 19 | ch 20 | @r 21 | t 22 | z 23 | n! 24 | k 25 | @@r1 26 | d 27 | f 28 | m 29 | aa1 30 | oo1 31 | ar 32 | a 33 | b 34 | sh 35 | v 36 | ai 37 | u 38 | l 39 | g 40 | ei1 41 | or 42 | uh 43 | uw 44 | l! 45 | jh 46 | ow 47 | h 48 | owr1 49 | y 50 | eir1 51 | ir 52 | ur1 53 | aer1 54 | zh 55 | m! 56 | oi 57 | eh 58 | ? 59 | -------------------------------------------------------------------------------- /idiaptts/src/neural_networks/pytorch/models/rnn_dyn/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | IDENTIFIER = "RNNDYN" 8 | 9 | from .Config import Config 10 | from .RNNDyn import RNNDyn 11 | 12 | 13 | def convert_legacy_to_config(in_dim, hparams): 14 | return RNNDyn._get_config_from_legacy_string(in_dim, None, hparams) 15 | -------------------------------------------------------------------------------- /idiaptts/src/neural_networks/pytorch/models/enc_dec_dyn/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | 6 | # Constants need to go before imports that need them. 7 | FIXED_ATTENTION = "FixedAttention" 8 | ATTENTION_GROUND_TRUTH = "ground_truth_durations" 9 | 10 | from .EncDecDyn import EncDecDyn 11 | from .Config import Config 12 | from .attention.FixedAttention import FixedAttention 13 | -------------------------------------------------------------------------------- /test/integration/fixtures/labels/mono_no_align/LJ001-0004.lab: -------------------------------------------------------------------------------- 1 | # 2 | p 3 | r 4 | @ 5 | d 6 | uw 7 | s 8 | t 9 | dh 10 | @ 11 | b 12 | l 13 | aa1 14 | k 15 | b 16 | u 17 | k 18 | s 19 | pau 20 | hw 21 | i 22 | ch 23 | w 24 | @r 25 | r 26 | dh 27 | @ 28 | i 29 | m 30 | iy 31 | t^ 32 | iy 33 | @ 34 | t 35 | p 36 | r 37 | e 38 | t^ 39 | @ 40 | s 41 | e 42 | s 43 | @r 44 | r 45 | z 46 | pau 47 | @ 48 | v 49 | dh 50 | @ 51 | t 52 | r 53 | uw 54 | p 55 | r 56 | i 57 | n 58 | t^ 59 | @ 60 | d 61 | b 62 | u 63 | k 64 | # 65 | -------------------------------------------------------------------------------- /idiaptts/src/neural_networks/pytorch/layers/AlwaysDropout.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2020 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | 8 | from torch.nn import Dropout 9 | from torch import Tensor 10 | import torch.nn.functional as F 11 | 12 | 13 | class AlwaysDropout(Dropout): 14 | 15 | def forward(self, input: Tensor) -> Tensor: 16 | return F.dropout(input, self.p, True, self.inplace) 17 | -------------------------------------------------------------------------------- /test/integration/fixtures/labels/mono_no_align/LJ001-0009.lab: -------------------------------------------------------------------------------- 1 | # 2 | p 3 | r 4 | i 5 | n 6 | t^ 7 | i 8 | ng 9 | dh 10 | e 11 | n 12 | pau 13 | f 14 | @r 15 | r 16 | owr1 17 | r 18 | p 19 | @@r1 20 | r 21 | p 22 | @ 23 | s 24 | pau 25 | m 26 | ei1 27 | b 28 | iy 29 | k 30 | @ 31 | n 32 | s 33 | i 34 | t^ 35 | @r 36 | r 37 | d 38 | @ 39 | z 40 | dh 41 | @ 42 | ar 43 | r 44 | t 45 | @ 46 | v 47 | m 48 | ei1 49 | k 50 | i 51 | ng 52 | b 53 | u 54 | k 55 | s 56 | pau 57 | b 58 | ai 59 | m 60 | iy 61 | n 62 | z 63 | @ 64 | v 65 | m 66 | uw 67 | v 68 | @ 69 | b 70 | l! 71 | t 72 | ai 73 | p 74 | s 75 | # 76 | -------------------------------------------------------------------------------- /idiaptts/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | 6 | """ 7 | An almost purely Python-based modular toolbox for building Deep Neural Network models (using PyTorch) 8 | for statistical parametric speech synthesis. 9 | """ 10 | 11 | __author__ = "Bastian Schnell" 12 | __copyright__ = "Copyright (c) 2019 Idiap Research Institute" 13 | __license__ = "MIT" 14 | __maintainer__ = "Bastian Schnell" 15 | __email__ = "bastian.schnell@idiap.ch" 16 | __url__ = "http://www.idiap.ch/~bschnell/" 17 | __version__ = "0.2" 18 | -------------------------------------------------------------------------------- /idiaptts/misc/tfcompat/README.md: -------------------------------------------------------------------------------- 1 | Same procedure as described in https://github.com/r9y9/wavenet_vocoder/tree/master/wavenet_vocoder/tfcompat 2 | 3 | Source: hparam.py copied from tensorflow v1.12.0. 4 | 5 | https://github.com/tensorflow/tensorflow/blob/v1.12.0/tensorflow/contrib/training/python/training/hparam.py 6 | 7 | with the following: 8 | wget https://github.com/tensorflow/tensorflow/raw/v1.12.0/tensorflow/contrib/training/python/training/hparam.py 9 | 10 | Once all other tensorflow dependencies of these file are removed, the class keeps its goal. Functions not available due to this process are not used in this project. 11 | -------------------------------------------------------------------------------- /test/integration/fixtures/labels/mono_no_align/LJ001-0007.lab: -------------------------------------------------------------------------------- 1 | # 2 | dh 3 | @ 4 | @@r1 5 | r 6 | lw 7 | iy 8 | i 9 | s 10 | t 11 | b 12 | u 13 | k 14 | pau 15 | p 16 | r 17 | i 18 | n 19 | t^ 20 | @ 21 | d 22 | w 23 | i 24 | th 25 | m 26 | uw 27 | v 28 | @ 29 | b 30 | l! 31 | t 32 | ai 33 | p 34 | s 35 | pau 36 | dh 37 | @ 38 | g 39 | uw 40 | t 41 | n! 42 | b 43 | @@r1 44 | r 45 | g 46 | pau 47 | @r 48 | r 49 | f 50 | or 51 | r 52 | t^ 53 | iy 54 | t 55 | uw 56 | l 57 | ai 58 | n 59 | b 60 | ai 61 | b 62 | l! 63 | pau 64 | @ 65 | v 66 | @ 67 | b 68 | ow 69 | t 70 | f 71 | or 72 | r 73 | t 74 | iy 75 | n 76 | f 77 | i 78 | f 79 | t 80 | iy 81 | f 82 | ai 83 | v 84 | # 85 | -------------------------------------------------------------------------------- /test/integration/fixtures/labels/phoneset_arpabet.txt: -------------------------------------------------------------------------------- 1 | #0 2 | #1 3 | #2 4 | #3 5 | #4 6 | #5 7 | AA0 8 | AA1 9 | AA2 10 | AE0 11 | AE1 12 | AE2 13 | AH0 14 | AH1 15 | AH2 16 | AO0 17 | AO1 18 | AO2 19 | AW0 20 | AW1 21 | AW2 22 | AY0 23 | AY1 24 | AY2 25 | B 26 | CH 27 | D 28 | DH 29 | EH0 30 | EH1 31 | EH2 32 | ER0 33 | ER1 34 | ER2 35 | EY0 36 | EY1 37 | EY2 38 | F 39 | G 40 | HH 41 | IH0 42 | IH1 43 | IH2 44 | IY0 45 | IY1 46 | IY2 47 | JH 48 | K 49 | L 50 | M 51 | N 52 | NG 53 | OW0 54 | OW1 55 | OW2 56 | OY0 57 | OY1 58 | OY2 59 | P 60 | R 61 | S 62 | SH 63 | sil 64 | sp 65 | spn 66 | T 67 | TH 68 | UH0 69 | UH1 70 | UH2 71 | UW0 72 | UW1 73 | UW2 74 | V 75 | W 76 | Y 77 | Z 78 | ZH -------------------------------------------------------------------------------- /idiaptts/scripts/tts_frontend/README.md: -------------------------------------------------------------------------------- 1 | Scripts to make HTS compatible labels from text files. 2 | 3 | * For English, usage is: 4 | Unpack festival_files.tar.gz in your festival directory. 5 | Download the unilex dictionary from [http://www.cstr.ed.ac.uk/projects/unisyn/](http://www.cstr.ed.ac.uk/projects/unisyn/). 6 | Copy the files from *festival/lib/dicts/unilex/* into your festival directory at the same location *full_festival_location/lib/dicts/unilex/*. 7 | The program requires the unilex dictionary for English and comes with two speakers uk_nina (British accent) and clb (American accent). To run the example (accent can be AM or BR): 8 | $ English/makeLabels.sh festival_dir English/example_English_prompts.txt accent ~/test_labels 9 | 10 | -------------------------------------------------------------------------------- /idiaptts/src/data_preparation/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | from .audio.RawWaveformLabelGen import RawWaveformLabelGen 6 | from .phonemes.PhonemeLabelGen import PhonemeLabelGen 7 | from .phonemes.PhonemeDurationLabelGen import PhonemeDurationLabelGen 8 | from .questions.QuestionLabelGen import QuestionLabelGen 9 | from .wcad.AtomLabelGen import AtomLabelGen 10 | from .wcad.AtomVUVDistPosLabelGen import AtomVUVDistPosLabelGen 11 | from .world.WorldFeatLabelGen import WorldFeatLabelGen 12 | 13 | from .PyTorchDatareadersDataset import PyTorchDatareadersDataset 14 | from .PyTorchWindowingDatareadersDataset import \ 15 | PyTorchWindowingDatareadersDataset 16 | -------------------------------------------------------------------------------- /idiaptts/src/neural_networks/pytorch/models/rnn_dyn/ApplyFunction.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | 8 | from typing import Callable 9 | 10 | import torch 11 | import torch.nn as nn 12 | 13 | 14 | class ApplyFunction(nn.Module): 15 | def __init__(self, fn: Callable, apply_during_testing: bool = True): 16 | super().__init__() 17 | 18 | self.fn = fn 19 | self.apply_during_testing = apply_during_testing 20 | 21 | def forward(self, input_: torch.Tensor): 22 | if self.apply_during_testing or self.training: 23 | return self.fn(input_) 24 | else: 25 | return input_ 26 | -------------------------------------------------------------------------------- /idiaptts/scripts/remove_durations.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | remove_dur(){ 4 | local file_id_list="${1}" 5 | local dir_labels="${2}" 6 | 7 | mkdir -p "${dir_labels}_no_align/" 8 | 9 | IFS=$'\r\n' GLOBIGNORE='*' command eval 'utts=($(cat ${file_id_list}))' 10 | 11 | for file_id in ${utts[@]}; do 12 | utt_id=$(basename "${file_id}") # Remove possible speaker folder in path. 13 | subfolder_name=$(basename "${dir_labels}") 14 | cp ${dir_labels}/${utt_id}.lab ${dir_labels}_no_align/ 15 | sed -i 's/[ ]*[^ ]*[ ]*[^ ]* //' ${dir_labels}_no_align/${utt_id}.lab 16 | done 17 | } 18 | 19 | file_id_list="${1}" 20 | dir_labels="${2}" 21 | 22 | echo "Remove durations..." 23 | 24 | remove_dur "${file_id_list}" "${dir_labels}/full" 25 | remove_dur "${file_id_list}" "${dir_labels}/mono" 26 | -------------------------------------------------------------------------------- /idiaptts/src/neural_networks/pytorch/models/rnn_dyn/Mask.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | 8 | from typing import List 9 | 10 | import torch 11 | import torch.nn as nn 12 | 13 | 14 | class Mask(nn.Module): 15 | def __init__(self, invert_mask: bool = True, mask_value: float = 0.0) \ 16 | -> None: 17 | super().__init__() 18 | self.invert_mask = invert_mask 19 | self.mask_value = float(mask_value) 20 | 21 | def forward(self, input_: List[torch.Tensor]): 22 | input_, mask = input_ 23 | mask = mask.bool() 24 | if self.invert_mask: 25 | mask = ~mask 26 | return input_.masked_fill(mask, self.mask_value) 27 | -------------------------------------------------------------------------------- /idiaptts/src/neural_networks/pytorch/models/rnn_dyn/Norm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | 8 | import torch 9 | import torch.nn as nn 10 | 11 | 12 | class SigmoidNorm(nn.Module): 13 | def __init__(self, dim=None): 14 | super().__init__() 15 | self.dim = dim 16 | 17 | def forward(self, input_: torch.Tensor): 18 | input_sig = torch.sigmoid(input_) 19 | return input_sig / (input_sig.sum(dim=self.dim, keepdim=True)) 20 | 21 | 22 | class LinearNorm(nn.Module): 23 | def __init__(self, dim=None): 24 | super().__init__() 25 | self.dim = dim 26 | 27 | def forward(self, input_: torch.Tensor): 28 | return input_ / input_.sum(dim=self.dim, keepdim=True) 29 | -------------------------------------------------------------------------------- /test/integration/fixtures/labels/mono_no_align/LJ001-0005.lab: -------------------------------------------------------------------------------- 1 | # 2 | dh 3 | @ 4 | i 5 | n 6 | v 7 | e 8 | n 9 | sh 10 | n! 11 | @ 12 | v 13 | m 14 | uw 15 | v 16 | @ 17 | b 18 | l! 19 | m 20 | e 21 | t^ 22 | l! 23 | l 24 | e 25 | t^ 26 | @r 27 | r 28 | z 29 | pau 30 | i 31 | n 32 | dh 33 | @ 34 | m 35 | i 36 | t^ 37 | l! 38 | @ 39 | v 40 | dh 41 | @ 42 | f 43 | i 44 | f 45 | t 46 | iy 47 | n 48 | th 49 | s 50 | e 51 | n 52 | ch 53 | @r 54 | r 55 | iy 56 | pau 57 | m 58 | ei1 59 | jh 60 | uh 61 | s 62 | t 63 | lw 64 | iy 65 | b 66 | iy 67 | k 68 | @ 69 | n 70 | s 71 | i 72 | t^ 73 | @r 74 | r 75 | d 76 | @ 77 | z 78 | dh 79 | @ 80 | i 81 | n 82 | v 83 | e 84 | n 85 | sh 86 | n! 87 | pau 88 | @ 89 | v 90 | dh 91 | @ 92 | ar 93 | r 94 | t 95 | @ 96 | v 97 | p 98 | r 99 | i 100 | n 101 | t^ 102 | i 103 | ng 104 | # 105 | -------------------------------------------------------------------------------- /idiaptts/src/neural_networks/pytorch/models/rnn_dyn/VAE.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | 8 | import torch 9 | import torch.nn as nn 10 | 11 | 12 | class VanillaVAE(nn.Module): 13 | def __init__(self, dim_in, latent_dim): 14 | super(VanillaVAE, self).__init__() 15 | self.linear = nn.Linear(dim_in, latent_dim * 2, bias=False) 16 | 17 | def forward(self, input): 18 | hidden = self.linear(input) 19 | mu, log_var = torch.split(hidden, hidden.shape[2] // 2, dim=2) 20 | z = self._reparametrize(mu, log_var) 21 | return z, mu, log_var 22 | 23 | def _reparametrize(self, mu: torch.Tensor, log_var: torch.Tensor) \ 24 | -> torch.Tensor: 25 | std = torch.exp(0.5 * log_var) 26 | eps = torch.randn_like(std) 27 | return eps * std + mu 28 | -------------------------------------------------------------------------------- /test/integration/fixtures/labels/mono_no_align/LJ001-0001.lab: -------------------------------------------------------------------------------- 1 | # 2 | p 3 | r 4 | i 5 | n 6 | t^ 7 | i 8 | ng 9 | pau 10 | i 11 | n 12 | dh 13 | @ 14 | ou1 15 | n 16 | lw 17 | iy 18 | s 19 | e 20 | n 21 | s 22 | pau 23 | w 24 | i 25 | th 26 | hw 27 | i 28 | ch 29 | w 30 | iy 31 | @r 32 | r 33 | @ 34 | t 35 | p 36 | r 37 | e 38 | z 39 | n! 40 | t 41 | k 42 | @ 43 | n 44 | s 45 | @@r1 46 | r 47 | n 48 | d 49 | pau 50 | d 51 | i 52 | f 53 | @r 54 | r 55 | z 56 | f 57 | r 58 | @ 59 | m 60 | m 61 | ou1 62 | s 63 | t 64 | i 65 | f 66 | n 67 | aa1 68 | t 69 | f 70 | r 71 | @ 72 | m 73 | oo1 74 | lw 75 | dh 76 | @ 77 | ar 78 | r 79 | t 80 | s 81 | pau 82 | @ 83 | n 84 | d 85 | k 86 | r 87 | a 88 | f 89 | t 90 | s 91 | r 92 | e 93 | p 94 | r 95 | i 96 | z 97 | e 98 | n 99 | t^ 100 | @ 101 | d 102 | i 103 | n 104 | dh 105 | @ 106 | e 107 | k 108 | s 109 | @ 110 | b 111 | i 112 | sh 113 | n! 114 | # 115 | -------------------------------------------------------------------------------- /test/integration/fixtures/labels/mono_no_align/LJ001-0003.lab: -------------------------------------------------------------------------------- 1 | # 2 | f 3 | @r 4 | r 5 | oo1 6 | lw 7 | dh 8 | ou1 9 | dh 10 | @ 11 | ch 12 | ai 13 | n 14 | iy 15 | z 16 | t 17 | u 18 | k 19 | i 20 | m 21 | p 22 | r 23 | e 24 | sh 25 | n! 26 | z 27 | f 28 | r 29 | @ 30 | m 31 | w 32 | u 33 | d 34 | b 35 | l 36 | aa1 37 | k 38 | s 39 | pau 40 | i 41 | n 42 | g 43 | r 44 | ei1 45 | v 46 | d 47 | i 48 | n 49 | r 50 | i 51 | l 52 | iy 53 | f 54 | f 55 | @r 56 | r 57 | s 58 | e 59 | n 60 | ch 61 | @r 62 | r 63 | iy 64 | z 65 | pau 66 | b 67 | i 68 | f 69 | or 70 | r 71 | dh 72 | @ 73 | w 74 | u 75 | d 76 | k 77 | uh 78 | t^ 79 | @r 80 | r 81 | z 82 | @ 83 | v 84 | dh 85 | @ 86 | n 87 | e 88 | dh 89 | @r 90 | r 91 | lw 92 | @ 93 | n 94 | d 95 | z 96 | pau 97 | b 98 | ai 99 | @ 100 | s 101 | i 102 | m 103 | @ 104 | lw 105 | @r 106 | r 107 | p 108 | r 109 | aa1 110 | s 111 | e 112 | s 113 | # 114 | -------------------------------------------------------------------------------- /idiaptts/src/model_trainers/vtln/VTLNMonphoneSpeakerAdaptionModelTrainer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | 8 | # System imports. 9 | 10 | # Third-party imports. 11 | 12 | # Local source tree imports. 13 | from idiaptts.src.model_trainers.vtln.VTLNSpeakerAdaptionModelTrainer import VTLNSpeakerAdaptionModelTrainer 14 | from idiaptts.src.model_trainers.EncDecMonophoneModelTrainer import EncDecMonophoneModelTrainer 15 | 16 | 17 | # TODO: Remove this class alltogether, or at least rename to APWEncDecTrainer 18 | class VTLNMonophoneSpeakerAdaptionModelTrainer(VTLNSpeakerAdaptionModelTrainer): 19 | 20 | @staticmethod 21 | def create_hparams(hparams_string=None, verbose=False): 22 | return EncDecMonophoneModelTrainer.create_hparams(hparams_string, 23 | verbose) 24 | -------------------------------------------------------------------------------- /idiaptts/src/neural_networks/EmbeddingConfig.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | 8 | 9 | class EmbeddingConfig(object): 10 | 11 | def __init__(self, f_get_emb_index, num_embeddings, embedding_dim, name=None, **args): 12 | assert callable(f_get_emb_index), "f_get_emb_index must be callable." 13 | self.f_get_emb_index = f_get_emb_index 14 | self.num_embeddings = num_embeddings 15 | self.embedding_dim = embedding_dim 16 | self.name = name 17 | self.args = args 18 | 19 | def __repr__(self): 20 | if self.name is None: 21 | output = "" 22 | else: 23 | output = "{}: ".format(self.name) 24 | output += "{}x{}".format(self.num_embeddings, self.embedding_dim) 25 | if len(self.args.keys()) > 0: 26 | output += "with " + " ".join(map(str, **self.args)) 27 | return output 28 | -------------------------------------------------------------------------------- /idiaptts/scripts/noise_reduction/run_runme.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # script for execution of deployed applications 3 | # 4 | # Sets up the MATLAB Runtime environment for the current $ARCH and executes 5 | # the specified command. 6 | # 7 | exe_name=$0 8 | exe_dir=`dirname "$0"` 9 | echo "------------------------------------------" 10 | if [ "x$1" = "x" ]; then 11 | echo Usage: 12 | echo $0 \ args 13 | else 14 | echo Setting up environment variables 15 | MCRROOT="$1" 16 | echo --- 17 | LD_LIBRARY_PATH=.:${MCRROOT}/runtime/glnxa64 ; 18 | LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${MCRROOT}/bin/glnxa64 ; 19 | LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${MCRROOT}/sys/os/glnxa64; 20 | LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${MCRROOT}/sys/opengl/lib/glnxa64; 21 | export LD_LIBRARY_PATH; 22 | echo LD_LIBRARY_PATH is ${LD_LIBRARY_PATH}; 23 | shift 1 24 | args= 25 | while [ $# -gt 0 ]; do 26 | token=$1 27 | args="${args} \"${token}\"" 28 | shift 29 | done 30 | eval "\"${exe_dir}/runme\"" $args 31 | fi 32 | exit 33 | 34 | -------------------------------------------------------------------------------- /idiaptts/src/neural_networks/pytorch/loss/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | 6 | from idiaptts.src.neural_networks.pytorch.loss.AtomLoss import atom_loss 7 | from idiaptts.src.neural_networks.pytorch.loss.DiscretizedMixturelogisticLoss import DiscretizedMixturelogisticLoss 8 | from idiaptts.src.neural_networks.pytorch.loss.L1WeightedVUVMSELoss import L1WeightedVUVMSELoss 9 | from idiaptts.src.neural_networks.pytorch.loss.OneHotCrossEntropyLoss import OneHotCrossEntropyLoss 10 | from idiaptts.src.neural_networks.pytorch.loss.WeightedNonzeroMSELoss import WeightedNonzeroMSELoss 11 | from idiaptts.src.neural_networks.pytorch.loss.WeightedNonzeroWMSEAtomLoss import WeightedNonzeroWMSEAtomLoss 12 | from idiaptts.src.neural_networks.pytorch.loss.WMSELoss import WMSELoss 13 | from idiaptts.src.neural_networks.pytorch.loss.VAEKLDLoss import VAEKLDLoss 14 | from idiaptts.src.neural_networks.pytorch.loss.UnWeightedAccuracy import UnWeightedAccuracy 15 | -------------------------------------------------------------------------------- /test/integration/fixtures/database/utts.data: -------------------------------------------------------------------------------- 1 | LJ001-0001 Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition 2 | LJ001-0002 in being comparatively modern. 3 | LJ001-0003 For although the Chinese took impressions from wood blocks engraved in relief for centuries before the woodcutters of the Netherlands, by a similar process 4 | LJ001-0004 produced the block books, which were the immediate predecessors of the true printed book, 5 | LJ001-0005 the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing. 6 | LJ001-0006 And it is worth mention in passing that, as an example of fine typography, 7 | LJ001-0007 the earliest book printed with movable types, the Gutenberg, or "forty-two line Bible" of about fourteen fifty-five, 8 | LJ001-0008 has never been surpassed. 9 | LJ001-0009 Printing, then, for our purpose, may be considered as the art of making books by means of movable types. 10 | -------------------------------------------------------------------------------- /idiaptts/scripts/tts_frontend/English/Text2FestivalReadyAm.pl: -------------------------------------------------------------------------------- 1 | #! /usr/bin/perl 2 | 3 | #added this in order to use basename command in order to remove the path from the filenames in prompts file 4 | use File::Basename; 5 | 6 | my ($inPlainText, $outFestivalText, $outUttFolder) = @ARGV; 7 | 8 | 9 | 10 | 11 | open INFILE, $inPlainText or die " Unable to open the file: $inPlainText\n"; 12 | open OUTFILE, ">$outFestivalText" or die " Unable to create the file: $outFestivalText\n"; 13 | print OUTFILE "(voice_clb_hts2010)\n"; 14 | while ( $tmp = ) 15 | { 16 | chomp($tmp); 17 | if (( $tmp =~ /^(.+)\t(.+)/ ) ) 18 | { 19 | $tmpFileName = $1; 20 | # added this because in the prompts file the filenames are with their path 21 | $tmpFileName = basename($tmpFileName); 22 | $tmpText = $2; 23 | $tmpText =~ s/~/ss/g; 24 | $tmpText =~ s/\\"//g; 25 | $tmpText =~ s/"//g; 26 | print OUTFILE "(utt.save (SynthText \"$tmpText\") \"$outUttFolder/$tmpFileName.utt\")\n"; 27 | } 28 | } 29 | close(INFILE); 30 | print OUTFILE "(quit)\n"; 31 | close(OUTFILE); 32 | -------------------------------------------------------------------------------- /idiaptts/src/data_preparation/CategoryDataReader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2020 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | 8 | from idiaptts.src.data_preparation.NpzDataReader import DataReader 9 | from idiaptts.src.data_preparation.DataReaderConfig import DataReaderConfig 10 | 11 | 12 | class CategoryDataReader(DataReader): 13 | 14 | class Config(DataReader.Config): 15 | def __init__(self, name, id_to_category_fn): 16 | super().__init__(name, 17 | output_names=DataReaderConfig._str_to_list(name)) 18 | self.id_to_category_fn = id_to_category_fn 19 | 20 | def create_reader(self): 21 | return CategoryDataReader(self) 22 | 23 | def __init__(self, config: Config): 24 | super().__init__(config) 25 | self.id_to_category_fn = config.id_to_category_fn 26 | 27 | def __getitem__(self, id_name: str): 28 | return {self.name: self.id_to_category_fn(id_name)} 29 | -------------------------------------------------------------------------------- /idiaptts/scripts/tts_frontend/English/Text2FestivalReadyBr.pl: -------------------------------------------------------------------------------- 1 | #! /usr/bin/perl 2 | 3 | #added this in order to use basename command in order to remove the path from the filenames in prompts file 4 | use File::Basename; 5 | 6 | my ($inPlainText, $outFestivalText, $outUttFolder) = @ARGV; 7 | 8 | 9 | 10 | 11 | open INFILE, $inPlainText or die " Unable to open the file: $inPlainText\n"; 12 | open OUTFILE, ">$outFestivalText" or die " Unable to create the file: $outFestivalText\n"; 13 | print OUTFILE "(voice_cstr_uk_nina_3hour_hts)\n"; 14 | #print OUTFILE "(voice_roger_hts2010)\n"; 15 | while ( $tmp = ) 16 | { 17 | chomp($tmp); 18 | if (( $tmp =~ /^(.+)\t(.+)/ ) ) 19 | { 20 | $tmpFileName = $1; 21 | # added this because in the prompts file the filenames are with their path 22 | $tmpFileName = basename($tmpFileName); 23 | $tmpText = $2; 24 | $tmpText =~ s/~/ss/g; 25 | $tmpText =~ s/\\"//g; 26 | $tmpText =~ s/"//g; 27 | print OUTFILE "(utt.save (SynthText \"$tmpText\") \"$outUttFolder/$tmpFileName.utt\")\n"; 28 | } 29 | } 30 | close(INFILE); 31 | print OUTFILE "(quit)\n"; 32 | close(OUTFILE); 33 | -------------------------------------------------------------------------------- /idiaptts/scripts/copy_to_speaker_subdirs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | file_id_list="${1}" 4 | dir_labels="${2}" 5 | 6 | IFS=$'\r\n' GLOBIGNORE='*' command eval 'utts=($(cat ${file_id_list}))' 7 | 8 | for file_id in ${utts[@]}; do 9 | utt_id=$(basename "${file_id}") # Remove possible speaker folder in path. 10 | subfolder_name=$(basename "${dir_labels}") 11 | 12 | if [[ "${file_id}" == *\/* ]]; then # File id contains a directory. 13 | speaker_id=${file_id%%/*} 14 | # echo $file_id $speaker_id 15 | if [ -n ${speaker_id} ]; then # If speaker id is not empty. 16 | utt_id=${file_id##*/} 17 | # echo Copy ${utt_id} to ${speaker_id}/${utt_id} 18 | mkdir -p ${dir_labels}/${speaker_id} 19 | # Alignment script requires the files in speaker specific 20 | # subdirectories so copy them here. Don't move them 21 | # because model trainers require them to be in the main 22 | # directory. TODO: Can we remove this requirement? 23 | cp ${dir_labels}/${utt_id}.lab ${dir_labels}/${speaker_id}/ 24 | fi 25 | fi 26 | done -------------------------------------------------------------------------------- /idiaptts/src/neural_networks/pytorch/loss/OneHotCrossEntropyLoss.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | 6 | import torch 7 | 8 | 9 | class OneHotCrossEntropyLoss(torch.nn.CrossEntropyLoss): 10 | 11 | def __init__(self, weight=None, size_average=None, ignore_index=-100, 12 | reduce=None, reduction='elementwise_mean', shift=1): 13 | self.shift = shift 14 | super().__init__(weight, size_average, ignore_index, reduce, reduction) 15 | 16 | def forward(self, input, target): 17 | 18 | # Convert one hot vector to index tensor. 19 | # (B x C x T) -> (B x T) 20 | _, targets = target.max(dim=1) 21 | 22 | if self.shift is not None: 23 | input = input[..., :-self.shift] 24 | targets = targets[..., self.shift:] 25 | 26 | # input (B x C x T), targets (B x T), loss_full (B x T) 27 | loss_full = super(OneHotCrossEntropyLoss, self).forward(input, targets) 28 | 29 | # (B x T) -unsqueeze-> (B x T x 1) 30 | return loss_full.unsqueeze(-1) 31 | -------------------------------------------------------------------------------- /idiaptts/misc/get_audio_length.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 4 | # Written by Bastian Schnell 5 | # 6 | 7 | convertsecs() { 8 | h=$(bc <<< "${1}/3600") 9 | m=$(bc <<< "(${1}%3600)/60") 10 | s=$(bc <<< "${1}%60") 11 | printf "%02d:%02d:%05.2f\n" $h $m $s 12 | } 13 | 14 | dir_audio=${1:-"."} 15 | 16 | if [ "$#" -eq 2 ]; then 17 | file_id_list=${2:-"file_id_list.txt"} 18 | echo "Load files from ${file_id_list} and search them in ${dir_audio}." 19 | IFS=$'\r\n' GLOBIGNORE='*' command eval 'utts=($(cat ${file_id_list}))' 20 | else 21 | echo "Find files in ${dir_audio}." 22 | cwd=$PWD 23 | cd "${dir_audio}" 24 | # find . -name "*.wav" | xargs echo 25 | utts=() 26 | while IFS= read -r -d $'\0'; do 27 | utts+=("$REPLY") 28 | done < <(find . -name "*.wav" -print0 ) 29 | cd "${cwd}" 30 | fi 31 | #echo ${utts[@]:0:10} 32 | 33 | total_length=0 34 | for filename in "${utts[@]}"; do 35 | length=$(soxi -D "${dir_audio}/${filename%.*}.wav") 36 | total_length=$(echo "${total_length} + ${length}" | bc) 37 | done 38 | 39 | echo $(convertsecs ${total_length}) -------------------------------------------------------------------------------- /idiaptts/scripts/tts_frontend/English/Text2FestivalReady.pl: -------------------------------------------------------------------------------- 1 | #! /usr/bin/perl 2 | 3 | #added this in order to use basename command in order to remove the path from the filenames in prompts file 4 | use File::Basename; 5 | 6 | my ($inPlainText, $outFestivalText, $outUttFolder) = @ARGV; 7 | 8 | ############################################################################### 9 | #|| ( $tmp =~ /^(.{8})\.abc\t(.+)/ ) || ( $tmp =~ /^(.{7})\.abc\t(.+)/ ) || ( $tmp =~ /^(.{9})\.abc\t(.+)/ ) 10 | 11 | 12 | open INFILE, $inPlainText or die " Unable to open the file: $inPlainText\n"; 13 | open OUTFILE, ">$outFestivalText" or die " Unable to create the file: $outFestivalText\n"; 14 | print OUTFILE "(voice_clb_hts2010)\n"; 15 | while ( $tmp = ) 16 | { 17 | chomp($tmp); 18 | if (( $tmp =~ /^(.+)\t(.+)/ ) ) 19 | { 20 | $tmpFileName = $1; 21 | # added this because in the prompts file the filenames are with their path 22 | $tmpFileName = basename($tmpFileName); 23 | $tmpText = $2; 24 | $tmpText =~ s/~/ss/g; 25 | $tmpText =~ s/\\"//g; 26 | $tmpText =~ s/"//g; 27 | print OUTFILE "(utt.save (SynthText \"$tmpText\") \"$outUttFolder/$tmpFileName.utt\")\n"; 28 | } 29 | } 30 | close(INFILE); 31 | print OUTFILE "(quit)\n"; 32 | close(OUTFILE); 33 | -------------------------------------------------------------------------------- /idiaptts/scripts/tts_frontend/English/example_English_prompts.txt: -------------------------------------------------------------------------------- 1 | sentence01.txt Biometrics authentication is used in computer science as a form of identification and access control. 2 | sentence02.txt Biometric identifiers are the distinctive, measurable characteristics used to label and describe individuals. 3 | sentence03.txt Biometric identifiers are often categorized as physiological versus behavioral characteristics. 4 | sentence04.txt More traditional means of access control include token-based identification systems, such as a driver's license or passport. 5 | sentence05.txt In computer science, speech recognition is the translation of spoken words into text. 6 | sentence06.txt It is also known as automatic speech recognition, computer speech recognition, or just speech to text. 7 | sentence07.txt These systems analyze the person's specific voice and use it for the recognition of that person's speech. 8 | sentence08.txt A facial recognition system is a computer application for automatically identifying a person from a digital image. 9 | sentence09.txt One of the ways to do this is by comparing selected facial features from the image and a facial database. 10 | sentence10.txt It is typically used in security systems and can be compared to other biometrics recognition systems. 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Except where otherwise state, this code is: 2 | 3 | MIT License 4 | 5 | Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 6 | Written by Bastian Schnell 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of 9 | this software and associated documentation files (the "Software"), to deal in 10 | the Software without restriction, including without limitation the rights to 11 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 12 | of the Software, and to permit persons to whom the Software is furnished to do 13 | so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in all 16 | copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | SOFTWARE. -------------------------------------------------------------------------------- /idiaptts/scripts/tts_frontend/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 4 | Written by Alexandros Lazaridis, Pierre-Edouard Honnet, Bastian Schnell 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | this software and associated documentation files (the "Software"), to deal in 8 | the Software without restriction, including without limitation the rights to 9 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 | of the Software, and to permit persons to whom the Software is furnished to do 11 | so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. -------------------------------------------------------------------------------- /idiaptts/src/neural_networks/pytorch/models/rnn_dyn/TransposingWrapper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | 8 | 9 | from .FFWrapper import FFWrapper 10 | 11 | 12 | class TransposingWrapper(FFWrapper): 13 | def __init__(self, in_dim, layer_config, batch_first: bool = True): 14 | super(TransposingWrapper, self).__init__(in_dim, layer_config) 15 | self.batch_first = batch_first 16 | self.transpose = True 17 | self.untranspose = True 18 | 19 | def forward(self, input_, **kwargs): 20 | if self.transpose: 21 | if self.batch_first: 22 | input_ = input_.transpose(1, 2) 23 | else: 24 | input_ = input_.permute(1, 2, 0) 25 | 26 | output, kwargs = super(TransposingWrapper, self).forward( 27 | input_, **kwargs) 28 | 29 | if self.untranspose: 30 | if self.batch_first: 31 | output = output.transpose(1, 2) 32 | else: 33 | output = output.permute(2, 0, 1) 34 | 35 | return output, kwargs 36 | 37 | def __getitem__(self, item): 38 | return self.module.__getitem__(item) 39 | -------------------------------------------------------------------------------- /idiaptts/misc/alignment/state_align/binary_io.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import numpy 4 | 5 | class BinaryIOCollection(object): 6 | 7 | def load_binary_file(self, file_name, dimension): 8 | fid_lab = open(file_name, 'rb') 9 | features = numpy.fromfile(fid_lab, dtype=numpy.float32) 10 | fid_lab.close() 11 | assert features.size % float(dimension) == 0.0,'specified dimension not compatible with data' 12 | features = features[:(dimension * (features.size / dimension))] 13 | features = features.reshape((-1, dimension)) 14 | 15 | return features 16 | 17 | def array_to_binary_file(self, data, output_file_name): 18 | data = numpy.array(data, 'float32') 19 | 20 | fid = open(output_file_name, 'wb') 21 | data.tofile(fid) 22 | fid.close() 23 | 24 | def load_binary_file_frame(self, file_name, dimension): 25 | fid_lab = open(file_name, 'rb') 26 | features = numpy.fromfile(fid_lab, dtype=numpy.float32) 27 | fid_lab.close() 28 | assert features.size % float(dimension) == 0.0,'specified dimension not compatible with data' 29 | frame_number = features.size / dimension 30 | features = features[:(dimension * frame_number)] 31 | features = features.reshape((-1, dimension)) 32 | 33 | return features, frame_number 34 | -------------------------------------------------------------------------------- /idiaptts/src/neural_networks/pytorch/GradientScaling.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2020 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | 8 | '''A gradient scaling layer implemented following https://pytorch.org/docs/master/notes/extending.html''' 9 | 10 | import torch 11 | 12 | 13 | class grad_scaling(torch.autograd.Function): 14 | 15 | @staticmethod 16 | def forward(ctx, input_, lambda_): 17 | # ctx.set_materialize_grads(False) # TODO: Add in PyTorch +1.7 18 | ctx.lambda_ = lambda_ 19 | return input_.view_as(input_) 20 | 21 | @staticmethod 22 | def backward(ctx, grad_output): 23 | # Skip computation of empty gradients. 24 | if grad_output is None: 25 | return None, None 26 | # Special case to make sure no gradient is flowing. 27 | if ctx.lambda_ == 0.0: 28 | return torch.zeros_like(grad_output), None 29 | 30 | return grad_output * ctx.lambda_, None 31 | 32 | 33 | class GradientScaling(torch.nn.Module): 34 | def __init__(self, lambda_: float) -> None: 35 | super(GradientScaling, self).__init__() 36 | self.lambda_ = float(lambda_) 37 | 38 | def forward(self, input_): 39 | return grad_scaling.apply(input_, self.lambda_) 40 | 41 | def extra_repr(self): 42 | return "lambda={}".format(self.lambda_) 43 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | # INSTALL 2 | 3 | To install IdiapTTS, `cd IdiapTTS` and run the below steps: 4 | 5 | - Install basic tools, mainly for data preparation (the festival installation requires gcc-4.8, only needed for tts_frontend) 6 | ``bash tools/compile_other_speech_tools.sh`` 7 | Also install HTK (requires account at http://htk.eng.cam.ac.uk/register.shtml) with 8 | ``bash tools/compile_htk `` 9 | - To make the TTS frontend working you have to download the unilex dictionary from [http://www.cstr.ed.ac.uk/projects/unisyn/](http://www.cstr.ed.ac.uk/projects/unisyn/) and copy the files from *festival/lib/dicts/unilex/* into your festival directory at the same location *full_festival_location/lib/dicts/unilex/*. 10 | - Ensure that bc and soxi packages are installed in your shell. 11 | 12 | For all the following we recommend to use a [Conda](https://docs.conda.io/en/latest/miniconda.html) environment with **Python 3.6** (bandmat does not build with pip in Python 3.7, it can be [built from source](https://github.com/MattShannon/bandmat/issues/11), May '19). 13 | 14 | - Activate conda environment 15 | - Install [PyTorch 1.6.0](https://pytorch.org/) with the appropriate cuda version. Example for CUDA8.0: ``conda install pytorch=1.6.0 cuda80 -c pytorch`` 16 | - If you use conda make sure you got ``pip`` installed in your environment, then install IdiapTTS by running 17 | ``pip install .`` or use ``pip install -e .`` to install in editable mode. 18 | -------------------------------------------------------------------------------- /idiaptts/src/neural_networks/pytorch/loss/UnWeightedAccuracy.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | 6 | import numpy as np 7 | from sklearn.metrics import confusion_matrix 8 | import torch 9 | 10 | 11 | class UnWeightedAccuracy(torch.nn.modules.loss._Loss): 12 | 13 | def __init__(self, num_per_class: torch.Tensor, reduction: str = 'none'): 14 | self._num_classes = len(num_per_class) 15 | self._num_per_class = num_per_class 16 | self._num_total = num_per_class.sum().float() 17 | self._num_per_class[self._num_per_class == 0] = 1.0 # Prevent NaNs. 18 | super().__init__() 19 | 20 | def forward(self, input_, target): 21 | class_pred = input_.argmax(dim=-1) 22 | 23 | conf_matrix = confusion_matrix(target.cpu(), class_pred.cpu(), 24 | labels=range(self._num_classes)) 25 | conf_matrix_norm = conf_matrix / self._num_per_class 26 | 27 | weighted_acc = (class_pred == target).sum() / self._num_total 28 | unweighted_acc = np.sum(np.diag(conf_matrix_norm)) / self._num_classes 29 | unweighted_acc = torch.tensor([unweighted_acc], dtype=torch.float32, device=input_.device) 30 | # print(conf_matrix) 31 | # print(num_per_class) 32 | # print(weighted_acc) 33 | # print(unweighted_acc) 34 | 35 | return -(weighted_acc + unweighted_acc) 36 | -------------------------------------------------------------------------------- /idiaptts/src/neural_networks/pytorch/models/rnn_dyn/CustomWrapper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | 8 | 9 | import torch.nn as nn 10 | 11 | 12 | class CustomWrapper(nn.Module): 13 | 14 | def __init__(self): 15 | super(CustomWrapper, self).__init__() 16 | self.module = None 17 | self.out_dim = None # Needs to be set by child classes. 18 | 19 | def forward(self, input_, **kwargs): 20 | if type(self.module) is nn.Sequential \ 21 | and hasattr(self.module[0], "select_inputs") \ 22 | and callable(self.module[0].select_inputs): 23 | output = self.module(self.module[0].select_inputs(input_, **kwargs)) 24 | 25 | elif hasattr(self.module, "select_inputs") \ 26 | and callable(self.module.select_inputs): 27 | output = self.module(self.module.select_inputs(input_, **kwargs)) 28 | 29 | else: 30 | output = self.module(input_) 31 | 32 | return output 33 | 34 | def __getattr__(self, item): 35 | try: 36 | return super().__getattr__(item) 37 | except AttributeError as e: 38 | # TODO: Test speed impact. 39 | if item != "module": 40 | return getattr(self.module, item) 41 | else: 42 | raise e 43 | 44 | def init_hidden(self, batch_size=1): 45 | pass 46 | -------------------------------------------------------------------------------- /test/cmd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 4 | # Written by Bastian Schnell 5 | # 6 | 7 | export IDIAPTTS_ROOT=$(python -c 'import os;import idiaptts; print(os.path.dirname(idiaptts.__file__))') 8 | 9 | # "queue.pl" uses qsub. The options to it are 10 | # options to qsub. If you have GridEngine installed, 11 | # change this to a queue you have access to. 12 | # Otherwise, use "run.pl", which will run jobs locally 13 | # (make sure your --num-jobs options are no more than 14 | # the number of cpus on your machine. 15 | 16 | #a) Sun grid options (IDIAP) 17 | # ATTENTION: Do that in your shell: SETSHELL grid 18 | export cuda_cmd="queue.pl -l gpu -v IDIAPTTS_ROOT" 19 | export cuda_short_cmd="queue.pl -l sgpu -v IDIAPTTS_ROOT" 20 | export cpu_1d_cmd="queue.pl -l q_1day -v IDIAPTTS_ROOT" 21 | export cpu_1d_32G_cmd="queue.pl -l q_1day_mth -pe pe_mth 4 -v IDIAPTTS_ROOT" # TODO: Needs testing. 22 | #export cuda_cmd="queue.pl -l q1d,hostname=dynamix03" 23 | #export cuda_cmd="..." 24 | queue_gpu="q_gpu" # q_gpu, q_short_gpu, cpu 25 | queue_gpu_short="q_short_gpu" 26 | queue_gpu_long="q_long_gpu" 27 | queue_gpu_mth="q_gpu_mth" 28 | queue_cpu_1d="q_1day" 29 | queue_cpu_1d_32G="q_1day_mth -pe pe_mth 4" 30 | queue_cpu_1w="q_1week" 31 | cpu="cpu" 32 | 33 | #b) BUT cluster options 34 | #export cuda_cmd="queue.pl -q long.q@@pco203 -l gpu=1" 35 | #export cuda_cmd="queue.pl -q long.q@pcspeech-gpu" 36 | 37 | #c) run it locally... 38 | # export cuda_cmd=run.pl 39 | # export cuda_short_cmd=$cuda_cmd 40 | -------------------------------------------------------------------------------- /idiaptts/scripts/noise_reduction/selectConfig.m: -------------------------------------------------------------------------------- 1 | function stConfig = selectConfig(nConf) 2 | 3 | stConfig.t60=NaN; 4 | if nConf > 4 && nConf <= 8 5 | stConfig.phase_sensitive= true; 6 | else 7 | stConfig.phase_sensitive= false; 8 | end 9 | 10 | stConfig.gain_function = 'betaorder'; 11 | stConfig.minimum_gain = -10; 12 | 13 | 14 | switch nConf 15 | case {1,5} 16 | stConfig.noise_estimator= 'minimumstat'; 17 | stConfig.ms_buffer_length = 3; 18 | stConfig.apriori_snr_estimation_method = 'dd'; 19 | case {2,6} 20 | stConfig.noise_estimator= 'minimumstat'; 21 | stConfig.ms_buffer_length = 3; 22 | stConfig.apriori_snr_estimation_method = 'cepstral'; 23 | case {3,7} 24 | stConfig.noise_estimator= 'spp'; 25 | stConfig.apriori_snr_estimation_method = 'dd'; 26 | case {4,8} 27 | stConfig.noise_estimator= 'spp'; 28 | stConfig.apriori_snr_estimation_method = 'cepstral'; 29 | case 9 30 | stConfig.noise_estimator = 'minimumstat'; 31 | stConfig.ms_buffer_length = 3; 32 | stConfig.apriori_snr_estimation_method = 'cepstral'; 33 | stConfig.t60_est_fac = 0.8; 34 | case 10 35 | stConfig.noise_estimator = 'minimumstat'; 36 | stConfig.ms_buffer_length = 3; 37 | stConfig.apriori_snr_estimation_method = 'cepstral'; 38 | stConfig.smoothing_constant_variant = 7; 39 | end 40 | 41 | cConfig = [fieldnames(stConfig) struct2cell(stConfig)].'; 42 | 43 | dFs = 16000; 44 | stConfig = InitDereverbSpectralSubtract(dFs, cConfig{:}); 45 | 46 | end 47 | -------------------------------------------------------------------------------- /test/integration/fixtures/test_model_in409_out67_tmp/nn/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "py/object": "idiaptts.src.neural_networks.pytorch.models.NamedForwardWrapper.NamedForwardWrapper.Config", 3 | "input_names": [ 4 | "questions" 5 | ], 6 | "batch_first": true, 7 | "input_merge_type": "cat", 8 | "name": null, 9 | "output_names": [ 10 | "pred_acoustic_features" 11 | ], 12 | "kwargs": {}, 13 | "wrapped_model_config": { 14 | "py/object": "idiaptts.src.neural_networks.pytorch.models.rnn_dyn.Config", 15 | "in_dim": 409, 16 | "batch_first": true, 17 | "layer_configs": [ 18 | { 19 | "py/object": "idiaptts.src.neural_networks.pytorch.models.rnn_dyn.Config.LayerConfig", 20 | "type": "Linear", 21 | "out_dim": 32, 22 | "num_layers": 1, 23 | "nonlin": "ReLU", 24 | "dropout": 0.0, 25 | "kwargs": {}, 26 | "needs_in_dim": true, 27 | "needs_packing": false, 28 | "needs_transposing": false 29 | }, 30 | { 31 | "py/object": "idiaptts.src.neural_networks.pytorch.models.rnn_dyn.Config.LayerConfig", 32 | "type": "Linear", 33 | "out_dim": 67, 34 | "num_layers": 1, 35 | "nonlin": null, 36 | "dropout": 0.0, 37 | "kwargs": {}, 38 | "needs_in_dim": true, 39 | "needs_packing": false, 40 | "needs_transposing": false 41 | } 42 | ], 43 | "emb_configs": [] 44 | } 45 | } -------------------------------------------------------------------------------- /test/integration/fixtures/test_model_in409_out67/nn/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "py/object": "idiaptts.src.neural_networks.pytorch.models.NamedForwardWrapper.NamedForwardWrapper.Config", 3 | "input_names": [ 4 | "questions" 5 | ], 6 | "batch_first": false, 7 | "input_merge_type": "cat", 8 | "name": "AcousticModel", 9 | "output_names": [ 10 | "pred_acoustic_features" 11 | ], 12 | "kwargs": {}, 13 | "wrapped_model_config": { 14 | "py/object": "idiaptts.src.neural_networks.pytorch.models.rnn_dyn.Config", 15 | "in_dim": 409, 16 | "batch_first": false, 17 | "layer_configs": [ 18 | { 19 | "py/object": "idiaptts.src.neural_networks.pytorch.models.rnn_dyn.Config.LayerConfig", 20 | "type": "Linear", 21 | "out_dim": 32, 22 | "num_layers": 1, 23 | "nonlin": "relu", 24 | "dropout": 0.0, 25 | "kwargs": {}, 26 | "needs_in_dim": true, 27 | "needs_packing": false, 28 | "needs_transposing": false 29 | }, 30 | { 31 | "py/object": "idiaptts.src.neural_networks.pytorch.models.rnn_dyn.Config.LayerConfig", 32 | "type": "Linear", 33 | "out_dim": 67, 34 | "num_layers": 1, 35 | "nonlin": null, 36 | "dropout": 0.0, 37 | "kwargs": {}, 38 | "needs_in_dim": true, 39 | "needs_packing": false, 40 | "needs_transposing": false 41 | } 42 | ], 43 | "emb_configs": [] 44 | } 45 | } -------------------------------------------------------------------------------- /idiaptts/src/neural_networks/pytorch/ExponentialMovingAverage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | 8 | # Code from https://github.com/ZackHodari/morgana/ 9 | 10 | import copy 11 | 12 | 13 | class ExponentialMovingAverage(object): 14 | """Exponential moving average helper to apply gradient updates to an EMA model. 15 | Parameters 16 | ---------- 17 | model : torch.nn.Module 18 | decay : float 19 | Decay rate of previous parameter values. Parameter updates are also scaled by `1 - decay`. 20 | """ 21 | def __init__(self, model, decay): 22 | self.model = copy.deepcopy(model) 23 | self.decay = decay 24 | 25 | # Use shadow to link to all parameters in the averaged model. 26 | self.shadow = {} 27 | for name, param in self.model.named_parameters(): 28 | if param.requires_grad: 29 | self.shadow[name] = param.data 30 | param.detach_() 31 | 32 | def _update_param(self, name, x): 33 | """Performs update on one parameter. `shadow = decay * shadow + (1 - decay) * x`.""" 34 | assert name in self.shadow 35 | 36 | update_delta = self.shadow[name] - x 37 | self.shadow[name] -= (1.0 - self.decay) * update_delta 38 | 39 | def update_params(self, other_model): 40 | """Updates all parameters of `self.model` using a separate model's updated parameters.""" 41 | assert other_model is not self.model 42 | 43 | for name, param in other_model.named_parameters(): 44 | if name in self.shadow: 45 | self._update_param(name, param.data) 46 | -------------------------------------------------------------------------------- /idiaptts/src/neural_networks/pytorch/ExtendedExponentialLR.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2020 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | 8 | import warnings 9 | import math 10 | 11 | from torch.optim.lr_scheduler import ExponentialLR 12 | 13 | 14 | class ExtendedExponentialLR(ExponentialLR): 15 | def __init__(self, optimizer, gamma, last_epoch=-1, min_lr=None, warmup_steps=0, decay_steps=1.0): 16 | if isinstance(min_lr, list) or isinstance(min_lr, tuple): 17 | if len(min_lr) != len(optimizer.param_groups): 18 | raise ValueError("expected {} min_lrs, got {}".format( 19 | len(optimizer.param_groups), len(min_lr))) 20 | self.min_lrs = list(min_lr) 21 | else: 22 | self.min_lrs = [min_lr] * len(optimizer.param_groups) 23 | 24 | self.warmup_steps = warmup_steps 25 | self.decay_steps = float(decay_steps) 26 | 27 | super(ExtendedExponentialLR, self).__init__(optimizer, gamma, last_epoch) 28 | 29 | def get_lr(self): 30 | if not self._get_lr_called_within_step: 31 | warnings.warn("To get the last learning rate computed by the scheduler, " 32 | "please use `get_last_lr()`.", UserWarning) 33 | 34 | if self.last_epoch <= self.warmup_steps: 35 | return self.base_lrs 36 | return [max(group['lr'] * self.gamma ** (1.0 / self.decay_steps), min_lr) 37 | for group, min_lr in zip(self.optimizer.param_groups, self.min_lrs)] 38 | 39 | def _get_closed_form_lr(self): 40 | return [max(base_lr * self.gamma ** max(0, (self.last_epoch - self.warmup_steps + 1) / self.decay_steps), min_lr) 41 | for base_lr, min_lr in zip(self.base_lrs, self.min_lrs)] 42 | -------------------------------------------------------------------------------- /idiaptts/src/data_preparation/audio/down_sampling.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | 8 | import sys 9 | import os 10 | import librosa 11 | import soundfile 12 | from shutil import copy 13 | from pydub import AudioSegment 14 | 15 | # Local source tree imports. 16 | from idiaptts.misc.utils import makedirs_safe 17 | 18 | """ 19 | Function that down-samples a list of audio files. 20 | 21 | python down_sampling.py 22 | """ 23 | 24 | # Read which files to process. 25 | dir_audio = sys.argv[1] 26 | dir_out = sys.argv[2] 27 | file_id_list = sys.argv[3] 28 | target_sampling_rate = int(sys.argv[4]) 29 | 30 | with open(file_id_list) as f: 31 | id_list = f.readlines() 32 | # Trim entries in-place. 33 | id_list[:] = [s.strip(' \t\n\r') for s in id_list] 34 | 35 | for file_id in id_list: 36 | full_path_in = os.path.join(dir_audio, file_id + ".wav") 37 | full_path_out = os.path.join(dir_out, file_id + ".wav") 38 | makedirs_safe(os.path.dirname(full_path_out)) 39 | 40 | current_sampling_rate = librosa.get_samplerate(full_path_in) 41 | if current_sampling_rate != target_sampling_rate: 42 | print("Downsample {} from {} to {}.".format(full_path_in, current_sampling_rate, target_sampling_rate)) 43 | y, s = librosa.load(full_path_in, sr=target_sampling_rate) 44 | soundfile.write(full_path_out, y, target_sampling_rate) 45 | # librosa.output.write_wav(full_path_out, y, target_sampling_rate) # librosa < 0.8.0 46 | else: 47 | print("Already at target frame rate, so copy " + full_path_in) 48 | copy(full_path_in, full_path_out) 49 | # sound = AudioSegment.from_file(full_path_in) 50 | # sound = sound.set_frame_rate(target_sampling_rate) 51 | # sound.export(full_path_out, format="wav") 52 | -------------------------------------------------------------------------------- /idiaptts/src/neural_networks/pytorch/models/ModelConfig.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2020 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | 8 | from typing import Union, Any, List, Optional, cast, Dict 9 | 10 | 11 | class ModelConfig: 12 | MERGE_TYPE_ADD = "add" 13 | MERGE_TYPE_CAT = "cat" 14 | MERGE_TYPE_LIST = "list" 15 | MERGE_TYPE_MEAN = "mean" 16 | MERGE_TYPE_MUL = "mul" 17 | MERGE_TYPE_ATTENTION = "attention" 18 | 19 | @staticmethod 20 | def _get_input_dim(input_names: List[str], datareader: Dict): 21 | in_dim = 0 22 | for input_name in input_names: 23 | in_dim += datareader[input_name].get_dim() 24 | return in_dim 25 | 26 | @staticmethod 27 | def _str_to_list(str_or_list): 28 | if str_or_list is None: 29 | return None 30 | elif type(str_or_list) in [tuple, list]: 31 | return str_or_list 32 | else: 33 | return [str_or_list] 34 | 35 | def __init__(self, 36 | input_names: List[str], 37 | batch_first: bool, 38 | input_merge_type: str = MERGE_TYPE_CAT, 39 | name: str = None, 40 | output_names: List[str] = None, 41 | **kwargs): 42 | super().__init__() 43 | 44 | self.input_names = self._str_to_list(input_names) 45 | self.batch_first = batch_first 46 | self.input_merge_type = input_merge_type 47 | self.name = name 48 | assert output_names is not None or name is not None, \ 49 | "Default output_names is [name], but both are None for input {}.".format(input_names) 50 | self.output_names = self._str_to_list(output_names) if output_names is not None else [name] 51 | self.kwargs = kwargs 52 | 53 | def create_model(self): 54 | raise NotImplementedError() 55 | -------------------------------------------------------------------------------- /tools/compile_htk.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Taken from Merlin (https://github.com/CSTR-Edinburgh/merlin) 4 | 5 | if test "$#" -ne 2; then 6 | echo "################################" 7 | echo "Usage: $0 " 8 | echo "Please register here at http://htk.eng.cam.ac.uk/register.shtml" 9 | echo "################################" 10 | exit 1 11 | fi 12 | 13 | current_working_dir=$(pwd) 14 | tools_dir=${current_working_dir}/$(dirname $0) 15 | cd $tools_dir 16 | 17 | HTK_USERNAME=$1 18 | HTK_PASSWORD=$2 19 | 20 | ## Download HTK source code: 21 | HTK_URL=http://htk.eng.cam.ac.uk/ftp/software/HTK-3.4.1.tar.gz 22 | HDECODE_URL=http://htk.eng.cam.ac.uk/ftp/software/hdecode/HDecode-3.4.1.tar.gz 23 | 24 | ## Download HTS patch: 25 | HTS_PATCH_URL=http://hts.sp.nitech.ac.jp/archives/2.3alpha/HTS-2.3alpha_for_HTK-3.4.1.tar.bz2 26 | 27 | if hash wget 2>/dev/null; then 28 | wget $HTK_URL --http-user=$HTK_USERNAME --http-password=$HTK_PASSWORD 29 | wget $HDECODE_URL --http-user=$HTK_USERNAME --http-password=$HTK_PASSWORD 30 | wget $HTS_PATCH_URL 31 | else 32 | echo "please download HTK from $HTK_URL" 33 | echo "please download HDECODE from $HDECODE_URL" 34 | echo "please download HTS PATCH from $HTS_PATCH_URL" 35 | exit 1 36 | fi 37 | 38 | ## Unpack everything: 39 | tar xzf HTK-3.4.1.tar.gz 40 | tar xzf HDecode-3.4.1.tar.gz 41 | tar xf HTS-2.3alpha_for_HTK-3.4.1.tar.bz2 42 | 43 | # apply HTS patch 44 | cd htk 45 | patch -p1 -d . < ../HTS-2.3alpha_for_HTK-3.4.1.patch 46 | 47 | echo "compiling HTK..." 48 | ( 49 | ./configure --prefix=$PWD/build; 50 | make all; 51 | make install 52 | ) 53 | 54 | HTK_BIN_DIR=$tools_dir/bin/htk 55 | 56 | mkdir -p $tools_dir/bin 57 | mkdir -p $HTK_BIN_DIR 58 | 59 | cp $tools_dir/htk/build/bin/* $HTK_BIN_DIR/ 60 | 61 | if [[ ! -f ${HTK_BIN_DIR}/HVite ]]; then 62 | echo "Error installing HTK tools" 63 | exit 1 64 | else 65 | echo "HTK successfully installed...!" 66 | fi 67 | 68 | -------------------------------------------------------------------------------- /test/unit/neural_networks/pytorch/test_GradientScaling.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2020 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | 6 | # Test cases inspired by PyTorch test cases https://github.com/pytorch/pytorch/blob/5b0f40048899e398d286fe7b55f297991f93ba2c/test/test_optim.py 7 | 8 | 9 | import unittest 10 | 11 | import torch 12 | from torch import nn 13 | 14 | from idiaptts.src.neural_networks.pytorch.GradientScaling import GradientScaling 15 | 16 | 17 | class TestGradientScaling(unittest.TestCase): 18 | in_dim = 4 19 | lin_layer = nn.Linear(in_dim, 3) 20 | 21 | def _get_network(self, lambda_=None): 22 | layers = [nn.Linear(self.in_dim, 3)] 23 | layers[0].weight = self.lin_layer.weight 24 | layers[0].bias = self.lin_layer.bias 25 | if lambda_ is not None: 26 | layers.append(GradientScaling(lambda_)) 27 | return nn.Sequential(*layers) 28 | 29 | def test_scaling(self): 30 | scaling = 5 31 | batch_size = 2 32 | 33 | net = self._get_network() 34 | net_with_scaling = self._get_network(lambda_=scaling) 35 | input_ = torch.randn((batch_size, self.in_dim), requires_grad=True) 36 | input_scaled = input_.detach().clone() 37 | input_scaled.requires_grad = True 38 | 39 | output = net(input_) 40 | output.sum().backward() 41 | output_scaled = net_with_scaling(input_scaled) 42 | net_with_scaling[0].weight = net[0].weight 43 | output_scaled.sum().backward() 44 | 45 | self.assertTrue(torch.isclose(scaling * input_.grad, input_scaled.grad).all()) 46 | 47 | def test_stop(self): 48 | batch_size = 2 49 | 50 | net_with_grad_stop = self._get_network(lambda_=0.0) 51 | input_ = torch.randn((batch_size, self.in_dim), requires_grad=True) 52 | 53 | output_scaled = net_with_grad_stop(input_) 54 | output_scaled.sum().backward() 55 | 56 | self.assertTrue((input_.grad == 0).all()) 57 | -------------------------------------------------------------------------------- /idiaptts/src/neural_networks/pytorch/models/enc_dec_dyn/attention/FixedAttention.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | 8 | from idiaptts.src.neural_networks.pytorch.models.enc_dec_dyn.attention.Attention import Attention 9 | import torch 10 | 11 | 12 | class FixedAttention(Attention): 13 | 14 | class Config: 15 | def __init__(self, ground_truth_feature_name, n_frames_per_step) -> None: 16 | self.ground_truth_feature_name = ground_truth_feature_name 17 | self.n_frames_per_step = n_frames_per_step 18 | 19 | def create_model(self): 20 | return FixedAttention(self) 21 | 22 | def __init__(self, config: Config) -> None: 23 | super().__init__() 24 | self.attention_ground_truth = config.ground_truth_feature_name 25 | self.n_frames_per_step = config.n_frames_per_step 26 | 27 | def allows_batched_forward(self): 28 | return True 29 | 30 | def forward_batched(self, encoder_input, attention_matrix): 31 | B = attention_matrix.shape[0] 32 | T = attention_matrix.shape[1] 33 | num_chunks = int(T / self.n_frames_per_step) 34 | # attention_matrix = attention_matrix[:, ::self.n_frames_per_step] # Skip frames. 35 | attention_matrix = attention_matrix.view( 36 | B, num_chunks, self.n_frames_per_step, -1).mean(dim=2) 37 | attention_context = torch.bmm(attention_matrix, encoder_input) 38 | return attention_context, attention_matrix 39 | 40 | def forward_incremental(self, idx, encoder_input, decoder_input, attention_matrix): 41 | # B = input_.shape[0] 42 | # T = input_.shape[1] 43 | # num_chunks = int(T / step) 44 | attention_weights = attention_matrix[:, idx:idx + self.n_frames_per_step] 45 | attention_weights = attention_weights.mean(dim=1, keepdim=True) 46 | attention_context = torch.einsum("btp,bpc->btc", 47 | (attention_weights, encoder_input)) 48 | return attention_context, attention_weights, attention_matrix 49 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # IdiapTTS 2 | This repository contains the Idiap Text-to-Speech system developed at the [Idiap Research Institute](https://www.idiap.ch/en), Martigny, Switzerland. 3 | Contact: 4 | 5 | It is an almost purely Python-based modular toolbox for building Deep Neural Network models (using [PyTorch](https://pytorch.org/)) for statistical parametric speech synthesis. It provides scripts for feature extraction and preparation which are based on third-party tools (e.g. [Festival](http://www.cstr.ed.ac.uk/projects/festival/)). It uses the [WORLD](https://github.com/mmorise/World) vocoder (i.e. its [Python wrapper](https://github.com/JeremyCCHsu/Python-Wrapper-for-World-Vocoder)) for waveform generation. The framework was highly inspired by [Merlin](https://github.com/CSTR-Edinburgh/merlin) and reuses some of its data preparation functionalities. In contrast to Merlin it is intended to be more modular and allowing prototyping purely in Python. 6 | 7 | It comes with recipes in the spirit of [Kaldi](https://github.com/kaldi-asr/kaldi) located in separate repositories. 8 | 9 | IdiapTTS is distributed under the MIT license, allowing unrestricted commercial and non-commercial use. 10 | 11 | IdiapTTS is tested with: **Python 3.6** 12 | 13 | ## Installation 14 | Follow the instructions given in *INSTALL.md*. 15 | 16 | ## Experiments 17 | Instructions to run specific experiments are in the *README* files of the respective *egs* repositories: 18 | 19 | * https://github.com/idiap/idiaptts_egs_ljspeech */s1* contains TTS with a duration and acoustic model. 20 | 21 | ## Publications 22 | #### Interspeech '18: A Neural Model to Predict Parameters for a Generalized Command Response Model of Intonation 23 | Instructions to produce results similar to those reported in the paper can be found at https://github.com/idiap/idiaptts_egs_blizzard08_roger *s1/*. 24 | 25 | #### Icassp'19: An End-to-End Network to Synthesize Intonation using a Generalized Command Response Model 26 | Instructions to reproduce the results of the paper can be found in https://github.com/idiap/idiaptts_egs_blizzard08_roger *s2/*. 27 | 28 | #### SSW'10: Neural VTLN for Speaker Adaptation in TTS 29 | Instructions to produce results similar to those in the paper can be found at https://github.com/idiap/idiaptts_egs_vctk *s1/*. -------------------------------------------------------------------------------- /idiaptts/misc/alignment/state_align/license.txt: -------------------------------------------------------------------------------- 1 | Code taken from Merlin and adapted. 2 | https://github.com/CSTR-Edinburgh/merlin 3 | Their licence follows. 4 | 5 | ########################################################################## 6 | # The Neural Network (NN) based Speech Synthesis System 7 | # https://github.com/CSTR-Edinburgh/merlin 8 | # 9 | # Centre for Speech Technology Research 10 | # University of Edinburgh, UK 11 | # Copyright (c) 2014-2015 12 | # All Rights Reserved. 13 | # 14 | # The system as a whole and most of the files in it are distributed 15 | # under the following copyright and conditions 16 | # 17 | # Permission is hereby granted, free of charge, to use and distribute 18 | # this software and its documentation without restriction, including 19 | # without limitation the rights to use, copy, modify, merge, publish, 20 | # distribute, sublicense, and/or sell copies of this work, and to 21 | # permit persons to whom this work is furnished to do so, subject to 22 | # the following conditions: 23 | # 24 | # - Redistributions of source code must retain the above copyright 25 | # notice, this list of conditions and the following disclaimer. 26 | # - Redistributions in binary form must reproduce the above 27 | # copyright notice, this list of conditions and the following 28 | # disclaimer in the documentation and/or other materials provided 29 | # with the distribution. 30 | # - The authors' names may not be used to endorse or promote products derived 31 | # from this software without specific prior written permission. 32 | # 33 | # THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK 34 | # DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING 35 | # ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT 36 | # SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE 37 | # FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 38 | # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN 39 | # AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 40 | # ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 41 | # THIS SOFTWARE. 42 | ########################################################################## -------------------------------------------------------------------------------- /idiaptts/src/neural_networks/pytorch/models/rnn_dyn/Pooling.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | 8 | import torch.nn as nn 9 | 10 | 11 | class Pooling(nn.Module): 12 | def __init__(self, batch_first): 13 | super().__init__() 14 | self.batch_first = batch_first 15 | 16 | def extra_repr(self): 17 | return "batch_first={}".format(self.batch_first) 18 | 19 | def get_output_length(self, seq_lengths_input): 20 | return seq_lengths_input.fill_(1) 21 | 22 | def select_inputs(self, input_, **kwargs): 23 | return input_, kwargs.pop("seq_lengths_input", None) 24 | 25 | 26 | class SelectLastPooling(Pooling): 27 | def __init__(self, batch_first): 28 | super(SelectLastPooling, self).__init__(batch_first) 29 | 30 | def forward(self, input_): 31 | input_, lengths = input_ 32 | batch_dim = 0 if self.batch_first else 1 33 | batch_size = input_.shape[batch_dim] 34 | if lengths is None: 35 | time_dim = 1 if self.batch_first else 0 36 | seq_len_indices = [input_.shape[time_dim] - 1] * batch_size 37 | else: 38 | seq_len_indices = [length - 1 for length in lengths] 39 | batch_indices = [i for i in range(batch_size)] 40 | 41 | if self.batch_first: 42 | return input_[batch_indices, seq_len_indices].unsqueeze(dim=1) 43 | else: 44 | return input_[seq_len_indices, batch_indices].unsqueeze(dim=0) 45 | 46 | 47 | class MeanPooling(Pooling): 48 | def __init__(self, batch_first): 49 | super().__init__(batch_first) 50 | self.time_dim = 1 if batch_first else 0 51 | 52 | def forward(self, input_): 53 | input_, lengths = input_ 54 | 55 | input_sum = input_.sum(self.time_dim, keepdim=True) 56 | 57 | batch_dim = len(lengths) 58 | missing_dims = [1] * max(0, input_sum.ndim - 2) 59 | if self.batch_first: 60 | lengths = lengths.view(batch_dim, 1, *missing_dims).float() 61 | else: 62 | lengths = lengths.view(1, batch_dim, *missing_dims).float() 63 | 64 | input_mean = input_sum / lengths 65 | return input_mean 66 | -------------------------------------------------------------------------------- /idiaptts/src/neural_networks/pytorch/models/PhraseNeuralFilters.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | 8 | # System imports. 9 | import sys 10 | import os 11 | import torch 12 | import torch.nn as nn 13 | 14 | # Local source tree imports. 15 | from idiaptts.src.neural_networks.pytorch import ModelHandlerPyTorch 16 | 17 | 18 | class PhraseNeuralFilters(nn.Module): 19 | IDENTIFIER = "PhraseNeuralFilters" 20 | 21 | def __init__(self, dim_in, dim_out, hparams): 22 | super().__init__() 23 | # Store parameters. 24 | self.use_gpu = hparams.use_gpu 25 | self.dim_in = dim_in 26 | self.dim_out = dim_out 27 | self.dropout = hparams.dropout 28 | 29 | self.model_handler_flat = ModelHandlerPyTorch.ModelHandlerPyTorch() 30 | self.model_handler_flat.load_checkpoint(hparams.flat_model_path, hparams.hparams_flat, initial_lr=hparams.hparams_flat.optimiser_args["lr"]) 31 | self.add_module("flat_model", self.model_handler_flat.model) # Add atom model as submodule so that parameters are properly registered. 32 | 33 | self.register_parameter("phrase_bias", torch.nn.Parameter(torch.Tensor(1).fill_(hparams.phrase_bias_init))) 34 | 35 | def forward(self, inputs, hidden, seq_lengths, max_lenght_inputs, *_): 36 | output_flat, _ = self.model_handler_flat.model(inputs, hidden, seq_lengths, max_lenght_inputs) 37 | 38 | output_flat[..., 0].add_(self.phrase_bias) 39 | 40 | return output_flat, None 41 | 42 | def filters_forward(self, inputs, hidden, seq_lengths, max_length): 43 | """Get output of each filter without their superposition.""" 44 | return self.model_handler_flat.model.filters_forward(inputs, hidden, seq_lengths, max_length) 45 | 46 | def set_gpu_flag(self, use_gpu): 47 | self.use_gpu = use_gpu 48 | self.model_handler_flat.use_gpu = use_gpu 49 | self.model_handler_flat.model.set_gpu_flag(use_gpu) 50 | 51 | def init_hidden(self, batch_size=1): 52 | self.model_handler_flat.model.init_hidden(batch_size) 53 | return None 54 | 55 | def thetas_approx(self): 56 | return self.model_handler_flat.model.thetas_approx() 57 | -------------------------------------------------------------------------------- /idiaptts/scripts/noise_reduction/betaOrder.m: -------------------------------------------------------------------------------- 1 | function [SHat] = betaOrder(noisyDFT,noisePow,prioSNR,mue,beta) 2 | % Estimates the STFT clean speech via the estimation formula given in 3 | % equation (5) in 4 | % 5 | % Colin Breithaupt, Martin Krawczyk, Rainer Martin, "Parameterized MMSE 6 | % spectral magnitude estimation for the enhancement of noisy speech", IEEE 7 | % Int. Conf. Acoustics, Speech, Signal Processing, Las Vegas, NV, USA, Apr. 8 | % 2008. 9 | % 10 | % ATTENTION: Approximative formula (2.13)+(2.14) as presented in Colins diss 11 | % are used here! Fast and relatively acurate, but only valid for a limited 12 | % set of combinations of mue and beta! 13 | % 14 | % Input: noisyDFT - Noisy DFT coefficients (complex) (might be a vector) 15 | % noisePow - Noise power (same size as noisyDFT) 16 | % prioSNR - A priori SNR (same size as noisyDFT) 17 | % mue - Shape parameter (mue<1 -> supergaussian) 18 | % beta - Exponent for compression function 19 | % 20 | % Output: SHat - Estimated clean speech STFT (amplitude and phase) (same size as noisyDFT) 21 | % 22 | % Version 0.1 23 | % July 2012 24 | 25 | gammaFactor = (gamma(mue+beta/2)./gamma(mue)); 26 | 27 | postSNR = (abs(noisyDFT).^2)./noisePow; 28 | nue = prioSNR./(mue+prioSNR).*postSNR; 29 | % AHat = sqrt(prioSNR./(mue+prioSNR)) .* ( gammaFactor .* (hypergeom(1-mue-beta/2,1,-nue)./hypergeom(1-mue,1,-nue)) ).^(1/beta) .* sqrt(noisePow); 30 | % SHat = AHat .* exp(1i*angle(noisyDFT)); 31 | 32 | % Approximation: (p.25 of Colin Breithaupt's dissertation) 33 | aHat0 = sqrt(prioSNR./(mue+prioSNR)) .* (gammaFactor).^(1/beta) .* sqrt(noisePow); %(2.13) - Output for zero-input 34 | if (beta==1 || beta==2) && mue==1 35 | p0 = 0.2; pInf = 1; 36 | elseif beta==0.5 && (mue==1 || mue==0.5) 37 | p0 = 0.5; pInf = 1; 38 | elseif beta==0.001 && mue==1 39 | p0 = 0.3; pInf = 1.2; 40 | elseif beta==0.001 && mue==0.5 41 | p0 = 0.5; pInf = 1.5; 42 | elseif beta==0.001 && mue==0.3 43 | p0 = 0.1; pInf = 2.8; 44 | else 45 | display(['No approximation found for \mu=' num2str(mue) ' and \beta=' num2str(beta) '! Aborting!']); 46 | return; 47 | end 48 | AHat = (1./(1+nue)).^p0 .* aHat0 + (nue./(1+nue)).^pInf .* prioSNR./(mue+prioSNR) .* abs(noisyDFT); %(2.14) 49 | SHat = AHat .* exp(1i*angle(noisyDFT)); 50 | 51 | % EOF -------------------------------------------------------------------------------- /idiaptts/src/data_preparation/IntercrossDataReaderConfig.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2020 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | 8 | import random 9 | import re 10 | import logging 11 | from types import MethodType 12 | 13 | from idiaptts.src.data_preparation.DataReaderConfig import DataReaderConfig 14 | 15 | 16 | class IntercrossDataReaderConfig(DataReaderConfig): 17 | 18 | def __init__(self, category_regexes, id_list, *args, **kwargs): 19 | super().__init__(*args, **kwargs) 20 | 21 | self._id_list = id_list 22 | self._category_regexes = category_regexes 23 | 24 | def create_reader(self): 25 | reader = super().create_reader() 26 | self._category_regexes = [re.compile(regex) for regex in self._category_regexes] 27 | reader._ids_per_categories = self._create_ids_per_regex(self._id_list) 28 | 29 | reader.change_id_name = MethodType(self.change_id_name, reader) 30 | return reader 31 | 32 | def _create_ids_per_regex(self, id_list): 33 | ids_per_categories = {regex: set() for regex in self._category_regexes} 34 | for id_name in id_list: 35 | for regex in self._category_regexes: 36 | if regex.match(id_name): 37 | ids_per_categories[regex].add(id_name) 38 | 39 | for regex in self._category_regexes: 40 | assert len(ids_per_categories[regex]) > 0, "No id_name found for regex {}.".format(regex) 41 | 42 | return ids_per_categories 43 | 44 | def get_named_item(self_config, self, id_name, output_names, get_item_fn, chunk_size, pad_fn): 45 | new_id_name = self.change_id_name(id_name) 46 | output = super(IntercrossDataReaderConfig, self_config).get_named_item( 47 | self, new_id_name, output_names, get_item_fn, chunk_size=chunk_size, pad_fn=pad_fn) 48 | output["_id_list"] = id_name 49 | return output 50 | 51 | @staticmethod 52 | def change_id_name(self, id_name): 53 | for regex, regex_ids in self._ids_per_categories.items(): 54 | if regex.match(id_name): 55 | return random.sample(regex_ids, k=1)[0] 56 | logging.warning("{} does not match any regex, thus remains unchanged.") 57 | return id_name 58 | -------------------------------------------------------------------------------- /test/integration/data_preparation/audio/test_RawWaveformLabelGen.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | 6 | 7 | import unittest 8 | 9 | 10 | import os 11 | import shutil 12 | 13 | import numpy as np 14 | 15 | from idiaptts.misc.utils import makedirs_safe 16 | from idiaptts.src.data_preparation.audio.RawWaveformLabelGen import RawWaveformLabelGen 17 | 18 | 19 | class TestRawWaveformLabelGen(unittest.TestCase): 20 | 21 | output_frequency_Hz = 22050 22 | mu = 128 23 | 24 | @classmethod 25 | def setUpClass(cls): 26 | cls.dir_wav = os.path.join("integration", "fixtures", "database", "wav") 27 | cls.dir_labels = os.path.join("integration", "fixtures", "labels", 28 | "label_state_align") 29 | cls.id_list = cls._get_id_list()[:3] 30 | 31 | @staticmethod 32 | def _get_id_list(): 33 | with open(os.path.join("integration", "fixtures", "database", 34 | "file_id_list.txt")) as f: 35 | id_list = f.readlines() 36 | # Trim entries in-place. 37 | id_list[:] = [s.strip(' \t\n\r') for s in id_list] 38 | return id_list 39 | 40 | def _get_test_dir(self): 41 | out_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 42 | type(self).__name__) 43 | makedirs_safe(out_dir) 44 | return out_dir 45 | 46 | def test_save_load(self): 47 | dir_out = self._get_test_dir() 48 | 49 | raw_feat_gen = RawWaveformLabelGen( 50 | frame_rate_output_Hz=self.output_frequency_Hz, mu=self.mu) 51 | 52 | id_list = [os.path.join(self.dir_wav, id_ + ".wav") 53 | for id_ in self.id_list] 54 | 55 | test_label = raw_feat_gen.load_sample(id_list[1], 56 | self.output_frequency_Hz) 57 | 58 | test_label_pre = raw_feat_gen.preprocess_sample(test_label) 59 | self.assertTrue(np.isclose(test_label_pre, 60 | raw_feat_gen[id_list[1]]).all()) 61 | 62 | test_label_post = raw_feat_gen.postprocess_sample(test_label_pre) 63 | # Slightly different because quantisation loses information. 64 | self.assertLess(abs(test_label - test_label_post).max(), 0.0334) 65 | 66 | shutil.rmtree(dir_out) 67 | -------------------------------------------------------------------------------- /idiaptts/src/model_trainers/ClassificationTrainer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | 8 | import logging 9 | import os 10 | from typing import Dict 11 | 12 | import numpy as np 13 | from sklearn.metrics import accuracy_score, confusion_matrix 14 | 15 | from idiaptts.src.ExtendedHParams import ExtendedHParams 16 | from idiaptts.src.model_trainers.ModularTrainer import ModularTrainer 17 | 18 | 19 | class ClassificationTrainer(ModularTrainer): 20 | 21 | @staticmethod 22 | def create_hparams(hparams_string: os.PathLike = None, 23 | verbose: bool = False): 24 | hparams = ModularTrainer.create_hparams(hparams_string=hparams_string, 25 | verbose=verbose) 26 | hparams.add_hparams( 27 | class_pred_name="class_pred", 28 | class_true_name="class_true", 29 | num_classes=-1, 30 | class_names=None 31 | ) 32 | return hparams 33 | 34 | def compute_score(self, data: Dict[str, object], output: Dict[str, object], 35 | hparams: ExtendedHParams): 36 | # TODO: Reuse UnWeightedAccuracy class here. 37 | class_pred = [] 38 | class_true = [] 39 | for id_, out_dict in output.items(): 40 | class_pred.append(out_dict[hparams.class_pred_name].argmax()) 41 | class_true.append(out_dict[hparams.class_true_name].squeeze()) 42 | class_pred = np.stack(class_pred) 43 | class_true = np.stack(class_true) 44 | 45 | conf_matrix = confusion_matrix(class_true, class_pred, 46 | labels=range(hparams.num_classes)) 47 | num_per_class = conf_matrix.sum(axis=1)[:, None] 48 | num_per_class[num_per_class == 0] = 1.0 # Prevent NaNs. 49 | conf_matrix_norm = conf_matrix / num_per_class 50 | W_acc = accuracy_score(class_true, class_pred) 51 | U_acc = np.sum(np.diag(conf_matrix_norm)) / hparams.num_classes 52 | 53 | self.logger.info("Weighted accuracy {}".format(W_acc)) 54 | self.logger.info("Unweighted accuracy {}".format(U_acc)) 55 | self.logger.info("Confusion matrix\n{}\n{}".format( 56 | hparams.class_names if hparams.class_names is not None else "", 57 | conf_matrix_norm)) 58 | 59 | return {"W_acc": W_acc, "U_acc": U_acc, "conf_matrix": conf_matrix_norm} 60 | -------------------------------------------------------------------------------- /idiaptts/src/neural_networks/pytorch/models/enc_dec_dyn/SubModule.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | from idiaptts.src.neural_networks.pytorch.models.NamedForwardModule import NamedForwardModule 8 | import idiaptts.src.neural_networks.pytorch.models.rnn_dyn as rnn_dyn 9 | 10 | 11 | class SubModule(NamedForwardModule): 12 | def __init__(self, config): 13 | super().__init__(input_names=config.input_names, 14 | batch_first=True, 15 | input_merge_type=config.input_merge_type, 16 | name=config.name, 17 | output_names=config.output_names) 18 | 19 | if config.config is not None: 20 | self.model = config.config.create_model() 21 | else: 22 | self.model = None 23 | 24 | def forward_module(self, input_, lengths, max_lengths): 25 | if self.model is None: 26 | return input_ 27 | 28 | if type(self.model) is rnn_dyn.RNNDyn: 29 | # print("Shape {}, device {}".format(input_.shape, input_.device)) 30 | output, kwargs = self.model( 31 | input_, 32 | seq_lengths_input=lengths[self.input_names[0]] 33 | if self.input_names is not None else None, 34 | max_length_inputs=max_lengths[self.input_names[0]] 35 | if self.input_names is not None else None) 36 | # Update output lengths. Assumes that all outputs have the 37 | # same length. 38 | lengths.update({name: kwargs["seq_lengths_input"] 39 | for name in self.output_names}) 40 | max_lengths.update({name: kwargs["max_length_inputs"] 41 | for name in self.output_names}) 42 | else: 43 | # This call expects models to change the lengths and 44 | # max_lengths dictionaries internally. 45 | output = self.model(input_, lengths, max_lengths) 46 | for name in self.output_names: 47 | assert name in lengths, "Sequence length for output {} of {} "\ 48 | " was not added.".format(name, self.model) 49 | assert name in max_lengths, "Max length for output {} of {} " \ 50 | "was not added".format(name, self.model) 51 | 52 | return output 53 | 54 | def init_hidden(self, batch_size=1): 55 | if self.model is not None: 56 | self.model.init_hidden(batch_size) 57 | -------------------------------------------------------------------------------- /idiaptts/misc/normalisation/NormParamsExtractor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | 8 | """Module description. 9 | 10 | """ 11 | 12 | # System imports. 13 | import logging 14 | import numpy as np 15 | 16 | # Third-party imports. 17 | 18 | # Local source tree imports. 19 | from misc.normalisation.MeanStdDevExtractor import MeanStdDevExtractor 20 | from misc.normalisation.MinMaxExtractor import MinMaxExtractor 21 | 22 | 23 | class NormParamsExtractor(object): 24 | """Base class for all normalization extractor classes. 25 | """ 26 | logger = logging.getLogger(__name__) 27 | 28 | # Constants. 29 | file_name_min_max = "min-max.txt" 30 | file_name_stats = "stats" 31 | file_name_mean_std_dev = "mean-std_dev.txt" 32 | 33 | def __init__(self, mean_std_dev=False, min_max=False): 34 | self.normalisers = list() 35 | 36 | if mean_std_dev: 37 | self.normalisers.append(MeanStdDevExtractor()) 38 | if min_max: 39 | self.normalisers.append(MinMaxExtractor()) 40 | 41 | def add_sample(self, sample): 42 | for normaliser in self.normalisers: 43 | normaliser.add_sample(sample) 44 | 45 | def get_mean_std_dev(self): 46 | function_name = self.get_mean_std_dev.__name__ 47 | for normaliser in self.normalisers: 48 | if callable(getattr(normaliser, function_name, None)): 49 | return normaliser.get_params() 50 | 51 | self.logger.error("No normaliser provides a " + function_name + " method." 52 | "Use mean_std_dev=True in the constructor to provide one.") 53 | return None 54 | 55 | def get_min_max(self): 56 | function_name = self.get_min_max.__name__ 57 | for normaliser in self.normalisers: 58 | if callable(getattr(normaliser, function_name, None)): 59 | return normaliser.get_params() 60 | 61 | self.logger.error("No normaliser provides a " + function_name + " method." 62 | "Use min_max=True in the constructor to provide one.") 63 | return None 64 | 65 | def save(self, filename, datatype=np.float64): 66 | for normaliser in self.normalisers: 67 | normaliser.save(filename, datatype) 68 | 69 | def load(self, file_path, datatype=np.float64): 70 | results = list() 71 | for normaliser in self.normalisers: 72 | results.append(normaliser.load(file_path + "-" + normaliser.file_name_appendix, datatype)) 73 | 74 | return results 75 | -------------------------------------------------------------------------------- /idiaptts/src/neural_networks/pytorch/models/enc_dec_dyn/EncDecDyn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | import copy 8 | import logging 9 | 10 | import jsonpickle 11 | 12 | from idiaptts.src.neural_networks.pytorch.models.NamedForwardModule import NamedForwardModule 13 | 14 | 15 | class EncDecDyn(NamedForwardModule): 16 | 17 | logger = logging.getLogger(__name__) 18 | 19 | def __init__(self, config): 20 | super(NamedForwardModule, self).__init__() 21 | self.config = copy.deepcopy(config) 22 | 23 | self.process_groups = [] 24 | for idx, group in enumerate(config.process_groups): 25 | submodules = [] 26 | for config in group: 27 | submodule = config.create_model() 28 | if submodule.name is None: 29 | msg = "Every module needs a name, but name of {} is None."\ 30 | .format(submodule) 31 | self.logger.warn(msg, DeprecationWarning) 32 | # TODO: Should there be an automatic name? 33 | # assert submodule.name is not None, msg 34 | 35 | module_id = "{}".format(submodule.name) 36 | if hasattr(self, module_id): 37 | raise ValueError("{} module defined twice.".format( 38 | module_id)) 39 | self.logger.info("Adding {} to {}".format( 40 | module_id, "EncDecDyn")) 41 | 42 | self.add_module(module_id, submodule) # TODO: Handle duplicates. 43 | submodules.append(submodule) 44 | 45 | self.process_groups.append(submodules) 46 | 47 | def init_hidden(self, batch_size=1): 48 | for process_group in self.process_groups: 49 | for module in process_group: 50 | module.init_hidden(batch_size) 51 | 52 | def forward(self, data_dict, lengths, max_lengths): 53 | for process_group in self.process_groups: 54 | for module in process_group: 55 | module(data_dict, lengths, max_lengths) 56 | return data_dict 57 | 58 | def inference(self, data_dict, lengths, max_lengths): 59 | for process_group in self.process_groups: 60 | for module in process_group: 61 | module.inference(data_dict, lengths, max_lengths) 62 | # data_dict["pred_acoustic_features"] = data_dict["pred_intermediate_acoustic_features"] 63 | return data_dict 64 | 65 | def get_config_as_json(self): 66 | return jsonpickle.encode(self.config, indent=4) 67 | -------------------------------------------------------------------------------- /idiaptts/src/neural_networks/pytorch/models/NamedForwardSplitter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2020 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | 8 | import copy 9 | from functools import reduce 10 | from typing import Union, Any, List, Optional, cast, Dict 11 | 12 | import jsonpickle 13 | import torch 14 | 15 | from idiaptts.src.neural_networks.pytorch.models.ModelConfig import ModelConfig 16 | from idiaptts.src.neural_networks.pytorch.models.NamedForwardModule import NamedForwardModule 17 | 18 | 19 | class NamedForwardSplitter(NamedForwardModule): 20 | 21 | class Config(ModelConfig): 22 | def __init__(self, 23 | input_names: List[str], 24 | batch_first: bool, 25 | output_names: List[str], 26 | split_sizes: Union[int, List[int]], 27 | input_merge_type: str = ModelConfig.MERGE_TYPE_CAT, 28 | split_dim: int = -1): 29 | super().__init__(input_names=input_names, batch_first=batch_first, input_merge_type=input_merge_type, 30 | name="Splitter[{}->{}]".format(" ".join(input_names), " ".join(output_names)), 31 | output_names=output_names) 32 | self.split_sizes = split_sizes if type(split_sizes) in [list, tuple] else (split_sizes,) 33 | self.split_dim = split_dim 34 | 35 | def create_model(self): 36 | return NamedForwardSplitter(self) 37 | 38 | def __init__(self, config: Config): 39 | super().__init__(input_names=config.input_names, 40 | input_merge_type=config.input_merge_type, 41 | name=config.name, 42 | output_names=config.output_names, 43 | batch_first=config.batch_first) 44 | self.split_sizes = config.split_sizes 45 | self.split_dim = config.split_dim 46 | 47 | def forward_module(self, input_, lengths, max_length, **kwargs): 48 | input_lengths = [lengths[name] for name in self.input_names] 49 | input_max_length = [max_length[name] for name in self.input_names] 50 | output_lengths = reduce(torch.max, input_lengths) 51 | output_max_length = reduce(torch.max, input_max_length) 52 | lengths.update({name: output_lengths for name in self.output_names}) 53 | max_length.update({name: output_max_length for name in self.output_names}) 54 | 55 | return torch.split(input_, self.split_sizes, self.split_dim) 56 | 57 | def get_config_as_json(self): 58 | return jsonpickle.encode(self.config, indent=4) 59 | -------------------------------------------------------------------------------- /idiaptts/src/neural_networks/pytorch/models/NamedForwardCombiner.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2020 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | 8 | import copy 9 | from functools import reduce 10 | from typing import Union, Any, List, Optional, cast, Dict 11 | 12 | import jsonpickle 13 | import torch 14 | 15 | from idiaptts.src.neural_networks.pytorch.models.ModelConfig import ModelConfig 16 | from idiaptts.src.neural_networks.pytorch.models.NamedForwardModule import NamedForwardModule 17 | 18 | 19 | class NamedForwardCombiner(NamedForwardModule): 20 | 21 | class Config(ModelConfig): 22 | def __init__(self, 23 | input_names: List[str], 24 | batch_first: bool, 25 | output_names: str, 26 | input_merge_type: str = ModelConfig.MERGE_TYPE_CAT): 27 | super().__init__(input_names=input_names, batch_first=batch_first, input_merge_type=input_merge_type, 28 | name="Combiner[{}->{}]".format(" ".join(input_names), output_names), 29 | output_names=[output_names]) 30 | 31 | def create_model(self): 32 | return NamedForwardCombiner(self) 33 | 34 | def __init__(self, config: Config): 35 | super().__init__(input_names=config.input_names, 36 | input_merge_type=config.input_merge_type, 37 | name=config.name, 38 | output_names=config.output_names, 39 | batch_first=config.batch_first) 40 | 41 | # def forward(self, data, lengths, max_lengths, **kwargs): 42 | # input_, length = NamedForwardModule._get_inputs(data, self.input_names, self.input_merge_type, 43 | # self.batch_first, return_lengths=True) 44 | # output = input_ 45 | # output_dict = self._map_to_output_names(output) 46 | # data.update(output_dict) 47 | 48 | # lengths.update({name: length for name in self.output_names}) 49 | # max_length = max(length) if len(length) > 0 else 1 50 | # max_lengths.update({name: max_length for name in self.output_names}) 51 | 52 | def forward_module(self, input_, lengths, max_length, **kwargs): 53 | input_lengths = [lengths[name] for name in self.input_names] 54 | input_max_length = [max_length[name] for name in self.input_names] 55 | output_lengths = reduce(torch.max, input_lengths) 56 | output_max_length = reduce(torch.max, input_max_length) 57 | lengths.update({name: output_lengths for name in self.output_names}) 58 | max_length.update({name: output_max_length for name in self.output_names}) 59 | return input_ 60 | 61 | def get_config_as_json(self): 62 | return jsonpickle.encode(self.config, indent=4) 63 | -------------------------------------------------------------------------------- /idiaptts/src/neural_networks/pytorch/loss/VAEKLDLoss.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | 8 | import torch 9 | from torch.nn.modules.loss import _Loss 10 | 11 | from idiaptts.src.neural_networks.pytorch.loss.NamedLoss import NamedLoss 12 | 13 | 14 | class VAEKLDLoss(NamedLoss): 15 | 16 | class Config(NamedLoss.Config): 17 | def __init__(self, 18 | name, 19 | input_names, 20 | annealing_steps=200, 21 | annealing_points=(25000, 150000), # TODO: Use 400, (20000, 60000) for CopyCat 22 | batch_first=True, 23 | input_merge_type=NamedLoss.Config.MERGE_TYPE_LIST, 24 | seq_mask=None, 25 | start_step=0, 26 | reduction='mean_per_frame', 27 | **kwargs): 28 | 29 | if "type_" in kwargs: 30 | kwargs.pop("type_") 31 | 32 | super().__init__(name, type_="VAEKLDLoss", seq_mask=seq_mask, input_names=input_names, 33 | batch_first=batch_first, input_merge_type=input_merge_type, start_step=start_step, 34 | reduction=reduction, **kwargs) 35 | 36 | assert annealing_steps >= 0, "Annealing steps must be greater or equal zero." 37 | self.annealing_steps = annealing_steps 38 | assert annealing_points[0] <= annealing_points[1], \ 39 | "First annealing point must be smaller than second annealing point." 40 | self.annealing_points = annealing_points 41 | 42 | def create_loss(self): 43 | return VAEKLDLoss(self) 44 | 45 | def __init__(self, config): 46 | super().__init__(config) 47 | 48 | self._annealing_steps = config.annealing_steps 49 | self._annealing_points = config.annealing_points # TODO: Different in distributed training, divide by number of machines. 50 | 51 | def forward(self, data, length_dict, step): 52 | loss_dict = super().forward(data, length_dict, step) 53 | 54 | return {k: self._anneal(v, step) for k, v in loss_dict.items()} 55 | 56 | def loss_fn(self, mu, log_var): 57 | kl_loss = 0.5 * (torch.exp(log_var) + mu**2 - 1. - log_var).sum(dim=-1, keepdim=True) 58 | return kl_loss 59 | 60 | def _anneal(self, loss, step): 61 | if step % self._annealing_steps == 0 and step > self._annealing_points[0]: 62 | if step > self._annealing_points[1]: 63 | annealing_factor = 1.0 64 | else: 65 | annealing_factor = (step - self._annealing_points[0]) / (self._annealing_points[1] - self._annealing_points[0]) 66 | else: 67 | annealing_factor = 0.0 68 | return loss * annealing_factor # * self._weight TODO: Add weights to all losses -> requires a loss base class 69 | -------------------------------------------------------------------------------- /test/integration/data_preparation/questions/test_QuestionLabelGen.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | 6 | 7 | import unittest 8 | 9 | 10 | import os 11 | import shutil 12 | 13 | import numpy as np 14 | 15 | from idiaptts.misc.utils import makedirs_safe 16 | from idiaptts.src.data_preparation.questions.QuestionLabelGen import QuestionLabelGen 17 | 18 | 19 | class TestQuestionLabelGen(unittest.TestCase): 20 | 21 | num_questions = 425 22 | 23 | @classmethod 24 | def setUpClass(cls): 25 | cls.dir_questions = os.path.join("integration", "fixtures", 26 | "questions") 27 | cls.file_questions = os.path.join("integration", "fixtures", 28 | "questions-en-radio_dnn_400.hed") 29 | cls.dir_labels = os.path.join("integration", "fixtures", "labels", 30 | "label_state_align") 31 | cls.id_list = cls._get_id_list()[:3] 32 | 33 | @staticmethod 34 | def _get_id_list(): 35 | with open(os.path.join("integration", "fixtures", "database", 36 | "file_id_list.txt")) as f: 37 | id_list = f.readlines() 38 | # Trim entries in-place. 39 | id_list[:] = [s.strip(' \t\n\r') for s in id_list] 40 | return id_list 41 | 42 | def _get_test_dir(self): 43 | out_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 44 | type(self).__name__) 45 | makedirs_safe(out_dir) 46 | return out_dir 47 | 48 | def test_save_load(self): 49 | dir_out = self._get_test_dir() 50 | 51 | label_dict, *extracted_norm_params = QuestionLabelGen.gen_data( 52 | dir_in=self.dir_labels, 53 | file_questions=self.file_questions, 54 | dir_out=dir_out, 55 | id_list=self.id_list, 56 | return_dict=True) 57 | 58 | question_gen = QuestionLabelGen(dir_out, num_questions=409) 59 | norm_params = question_gen.get_normalisation_params(dir_out) 60 | self.assertTrue((extracted_norm_params[0] == norm_params[0]).all()) 61 | self.assertTrue((extracted_norm_params[1] == norm_params[1]).all()) 62 | 63 | test_label = label_dict[self.id_list[1]] 64 | 65 | test_label_pre = question_gen.preprocess_sample(test_label) 66 | self.assertTrue(np.isclose( 67 | test_label_pre, question_gen[self.id_list[1]]).all()) 68 | 69 | test_label_post = question_gen.postprocess_sample(test_label_pre) 70 | self.assertTrue(np.isclose(test_label, test_label_post).all()) 71 | 72 | shutil.rmtree(dir_out) 73 | 74 | def test_load(self): 75 | sample = QuestionLabelGen.load_sample(self.id_list[0], 76 | self.dir_questions, 77 | num_questions=409) 78 | self.assertEqual(409, sample.shape[1]) 79 | -------------------------------------------------------------------------------- /idiaptts/src/data_preparation/audio/normalize_loudness.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2020 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | 8 | """Module description. 9 | Loudness normalization of each given audio file to a reference average. 10 | Audio files are specified through the file_id_list parameter. 11 | """ 12 | 13 | # System imports. 14 | import os 15 | import logging 16 | import argparse 17 | 18 | # Third-party imports. 19 | import math 20 | import soundfile 21 | 22 | # Local source tree imports. 23 | from idiaptts.misc.utils import makedirs_safe 24 | 25 | 26 | class LoudnessNormalizer(object): 27 | logger = logging.getLogger(__name__) 28 | 29 | def __init__(self, ref_rms=0.1): 30 | self.ref_rms = ref_rms 31 | 32 | def process_list(self, id_list, dir_audio, dir_out, format="wav"): 33 | 34 | for file_id in id_list: 35 | self.process_file(file_id + "." + format, dir_audio, dir_out) 36 | 37 | def process_file(self, file, dir_audio, dir_out): 38 | 39 | raw, fs = soundfile.read(os.path.join(dir_audio, file)) 40 | 41 | raw -= raw.mean() 42 | raw *= math.sqrt(len(raw) * self.ref_rms**2 / (raw**2).sum()) 43 | 44 | out_file = os.path.join(dir_out, file) 45 | makedirs_safe(os.path.dirname(out_file)) 46 | soundfile.write(out_file, raw, samplerate=fs) 47 | 48 | return raw 49 | 50 | 51 | def main(): 52 | logging.basicConfig(level=logging.DEBUG) 53 | 54 | parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) 55 | parser.add_argument("-w", "--dir_wav", help="Directory containing the wav files.", type=str, 56 | dest="dir_wav", required=True) 57 | parser.add_argument("-o", "--dir_out", help="Directory to save the trimmed files.", type=str, 58 | dest="dir_out", required=True) 59 | parser.add_argument("-f", "--file_id_list", help="Full path to file containing the ids.", type=str, 60 | dest="file_id_list", required=True) 61 | parser.add_argument("--format", help="Format of the audio file, e.g. WAV.", type=str, 62 | dest="format", required=False, default='wav') 63 | parser.add_argument("--ref_rms", help="Reference RMS to normalize to.", type=float, 64 | dest="ref_rms", required=False, default=0.1) 65 | 66 | # Parse arguments 67 | args = parser.parse_args() 68 | 69 | # Read which files to process. 70 | with open(args.file_id_list) as f: 71 | id_list = f.readlines() 72 | # Trim entries in-place. 73 | id_list[:] = [s.strip(' \t\n\r') for s in id_list] 74 | 75 | # Create output directory if missing. 76 | makedirs_safe(args.dir_out) 77 | 78 | # Start silence removal. 79 | loudness_normalizer = LoudnessNormalizer(args.ref_rms) 80 | loudness_normalizer.process_list(id_list, args.dir_wav, args.dir_out, args.format) 81 | 82 | 83 | if __name__ == "__main__": 84 | main() 85 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 4 | # Written by Bastian Schnell 5 | # 6 | 7 | """Setup idiaptts""" 8 | 9 | from itertools import dropwhile 10 | import os 11 | from os import path 12 | from setuptools import find_packages, setup 13 | import glob 14 | 15 | 16 | def collect_docstring(lines): 17 | """Return document docstring if it exists""" 18 | lines = dropwhile(lambda x: not x.startswith('"""'), lines) 19 | doc = "" 20 | for line in lines: 21 | doc += line 22 | if doc.endswith('"""\n'): 23 | break 24 | 25 | return doc[3:-4].replace("\r", "").replace("\n", " ") 26 | 27 | 28 | def collect_metadata(): 29 | meta = {} 30 | with open(path.join("idiaptts", "__init__.py")) as f: 31 | lines = iter(f) 32 | meta["description"] = collect_docstring(lines) 33 | for line in lines: 34 | if line.startswith("__"): 35 | key, value = map(lambda x: x.strip(), line.split("=")) 36 | meta[key[2:-2]] = value[1:-1] 37 | 38 | return meta 39 | 40 | 41 | def get_extensions(): 42 | return [] 43 | 44 | 45 | def get_install_requirements(): 46 | with open("requirements.txt") as f: 47 | requirements = f.read().splitlines() 48 | 49 | return requirements 50 | 51 | 52 | def get_package_files(directory): 53 | paths = [] 54 | for (path, directories, filenames) in os.walk(directory): 55 | for filename in filenames: 56 | paths.append(os.path.join('..', path, filename)) 57 | return paths 58 | 59 | 60 | def setup_package(): 61 | with open("README.md") as f: 62 | long_description = f.read() 63 | meta = collect_metadata() 64 | setup( 65 | name="idiaptts", 66 | version=meta["version"], 67 | description=meta["description"], 68 | long_description=long_description, 69 | maintainer=meta["maintainer"], 70 | maintainer_email=meta["email"], 71 | url=meta["url"], 72 | license=meta["license"], 73 | classifiers=[ 74 | # "Development Status :: 5 - Production/Stable", 75 | "Intended Audience :: Science/Research", 76 | "Intended Audience :: Developers", 77 | "License :: OSI Approved :: MIT License", 78 | "Topic :: Scientific/Engineering", 79 | "Programming Language :: Python :: 3.6", 80 | # "Programming Language :: Python :: 3.7", 81 | ], 82 | packages=find_packages(exclude=["tools", "tests"]), 83 | install_requires=get_install_requirements(), 84 | ext_modules=get_extensions(), 85 | # scripts=["idiaptts/misc/get_audio_length.sh"], # Installs scripts for command line usage. 86 | 87 | package_data={ 88 | '': ['*.sh'] + get_package_files('idiaptts/scripts') # All shell scripts and everything in scripts/. 89 | }, 90 | # data_files=[ 91 | # ('', []), 92 | # ] 93 | ) 94 | 95 | 96 | if __name__ == "__main__": 97 | setup_package() 98 | -------------------------------------------------------------------------------- /idiaptts/src/neural_networks/pytorch/models/enc_dec_dyn/attention/Attention.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | 8 | from typing import Tuple, Any 9 | 10 | import torch 11 | 12 | 13 | class Attention(torch.nn.Module): 14 | """ 15 | Base class for all attention mechanisms. 16 | """ 17 | 18 | def allows_batched_forward(self) -> bool: 19 | """ 20 | Whether the attention block can be run once and not incremental. 21 | 22 | :raises NotImplementedError: Implement in subclass. 23 | """ 24 | raise NotImplementedError() 25 | 26 | def get_go_frame(self) -> Any: 27 | """ 28 | Returns the attention state for the very first frame. The state 29 | can be of any type, and can be used in the forward functions in 30 | stateful attention. 31 | 32 | :raises NotImplementedError: Implement in subclass. 33 | :return: If true, forward_batched will be used. 34 | :rtype: boolean 35 | """ 36 | raise NotImplementedError() 37 | return attention_state 38 | 39 | def forward_batched(self, 40 | encoder_input: torch.Tensor, 41 | attention_state: Any): 42 | """ 43 | A forward call for the entire sequence at once. Return the 44 | attention context for the entire sequence and the used 45 | attention matrix to compute it. 46 | 47 | :param encoder_input: Computed sequence of encoder. 48 | :type encoder_input: torch.Tensor 49 | :param attention_state: Last return attention state 50 | :type attention_state: Anything 51 | :raises NotImplementedError: Implement in subclass. 52 | """ 53 | raise NotImplementedError() 54 | return attention_context, attention_matrix 55 | 56 | def forward_incremental(self, 57 | current_frame_idx: int, 58 | encoder_input: torch.Tensor, 59 | decoder_input: torch.Tensor, 60 | attention_state) \ 61 | -> Tuple[torch.Tensor, torch.Tensor, Any]: 62 | """ 63 | Incremental call of attention mechanism. 64 | 65 | :param current_frame_idx: Index of current frame in sequence. 66 | :type current_frame_idx: int 67 | :param encoder_input: Computed sequence of encoder. 68 | :type encoder_input: torch.Tensor 69 | :param decoder_input: Last decoder output. 70 | :type decoder_input: torch.Tensor 71 | :param attention_state: Last return attention state 72 | :type attention_state: Anything 73 | :raises NotImplementedError: [description] 74 | :return: attention context (results of weighted sum), weights 75 | used to compute the attention context 76 | :rtype: Tuple 77 | """ 78 | raise NotImplementedError() 79 | return attention_context, attention_weights, attention_state 80 | -------------------------------------------------------------------------------- /idiaptts/src/neural_networks/pytorch/loss/L1WeightedVUVMSELoss.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | 8 | import torch 9 | 10 | from idiaptts.src.neural_networks.pytorch.loss.WMSELoss import WMSELoss, weighted_mse_loss 11 | 12 | 13 | class L1WeightedVUVMSELoss(WMSELoss): 14 | def __init__(self, weight_unvoiced=0.5, vuv_loss_weight=1, size_average=True, reduce=True, L1_loss_weight=1, vector_loss=False): 15 | """ 16 | 17 | :param weight_unvoiced: Weight of loss at unvoiced frames, voiced frames weight is 1. 18 | :param vuv_loss_weight: Weighting of loss on V/UV flag (decision_index_weight). 19 | :param size_average: Return mean over time, only used when reduce=True. 20 | :param reduce: Sum over time. 21 | :param L1_loss_weight: Weighting of L1 loss on spiking inputs. 22 | :param vector_loss: Return the three losses stacked in the first dimension. Reduce and size_average apply as before. 23 | """ 24 | super().__init__(dim_out=2, 25 | weighting_decision_index=1, 26 | weight=weight_unvoiced, 27 | decision_index_weight=vuv_loss_weight, 28 | size_average=size_average, 29 | reduce=reduce) 30 | 31 | self.register_buffer('L1_loss_weight', torch.Tensor([L1_loss_weight]).squeeze_()) 32 | self.vector_loss = vector_loss 33 | 34 | def forward(self, input, target): 35 | lf0_vuv_input = input[..., 0:2] 36 | amps_input = input[..., 2:] 37 | 38 | lf0_vuv_loss = super().forward(lf0_vuv_input, target) 39 | lf0_loss = lf0_vuv_loss[..., 0] 40 | vuv_loss = lf0_vuv_loss[..., 1] 41 | L1_constraint = amps_input.abs().mean(dim=-1) 42 | 43 | # TODO THIS IS A DIRTY HACK it works but... what with non zero weight? (see the hacky part with the mask) 44 | lf0_loss.div_(target[..., 1].sum(dim=0)) 45 | ########################################################################## 46 | 47 | if self.vector_loss: 48 | if not self.reduce: # Ignore size_average. 49 | return torch.cat((lf0_loss[None, ...], vuv_loss[None, ...], L1_constraint[None, ...])) 50 | if not self.size_average: 51 | return torch.cat((lf0_loss.sum(dim=0), vuv_loss.sum(dim=0), L1_constraint.sum(dim=0))) 52 | else: 53 | return torch.cat((lf0_loss.mean(dim=0), vuv_loss.mean(dim=0), L1_constraint.mean(dim=0))) 54 | else: 55 | L1_constraint.mul_(self.L1_loss_weight) 56 | 57 | if not self.reduce: 58 | return torch.cat((lf0_vuv_loss, L1_constraint.unsqueeze(-1)), dim=-1) 59 | if not self.size_average: 60 | return lf0_loss.sum() + vuv_loss.sum() + L1_constraint.sum() 61 | else: 62 | return lf0_loss.mean() + vuv_loss.mean() + L1_constraint.mean() 63 | 64 | -------------------------------------------------------------------------------- /idiaptts/src/data_preparation/world/FlatLF0LabelGen.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | 8 | """Module description: 9 | Create world lf0 and vuv feature labels for .wav files. 10 | By default removes the phrase curve from LF0 when loading it. 11 | """ 12 | 13 | # System imports. 14 | import numpy as np 15 | import os 16 | 17 | # Local source tree imports. 18 | from idiaptts.misc.normalisation.MeanStdDevExtractor import MeanStdDevExtractor 19 | from idiaptts.src.data_preparation.world.LF0LabelGen import LF0LabelGen 20 | 21 | 22 | class FlatLF0LabelGen(LF0LabelGen): 23 | """ 24 | Load LF0 and (by default) remove phrase curve from it. 25 | The atom amplitude normalisation is used to normalise the LF0 curve. 26 | """ 27 | ext_phrase = ".phrase" 28 | 29 | def __init__(self, dir_lf0_labels, dir_phrase_labels, remove_phrase=True): 30 | super().__init__(dir_labels=dir_lf0_labels, add_deltas=False) 31 | self.dir_phrase = dir_phrase_labels 32 | self.remove_phrase = remove_phrase 33 | 34 | def get_phrase_curve(self, id_name): 35 | return np.fromfile(os.path.join(self.dir_phrase, id_name + self.ext_phrase), dtype=np.float32) 36 | 37 | def __getitem__(self, id_name): 38 | """Return the preprocessed sample with the given id_name.""" 39 | sample = self.load_sample(id_name, self.dir_labels) 40 | if self.remove_phrase: 41 | phrase_curve = self.get_phrase_curve(id_name) 42 | sample[:, 0] -= phrase_curve 43 | sample = self.preprocess_sample(sample) 44 | 45 | return sample 46 | 47 | def get_normalisation_params(self, dir_out, file_name=None): 48 | """ 49 | Read the mean std_dev values from a file. 50 | Save them in self.norm_params. 51 | 52 | :param dir_out: Directory containing the normalisation file, usually the atom directory. 53 | :param file_name: Prefix of normalisation file. 54 | Expects file to be named .bin 55 | :return: Tuple of normalisation parameters (mean, std_dev). 56 | """ 57 | 58 | full_file_name = (file_name + "-" if file_name is not None else "") + MeanStdDevExtractor.file_name_appendix + ".bin" 59 | 60 | # Use the same normalisation parameters for the LF0 curve without phrase curve 61 | # as for atoms. The phrase directory is the same as the atom directory. 62 | mean, std_dev = MeanStdDevExtractor.load(os.path.join(self.dir_phrase, full_file_name)) 63 | mean, std_dev = mean[:, 0:1], std_dev[:, 0:1] # Dimension of both is 1 x 2(atom amplitude, theta). 64 | 65 | # Manually set V/UV normalisation parameters and save the concatenated normalisation parameters locally. 66 | self.norm_params = np.concatenate((mean, np.zeros((1, 1))), axis=1),\ 67 | np.concatenate((std_dev, np.ones((1, 1))), axis=1) 68 | 69 | return self.norm_params 70 | -------------------------------------------------------------------------------- /test/integration/fixtures/labels/mfa/LJ001-0008.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | xmin = 0 5 | xmax = 1.6862 6 | tiers? 7 | size = 2 8 | item []: 9 | item [1]: 10 | class = "IntervalTier" 11 | name = "words" 12 | xmin = 0 13 | xmax = 1.6862 14 | intervals: size = 5 15 | intervals [1]: 16 | xmin = 0 17 | xmax = 0.18 18 | text = "has" 19 | intervals [2]: 20 | xmin = 0.18 21 | xmax = 0.52 22 | text = "never" 23 | intervals [3]: 24 | xmin = 0.52 25 | xmax = 0.75 26 | text = "been" 27 | intervals [4]: 28 | xmin = 0.75 29 | xmax = 1.64 30 | text = "surpassed" 31 | intervals [5]: 32 | xmin = 1.64 33 | xmax = 1.6862 34 | text = "" 35 | item [2]: 36 | class = "IntervalTier" 37 | name = "phones" 38 | xmin = 0 39 | xmax = 1.6862 40 | intervals: size = 17 41 | intervals [1]: 42 | xmin = 0 43 | xmax = 0.03 44 | text = "HH" 45 | intervals [2]: 46 | xmin = 0.03 47 | xmax = 0.08 48 | text = "AE1" 49 | intervals [3]: 50 | xmin = 0.08 51 | xmax = 0.18 52 | text = "Z" 53 | intervals [4]: 54 | xmin = 0.18 55 | xmax = 0.27 56 | text = "N" 57 | intervals [5]: 58 | xmin = 0.27 59 | xmax = 0.36 60 | text = "EH1" 61 | intervals [6]: 62 | xmin = 0.36 63 | xmax = 0.43 64 | text = "V" 65 | intervals [7]: 66 | xmin = 0.43 67 | xmax = 0.52 68 | text = "ER0" 69 | intervals [8]: 70 | xmin = 0.52 71 | xmax = 0.59 72 | text = "B" 73 | intervals [9]: 74 | xmin = 0.59 75 | xmax = 0.68 76 | text = "IH1" 77 | intervals [10]: 78 | xmin = 0.68 79 | xmax = 0.75 80 | text = "N" 81 | intervals [11]: 82 | xmin = 0.75 83 | xmax = 0.89 84 | text = "S" 85 | intervals [12]: 86 | xmin = 0.89 87 | xmax = 0.95 88 | text = "ER0" 89 | intervals [13]: 90 | xmin = 0.95 91 | xmax = 1.09 92 | text = "P" 93 | intervals [14]: 94 | xmin = 1.09 95 | xmax = 1.39 96 | text = "AE1" 97 | intervals [15]: 98 | xmin = 1.39 99 | xmax = 1.58 100 | text = "S" 101 | intervals [16]: 102 | xmin = 1.58 103 | xmax = 1.64 104 | text = "T" 105 | intervals [17]: 106 | xmin = 1.64 107 | xmax = 1.6862 108 | text = "" 109 | -------------------------------------------------------------------------------- /idiaptts/src/data_preparation/audio/single_channel_noise_reduction.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2020 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | 8 | """Module description. 9 | """ 10 | 11 | # System imports. 12 | import os 13 | import logging 14 | import argparse 15 | 16 | # Third-party imports. 17 | import soundfile 18 | import matlab 19 | import matlab.engine 20 | 21 | # Local source tree imports. 22 | from idiaptts.misc.utils import makedirs_safe 23 | 24 | 25 | class SingleChannelNoiseReduction(object): 26 | logger = logging.getLogger(__name__) 27 | 28 | def __init__(self): 29 | self.eng = matlab.engine.connect_matlab() # Connects to found shared session or creates a new shared session. 30 | self.eng.addpath(os.path.join(os.environ["IDIAPTTS_ROOT"], "scripts", "noise_reduction")) 31 | 32 | @staticmethod 33 | def nparray_to_matlab(x): 34 | return matlab.double(x.tolist()) 35 | 36 | def process_list(self, id_list, dir_audio, dir_out, extension="wav"): 37 | 38 | for file_id in id_list: 39 | self.process_file(file_id + "." + extension, dir_audio, dir_out) 40 | 41 | def process_file(self, file, dir_audio, dir_out): 42 | 43 | raw, fs = soundfile.read(os.path.join(dir_audio, file)) 44 | 45 | data_noisy_matlab = self.nparray_to_matlab(raw) 46 | data_noisy_matlab = self.eng.transpose(data_noisy_matlab) 47 | 48 | enhanced = self.eng.runme(data_noisy_matlab, fs) 49 | 50 | out_file = os.path.join(dir_out, file) 51 | makedirs_safe(os.path.dirname(out_file)) 52 | soundfile.write(out_file, enhanced, samplerate=fs) 53 | 54 | return enhanced 55 | 56 | 57 | def main(): 58 | logging.basicConfig(level=logging.DEBUG) 59 | 60 | parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) 61 | parser.add_argument("-w", "--dir_wav", help="Directory containing the wav files.", type=str, 62 | dest="dir_wav", required=True) 63 | parser.add_argument("-o", "--dir_out", help="Directory to save the trimmed files.", type=str, 64 | dest="dir_out", required=True) 65 | parser.add_argument("-f", "--file_id_list", help="Full path to file containing the ids.", type=str, 66 | dest="file_id_list", required=True) 67 | parser.add_argument("--format", help="Format of the audio file, e.g. WAV.", type=str, 68 | dest="format", required=False, default='wav') 69 | 70 | # Parse arguments 71 | args = parser.parse_args() 72 | 73 | # Read which files to process. 74 | with open(args.file_id_list) as f: 75 | id_list = f.readlines() 76 | # Trim entries in-place. 77 | id_list[:] = [s.strip(' \t\n\r') for s in id_list] 78 | 79 | # Create output directory if missing. 80 | makedirs_safe(args.dir_out) 81 | 82 | # Start silence removal. 83 | loudness_normalizer = SingleChannelNoiseReduction() 84 | loudness_normalizer.process_list(id_list, args.dir_wav, args.dir_out, args.format) 85 | 86 | 87 | if __name__ == "__main__": 88 | main() 89 | -------------------------------------------------------------------------------- /test/integration/data_preparation/wcad/test_AtomVUVDistPosLabelGen.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | 6 | 7 | import unittest 8 | 9 | 10 | import os 11 | import shutil 12 | 13 | import numpy as np 14 | 15 | from idiaptts.misc.utils import makedirs_safe 16 | from idiaptts.src.data_preparation.wcad.AtomVUVDistPosLabelGen import AtomVUVDistPosLabelGen 17 | 18 | 19 | class TestAtomVUVDistPosLabelGen(unittest.TestCase): 20 | 21 | num_questions = 425 22 | 23 | @classmethod 24 | def setUpClass(cls): 25 | cls.dir_database = os.path.realpath(os.path.join( 26 | "integration", "fixtures", "database")) 27 | cls.dir_wav = os.path.join(cls.dir_database, "wav") 28 | cls.dir_atoms = os.path.join("integration", "fixtures", 29 | "wcad-0.030_0.060_0.090_0.120_0.150") 30 | cls.dir_world_features = os.path.join("integration", "fixtures", "WORLD") 31 | cls.dir_wcad_root = os.path.join("IdiapTTS", "tools", "wcad") 32 | cls.id_list = cls._get_id_list()[:3] 33 | 34 | @staticmethod 35 | def _get_id_list(): 36 | with open(os.path.join("integration", "fixtures", "database", 37 | "file_id_list.txt")) as f: 38 | id_list = f.readlines() 39 | id_list[:] = [s.strip(' \t\n\r') for s in id_list] 40 | return id_list 41 | 42 | def _get_test_dir(self): 43 | out_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 44 | type(self).__name__) 45 | makedirs_safe(out_dir) 46 | return out_dir 47 | 48 | def test_save_load(self): 49 | dir_out = self._get_test_dir() 50 | 51 | theta_start = 0.01 52 | theta_stop = 0.055 53 | theta_step = 0.005 54 | thetas = np.arange(theta_start, theta_stop, theta_step) 55 | k = 6 56 | frame_size_ms = 5 57 | 58 | atom_gen = AtomVUVDistPosLabelGen(self.dir_wcad_root, dir_out, 59 | dir_out, thetas, frame_size_ms) 60 | 61 | label_dict = atom_gen.gen_data(self.dir_wav, None, 62 | id_list=self.id_list, return_dict=True) 63 | 64 | # Call this once before starting the pre-processing. 65 | atom_gen.get_normalisation_params(dir_out) 66 | 67 | test_label = label_dict[self.id_list[1]] 68 | test_label_pre = atom_gen.preprocess_sample(test_label) 69 | 70 | # TODO: Figure out what to test here. 71 | 72 | # self.assertTrue(np.isclose(test_label_pre, 73 | # atom_gen[self.id_list[1]]).all()) 74 | 75 | test_label_post = atom_gen.postprocess_sample(test_label_pre) 76 | # self.assertTrue(np.isclose(test_label, test_label_post).all()) 77 | 78 | self.assertTrue(np.isclose(-3.4898, test_label_post.sum(), 79 | atol=0.0001)) 80 | 81 | os.remove(os.path.join(self.dir_database, "wcad_.txt")) 82 | shutil.rmtree(dir_out) 83 | 84 | def test_load(self): 85 | sample = AtomVUVDistPosLabelGen.load_sample( 86 | self.id_list[0], self.dir_atoms, num_thetas=5, 87 | dir_world=self.dir_world_features) 88 | self.assertEqual(1931, sample.shape[0]) 89 | -------------------------------------------------------------------------------- /test/integration/data_preparation/wcad/test_AtomLabelGen.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | 6 | 7 | import unittest 8 | 9 | 10 | import os 11 | import shutil 12 | 13 | import numpy as np 14 | 15 | from idiaptts.misc.utils import makedirs_safe 16 | from idiaptts.src.data_preparation.wcad.AtomLabelGen import AtomLabelGen 17 | 18 | 19 | class TestAtomLabelGen(unittest.TestCase): 20 | 21 | num_questions = 425 22 | 23 | @classmethod 24 | def setUpClass(cls): 25 | cls.dir_database = os.path.realpath(os.path.join( 26 | "integration", "fixtures", "database")) 27 | cls.dir_wav = os.path.join(cls.dir_database, "wav") 28 | cls.dir_atoms = os.path.join("integration", "fixtures", 29 | "wcad-0.030_0.060_0.090_0.120_0.150") 30 | cls.dir_wcad_root = os.path.join("IdiapTTS", "tools", "wcad") 31 | cls.id_list = cls._get_id_list()[:3] 32 | 33 | @staticmethod 34 | def _get_id_list(): 35 | with open(os.path.join("integration", "fixtures", "database", 36 | "file_id_list.txt")) as f: 37 | id_list = f.readlines() 38 | id_list[:] = [s.strip(' \t\n\r') for s in id_list] 39 | return id_list 40 | 41 | def _get_test_dir(self): 42 | out_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 43 | type(self).__name__) 44 | makedirs_safe(out_dir) 45 | return out_dir 46 | 47 | def test_save_load(self): 48 | dir_out = self._get_test_dir() 49 | 50 | theta_start = 0.01 51 | theta_stop = 0.055 52 | theta_step = 0.005 53 | thetas = np.arange(theta_start, theta_stop, theta_step) 54 | k = 6 55 | frame_size_ms = 5 56 | 57 | atom_gen = AtomLabelGen(self.dir_wcad_root, dir_out, thetas, k, 58 | frame_size_ms) 59 | label_dict, *extracted_norm_params = atom_gen.gen_data( 60 | self.dir_wav, dir_out, id_list=self.id_list, return_dict=True) 61 | 62 | # Call this once before starting the pre-processing. 63 | norm_params = atom_gen.get_normalisation_params(dir_out) 64 | self.assertTrue((extracted_norm_params[0] == norm_params[0]).all()) 65 | self.assertTrue((extracted_norm_params[1] == norm_params[1]).all()) 66 | 67 | test_label = label_dict[self.id_list[1]] 68 | test_label_pre = atom_gen.preprocess_sample(test_label) 69 | self.assertTrue(np.isclose(test_label_pre, 70 | atom_gen[self.id_list[1]]).all()) 71 | 72 | test_label_post = atom_gen.postprocess_sample(test_label_pre) 73 | # Post-precessing does peak selection, so pre and post labels 74 | # are not the same and we cannot check for equality here. 75 | 76 | self.assertTrue(np.isclose(-0.2547, test_label_post.sum(), 77 | atol=0.0001)) 78 | 79 | os.remove(os.path.join(self.dir_database, "wcad_.txt")) 80 | shutil.rmtree(dir_out) 81 | 82 | def test_load(self): 83 | sample = AtomLabelGen.load_sample(self.id_list[0], self.dir_atoms, 84 | num_thetas=5) 85 | self.assertEqual(1931, sample.shape[0]) 86 | -------------------------------------------------------------------------------- /test/integration/fixtures/labels/full/LJ001-0008.lab: -------------------------------------------------------------------------------- 1 | 0 1000000 xx~xx-#+h=@:xx_xx/A/0_0_0/B/xx-xx-xx:xx-xx&xx-xx#xx-xx$xx-xx>xx-xx0-1<0-1|@/C/1+1+2/D/0_0/E/aux+1:1+4&1+3#0+1/F/content_2/G/0_0/H/6=4:1=1&L-L%/I/0_0/J/6+4-1 3 | 2000000 3000000 #~h-@+z=n:2_2/A/0_0_0/B/1-0-3:1-1&1-6#1-4$1-3>0-1<0-1|@/C/1+1+2/D/0_0/E/aux+1:1+4&1+3#0+1/F/content_2/G/0_0/H/6=4:1=1&L-L%/I/0_0/J/6+4-1 4 | 3000000 4000000 h~@-z+n=e:3_1/A/0_0_0/B/1-0-3:1-1&1-6#1-4$1-3>0-1<0-1|@/C/1+1+2/D/0_0/E/aux+1:1+4&1+3#0+1/F/content_2/G/0_0/H/6=4:1=1&L-L%/I/0_0/J/6+4-1 5 | 4000000 5000000 @~z-n+e=v:1_2/A/1_0_3/B/1-1-2:1-2&2-5#1-3$1-2>1-2<0-4|e/C/0+0+3/D/aux_1/E/content+2:2+3&1+2#0+1/F/content_1/G/0_0/H/6=4:1=1&L-L%/I/0_0/J/6+4-1 6 | 5000000 6000000 z~n-e+v=@r:2_1/A/1_0_3/B/1-1-2:1-2&2-5#1-3$1-2>1-2<0-4|e/C/0+0+3/D/aux_1/E/content+2:2+3&1+2#0+1/F/content_1/G/0_0/H/6=4:1=1&L-L%/I/0_0/J/6+4-1 7 | 6000000 7000000 n~e-v+@r=r:1_3/A/1_1_2/B/0-0-3:2-1&3-4#2-3$2-2>1-1<1-3|@r/C/1+0+3/D/aux_1/E/content+2:2+3&1+2#0+1/F/content_1/G/0_0/H/6=4:1=1&L-L%/I/0_0/J/6+4-1 8 | 7000000 8000000 e~v-@r+r=b:2_2/A/1_1_2/B/0-0-3:2-1&3-4#2-3$2-2>1-1<1-3|@r/C/1+0+3/D/aux_1/E/content+2:2+3&1+2#0+1/F/content_1/G/0_0/H/6=4:1=1&L-L%/I/0_0/J/6+4-1 9 | 8000000 9000000 v~@r-r+b=iy:3_1/A/1_1_2/B/0-0-3:2-1&3-4#2-3$2-2>1-1<1-3|@r/C/1+0+3/D/aux_1/E/content+2:2+3&1+2#0+1/F/content_1/G/0_0/H/6=4:1=1&L-L%/I/0_0/J/6+4-1 10 | 9000000 10000000 @r~r-b+iy=n:1_3/A/0_0_3/B/1-0-3:1-1&4-3#2-2$2-2>2-2<2-2|iy/C/0+0+3/D/content_2/E/content+1:3+2&2+1#1+1/F/content_2/G/0_0/H/6=4:1=1&L-L%/I/0_0/J/6+4-1 11 | 10000000 11000000 r~b-iy+n=s:2_2/A/0_0_3/B/1-0-3:1-1&4-3#2-2$2-2>2-2<2-2|iy/C/0+0+3/D/content_2/E/content+1:3+2&2+1#1+1/F/content_2/G/0_0/H/6=4:1=1&L-L%/I/0_0/J/6+4-1 12 | 11000000 12000000 b~iy-n+s=@r:3_1/A/0_0_3/B/1-0-3:1-1&4-3#2-2$2-2>2-2<2-2|iy/C/0+0+3/D/content_2/E/content+1:3+2&2+1#1+1/F/content_2/G/0_0/H/6=4:1=1&L-L%/I/0_0/J/6+4-1 13 | 12000000 13000000 iy~n-s+@r=r:1_3/A/1_0_3/B/0-0-3:1-2&5-2#3-2$2-2>1-1<3-1|@r/C/1+1+4/D/content_1/E/content+2:4+1&3+0#1+0/F/0_0/G/0_0/H/6=4:1=1&L-L%/I/0_0/J/6+4-1 14 | 13000000 14000000 n~s-@r+r=p:2_2/A/1_0_3/B/0-0-3:1-2&5-2#3-2$2-2>1-1<3-1|@r/C/1+1+4/D/content_1/E/content+2:4+1&3+0#1+0/F/0_0/G/0_0/H/6=4:1=1&L-L%/I/0_0/J/6+4-1 15 | 14000000 15000000 s~@r-r+p=a:3_1/A/1_0_3/B/0-0-3:1-2&5-2#3-2$2-2>1-1<3-1|@r/C/1+1+4/D/content_1/E/content+2:4+1&3+0#1+0/F/0_0/G/0_0/H/6=4:1=1&L-L%/I/0_0/J/6+4-1 16 | 15000000 16000000 @r~r-p+a=s:1_4/A/0_0_3/B/1-1-4:2-1&6-1#3-1$2-1>2-0<4-0|a/C/0+0+0/D/content_1/E/content+2:4+1&3+0#1+0/F/0_0/G/0_0/H/6=4:1=1&L-L%/I/0_0/J/6+4-1 17 | 16000000 17000000 r~p-a+s=t:2_3/A/0_0_3/B/1-1-4:2-1&6-1#3-1$2-1>2-0<4-0|a/C/0+0+0/D/content_1/E/content+2:4+1&3+0#1+0/F/0_0/G/0_0/H/6=4:1=1&L-L%/I/0_0/J/6+4-1 18 | 17000000 18000000 p~a-s+t=#:3_2/A/0_0_3/B/1-1-4:2-1&6-1#3-1$2-1>2-0<4-0|a/C/0+0+0/D/content_1/E/content+2:4+1&3+0#1+0/F/0_0/G/0_0/H/6=4:1=1&L-L%/I/0_0/J/6+4-1 19 | 18000000 19000000 a~s-t+#=xx:4_1/A/0_0_3/B/1-1-4:2-1&6-1#3-1$2-1>2-0<4-0|a/C/0+0+0/D/content_1/E/content+2:4+1&3+0#1+0/F/0_0/G/0_0/H/6=4:1=1&L-L%/I/0_0/J/6+4-1 20 | 19000000 20000000 s~t-#+xx=xx:xx_xx/A/1_1_4/B/xx-xx-xx:xx-xx&xx-xx#xx-xx$xx-xx>xx-xx 4 | # 5 | 6 | 7 | import unittest 8 | 9 | import os 10 | import shutil 11 | 12 | from idiaptts.misc.utils import makedirs_safe 13 | from idiaptts.src.data_preparation.NpzDataReader import NpzDataReader 14 | from idiaptts.src.data_preparation.phonemes.PhonemeDurationLabelGen import PhonemeDurationLabelGen 15 | 16 | 17 | class TestPhonemeDurationLabelGen(unittest.TestCase): 18 | 19 | @classmethod 20 | def setUpClass(cls): 21 | cls.dir_labels_state = os.path.join("integration", "fixtures", 22 | "labels", "label_state_align") 23 | cls.id_list = cls._get_id_list()[:3] 24 | 25 | @staticmethod 26 | def _get_id_list(): 27 | with open(os.path.join("integration", "fixtures", "database", 28 | "file_id_list.txt")) as f: 29 | id_list = f.readlines() 30 | # Trim entries in-place. 31 | id_list[:] = [s.strip(' \t\n\r') for s in id_list] 32 | return id_list 33 | 34 | def _get_test_dir(self): 35 | out_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 36 | type(self).__name__) 37 | makedirs_safe(out_dir) 38 | return out_dir 39 | 40 | def test_save_load(self): 41 | dir_out = self._get_test_dir() 42 | 43 | label_dict, *extracted_norm_params = PhonemeDurationLabelGen.gen_data( 44 | self.dir_labels_state, dir_out, id_list=self.id_list, 45 | label_type="full_state_align", return_dict=True) 46 | 47 | dur_gen = PhonemeDurationLabelGen(dir_labels=dir_out) 48 | norm_params = dur_gen.get_normalisation_params(dir_out) 49 | self.assertTrue((extracted_norm_params[0] == norm_params[0]).all()) 50 | self.assertTrue((extracted_norm_params[1] == norm_params[1]).all()) 51 | 52 | test_label = label_dict[self.id_list[1]] 53 | 54 | test_label_pre = dur_gen.preprocess_sample(test_label) 55 | self.assertTrue((test_label_pre == dur_gen[self.id_list[1]]).all()) 56 | 57 | test_label_post = dur_gen.postprocess_sample(test_label_pre) 58 | self.assertTrue((test_label == test_label_post).all()) 59 | 60 | self.assertRaises(AssertionError, PhonemeDurationLabelGen.Config, 61 | name="durations", directory=dir_out, 62 | load_as_matrix=True) 63 | self.assertRaises(AssertionError, PhonemeDurationLabelGen.Config, 64 | name="durations", directory=dir_out, 65 | pad_mode="edge", load_as_matrix=True) 66 | 67 | dur_gen = PhonemeDurationLabelGen( 68 | dir_labels=dir_out, pad_mode='edge', 69 | norm_type=NpzDataReader.Config.NormType.NONE, load_as_matrix=True) 70 | norm_params = dur_gen.get_normalisation_params(dir_out) 71 | 72 | self.assertIsNone( 73 | dur_gen.norm_params, 74 | "No normalisation should be used on attention matrix.") 75 | 76 | test_label = dur_gen[self.id_list[1]] 77 | 78 | expected_dims = (int(test_label_post.sum()), len(test_label_post)) 79 | self.assertEqual(expected_dims, test_label.shape) 80 | 81 | shutil.rmtree(dir_out) 82 | -------------------------------------------------------------------------------- /test/unit/neural_networks/pytorch/test_ModularModelHandlerPyTorch.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ 3 | # Written by Bastian Schnell 4 | # 5 | 6 | 7 | import unittest 8 | 9 | import os 10 | import shutil 11 | import torch 12 | 13 | from idiaptts.misc.utils import makedirs_safe 14 | from idiaptts.src.model_trainers.ModularTrainer import ModularTrainer 15 | import idiaptts.src.neural_networks.pytorch.models.rnn_dyn as rnn_dyn 16 | from idiaptts.src.neural_networks.pytorch.ModularModelHandlerPyTorch import ModularModelHandlerPyTorch 17 | from idiaptts.src.neural_networks.pytorch.utils import equal_iterable 18 | 19 | 20 | class TestModularHandlerPyTorch(unittest.TestCase): 21 | 22 | out_dir = None 23 | 24 | @classmethod 25 | def setUpClass(cls): 26 | cls.out_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 27 | type(cls()).__name__) 28 | makedirs_safe(cls.out_dir) # Create class name directory. 29 | 30 | @classmethod 31 | def tearDownClass(cls): 32 | os.rmdir(cls.out_dir) # Remove class name directory, should be empty. 33 | 34 | def test_save_load_equality(self): 35 | hparams = ModularTrainer.create_hparams() 36 | hparams.optimiser_type = "Adam" 37 | hparams.optimiser_args["lr"] = 0.1 38 | # Add function name to path. 39 | out_dir = os.path.join(self.out_dir, "test_save_load_equality") 40 | model_path = os.path.join(out_dir, "test_model") 41 | 42 | # Create a new model, run the optimiser once to obtain a state, and save everything. 43 | in_dim, out_dim = 10, 4 44 | total_epochs = 10 45 | model_handler = ModularModelHandlerPyTorch() 46 | model_handler.model = rnn_dyn.Config(in_dim=in_dim, layer_configs=[ 47 | rnn_dyn.Config.LayerConfig(layer_type="Linear", out_dim=out_dim) 48 | ]).create_model() 49 | model_handler.set_optimiser(hparams) 50 | 51 | seq_length = torch.tensor((10, 7), dtype=torch.long) 52 | batch_size = 2 53 | test_input = torch.ones([seq_length[0], batch_size, in_dim]) 54 | model_handler.model.init_hidden(batch_size) 55 | output = model_handler.model(test_input, seq_lengths_input=seq_length, 56 | max_length_inputs=seq_length.max())[0] 57 | output.mean().backward() 58 | 59 | model_handler.optimiser.step() 60 | model_handler.save_checkpoint(epoch=total_epochs, model_path=model_path) 61 | 62 | # Create a new model handler and test load save. 63 | model_handler_copy = ModularModelHandlerPyTorch() 64 | model_handler_copy.load_checkpoint( 65 | hparams, 66 | model_path=model_path, 67 | load_optimiser=True, 68 | epoch=total_epochs, 69 | verbose=False) 70 | 71 | zip_params = zip(model_handler.model.parameters(), 72 | model_handler_copy.model.parameters()) 73 | self.assertTrue(all([(x == x_copy).all() for x, x_copy in zip_params]), 74 | "Loaded and saved models are not the same.") 75 | current_opt_state = model_handler.optimiser.state_dict()["state"] 76 | copy_opt_state = model_handler_copy.optimiser.state_dict()["state"] 77 | self.assertTrue(equal_iterable(current_opt_state, copy_opt_state), 78 | "Loaded and saved optimisers are not the same.") 79 | 80 | shutil.rmtree(out_dir) 81 | -------------------------------------------------------------------------------- /idiaptts/scripts/tts_frontend/install: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copyright 2017 by Idiap Research Institute, http://www.idiap.ch 4 | # 5 | # Author(s): 6 | # Bastian Schnell, DATE 7 | # 8 | # In-depth Description: 9 | # 10 | # 11 | 12 | usage() { 13 | cat <<- EOF 14 | usage: $PROGNAME [OPTIONS] 15 | 16 | Program installs the language specific files into the correct festival directory. 17 | 18 | OPTIONS: 19 | -h show this help 20 | 21 | 22 | EOF 23 | } 24 | 25 | ############################### 26 | # Default options and functions 27 | # 28 | # set -o xtrace # Prints every command before running it, same as "set -x". 29 | # set -o errexit # Exit when a command fails, same as "set -e". 30 | # # Use "|| true" for those who are allowed to fail. 31 | # # Disable (set +e) this mode if you want to know a nonzero return value. 32 | # set -o pipefail # Catch mysqldump fails. 33 | # set -o nounset # Exit when using undeclared variables, same as "set -u". 34 | # set -o noclobber # Prevents the bash shell from overwriting files, but you can force it with ">|". 35 | export SHELLOPTS # Used to pass above shell options to any called subscripts. 36 | 37 | readonly PROGNAME=$(basename $0) 38 | readonly PROGDIR=$(readlink -m $(dirname $0)) 39 | readonly ARGS="$@" 40 | 41 | # Provide log function. Use source bash_logging.sh if file exists in current PATH variable. 42 | # source does not work with -u option, so disable it temporarily. 43 | # set +u 44 | # source bash_logging.sh 45 | # set -u 46 | log() 47 | { 48 | echo -e >&2 "$@" 49 | } 50 | 51 | # Die should be called when an error occurs with a HELPFUL error message. 52 | die () { 53 | log "ERROR" "$@" 54 | exit 1 55 | } 56 | 57 | # The main function of this file. 58 | main() 59 | { 60 | log "INFO" "Run ${PROGNAME} $@" 61 | 62 | # Read parameters. 63 | while getopts ":h" flag; do # If a character is followed by a colon (e.g. f:), that option is expected to have an argument. 64 | case "${flag}" in 65 | -) case "${OPTARG}" in 66 | *) die "Invalid option: --${OPTARG}" ;; 67 | esac;; 68 | h) usage; exit ;; 69 | \?) die "Invalid option: -$OPTARG" ;; 70 | :) die "Option -$OPTARG requires an argument." ;; 71 | esac 72 | done 73 | shift $(($OPTIND - 1)) # Skip the already processed arguments. 74 | 75 | # Read arguments. 76 | local expectedArgs=1 # Always use "local" for variables, global variables are evil anyway. 77 | if [[ $# != "${expectedArgs}" ]]; then 78 | usage # Function call. 79 | die "Wrong number of parameters, expected ${expectedArgs} but got $#." 80 | fi 81 | # Read and check parameter. 82 | local dir_festival=${1:-} 83 | if [ ! -f ${dir_festival}/bin/festival ]; then 84 | die "Wrong path to festival directory. Cannot find /bin/festival in ${dir_festival}" 85 | fi 86 | 87 | ################################## 88 | # Main functionality of this file. 89 | # 90 | mkdir -p festival_files 91 | echo "Extract files..." 92 | tar xzf English/festival_files.tar.gz festival_files/ 93 | echo "Done" 94 | 95 | echo "Copy to festival directory..." 96 | cp -r festival_files/* ${dir_festival} 97 | echo "Done" 98 | 99 | echo "Clean up..." 100 | rm -r festival_files 101 | echo "Done" 102 | } 103 | 104 | # Call the main function, provide all parameters. 105 | main "$@" 106 | -------------------------------------------------------------------------------- /idiaptts/src/data_preparation/audio/high_pass_filter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2020 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | 8 | """Module description. 9 | """ 10 | 11 | # System imports. 12 | import os 13 | import logging 14 | import argparse 15 | 16 | # Third-party imports. 17 | import soundfile 18 | 19 | # Local source tree imports. 20 | from scipy import signal 21 | 22 | from idiaptts.misc.utils import makedirs_safe 23 | 24 | 25 | class HighPassFilter(object): 26 | logger = logging.getLogger(__name__) 27 | 28 | def __init__(self, stop_freq_Hz=70, pass_freq_Hz=100, filter_order=1001): 29 | self.stop_freq_Hz = stop_freq_Hz 30 | self.pass_freq_Hz = pass_freq_Hz 31 | self.filter_order = filter_order 32 | 33 | def process_list(self, id_list, dir_audio, dir_out, format="wav"): 34 | 35 | for file_id in id_list: 36 | self.process_file(file_id + "." + format, dir_audio, dir_out) 37 | 38 | def process_file(self, file, dir_audio, dir_out): 39 | 40 | raw, fs = soundfile.read(os.path.join(dir_audio, file)) 41 | 42 | raw = self.highpass_filter(raw, fs) 43 | 44 | out_file = os.path.join(dir_out, file) 45 | makedirs_safe(os.path.dirname(out_file)) 46 | soundfile.write(out_file, raw, samplerate=fs) 47 | 48 | return raw 49 | 50 | def highpass_filter(self, raw, fs): 51 | 52 | nyquist_rate = fs / 2. 53 | desired = (0, 0, 1, 1) 54 | bands = (0, self.stop_freq_Hz, self.pass_freq_Hz, nyquist_rate) 55 | filter_coefs = signal.firls(self.filter_order, bands, desired, nyq=nyquist_rate) 56 | 57 | filtered_raw = signal.filtfilt(filter_coefs, [1], raw) 58 | return filtered_raw 59 | 60 | 61 | def main(): 62 | logging.basicConfig(level=logging.DEBUG) 63 | 64 | parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) 65 | parser.add_argument("-w", "--dir_wav", help="Directory containing the wav files.", type=str, 66 | dest="dir_wav", required=True) 67 | parser.add_argument("-o", "--dir_out", help="Directory to save the trimmed files.", type=str, 68 | dest="dir_out", required=True) 69 | parser.add_argument("-f", "--file_id_list", help="Full path to file containing the ids.", type=str, 70 | dest="file_id_list", required=True) 71 | parser.add_argument("--format", help="Format of the audio file, e.g. WAV.", type=str, 72 | dest="format", required=False, default='wav') 73 | parser.add_argument("--stop_freq_Hz", help="Frequencies below are blocked.", type=float, 74 | dest="stop_freq_Hz", required=False, default=70) 75 | parser.add_argument("--pass_freq_Hz", help="Filter gain raises between stop_freq_Hz and pass_freq_Hz from 0 to 1.", 76 | type=float, dest="pass_freq_Hz", required=False, default=100) 77 | parser.add_argument("--filter_order", help="Size of the filter window in raw signal.", type=int, 78 | dest="filter_order", required=False, default=1001) 79 | 80 | # Parse arguments 81 | args = parser.parse_args() 82 | 83 | # Read which files to process. 84 | with open(args.file_id_list) as f: 85 | id_list = f.readlines() 86 | # Trim entries in-place. 87 | id_list[:] = [s.strip(' \t\n\r') for s in id_list] 88 | 89 | # Create output directory if missing. 90 | makedirs_safe(args.dir_out) 91 | 92 | # Start silence removal. 93 | high_pass_filter = HighPassFilter(args.stop_freq_Hz, args.pass_freq_Hz, args.filter_order) 94 | high_pass_filter.process_list(id_list, args.dir_wav, args.dir_out, args.format) 95 | 96 | 97 | if __name__ == "__main__": 98 | main() 99 | -------------------------------------------------------------------------------- /idiaptts/src/neural_networks/pytorch/models/rnn_dyn/CNNWrapper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | 8 | from typing import List, Tuple 9 | from idiaptts.src.neural_networks.pytorch.models.rnn_dyn.FFWrapper import FFWrapper 10 | 11 | import torch 12 | import torch.nn as nn 13 | 14 | from .TransposingWrapper import TransposingWrapper 15 | 16 | 17 | class CNNWrapper(TransposingWrapper): 18 | def __init__(self, in_dim, layer_config, batch_first: bool = True): 19 | super(CNNWrapper, self).__init__(in_dim, layer_config, batch_first) 20 | 21 | def _create_module(self, in_dim, layer_config): 22 | layer_list = [] 23 | nonlin = self._get_nonlin(layer_config.nonlin) 24 | for _ in range(layer_config.num_layers): 25 | if 'padding' not in layer_config.kwargs \ 26 | and ('stride' not in layer_config.kwargs 27 | or layer_config.kwargs['stride'] == 1) \ 28 | and ('dilation' not in layer_config.kwargs 29 | or layer_config.kwargs['dilation'] == 1): 30 | 31 | kernel = layer_config.kwargs.get('kernel_size') 32 | if type(kernel) in [Tuple, List]: 33 | kernel = kernel[0] 34 | # Designed so that sequence length remains the same. 35 | # Usefull for speech. 36 | # TODO: Take into account dilation and padding. 37 | layer_config.kwargs['padding'] = int((kernel - 1) / 2) 38 | 39 | layer = getattr(torch.nn, layer_config.type)( 40 | in_dim, layer_config.out_dim, **layer_config.kwargs) 41 | layer_list.append(layer) 42 | in_dim = layer_config.out_dim 43 | 44 | # From NVIDIA's Tacotron2 implementation. 45 | # TODO: Make this an else when switching to new initialisation. 46 | torch.nn.init.xavier_uniform_( 47 | layer.weight, 48 | gain=torch.nn.init.calculate_gain(layer_config.type.lower())) 49 | 50 | if nonlin is not None: 51 | if len(layer_list) > 0: 52 | FFWrapper.reset_parameters(layer_list[-1], nonlin) 53 | layer_list.append(nonlin()) 54 | 55 | self.module = nn.Sequential(*layer_list) 56 | self.out_dim = in_dim 57 | 58 | def forward(self, input, **kwargs): 59 | output, kwargs = super(CNNWrapper, self).forward(input, **kwargs) 60 | 61 | kwargs["seq_lengths_input"] = self.get_output_length( 62 | kwargs['seq_lengths_input']) 63 | kwargs["max_length_inputs"] = self.get_output_length( 64 | kwargs['max_length_inputs']) 65 | return output, kwargs 66 | 67 | def get_output_length(self, seq_lengths_input): 68 | for layer in self.module: 69 | if hasattr(layer, "padding"): 70 | padding = layer.padding 71 | if type(padding) in [tuple, list]: 72 | padding = padding[0] 73 | dilation = layer.dilation 74 | if type(dilation) in [tuple, list]: 75 | dilation = dilation[0] 76 | kernel_size = layer.kernel_size 77 | if type(kernel_size) in [tuple, list]: 78 | kernel_size = kernel_size[0] 79 | stride = layer.stride 80 | if type(stride) in [tuple, list]: 81 | stride = stride[0] 82 | 83 | # Formula: https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html#torch.nn.Conv1d 84 | seq_lengths_input = (seq_lengths_input + 2 * padding 85 | - dilation * (kernel_size - 1) - 1) \ 86 | // stride + 1 87 | 88 | return seq_lengths_input 89 | -------------------------------------------------------------------------------- /test/integration/fixtures/labels/mfa/LJ001-0002.TextGrid: -------------------------------------------------------------------------------- 1 | File type = "ooTextFile" 2 | Object class = "TextGrid" 3 | 4 | xmin = 0 5 | xmax = 1.8059 6 | tiers? 7 | size = 2 8 | item []: 9 | item [1]: 10 | class = "IntervalTier" 11 | name = "words" 12 | xmin = 0 13 | xmax = 1.8059 14 | intervals: size = 5 15 | intervals [1]: 16 | xmin = 0 17 | xmax = 0.14 18 | text = "in" 19 | intervals [2]: 20 | xmin = 0.14 21 | xmax = 0.4 22 | text = "being" 23 | intervals [3]: 24 | xmin = 0.4 25 | xmax = 1.27 26 | text = "comparatively" 27 | intervals [4]: 28 | xmin = 1.27 29 | xmax = 1.79 30 | text = "modern" 31 | intervals [5]: 32 | xmin = 1.79 33 | xmax = 1.8059 34 | text = "" 35 | item [2]: 36 | class = "IntervalTier" 37 | name = "phones" 38 | xmin = 0 39 | xmax = 1.8059 40 | intervals: size = 24 41 | intervals [1]: 42 | xmin = 0 43 | xmax = 0.07 44 | text = "IH1" 45 | intervals [2]: 46 | xmin = 0.07 47 | xmax = 0.14 48 | text = "N" 49 | intervals [3]: 50 | xmin = 0.14 51 | xmax = 0.19 52 | text = "B" 53 | intervals [4]: 54 | xmin = 0.19 55 | xmax = 0.32 56 | text = "IY1" 57 | intervals [5]: 58 | xmin = 0.32 59 | xmax = 0.35 60 | text = "IH0" 61 | intervals [6]: 62 | xmin = 0.35 63 | xmax = 0.4 64 | text = "NG" 65 | intervals [7]: 66 | xmin = 0.4 67 | xmax = 0.47 68 | text = "K" 69 | intervals [8]: 70 | xmin = 0.47 71 | xmax = 0.51 72 | text = "AH0" 73 | intervals [9]: 74 | xmin = 0.51 75 | xmax = 0.57 76 | text = "M" 77 | intervals [10]: 78 | xmin = 0.57 79 | xmax = 0.7 80 | text = "P" 81 | intervals [11]: 82 | xmin = 0.7 83 | xmax = 0.76 84 | text = "EH1" 85 | intervals [12]: 86 | xmin = 0.76 87 | xmax = 0.85 88 | text = "R" 89 | intervals [13]: 90 | xmin = 0.85 91 | xmax = 0.9 92 | text = "AH0" 93 | intervals [14]: 94 | xmin = 0.9 95 | xmax = 0.99 96 | text = "T" 97 | intervals [15]: 98 | xmin = 0.99 99 | xmax = 1.05 100 | text = "IH0" 101 | intervals [16]: 102 | xmin = 1.05 103 | xmax = 1.11 104 | text = "V" 105 | intervals [17]: 106 | xmin = 1.11 107 | xmax = 1.22 108 | text = "L" 109 | intervals [18]: 110 | xmin = 1.22 111 | xmax = 1.27 112 | text = "IY0" 113 | intervals [19]: 114 | xmin = 1.27 115 | xmax = 1.4 116 | text = "M" 117 | intervals [20]: 118 | xmin = 1.4 119 | xmax = 1.56 120 | text = "AA1" 121 | intervals [21]: 122 | xmin = 1.56 123 | xmax = 1.61 124 | text = "D" 125 | intervals [22]: 126 | xmin = 1.61 127 | xmax = 1.68 128 | text = "ER0" 129 | intervals [23]: 130 | xmin = 1.68 131 | xmax = 1.79 132 | text = "N" 133 | intervals [24]: 134 | xmin = 1.79 135 | xmax = 1.8059 136 | text = "" 137 | -------------------------------------------------------------------------------- /test/unit/neural_networks/pytorch/test_AllPassLayer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2020 Idiap Research Institute, http://www.idiap.ch/ 5 | # Written by Bastian Schnell 6 | # 7 | 8 | 9 | import unittest 10 | 11 | import torch 12 | 13 | from idiaptts.src.neural_networks.pytorch.layers.AllPassWarpLayer import AllPassWarpLayer 14 | 15 | 16 | class TestAllPassLayer(unittest.TestCase): 17 | 18 | def _get_config(self, alpha_layer_in_dims=[4, 2], alpha_ranges=[0.1, 0.2], mean=torch.full((5,), -1, dtype=torch.float32), 19 | std_dev=torch.full((5,), 3.0, dtype=torch.float32)): 20 | return AllPassWarpLayer.Config(alpha_layer_in_dims=alpha_layer_in_dims, alpha_ranges=alpha_ranges, 21 | batch_first=True, warp_matrix_size=5, mean=mean, std_dev=std_dev) 22 | 23 | def test_alpha_layer_generation(self): 24 | layer = self._get_config().create_model() 25 | params = list(layer.named_parameters()) 26 | self.assertEqual(4, len(params)) 27 | 28 | state = layer.state_dict() 29 | self.assertEqual(4 + 2, len(state)) 30 | 31 | def test_forward(self): 32 | layer = self._get_config().create_model() 33 | batch_size = 2 34 | T = 8 35 | input_ = torch.rand((batch_size, T, 5)) 36 | alpha_input_1 = torch.rand((batch_size, T, 4)) 37 | alpha_input_2 = torch.rand((batch_size, T, 2)) 38 | 39 | output, kwargs = layer((input_, alpha_input_1, alpha_input_2), None, None) 40 | output, combined_alpha, alpha_1, alpha_2 = output 41 | 42 | self.assertEqual(torch.Size([batch_size, T, 5]), output.shape) 43 | self.assertEqual(torch.Size([batch_size, T, 1]), combined_alpha.shape) 44 | self.assertEqual(torch.Size([batch_size, T, 1]), alpha_1.shape) 45 | self.assertEqual(torch.Size([batch_size, T, 1]), alpha_2.shape) 46 | self.assertTrue((alpha_1.abs() <= layer.alpha_ranges[0]).all(), msg="Alpha 1 not within allowed range.") 47 | self.assertTrue((alpha_2.abs() <= layer.alpha_ranges[1]).all(), msg="Alpha 2 not within allowed range.") 48 | 49 | def test_training(self): 50 | layer = self._get_config().create_model() 51 | batch_size = 2 52 | T = 8 53 | input_ = torch.rand((batch_size, T, 5)) 54 | org_input = input_.detach().clone() 55 | alpha_input_1 = torch.rand((batch_size, T, 4)) 56 | alpha_input_2 = torch.rand((batch_size, T, 2)) 57 | 58 | (output, *_), _ = layer((input_, alpha_input_1, alpha_input_2), None, None) 59 | 60 | layer.alpha_layers[1].weight.retain_grad() 61 | self.assertFalse(layer.all_pass_warp.w_matrix_3d.requires_grad) 62 | 63 | torch.autograd.set_detect_anomaly(True) 64 | output.sum().backward() 65 | self.assertIsNotNone(layer.alpha_layers[1].weight.grad) 66 | self.assertTrue((input_ == org_input).all()) 67 | self.assertIsNone(layer.mean.grad) 68 | self.assertIsNone(layer.std_dev.grad) 69 | self.assertIsNone(layer.all_pass_warp.w_matrix_3d.grad) 70 | 71 | def test_normalisation(self): 72 | layer = self._get_config().create_model() 73 | batch_size = 2 74 | T = 8 75 | input_ = torch.rand((batch_size, T, 5)) 76 | org_input = input_.clone() 77 | alpha_input_1 = torch.rand((batch_size, T, 4)) 78 | alpha_input_2 = torch.rand((batch_size, T, 2)) 79 | 80 | (output, *_), _ = layer((input_, alpha_input_1, alpha_input_2), None, None) 81 | self.assertTrue((org_input == input_).all(), msg="Input tensor was changed in-place.") 82 | 83 | layer.mean = None 84 | layer.std_dev = None 85 | (no_norm_output, *_), _ = layer((input_, alpha_input_1, alpha_input_2), None, None) 86 | 87 | self.assertFalse(torch.isclose(output, no_norm_output).all(), 88 | msg="Normalised warp shouldn't match un-normalised warp.") 89 | --------------------------------------------------------------------------------