├── LICENSE
├── README.md
├── conftest.py
├── docs
    ├── Makefile
    ├── README.md
    ├── _apidoc_templates
    │   ├── module.rst
    │   └── package.rst
    ├── conf.py
    ├── contributing.md
    ├── docs-requirements.txt
    ├── experiment.md
    ├── images
    │   └── logo_noname_rounded_big.png
    ├── index.rst
    ├── installation.md
    ├── multigpu.md
    └── tutorials.md
├── lint-requirements.txt
├── pip-wheel-metadata
    └── speechbrain.dist-info
    │   ├── LICENSE
    │   ├── METADATA
    │   └── top_level.txt
├── pyproject.toml
├── pytest.ini
├── recipes
    └── LibriMix
    │   ├── .DS_Store
    │   ├── __pycache__
    │       └── prepare_data.cpython-38.pyc
    │   ├── extra-dependencies.txt
    │   ├── meta
    │       ├── __pycache__
    │       │   └── preprocess_dynamic_mixing.cpython-38.pyc
    │       └── preprocess_dynamic_mixing.py
    │   ├── prepare_data.py
    │   └── separation
    │       ├── .DS_Store
    │       ├── __pycache__
    │           └── dynamic_mixing.cpython-38.pyc
    │       ├── csv_wham
    │           ├── libri2mix_dev.csv
    │           ├── libri2mix_test.csv
    │           ├── libri2mix_train-360.csv
    │           ├── libri3mix_dev.csv
    │           ├── libri3mix_test.csv
    │           └── libri3mix_train-360.csv
    │       ├── dynamic_mixing.py
    │       ├── hparams
    │           ├── .DS_Store
    │           ├── dprnn-libri2mix-unified-gm.yaml
    │           └── sepformer-libri2mix-unified-gm.yaml
    │       ├── test_dprnn_libri2mix_unified_gm.sh
    │       ├── test_sepformer_libri2mix_unified_gm.sh
    │       ├── train.py
    │       ├── train_dprnn_libri2mix_unified_gm.sh
    │       ├── train_sepformer_libri2mix_unified_gm.sh
    │       ├── train_unified.py
    │       └── train_unified_gm.py
├── requirements.txt
├── samples
    ├── audio_samples
    │   ├── csv_example.csv
    │   ├── csv_example2.csv
    │   ├── csv_example3.csv
    │   ├── csv_example_multichannel.csv
    │   ├── example1.wav
    │   ├── example2.flac
    │   ├── example3.sph
    │   ├── example4.raw
    │   ├── example5.wav
    │   ├── example6.wav
    │   ├── example_fr.wav
    │   ├── example_multichannel.wav
    │   ├── example_noisy.wav
    │   ├── multi_mic
    │   │   ├── noise_0.70225_-0.70225_0.11704.flac
    │   │   ├── noise_diffuse.flac
    │   │   ├── speech_-0.82918_0.55279_-0.082918.flac
    │   │   └── speech_-0.98894_0_0.14834.flac
    │   ├── nn_training_samples
    │   │   ├── debug.csv
    │   │   ├── dev.csv
    │   │   ├── dev.json
    │   │   ├── spk1_snt1.pkl
    │   │   ├── spk1_snt1.wav
    │   │   ├── spk1_snt2.pkl
    │   │   ├── spk1_snt2.wav
    │   │   ├── spk1_snt3.pkl
    │   │   ├── spk1_snt3.wav
    │   │   ├── spk1_snt4.pkl
    │   │   ├── spk1_snt4.wav
    │   │   ├── spk1_snt5.pkl
    │   │   ├── spk1_snt5.wav
    │   │   ├── spk1_snt6.pkl
    │   │   ├── spk1_snt6.wav
    │   │   ├── spk2_snt1.pkl
    │   │   ├── spk2_snt1.wav
    │   │   ├── spk2_snt2.pkl
    │   │   ├── spk2_snt2.wav
    │   │   ├── spk2_snt3.pkl
    │   │   ├── spk2_snt3.wav
    │   │   ├── spk2_snt4.pkl
    │   │   ├── spk2_snt4.wav
    │   │   ├── spk2_snt5.pkl
    │   │   ├── spk2_snt5.wav
    │   │   ├── spk2_snt6.pkl
    │   │   ├── spk2_snt6.wav
    │   │   ├── test.csv
    │   │   ├── train.csv
    │   │   └── train.json
    │   ├── sourcesep_samples
    │   │   ├── csv_example_sourcesep_mixture.csv
    │   │   ├── csv_example_sourcesep_source1.csv
    │   │   ├── csv_example_sourcesep_source2.csv
    │   │   ├── minimal_example_convtasnet_cv.csv
    │   │   ├── minimal_example_convtasnet_tr.csv
    │   │   ├── minimal_example_convtasnet_tt.csv
    │   │   ├── mixture_0.wav
    │   │   ├── mixture_1.wav
    │   │   ├── mixture_2.wav
    │   │   ├── mixture_3.wav
    │   │   ├── source1_0.wav
    │   │   ├── source1_1.wav
    │   │   ├── source1_2.wav
    │   │   ├── source1_3.wav
    │   │   ├── source2_0.wav
    │   │   ├── source2_1.wav
    │   │   ├── source2_2.wav
    │   │   └── source2_3.wav
    │   ├── test_csv_merge.csv
    │   ├── test_mixture.wav
    │   └── vad
    │   │   ├── train.json
    │   │   ├── train.wav
    │   │   ├── valid.json
    │   │   └── valid.wav
    ├── label_samples
    │   ├── hyp.csv
    │   └── ref.csv
    ├── noise_samples
    │   ├── noise.csv
    │   ├── noise1.wav
    │   ├── noise2.wav
    │   ├── noise3.wav
    │   ├── noise4.wav
    │   ├── noise5.wav
    │   ├── noise_multichannel.csv
    │   ├── noise_multichannel.wav
    │   └── noise_rel.csv
    ├── plda_xvect_samples
    │   ├── enrol_stat_xvect.pkl
    │   ├── expected_plda_scores.pkl
    │   ├── test_stat_xvect.pkl
    │   └── train_stat_xvect.pkl
    ├── rir_samples
    │   ├── rir1.wav
    │   ├── rir2.wav
    │   ├── rir3.wav
    │   ├── rir4.wav
    │   ├── rir_multichannel.csv
    │   ├── rir_multichannel.wav
    │   ├── rirs.csv
    │   └── rirs_rel.csv
    ├── rttm_samples
    │   ├── ReadMe.md
    │   ├── ref_rttm
    │   │   └── ES2014c.rttm
    │   └── sys_rttm
    │   │   └── ES2014c.rttm
    ├── text_samples
    │   ├── hdf5_example.h5
    │   ├── label_dict.pkl
    │   └── readme.txt
    └── voxceleb_samples
    │   ├── meta
    │       └── iden_split.txt
    │   ├── readme.txt
    │   └── wav
    │       ├── dev.csv
    │       ├── id10001
    │           └── 1zcIwhmdeo4
    │           │   ├── 00001.wav
    │           │   ├── 00002.wav
    │           │   └── 00003.wav
    │       ├── id10002
    │           └── xTV-jFAUKcw
    │           │   ├── 00001.wav
    │           │   ├── 00002.wav
    │           │   └── 00003.wav
    │       └── train.csv
├── setup.py
├── speechbrain.egg-info
    ├── PKG-INFO
    ├── SOURCES.txt
    ├── dependency_links.txt
    ├── requires.txt
    └── top_level.txt
├── speechbrain
    ├── __init__.py
    ├── __pycache__
    │   ├── __init__.cpython-37.pyc
    │   ├── __init__.cpython-38.pyc
    │   ├── core.cpython-37.pyc
    │   └── core.cpython-38.pyc
    ├── alignment
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-37.pyc
    │   │   └── __init__.cpython-38.pyc
    │   ├── aligner.py
    │   └── ctc_segmentation.py
    ├── core.py
    ├── dataio
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── batch.cpython-37.pyc
    │   │   ├── batch.cpython-38.pyc
    │   │   ├── dataio.cpython-37.pyc
    │   │   ├── dataio.cpython-38.pyc
    │   │   ├── dataloader.cpython-37.pyc
    │   │   ├── dataloader.cpython-38.pyc
    │   │   ├── dataset.cpython-37.pyc
    │   │   ├── dataset.cpython-38.pyc
    │   │   ├── encoder.cpython-37.pyc
    │   │   ├── encoder.cpython-38.pyc
    │   │   ├── iterators.cpython-37.pyc
    │   │   ├── iterators.cpython-38.pyc
    │   │   ├── legacy.cpython-37.pyc
    │   │   ├── legacy.cpython-38.pyc
    │   │   ├── preprocess.cpython-37.pyc
    │   │   ├── preprocess.cpython-38.pyc
    │   │   ├── sampler.cpython-37.pyc
    │   │   ├── sampler.cpython-38.pyc
    │   │   ├── wer.cpython-37.pyc
    │   │   └── wer.cpython-38.pyc
    │   ├── batch.py
    │   ├── dataio.py
    │   ├── dataloader.py
    │   ├── dataset.py
    │   ├── encoder.py
    │   ├── iterators.py
    │   ├── legacy.py
    │   ├── preprocess.py
    │   ├── sampler.py
    │   └── wer.py
    ├── decoders
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── ctc.cpython-37.pyc
    │   │   ├── ctc.cpython-38.pyc
    │   │   ├── seq2seq.cpython-37.pyc
    │   │   └── seq2seq.cpython-38.pyc
    │   ├── ctc.py
    │   ├── seq2seq.py
    │   └── transducer.py
    ├── lm
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-37.pyc
    │   │   └── __init__.cpython-38.pyc
    │   ├── arpa.py
    │   ├── counting.py
    │   └── ngram.py
    ├── lobes
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── __init__.cpython-38.pyc
    │   │   └── augment.cpython-38.pyc
    │   ├── augment.py
    │   ├── beamform_multimic.py
    │   ├── features.py
    │   └── models
    │   │   ├── CRDNN.py
    │   │   ├── ContextNet.py
    │   │   ├── ECAPA_TDNN.py
    │   │   ├── ESPnetVGG.py
    │   │   ├── MetricGAN.py
    │   │   ├── RNNLM.py
    │   │   ├── VanillaNN.py
    │   │   ├── Xvector.py
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │       ├── __init__.cpython-37.pyc
    │   │       ├── __init__.cpython-38.pyc
    │   │       ├── conv_tasnet.cpython-38.pyc
    │   │       ├── dual_path.cpython-37.pyc
    │   │       ├── dual_path.cpython-38.pyc
    │   │       ├── dual_path2.cpython-38.pyc
    │   │       ├── dual_path_context.cpython-38.pyc
    │   │       ├── dual_path_conv.cpython-38.pyc
    │   │       ├── dual_path_conv2.cpython-38.pyc
    │   │       ├── dual_path_multi_scale.cpython-38.pyc
    │   │       ├── dual_path_splitnet.cpython-38.pyc
    │   │       ├── dual_path_splitnet_exchange.cpython-38.pyc
    │   │       ├── galr.cpython-38.pyc
    │   │       ├── norms.cpython-38.pyc
    │   │       ├── torch_utils.cpython-38.pyc
    │   │       ├── u_net.cpython-38.pyc
    │   │       └── unet.cpython-38.pyc
    │   │   ├── conv_tasnet.py
    │   │   ├── convolution.py
    │   │   ├── dual_path.py
    │   │   ├── fairseq_wav2vec.py
    │   │   ├── galr.py
    │   │   ├── huggingface_wav2vec.py
    │   │   ├── norms.py
    │   │   ├── segan_model.py
    │   │   ├── torch_utils.py
    │   │   ├── transformer
    │   │       ├── Conformer.py
    │   │       ├── Transformer.py
    │   │       ├── TransformerASR.py
    │   │       ├── TransformerLM.py
    │   │       ├── TransformerSE.py
    │   │       ├── TransformerST.py
    │   │       ├── Transformer_GALR.py
    │   │       ├── Transformer_old.py
    │   │       ├── __init__.py
    │   │       └── __pycache__
    │   │       │   ├── Conformer.cpython-37.pyc
    │   │       │   ├── Conformer.cpython-38.pyc
    │   │       │   ├── Transformer.cpython-37.pyc
    │   │       │   ├── Transformer.cpython-38.pyc
    │   │       │   ├── Transformer_GALR.cpython-38.pyc
    │   │       │   ├── __init__.cpython-37.pyc
    │   │       │   └── __init__.cpython-38.pyc
    │   │   └── unet.py
    ├── log-config.yaml
    ├── nnet
    │   ├── CNN.py
    │   ├── RNN.py
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── CNN.cpython-37.pyc
    │   │   ├── CNN.cpython-38.pyc
    │   │   ├── RNN.cpython-37.pyc
    │   │   ├── RNN.cpython-38.pyc
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── activations.cpython-37.pyc
    │   │   ├── activations.cpython-38.pyc
    │   │   ├── attention.cpython-37.pyc
    │   │   ├── attention.cpython-38.pyc
    │   │   ├── containers.cpython-37.pyc
    │   │   ├── containers.cpython-38.pyc
    │   │   ├── dropout.cpython-37.pyc
    │   │   ├── dropout.cpython-38.pyc
    │   │   ├── embedding.cpython-37.pyc
    │   │   ├── embedding.cpython-38.pyc
    │   │   ├── linear.cpython-37.pyc
    │   │   ├── linear.cpython-38.pyc
    │   │   ├── losses.cpython-37.pyc
    │   │   ├── losses.cpython-38.pyc
    │   │   ├── normalization.cpython-37.pyc
    │   │   ├── normalization.cpython-38.pyc
    │   │   ├── pooling.cpython-37.pyc
    │   │   ├── pooling.cpython-38.pyc
    │   │   ├── schedulers.cpython-37.pyc
    │   │   └── schedulers.cpython-38.pyc
    │   ├── activations.py
    │   ├── attention.py
    │   ├── complex_networks
    │   │   ├── __init__.py
    │   │   ├── c_CNN.py
    │   │   ├── c_RNN.py
    │   │   ├── c_linear.py
    │   │   ├── c_normalization.py
    │   │   └── c_ops.py
    │   ├── containers.py
    │   ├── dropout.py
    │   ├── embedding.py
    │   ├── linear.py
    │   ├── loss
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-37.pyc
    │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   ├── stoi_loss.cpython-37.pyc
    │   │   │   └── stoi_loss.cpython-38.pyc
    │   │   ├── guidedattn_loss.py
    │   │   ├── stoi_loss.py
    │   │   └── transducer_loss.py
    │   ├── losses.py
    │   ├── normalization.py
    │   ├── pooling.py
    │   ├── quaternion_networks
    │   │   ├── __init__.py
    │   │   ├── q_CNN.py
    │   │   ├── q_RNN.py
    │   │   ├── q_linear.py
    │   │   ├── q_normalization.py
    │   │   └── q_ops.py
    │   ├── schedulers.py
    │   └── transducer
    │   │   ├── __init__.py
    │   │   └── transducer_joint.py
    ├── pretrained
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── fetching.cpython-37.pyc
    │   │   ├── fetching.cpython-38.pyc
    │   │   ├── interfaces.cpython-37.pyc
    │   │   └── interfaces.cpython-38.pyc
    │   ├── fetching.py
    │   └── interfaces.py
    ├── processing
    │   ├── NMF.py
    │   ├── PLDA_LDA.py
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── signal_processing.cpython-37.pyc
    │   │   ├── signal_processing.cpython-38.pyc
    │   │   ├── speech_augmentation.cpython-37.pyc
    │   │   └── speech_augmentation.cpython-38.pyc
    │   ├── decomposition.py
    │   ├── diarization.py
    │   ├── features.py
    │   ├── multi_mic.py
    │   ├── signal_processing.py
    │   └── speech_augmentation.py
    ├── tokenizers
    │   ├── SentencePiece.py
    │   ├── __init__.py
    │   └── __pycache__
    │   │   ├── __init__.cpython-37.pyc
    │   │   └── __init__.cpython-38.pyc
    ├── utils
    │   ├── Accuracy.py
    │   ├── DER.py
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── Accuracy.cpython-37.pyc
    │   │   ├── Accuracy.cpython-38.pyc
    │   │   ├── DER.cpython-37.pyc
    │   │   ├── DER.cpython-38.pyc
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── bleu.cpython-37.pyc
    │   │   ├── bleu.cpython-38.pyc
    │   │   ├── callchains.cpython-37.pyc
    │   │   ├── callchains.cpython-38.pyc
    │   │   ├── checkpoints.cpython-37.pyc
    │   │   ├── checkpoints.cpython-38.pyc
    │   │   ├── data_pipeline.cpython-37.pyc
    │   │   ├── data_pipeline.cpython-38.pyc
    │   │   ├── data_utils.cpython-37.pyc
    │   │   ├── data_utils.cpython-38.pyc
    │   │   ├── depgraph.cpython-37.pyc
    │   │   ├── depgraph.cpython-38.pyc
    │   │   ├── distributed.cpython-37.pyc
    │   │   ├── distributed.cpython-38.pyc
    │   │   ├── edit_distance.cpython-37.pyc
    │   │   ├── edit_distance.cpython-38.pyc
    │   │   ├── epoch_loop.cpython-37.pyc
    │   │   ├── epoch_loop.cpython-38.pyc
    │   │   ├── logger.cpython-37.pyc
    │   │   ├── logger.cpython-38.pyc
    │   │   ├── metric_stats.cpython-37.pyc
    │   │   ├── metric_stats.cpython-38.pyc
    │   │   ├── parameter_transfer.cpython-37.pyc
    │   │   ├── parameter_transfer.cpython-38.pyc
    │   │   ├── superpowers.cpython-37.pyc
    │   │   ├── superpowers.cpython-38.pyc
    │   │   ├── torch_audio_backend.cpython-37.pyc
    │   │   ├── torch_audio_backend.cpython-38.pyc
    │   │   ├── train_logger.cpython-37.pyc
    │   │   └── train_logger.cpython-38.pyc
    │   ├── bleu.py
    │   ├── callchains.py
    │   ├── checkpoints.py
    │   ├── data_pipeline.py
    │   ├── data_utils.py
    │   ├── depgraph.py
    │   ├── distributed.py
    │   ├── edit_distance.py
    │   ├── epoch_loop.py
    │   ├── logger.py
    │   ├── metric_stats.py
    │   ├── parameter_transfer.py
    │   ├── superpowers.py
    │   ├── torch_audio_backend.py
    │   └── train_logger.py
    └── version.txt
├── templates
    ├── README.md
    ├── enhancement
    │   ├── README.md
    │   ├── custom_model.py
    │   ├── mini_librispeech_prepare.py
    │   ├── train.py
    │   └── train.yaml
    ├── speaker_id
    │   ├── README.md
    │   ├── custom_model.py
    │   ├── mini_librispeech_prepare.py
    │   ├── train.py
    │   └── train.yaml
    └── speech_recognition
    │   ├── ASR
    │       ├── README.md
    │       ├── mini_librispeech_prepare.py
    │       ├── train.py
    │       └── train.yaml
    │   ├── LM
    │       ├── README.md
    │       ├── RNNLM.yaml
    │       ├── custom_model.py
    │       ├── data
    │       │   ├── test.txt
    │       │   ├── train.txt
    │       │   └── valid.txt
    │       ├── extra_requirements.txt
    │       └── train.py
    │   ├── README.md
    │   ├── Tokenizer
    │       ├── README.md
    │       ├── mini_librispeech_prepare.py
    │       ├── tokenizer.yaml
    │       └── train.py
    │   └── mini_librispeech_prepare.py
├── tests
    ├── .run-doctests.sh
    ├── .run-linters.sh
    ├── .run-recipe-tests.sh
    ├── .run-unittests.sh
    ├── integration
    │   ├── neural_networks
    │   │   ├── ASR_CTC
    │   │   │   ├── example_asr_ctc_experiment.py
    │   │   │   ├── example_asr_ctc_experiment_complex_net.py
    │   │   │   ├── example_asr_ctc_experiment_quaternion_net.py
    │   │   │   ├── hyperparams.yaml
    │   │   │   ├── hyperparams_complex_net.yaml
    │   │   │   └── hyperparams_quaternion_net.yaml
    │   │   ├── ASR_DNN_HMM
    │   │   │   ├── example_asr_dnn_hmm_experiment.py
    │   │   │   └── hyperparams.yaml
    │   │   ├── ASR_Transducer
    │   │   │   ├── example_asr_transducer_experiment.py
    │   │   │   └── hyperparams.yaml
    │   │   ├── ASR_alignment_forward
    │   │   │   ├── example_asr_alignment_forward_experiment.py
    │   │   │   └── hyperparams.yaml
    │   │   ├── ASR_alignment_viterbi
    │   │   │   ├── example_asr_alignment_viterbi_experiment.py
    │   │   │   └── hyperparams.yaml
    │   │   ├── ASR_seq2seq
    │   │   │   ├── example_asr_seq2seq_experiment.py
    │   │   │   └── hyperparams.yaml
    │   │   ├── G2P
    │   │   │   ├── example_g2p.py
    │   │   │   └── hyperparams.yaml
    │   │   ├── LM_RNN
    │   │   │   ├── example_lm_rnn_experiment.py
    │   │   │   └── hyperparams.yaml
    │   │   ├── VAD
    │   │   │   ├── example_vad.py
    │   │   │   └── hyperparams.yaml
    │   │   ├── autoencoder
    │   │   │   ├── example_auto_experiment.py
    │   │   │   └── hyperparams.yaml
    │   │   ├── enhance_GAN
    │   │   │   ├── example_enhance_gan_experiment.py
    │   │   │   ├── hyperparams.yaml
    │   │   │   └── models.yaml
    │   │   ├── separation
    │   │   │   ├── example_conv_tasnet.py
    │   │   │   └── hyperparams.yaml
    │   │   └── speaker_id
    │   │   │   ├── example_xvector_experiment.py
    │   │   │   └── hyperparams.yaml
    │   └── signal_processing
    │   │   ├── PLDA_xvector
    │   │       └── example_plda_experiment.py
    │   │   ├── example_add_babble.py
    │   │   ├── example_add_noise.py
    │   │   ├── example_add_reverb.py
    │   │   ├── example_do_clip.py
    │   │   ├── example_drop_chunk.py
    │   │   ├── example_drop_freq.py
    │   │   ├── example_speed_perturb.py
    │   │   ├── expected
    │   │       ├── add_babble
    │   │       │   └── save
    │   │       │   │   └── example1.flac
    │   │       ├── add_noise
    │   │       │   └── save
    │   │       │   │   └── example1.flac
    │   │       ├── add_reverb
    │   │       │   └── save
    │   │       │   │   └── example1.flac
    │   │       ├── do_clip
    │   │       │   └── save
    │   │       │   │   └── example1.flac
    │   │       ├── drop_chunk
    │   │       │   └── save
    │   │       │   │   └── example1.flac
    │   │       ├── drop_freq
    │   │       │   └── save
    │   │       │   │   └── example1.flac
    │   │       └── speed_perturb
    │   │       │   └── save
    │   │       │       └── example1.flac
    │   │   ├── hyperparams.yaml
    │   │   └── nmf_sourcesep
    │   │       ├── example_experiment.py
    │   │       └── hyperparams.yaml
    └── unittests
    │   ├── test_CNN.py
    │   ├── test_RNN.py
    │   ├── test_activations.py
    │   ├── test_arpa.py
    │   ├── test_attention.py
    │   ├── test_augment.py
    │   ├── test_batching.py
    │   ├── test_callchains.py
    │   ├── test_categorical_encoder.py
    │   ├── test_checkpoints.py
    │   ├── test_core.py
    │   ├── test_counting.py
    │   ├── test_ctc_segmentation.py
    │   ├── test_data_io.py
    │   ├── test_data_pipeline.py
    │   ├── test_dataloader.py
    │   ├── test_dataset.py
    │   ├── test_dependency_graph.py
    │   ├── test_dropout.py
    │   ├── test_edit_distance.py
    │   ├── test_embedding.py
    │   ├── test_epoch_loop.py
    │   ├── test_features.py
    │   ├── test_linear.py
    │   ├── test_losses.py
    │   ├── test_metrics.py
    │   ├── test_multi_mic.py
    │   ├── test_ngram_lm.py
    │   ├── test_normalization.py
    │   ├── test_pooling.py
    │   ├── test_pretrainer.py
    │   ├── test_samplers.py
    │   ├── test_schedulers.py
    │   ├── test_signal_processing.py
    │   ├── test_superpowers.py
    │   ├── test_tokenizer.py
    │   └── tokenizer_data
    │       └── dev-clean.csv
└── tools
    ├── compute_wer.py
    └── der_eval
        └── md-eval.pl


/README.md:
--------------------------------------------------------------------------------
1 | # Unifying Speech Enhancement and Separation
2 | 
3 | This is the code implementation for paper [Unifying Speech Enhancement and Separation with Gradient Modulation for End-to-End Noise-Robust Speech Separation](https://arxiv.org/abs/2302.11131) that is built based on [SpeechBrain](https://github.com/speechbrain/speechbrain) toolkit.
4 | 


--------------------------------------------------------------------------------
/conftest.py:
--------------------------------------------------------------------------------
 1 | collect_ignore = ["setup.py"]
 2 | try:
 3 |     import numba  # noqa: F401
 4 | except ModuleNotFoundError:
 5 |     collect_ignore.append("speechbrain/nnet/loss/transducer_loss.py")
 6 | try:
 7 |     import fairseq  # noqa: F401
 8 | except ModuleNotFoundError:
 9 |     collect_ignore.append("speechbrain/lobes/models/fairseq_wav2vec.py")
10 | try:
11 |     from transformers import Wav2Vec2Model  # noqa: F401
12 | except ModuleNotFoundError:
13 |     collect_ignore.append("speechbrain/lobes/models/huggingface_wav2vec.py")
14 | try:
15 |     import sacrebleu  # noqa: F401
16 | except ModuleNotFoundError:
17 |     collect_ignore.append("speechbrain/utils/bleu.py")
18 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | clean:
18 | 	rm -rf build
19 | 	rm -rf API
20 | 	@echo "You may also want to remove files not under version control."
21 | 	@echo "First run"
22 | 	@echo "	git clean -n -d source"
23 | 	@echo "to see what would be deleted"
24 | 	@echo "Then if you're happy run"
25 | 	@echo "	git clean -f -d source"
26 | 	@echo "This can help to clean out api-doc generated .rst files etc."
27 | 
28 | # Catch-all target: route all unknown targets to Sphinx using the new
29 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
30 | %: Makefile
31 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
32 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | # SpeechBrain documentation
 2 | 
 3 | Please install additional dependencies:
 4 | 
 5 | ```
 6 | pip install -r docs-requirements.txt
 7 | ```
 8 | 
 9 | Then run:
10 | ```
11 | make html
12 | ```
13 | to build HTML documentation. Then open `build/html/index.html`
14 | 
15 | ## Automatic API documentation from docstrings
16 | 
17 | The documentation uses `sphinx.ext.napoleon` to support Google-style
18 | docstrings. Sphinx natively supports reStructuredText directives.
19 | 
20 | Automatically generating documentation based on docstrings is not the
21 | core of Sphinx. For this, after much searching, we use better-apidoc.
22 | 
23 | ## Future work
24 | 
25 | Besides automatic API documentation, Sphinx will facilitate manual prose
26 | documentation.
27 | 


--------------------------------------------------------------------------------
/docs/_apidoc_templates/module.rst:
--------------------------------------------------------------------------------
 1 | {# The :autogenerated: tag is picked up by breadcrumbs.html to suppress "Edit on Github" link #}
 2 | :autogenerated:
 3 | 
 4 | {{ fullname }} module
 5 | {% for item in range(7 + fullname|length) -%}={%- endfor %}
 6 | 
 7 | .. currentmodule:: {{ fullname }}
 8 | 
 9 | .. automodule:: {{ fullname }}
10 |     {% if members -%}
11 |     :members: {{ members|join(", ") }}
12 |     :undoc-members:
13 |     :show-inheritance:
14 |     :member-order: bysource
15 | 
16 |     Summary
17 |     -------
18 | 
19 |     {%- if exceptions %}
20 | 
21 |     Exceptions:
22 | 
23 |     .. autosummary::
24 |         :nosignatures:
25 | {% for item in exceptions %}
26 |         {{ item }}
27 | {%- endfor %}
28 |     {%- endif %}
29 | 
30 |     {%- if classes %}
31 | 
32 |     Classes:
33 | 
34 |     .. autosummary::
35 |         :nosignatures:
36 | {% for item in classes %}
37 |         {{ item }}
38 | {%- endfor %}
39 |     {%- endif %}
40 | 
41 |     {%- if functions %}
42 | 
43 |     Functions:
44 | 
45 |     .. autosummary::
46 |         :nosignatures:
47 | {% for item in functions %}
48 |         {{ item }}
49 | {%- endfor %}
50 |     {%- endif %}
51 | {%- endif %}
52 | 
53 | {% set data = get_members(typ='data', in_list='__all__') %}
54 |     {%- if data %}
55 | 
56 |     Data:
57 | 
58 |     .. autosummary::
59 |         :nosignatures:
60 | {% for item in data %}
61 |         {{ item }}
62 | {%- endfor %}
63 |     {%- endif %}
64 | 
65 | {% set all_refs = get_members(in_list='__all__', include_imported=True, out_format='refs') %}
66 | {% if all_refs %}
67 |     ``__all__``: {{ all_refs|join(", ") }}
68 | {%- endif %}
69 | 
70 | 
71 | {% if members %}
72 |     Reference
73 |     ---------
74 | 
75 | {%- endif %}
76 | 


--------------------------------------------------------------------------------
/docs/docs-requirements.txt:
--------------------------------------------------------------------------------
1 | better-apidoc>=0.3.1
2 | numba
3 | recommonmark>=0.7.1
4 | six
5 | sphinx-rtd-theme>=0.4.3
6 | Sphinx>=3.4.3
7 | 


--------------------------------------------------------------------------------
/docs/images/logo_noname_rounded_big.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/docs/images/logo_noname_rounded_big.png


--------------------------------------------------------------------------------
/docs/tutorials.md:
--------------------------------------------------------------------------------
1 | # Tutorials
2 | 
3 | A good way to familiarize yourself with SpeechBrain is to take a look at the Colab tutorials that we made available. More tutorials will be made available as the project will progress.
4 | 
5 | The full list of tutorials can be found on the official [website](https://speechbrain.github.io). All the tutorials are developed on the [Google Colab platform](https://colab.research.google.com). This allows users to directly try SpeechBrain on GPUs without the need to set up an environment.
6 | 


--------------------------------------------------------------------------------
/lint-requirements.txt:
--------------------------------------------------------------------------------
1 | black==19.10b0
2 | flake8==3.7.9
3 | pycodestyle==2.5.0
4 | pytest==5.4.1
5 | yamllint==1.23.0
6 | 


--------------------------------------------------------------------------------
/pip-wheel-metadata/speechbrain.dist-info/top_level.txt:
--------------------------------------------------------------------------------
1 | speechbrain
2 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.black]
 2 | line-length = 80
 3 | target-version = ['py38']
 4 | exclude = '''
 5 | 
 6 | (
 7 |   /(
 8 |       \.eggs         # exclude a few common directories in the
 9 |     | \.git          # root of the project
10 |     | \.mypy_cache
11 |     | \.tox
12 |     | \.venv
13 |   )/
14 | )
15 | '''
16 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
 1 | [pytest]
 2 | doctest_optionflags= ELLIPSIS
 3 | 
 4 | python_files =
 5 |     test_*.py
 6 |     check_*.py
 7 |     example_*.py
 8 | 
 9 | norecursedirs = results
10 | 


--------------------------------------------------------------------------------
/recipes/LibriMix/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/recipes/LibriMix/.DS_Store


--------------------------------------------------------------------------------
/recipes/LibriMix/__pycache__/prepare_data.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/recipes/LibriMix/__pycache__/prepare_data.cpython-38.pyc


--------------------------------------------------------------------------------
/recipes/LibriMix/extra-dependencies.txt:
--------------------------------------------------------------------------------
1 | mir-eval==0.6
2 | pyloudnorm
3 | 
4 | 


--------------------------------------------------------------------------------
/recipes/LibriMix/meta/__pycache__/preprocess_dynamic_mixing.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/recipes/LibriMix/meta/__pycache__/preprocess_dynamic_mixing.cpython-38.pyc


--------------------------------------------------------------------------------
/recipes/LibriMix/separation/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/recipes/LibriMix/separation/.DS_Store


--------------------------------------------------------------------------------
/recipes/LibriMix/separation/__pycache__/dynamic_mixing.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/recipes/LibriMix/separation/__pycache__/dynamic_mixing.cpython-38.pyc


--------------------------------------------------------------------------------
/recipes/LibriMix/separation/hparams/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/recipes/LibriMix/separation/hparams/.DS_Store


--------------------------------------------------------------------------------
/recipes/LibriMix/separation/test_dprnn_libri2mix_unified_gm.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | cmd="/path/to/slurm.pl --quiet"
 4 | 
 5 | source activate CONDA_ENV
 6 | 
 7 | $cmd log/test-dprnn-libri2mix-unified-gm.log \
 8 | python train.py hparams/dprnn-libri2mix-unified-gm.yaml --data_folder /path/to/data/LibriMix/Libri2Mix/ --test_only True
 9 | 
10 | 


--------------------------------------------------------------------------------
/recipes/LibriMix/separation/test_sepformer_libri2mix_unified_gm.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | cmd="/path/to/slurm.pl --quiet"
 4 | 
 5 | source activate CONDA_ENV
 6 | 
 7 | $cmd log/test-sepformer-libri2mix-unified-gm.log \
 8 | python train.py hparams/sepformer-libri2mix-unified-gm.yaml --data_folder /path/to/data/LibriMix/Libri2Mix/ --test_only True
 9 | 
10 | 


--------------------------------------------------------------------------------
/recipes/LibriMix/separation/train_dprnn_libri2mix_unified_gm.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | cmd="/path/to/slurm.pl --quiet"
 4 | 
 5 | source activate CONDA_ENV
 6 | 
 7 | $cmd log/dprnn-libri2mix-unified-gm.log \
 8 | python train_unified_gm.py hparams/dprnn-libri2mix-unified-gm.yaml --data_folder /path/to/data/LibriMix/Libri2Mix/ --dynamic_mixing False
 9 | 
10 | 


--------------------------------------------------------------------------------
/recipes/LibriMix/separation/train_sepformer_libri2mix_unified_gm.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | cmd="/path/to/slurm.pl --quiet"
 4 | 
 5 | source activate CONDA_ENV
 6 | 
 7 | $cmd log/sepformer-libri2mix-unified-gm.log \
 8 | python train_unified_gm.py hparams/sepformer-libri2mix-unified-gm.yaml --data_folder /path/to/data/LibriMix/Libri2Mix/ --dynamic_mixing False
 9 | 
10 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | -r lint-requirements.txt
 2 | huggingface_hub>=0.0.6
 3 | hyperpyyaml>=0.0.1
 4 | joblib>=0.14.1
 5 | numpy>=1.17.0
 6 | packaging
 7 | pre-commit>=2.3.0
 8 | scipy>=1.4.1
 9 | sentencepiece>=0.1.91
10 | SoundFile; sys_platform == 'win32'
11 | torch>=1.8.0,<=1.8.1
12 | torchaudio>=0.7.2,<=0.8.1
13 | tqdm>=4.42.0
14 | 


--------------------------------------------------------------------------------
/samples/audio_samples/csv_example.csv:
--------------------------------------------------------------------------------
1 | ID, duration, wav, wav_format, wav_opts, spk_id, spk_id_format, spk_id_opts
2 | 
3 | example1, 3.260, $data_folder/example1.wav, wav, , spk01, string,
4 | example2, 2.068, $data_folder/example2.flac, flac, , spk02, string,
5 | example3, 2.890, $data_folder/example3.sph, wav, , spk03, string,
6 | example5, 1.000, $data_folder/example5.wav, wav, start:10000 stop:26000, spk05, string,
7 | 


--------------------------------------------------------------------------------
/samples/audio_samples/csv_example2.csv:
--------------------------------------------------------------------------------
1 | ID, duration, wav, wav_format, wav_opts, spk_id, spk_id_format, spk_id_opts
2 | 
3 | example1, 3.260,  $data_folder/example1.wav, wav, , spk01, string,
4 | 
5 | 


--------------------------------------------------------------------------------
/samples/audio_samples/csv_example3.csv:
--------------------------------------------------------------------------------
1 | ID, duration, wav, wav_format, wav_opts
2 | 
3 | example1, 3.260, samples/audio_samples/example1.wav, wav,
4 | example2, 2.068, samples/audio_samples/example2.flac, flac,
5 | example5, 1.00, samples/audio_samples/example5.wav, wav, start:10000 stop:26000
6 | 
7 | 


--------------------------------------------------------------------------------
/samples/audio_samples/csv_example_multichannel.csv:
--------------------------------------------------------------------------------
1 | ID, duration, wav, wav_format, wav_opts, spk_id, spk_id_format, spk_id_opts
2 | 
3 | example1, 3.260, $data_folder/example_multichannel.wav, wav, , spk01, string,
4 | 
5 | 


--------------------------------------------------------------------------------
/samples/audio_samples/example1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/example1.wav


--------------------------------------------------------------------------------
/samples/audio_samples/example2.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/example2.flac


--------------------------------------------------------------------------------
/samples/audio_samples/example3.sph:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/example3.sph


--------------------------------------------------------------------------------
/samples/audio_samples/example4.raw:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/example4.raw


--------------------------------------------------------------------------------
/samples/audio_samples/example5.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/example5.wav


--------------------------------------------------------------------------------
/samples/audio_samples/example6.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/example6.wav


--------------------------------------------------------------------------------
/samples/audio_samples/example_fr.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/example_fr.wav


--------------------------------------------------------------------------------
/samples/audio_samples/example_multichannel.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/example_multichannel.wav


--------------------------------------------------------------------------------
/samples/audio_samples/example_noisy.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/example_noisy.wav


--------------------------------------------------------------------------------
/samples/audio_samples/multi_mic/noise_0.70225_-0.70225_0.11704.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/multi_mic/noise_0.70225_-0.70225_0.11704.flac


--------------------------------------------------------------------------------
/samples/audio_samples/multi_mic/noise_diffuse.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/multi_mic/noise_diffuse.flac


--------------------------------------------------------------------------------
/samples/audio_samples/multi_mic/speech_-0.82918_0.55279_-0.082918.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/multi_mic/speech_-0.82918_0.55279_-0.082918.flac


--------------------------------------------------------------------------------
/samples/audio_samples/multi_mic/speech_-0.98894_0_0.14834.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/multi_mic/speech_-0.98894_0_0.14834.flac


--------------------------------------------------------------------------------
/samples/audio_samples/nn_training_samples/dev.csv:
--------------------------------------------------------------------------------
1 | ID, duration, wav, wav_format, wav_opts, spk_id, spk_id_format, spk_id_opts, ali, ali_format, ali_opts, phn, phn_format, phn_opts,char,char_format,char_opts
2 | spk1_snt5,2.6,$data_folder/spk1_snt5.wav, wav, ,spk1,string, ,$data_folder/spk1_snt5.pkl,pkl, ,s ah n vcl d ey ih z dh ax vcl b eh s cl t cl p aa r dx ax v dh ax w iy cl,string, ,s u n d a y i s t h e b e s t p a r t o f t h e w e e k,string,
3 | spk2_snt5,1.98,$data_folder/spk2_snt5.wav, wav, ,spk2,string, ,$data_folder/spk2_snt5.pkl,pkl, ,vcl jh ah m cl p dh ax f eh n s ae n hh er iy ah cl p dh ax vcl b ae ng cl,string, ,k e n p a I r s l a c k f u l l f l a v o r,string,
4 | 


--------------------------------------------------------------------------------
/samples/audio_samples/nn_training_samples/dev.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "spk1_snt5": {
 3 |         "wav": "{data_root}/spk1_snt5.wav",
 4 |         "length": 2.6,
 5 |         "spk_id": "spk1",
 6 |         "ali": "{data_root}/spk1_snt5.pkl",
 7 |         "phn": "s ah n vcl d ey ih z dh ax vcl b eh s cl t cl p aa r dx ax v dh ax w iy cl",
 8 |         "char": "s u n d a y i s t h e b e s t p a r t o f t h e w e e k"
 9 |     },
10 |     "spk2_snt5": {
11 |         "wav": "{data_root}/spk2_snt5.wav",
12 |         "length": 1.98,
13 |         "spk_id": "spk2",
14 |         "ali": "{data_root}/spk2_snt5.pkl",
15 |         "phn": "vcl jh ah m cl p dh ax f eh n s ae n hh er iy ah cl p dh ax vcl b ae ng cl",
16 |         "char": "k e n p a i r s l a c k f u l l f l a v o r"
17 |     }
18 | }


--------------------------------------------------------------------------------
/samples/audio_samples/nn_training_samples/spk1_snt1.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/nn_training_samples/spk1_snt1.pkl


--------------------------------------------------------------------------------
/samples/audio_samples/nn_training_samples/spk1_snt1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/nn_training_samples/spk1_snt1.wav


--------------------------------------------------------------------------------
/samples/audio_samples/nn_training_samples/spk1_snt2.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/nn_training_samples/spk1_snt2.pkl


--------------------------------------------------------------------------------
/samples/audio_samples/nn_training_samples/spk1_snt2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/nn_training_samples/spk1_snt2.wav


--------------------------------------------------------------------------------
/samples/audio_samples/nn_training_samples/spk1_snt3.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/nn_training_samples/spk1_snt3.pkl


--------------------------------------------------------------------------------
/samples/audio_samples/nn_training_samples/spk1_snt3.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/nn_training_samples/spk1_snt3.wav


--------------------------------------------------------------------------------
/samples/audio_samples/nn_training_samples/spk1_snt4.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/nn_training_samples/spk1_snt4.pkl


--------------------------------------------------------------------------------
/samples/audio_samples/nn_training_samples/spk1_snt4.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/nn_training_samples/spk1_snt4.wav


--------------------------------------------------------------------------------
/samples/audio_samples/nn_training_samples/spk1_snt5.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/nn_training_samples/spk1_snt5.pkl


--------------------------------------------------------------------------------
/samples/audio_samples/nn_training_samples/spk1_snt5.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/nn_training_samples/spk1_snt5.wav


--------------------------------------------------------------------------------
/samples/audio_samples/nn_training_samples/spk1_snt6.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/nn_training_samples/spk1_snt6.pkl


--------------------------------------------------------------------------------
/samples/audio_samples/nn_training_samples/spk1_snt6.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/nn_training_samples/spk1_snt6.wav


--------------------------------------------------------------------------------
/samples/audio_samples/nn_training_samples/spk2_snt1.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/nn_training_samples/spk2_snt1.pkl


--------------------------------------------------------------------------------
/samples/audio_samples/nn_training_samples/spk2_snt1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/nn_training_samples/spk2_snt1.wav


--------------------------------------------------------------------------------
/samples/audio_samples/nn_training_samples/spk2_snt2.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/nn_training_samples/spk2_snt2.pkl


--------------------------------------------------------------------------------
/samples/audio_samples/nn_training_samples/spk2_snt2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/nn_training_samples/spk2_snt2.wav


--------------------------------------------------------------------------------
/samples/audio_samples/nn_training_samples/spk2_snt3.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/nn_training_samples/spk2_snt3.pkl


--------------------------------------------------------------------------------
/samples/audio_samples/nn_training_samples/spk2_snt3.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/nn_training_samples/spk2_snt3.wav


--------------------------------------------------------------------------------
/samples/audio_samples/nn_training_samples/spk2_snt4.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/nn_training_samples/spk2_snt4.pkl


--------------------------------------------------------------------------------
/samples/audio_samples/nn_training_samples/spk2_snt4.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/nn_training_samples/spk2_snt4.wav


--------------------------------------------------------------------------------
/samples/audio_samples/nn_training_samples/spk2_snt5.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/nn_training_samples/spk2_snt5.pkl


--------------------------------------------------------------------------------
/samples/audio_samples/nn_training_samples/spk2_snt5.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/nn_training_samples/spk2_snt5.wav


--------------------------------------------------------------------------------
/samples/audio_samples/nn_training_samples/spk2_snt6.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/nn_training_samples/spk2_snt6.pkl


--------------------------------------------------------------------------------
/samples/audio_samples/nn_training_samples/spk2_snt6.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/nn_training_samples/spk2_snt6.wav


--------------------------------------------------------------------------------
/samples/audio_samples/nn_training_samples/test.csv:
--------------------------------------------------------------------------------
1 | ID, duration, wav, wav_format, wav_opts, spk_id, spk_id_format, spk_id_opts, ali, ali_format, ali_opts, phn, phn_format, phn_opts,char,char_format,char_opts
2 | spk1_snt6,2.29,$data_folder/spk1_snt6.wav, wav, ,spk1,string, ,$data_folder/spk1_snt6.pkl,pkl, ,t h e  p e n c I l s h a v e a l l b e e n u s e d,string, ,t h e c h i l d a l m o s t h u r t t h e s m a l l d o g ,string,
3 | spk2_snt6 ,1.8,$data_folder/spk2_snt6.wav, wav, ,spk2,string, ,$data_folder/spk2_snt6.pkl,pkl, ,j u m p t h e f e n c e a n d h u r r y u p t h e b a n k,string,,k e n p a I r s l a c k f u l l f l a v o r,string,
4 | 


--------------------------------------------------------------------------------
/samples/audio_samples/nn_training_samples/train.csv:
--------------------------------------------------------------------------------
 1 | ID, duration, wav, wav_format, wav_opts, spk_id, spk_id_format, spk_id_opts, ali, ali_format, ali_opts, phn, phn_format, phn_opts,char,char_format,char_opts
 2 | spk1_snt1,2.87,$data_folder/spk1_snt1.wav, wav, ,spk1,string, ,$data_folder/spk1_snt1.pkl,pkl, ,dh ax cl ch ay l vcl d ao l m ow s cl t hh er cl t sil dh ax s m ao l vcl d ao vcl,string, ,t h e c h i l d a l m o s t h u r t t h e s m a l l d o g ,string,
 3 | spk1_snt2,3.15,$data_folder/spk1_snt2.wav, wav, ,spk1,string, ,$data_folder/spk1_snt2.pkl,pkl, ,vcl d r aa cl p dh ax cl t uw sil w eh n y uw ae vcl d dh ax f ih vcl g y er,string, ,d r o p t h e t u e w h e n y o u a d d t h e f i g u r e s,string,
 4 | spk1_snt3,2.72,$data_folder/spk1_snt3.wav, wav, ,spk1,string, ,$data_folder/spk1_snt3.pkl,pkl, ,ae cl t dh ae cl t hh ay l eh v ax l dh iy eh r ih z cl p y uh,string, ,A t t h a t h i g h l e v e l t h e a i r i s p u r e,string,
 5 | spk1_snt4,2.53,$data_folder/spk1_snt4.wav, wav, ,spk1,string, ,$data_folder/spk1_snt4.pkl,pkl, ,ey th ih n s cl t r ay cl p r ah n z vcl d aw n dh ax m ih vcl d ax,string, ,a t h i n s t r i p e r u n s d o w n  t h e m i d d l e,string,
 6 | spk2_snt1,2.01,$data_folder/spk2_snt1.wav, wav, ,spk2,string, ,$data_folder/spk2_snt1.pkl,pkl, ,w iy er sh ao r dh ax dx w ah n w ao r ih z ih n ah,string, ,w e a r e  s u r e t h a t o n e w o r e i s e n o u g h,string,
 7 | spk2_snt2,1.76,$data_folder/spk2_snt2.wav, wav, ,spk2,string, ,$data_folder/spk2_snt2.pkl,pkl, ,w ah cl t vcl jh oy dh eh r ih z ih n l ih v ih,string, ,w h a t j o y t h e r e i s i n l i v i n g,string,
 8 | spk2_snt3,1.88,$data_folder/spk2_snt3.wav, wav, ,spk2,string, ,$data_folder/spk2_snt3.pkl,pkl, ,t eh r ah th ih n sh iy cl t f er m dh iy y eh l ow cl p ae vcl,string, ,t h e r a t I n s h I p  f r o m t h e y a l l o w p a v,string,
 9 | spk2_snt4,2.04,$data_folder/spk2_snt4.wav, wav, ,spk2,string, ,$data_folder/spk2_snt4.pkl,pkl, ,m eh n vcl d dh ax cl k ow cl t vcl b ih f ao r y uw vcl g ow aw cl,string, ,m e n t h e c o w t b e f o r e y o u g o o u t,string,
10 | 


--------------------------------------------------------------------------------
/samples/audio_samples/nn_training_samples/train.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "spk1_snt1": {
 3 |         "wav": "{data_root}/spk1_snt1.wav",
 4 |         "length": 2.87,
 5 |         "spk_id": "spk1",
 6 |         "ali": "{data_root}/spk1_snt1.pkl",
 7 |         "phn": "dh ax cl ch ay l vcl d ao l m ow s cl t hh er cl t sil dh ax s m ao l vcl d ao vcl",
 8 |         "char": "t h e c h i l d a l m o s t h u r t t h e s m a l l d o g "
 9 |     },
10 |     "spk1_snt2": {
11 |         "wav": "{data_root}/spk1_snt2.wav",
12 |         "length": 3.15,
13 |         "spk_id": "spk1",
14 |         "ali": "{data_root}/spk1_snt2.pkl",
15 |         "phn": "vcl d r aa cl p dh ax cl t uw sil w eh n y uw ae vcl d dh ax f ih vcl g y er",
16 |         "char": "d r o p t h e t u e w h e n y o u a d d t h e f i g u r e s"
17 |     },
18 |     "spk1_snt3": {
19 |         "wav": "{data_root}/spk1_snt3.wav",
20 |         "length": 2.72,
21 |         "spk_id": "spk1",
22 |         "ali": "{data_root}/spk1_snt3.pkl",
23 |         "phn": "ae cl t dh ae cl t hh ay l eh v ax l dh iy eh r ih z cl p y uh",
24 |         "char": "a t t h a t h i g h l e v e l t h e a i r i s p u r e"
25 |     },
26 |     "spk1_snt4": {
27 |         "wav": "{data_root}/spk1_snt4.wav",
28 |         "length": 2.53,
29 |         "spk_id": "spk1",
30 |         "ali": "{data_root}/spk1_snt4.pkl",
31 |         "phn": "ey th ih n s cl t r ay cl p r ah n z vcl d aw n dh ax m ih vcl d ax",
32 |         "char": "a t h i n s t r i p e r u n s d o w n  t h e m i d d l e"
33 |     },
34 |     "spk2_snt1": {
35 |         "wav": "{data_root}/spk2_snt1.wav",
36 |         "length": 2.01,
37 |         "spk_id": "spk2",
38 |         "ali": "{data_root}/spk2_snt1.pkl",
39 |         "phn": "w iy er sh ao r dh ax dx w ah n w ao r ih z ih n ah",
40 |         "char": "w e a r e  s u r e t h a t o n e w o r e i s e n o u g h"
41 |     },
42 |     "spk2_snt2": {
43 |         "wav": "{data_root}/spk2_snt2.wav",
44 |         "length": 1.76,
45 |         "spk_id": "spk2",
46 |         "ali": "{data_root}/spk2_snt2.pkl",
47 |         "phn": "w ah cl t vcl jh oy dh eh r ih z ih n l ih v ih",
48 |         "char": "w h a t j o y t h e r e i s i n l i v i n g"
49 |     },
50 |     "spk2_snt3": {
51 |         "wav": "{data_root}/spk2_snt3.wav",
52 |         "length": 1.88,
53 |         "spk_id": "spk2",
54 |         "ali": "{data_root}/spk2_snt3.pkl",
55 |         "phn": "t eh r ah th ih n sh iy cl t f er m dh iy y eh l ow cl p ae vcl",
56 |         "char": "t h e r a t i n s h i p  f r o m t h e y a l l o w p a v"
57 |     },
58 |     "spk2_snt4": {
59 |         "wav": "{data_root}/spk2_snt4.wav",
60 |         "length": 2.04,
61 |         "spk_id": "spk2",
62 |         "ali": "{data_root}/spk2_snt4.pkl",
63 |         "phn": "m eh n vcl d dh ax cl k ow cl t vcl b ih f ao r y uw vcl g ow aw cl",
64 |         "char": "m e n t h e c o w t b e f o r e y o u g o o u t"
65 |     }
66 | }


--------------------------------------------------------------------------------
/samples/audio_samples/sourcesep_samples/csv_example_sourcesep_mixture.csv:
--------------------------------------------------------------------------------
1 | ID, duration, wav, wav_format, wav_opts
2 | 
3 | example0, 3.260, $data_folder/mixture_0.wav, wav,
4 | example1, 3.260, $data_folder/mixture_1.wav, wav,
5 | 
6 | 
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/samples/audio_samples/sourcesep_samples/csv_example_sourcesep_source1.csv:
--------------------------------------------------------------------------------
1 | ID, duration, wav, wav_format, wav_opts
2 | 
3 | example1, 3.260, $data_folder/source1_0.wav, wav,
4 | example3, 3.260, $data_folder/source1_1.wav, wav,
5 | 
6 | 
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/samples/audio_samples/sourcesep_samples/csv_example_sourcesep_source2.csv:
--------------------------------------------------------------------------------
1 | ID, duration, wav, wav_format, wav_opts
2 | 
3 | example2, 3.260, $data_folder/source2_0.wav, wav,
4 | example4, 3.260, $data_folder/source2_1.wav, wav,
5 | 
6 | 
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/samples/audio_samples/sourcesep_samples/minimal_example_convtasnet_cv.csv:
--------------------------------------------------------------------------------
1 | ID, duration, mix_wav, mix_wav_format, mix_wav_opts, s1_wav, s1_wav_format, s1_wav_opts, s2_wav, s2_wav_format, s2_wav_opts
2 | 0,1.0,$data_root/mixture_2.wav,wav,,$data_root/source1_2.wav,wav,,$data_root/source2_2.wav,wav,
3 | 


--------------------------------------------------------------------------------
/samples/audio_samples/sourcesep_samples/minimal_example_convtasnet_tr.csv:
--------------------------------------------------------------------------------
1 | ID, duration, mix_wav, mix_wav_format, mix_wav_opts, s1_wav, s1_wav_format, s1_wav_opts, s2_wav, s2_wav_format, s2_wav_opts
2 | 0,1.0,$data_root/mixture_0.wav,wav,,$data_root/source1_0.wav,wav,,$data_root/source2_0.wav,wav,
3 | 1,1.0,$data_root/mixture_1.wav,wav,,$data_root/source1_1.wav,wav,,$data_root/source2_1.wav,wav,
4 | 


--------------------------------------------------------------------------------
/samples/audio_samples/sourcesep_samples/minimal_example_convtasnet_tt.csv:
--------------------------------------------------------------------------------
1 | ID, duration, mix_wav, mix_wav_format, mix_wav_opts, s1_wav, s1_wav_format, s1_wav_opts, s2_wav, s2_wav_format, s2_wav_opts
2 | 0,1.0,$data_root/mixture_3.wav,wav,,$data_root/source1_3.wav,wav,,$data_root/source2_3.wav,wav,
3 | 


--------------------------------------------------------------------------------
/samples/audio_samples/sourcesep_samples/mixture_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/sourcesep_samples/mixture_0.wav


--------------------------------------------------------------------------------
/samples/audio_samples/sourcesep_samples/mixture_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/sourcesep_samples/mixture_1.wav


--------------------------------------------------------------------------------
/samples/audio_samples/sourcesep_samples/mixture_2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/sourcesep_samples/mixture_2.wav


--------------------------------------------------------------------------------
/samples/audio_samples/sourcesep_samples/mixture_3.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/sourcesep_samples/mixture_3.wav


--------------------------------------------------------------------------------
/samples/audio_samples/sourcesep_samples/source1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/sourcesep_samples/source1_0.wav


--------------------------------------------------------------------------------
/samples/audio_samples/sourcesep_samples/source1_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/sourcesep_samples/source1_1.wav


--------------------------------------------------------------------------------
/samples/audio_samples/sourcesep_samples/source1_2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/sourcesep_samples/source1_2.wav


--------------------------------------------------------------------------------
/samples/audio_samples/sourcesep_samples/source1_3.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/sourcesep_samples/source1_3.wav


--------------------------------------------------------------------------------
/samples/audio_samples/sourcesep_samples/source2_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/sourcesep_samples/source2_0.wav


--------------------------------------------------------------------------------
/samples/audio_samples/sourcesep_samples/source2_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/sourcesep_samples/source2_1.wav


--------------------------------------------------------------------------------
/samples/audio_samples/sourcesep_samples/source2_2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/sourcesep_samples/source2_2.wav


--------------------------------------------------------------------------------
/samples/audio_samples/sourcesep_samples/source2_3.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/sourcesep_samples/source2_3.wav


--------------------------------------------------------------------------------
/samples/audio_samples/test_csv_merge.csv:
--------------------------------------------------------------------------------
 1 | ID, duration, wav, wav_format, wav_opts, spk_id, spk_id_format, spk_id_opts
 2 | 
 3 | example1, 3.260, $data_folder/example1.wav, wav, , spk01, string,
 4 | example2, 2.068, $data_folder/example2.flac, flac, , spk02, string,
 5 | example3, 2.890, $data_folder/example3.sph, wav, , spk03, string,
 6 | example5, 1.000, $data_folder/example5.wav, wav, start:10000 stop:26000, spk05, string,
 7 | 
 8 | example1, 3.260,  $data_folder/example1.wav, wav, , spk01, string,
 9 | 
10 | 


--------------------------------------------------------------------------------
/samples/audio_samples/test_mixture.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/test_mixture.wav


--------------------------------------------------------------------------------
/samples/audio_samples/vad/train.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "example_1": {
 3 |         "length": 32000,
 4 |         "wav": {
 5 |             "file": "{data_folder}/train.wav",
 6 |             "start": 0,
 7 |             "stop": 32000
 8 |         },
 9 |         "speech": "0.52 0.85 1.32 1.83"
10 |     },
11 |     "example_2": {
12 |         "length": 32000,
13 |         "wav": {
14 |             "file": "{data_folder}/train.wav",
15 |             "start": 32000,
16 |             "stop": 64000
17 |         },
18 |         "speech": "0.35 1.70"
19 |     }
20 | }


--------------------------------------------------------------------------------
/samples/audio_samples/vad/train.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/vad/train.wav


--------------------------------------------------------------------------------
/samples/audio_samples/vad/valid.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "example_1": {
 3 |         "length": 32000,
 4 |         "wav": {
 5 |             "file": "{data_folder}/valid.wav",
 6 |             "start": 0,
 7 |             "stop": 32000
 8 |         },
 9 |         "speech": "0.38 2"
10 |     },
11 |     "example_2": {
12 |         "length": 32000,
13 |         "wav": {
14 |             "file": "{data_folder}/valid.wav",
15 |             "start": 32000,
16 |             "stop": 64000
17 |         },
18 |         "speech": "0 0.7"
19 |     },
20 |     "example_3": {
21 |         "length": 32000,
22 |         "wav": {
23 |             "file": "{data_folder}/valid.wav",
24 |             "start": 64000,
25 |             "stop": 96000
26 |         },
27 |         "speech": "0.1 1.88"
28 |     },
29 |     "example_4": {
30 |         "length": 32000,
31 |         "wav": {
32 |             "file": "{data_folder}/valid.wav",
33 |             "start": 96000,
34 |             "stop": 128000
35 |         },
36 |         "speech": ""
37 |     }
38 | }


--------------------------------------------------------------------------------
/samples/audio_samples/vad/valid.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/audio_samples/vad/valid.wav


--------------------------------------------------------------------------------
/samples/label_samples/hyp.csv:
--------------------------------------------------------------------------------
1 | ID,duration,phn,phn_format,phn_opts
2 | example1,3.1,a b d,string,
3 | example2,4.5,e f,string,
4 | 


--------------------------------------------------------------------------------
/samples/label_samples/ref.csv:
--------------------------------------------------------------------------------
1 | ID,duration,phn,phn_format,phn_opts
2 | example1,3.1,a b c,string,
3 | example2,4.5,d e f,string,
4 | 


--------------------------------------------------------------------------------
/samples/noise_samples/noise.csv:
--------------------------------------------------------------------------------
1 | ID, duration, wav, wav_format, wav_opts
2 | 
3 | noise1, 33.12325, samples/noise_samples/noise1.wav, wav,
4 | noise2, 5.0, samples/noise_samples/noise2.wav, wav,
5 | noise3, 1.0, samples/noise_samples/noise3.wav, wav, start:0 stop:16000
6 | noise4, 17.65875, samples/noise_samples/noise4.wav, wav,
7 | noise5, 13.685625, samples/noise_samples/noise5.wav, wav,
8 | 


--------------------------------------------------------------------------------
/samples/noise_samples/noise1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/noise_samples/noise1.wav


--------------------------------------------------------------------------------
/samples/noise_samples/noise2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/noise_samples/noise2.wav


--------------------------------------------------------------------------------
/samples/noise_samples/noise3.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/noise_samples/noise3.wav


--------------------------------------------------------------------------------
/samples/noise_samples/noise4.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/noise_samples/noise4.wav


--------------------------------------------------------------------------------
/samples/noise_samples/noise5.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/noise_samples/noise5.wav


--------------------------------------------------------------------------------
/samples/noise_samples/noise_multichannel.csv:
--------------------------------------------------------------------------------
1 | ID, duration, wav, wav_format, wav_opts
2 | 
3 | noise_multichannel1, 5.0, samples/noise_samples/noise_multichannel.wav, wav, start:0 stop:80000
4 | noise_multichannel2, 5.0, samples/noise_samples/noise_multichannel.wav, wav, start:80000 stop:160000
5 | 


--------------------------------------------------------------------------------
/samples/noise_samples/noise_multichannel.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/noise_samples/noise_multichannel.wav


--------------------------------------------------------------------------------
/samples/noise_samples/noise_rel.csv:
--------------------------------------------------------------------------------
1 | ID, duration, wav, wav_format, wav_opts
2 | 
3 | noise1, 33.12325, $noise_folder/noise1.wav, wav,
4 | noise2, 5.0, $noise_folder/noise2.wav, wav,
5 | noise3, 1.0, $noise_folder/noise3.wav, wav, start:0 stop:16000
6 | noise4, 17.65875, $noise_folder/noise4.wav, wav,
7 | noise5, 13.685625, $noise_folder/noise5.wav, wav,
8 | 


--------------------------------------------------------------------------------
/samples/plda_xvect_samples/enrol_stat_xvect.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/plda_xvect_samples/enrol_stat_xvect.pkl


--------------------------------------------------------------------------------
/samples/plda_xvect_samples/expected_plda_scores.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/plda_xvect_samples/expected_plda_scores.pkl


--------------------------------------------------------------------------------
/samples/plda_xvect_samples/test_stat_xvect.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/plda_xvect_samples/test_stat_xvect.pkl


--------------------------------------------------------------------------------
/samples/plda_xvect_samples/train_stat_xvect.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/plda_xvect_samples/train_stat_xvect.pkl


--------------------------------------------------------------------------------
/samples/rir_samples/rir1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/rir_samples/rir1.wav


--------------------------------------------------------------------------------
/samples/rir_samples/rir2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/rir_samples/rir2.wav


--------------------------------------------------------------------------------
/samples/rir_samples/rir3.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/rir_samples/rir3.wav


--------------------------------------------------------------------------------
/samples/rir_samples/rir4.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/rir_samples/rir4.wav


--------------------------------------------------------------------------------
/samples/rir_samples/rir_multichannel.csv:
--------------------------------------------------------------------------------
1 | ID, duration, wav, wav_format, wav_opts
2 | 
3 | rir_multichannel, 0.5, samples/rir_samples/rir_multichannel.wav, wav,
4 | 


--------------------------------------------------------------------------------
/samples/rir_samples/rir_multichannel.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/rir_samples/rir_multichannel.wav


--------------------------------------------------------------------------------
/samples/rir_samples/rirs.csv:
--------------------------------------------------------------------------------
1 | ID, duration, wav, wav_format, wav_opts
2 | rir1, 1.0, samples/rir_samples/rir1.wav, wav,
3 | rir2, 1.3653125, samples/rir_samples/rir2.wav, wav,
4 | rir3, 2.0, samples/rir_samples/rir3.wav, wav,
5 | rir4, 0.5, samples/rir_samples/rir4.wav, wav,
6 | 
7 | 


--------------------------------------------------------------------------------
/samples/rir_samples/rirs_rel.csv:
--------------------------------------------------------------------------------
1 | ID, duration, wav, wav_format, wav_opts
2 | 
3 | rir1, 1.0, $rir_folder/rir1.wav, wav,
4 | rir2, 1.3653125, $rir_folder/rir2.wav, wav,
5 | rir3, 2.0, $rir_folder/rir3.wav, wav,
6 | rir4, 0.5, $rir_folder/rir4.wav, wav,
7 | 
8 | 


--------------------------------------------------------------------------------
/samples/rttm_samples/ReadMe.md:
--------------------------------------------------------------------------------
1 | ## RTTM Files
2 | ###### The sample RTTM files given in this directory are generated from manual annotations from AMI corpus (http://groups.inf.ed.ac.uk/ami/corpus/). 
3 | ###### The AMI corpus and its annotations are released under the Creative Commons Attribution 4.0 International Public License agreement (CC BY 4.0).  Use of this data implies agreement with the terms below. See also: https://creativecommons.org/licenses/by/4.0/
4 | 


--------------------------------------------------------------------------------
/samples/text_samples/hdf5_example.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/text_samples/hdf5_example.h5


--------------------------------------------------------------------------------
/samples/text_samples/label_dict.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/text_samples/label_dict.pkl


--------------------------------------------------------------------------------
/samples/text_samples/readme.txt:
--------------------------------------------------------------------------------
 1 | hdf5_example.h5 contains an example of hdf5 format dataset for text-only data.
 2 | The structure of the file is as followed.
 3 | 
 4 | hdf5_example.h5 - wrd - "good morning"
 5 | 		 		- "good evening"
 6 | 		  - char - "g o o d _ m o r n i n g"
 7 | 		         - "g o o d _ e v e n i n g"
 8 | 
 9 | The label_dict.pkl is used for HDF5 dataloader and dataset.
10 | 


--------------------------------------------------------------------------------
/samples/voxceleb_samples/meta/iden_split.txt:
--------------------------------------------------------------------------------
1 | 1 id10001/1zcIwhmdeo4/00001.wav
2 | 2 id10001/1zcIwhmdeo4/00002.wav
3 | 3 id10001/1zcIwhmdeo4/00003.wav
4 | 1 id10002/xTV-jFAUKcw/00001.wav
5 | 2 id10002/xTV-jFAUKcw/00002.wav
6 | 3 id10002/xTV-jFAUKcw/00003.wav
7 | 


--------------------------------------------------------------------------------
/samples/voxceleb_samples/readme.txt:
--------------------------------------------------------------------------------
1 | 
2 | This a small sample data containing 6 audio clips taken from the subset of voxceleb1 dataset (http://www.robots.ox.ac.uk/~vgg/data/voxceleb/) which is distributed under Creative Commons Attribution 4.0 International License (https://creativecommons.org/licenses/by/4.0/). 
3 | In this sample data, we have edited train-dev-test split in the iden_split_sample.txt file. Please refer http://www.robots.ox.ac.uk/~vgg/data/voxceleb/ for more information on the complete original dataset.
4 | 
5 | 


--------------------------------------------------------------------------------
/samples/voxceleb_samples/wav/dev.csv:
--------------------------------------------------------------------------------
1 | ID,duration,wav,wav_format,wav_opts,spk_id,spk_id_format,spk_id_opts
2 | id10001---1zcIwhmdeo4---00001_0_300,3.0,$data_folder/id10001/1zcIwhmdeo4/00001.wav,wav,start:0 stop:48000,id10001,string,
3 | id10001---1zcIwhmdeo4---00001_300_600,3.0,$data_folder/id10001/1zcIwhmdeo4/00001.wav,wav,start:48000 stop:96000,id10001,string,
4 | id10002---xTV-jFAUKcw---00001_0_300,3.0,$data_folder/id10002/xTV-jFAUKcw/00001.wav,wav,start:0 stop:48000,id10002,string,
5 | 


--------------------------------------------------------------------------------
/samples/voxceleb_samples/wav/id10001/1zcIwhmdeo4/00001.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/voxceleb_samples/wav/id10001/1zcIwhmdeo4/00001.wav


--------------------------------------------------------------------------------
/samples/voxceleb_samples/wav/id10001/1zcIwhmdeo4/00002.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/voxceleb_samples/wav/id10001/1zcIwhmdeo4/00002.wav


--------------------------------------------------------------------------------
/samples/voxceleb_samples/wav/id10001/1zcIwhmdeo4/00003.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/voxceleb_samples/wav/id10001/1zcIwhmdeo4/00003.wav


--------------------------------------------------------------------------------
/samples/voxceleb_samples/wav/id10002/xTV-jFAUKcw/00001.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/voxceleb_samples/wav/id10002/xTV-jFAUKcw/00001.wav


--------------------------------------------------------------------------------
/samples/voxceleb_samples/wav/id10002/xTV-jFAUKcw/00002.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/voxceleb_samples/wav/id10002/xTV-jFAUKcw/00002.wav


--------------------------------------------------------------------------------
/samples/voxceleb_samples/wav/id10002/xTV-jFAUKcw/00003.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/samples/voxceleb_samples/wav/id10002/xTV-jFAUKcw/00003.wav


--------------------------------------------------------------------------------
/samples/voxceleb_samples/wav/train.csv:
--------------------------------------------------------------------------------
1 | ID,duration,wav,wav_format,wav_opts,spk_id,spk_id_format,spk_id_opts
2 | id10001---1zcIwhmdeo4---00003_0_300,3.0,$data_folder/id10001/1zcIwhmdeo4/00003.wav,wav,start:0 stop:48000,id10001,string,
3 | id10002---xTV-jFAUKcw---00002_0_300,3.0,$data_folder/id10002/xTV-jFAUKcw/00002.wav,wav,start:0 stop:48000,id10002,string,
4 | id10001---1zcIwhmdeo4---00002_0_300,3.0,$data_folder/id10001/1zcIwhmdeo4/00002.wav,wav,start:0 stop:48000,id10001,string,
5 | id10002---xTV-jFAUKcw---00003_0_300,3.0,$data_folder/id10002/xTV-jFAUKcw/00003.wav,wav,start:0 stop:48000,id10002,string,
6 | id10001---1zcIwhmdeo4---00002_300_600,3.0,$data_folder/id10001/1zcIwhmdeo4/00002.wav,wav,start:48000 stop:96000,id10001,string,
7 | id10002---xTV-jFAUKcw---00003_300_600,3.0,$data_folder/id10002/xTV-jFAUKcw/00003.wav,wav,start:48000 stop:96000,id10002,string,
8 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import os
 3 | import sys
 4 | import site
 5 | import setuptools
 6 | from distutils.core import setup
 7 | 
 8 | 
 9 | # Editable install in user site directory can be allowed with this hack:
10 | # https://github.com/pypa/pip/issues/7953.
11 | site.ENABLE_USER_SITE = "--user" in sys.argv[1:]
12 | 
13 | with open("README.md") as f:
14 |     long_description = f.read()
15 | 
16 | with open(os.path.join("speechbrain", "version.txt")) as f:
17 |     version = f.read().strip()
18 | 
19 | setup(
20 |     name="speechbrain",
21 |     version=version,
22 |     description="All-in-one speech toolkit in pure Python and Pytorch",
23 |     long_description=long_description,
24 |     long_description_content_type="text/markdown",
25 |     author="Mirco Ravanelli & Others",
26 |     author_email="speechbrain@gmail.com",
27 |     packages=setuptools.find_packages(),
28 |     package_data={"speechbrain": ["version.txt", "log-config.yaml"]},
29 |     install_requires=[
30 |         "hyperpyyaml",
31 |         "joblib",
32 |         "numpy",
33 |         "packaging",
34 |         "scipy",
35 |         "sentencepiece",
36 |         "torch>=1.7,<=1.11",
37 |         "torchaudio",
38 |         "tqdm",
39 |         "huggingface_hub",
40 |     ],
41 |     python_requires=">=3.7",
42 |     url="https://speechbrain.github.io/",
43 | )
44 | 


--------------------------------------------------------------------------------
/speechbrain.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/speechbrain.egg-info/requires.txt:
--------------------------------------------------------------------------------
 1 | hyperpyyaml
 2 | joblib
 3 | numpy
 4 | packaging
 5 | scipy
 6 | sentencepiece
 7 | torch<=1.11,>=1.7
 8 | torchaudio
 9 | tqdm
10 | huggingface_hub
11 | 


--------------------------------------------------------------------------------
/speechbrain.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | speechbrain
2 | 


--------------------------------------------------------------------------------
/speechbrain/__init__.py:
--------------------------------------------------------------------------------
 1 | """ Comprehensive speech processing toolkit
 2 | """
 3 | import os
 4 | from .core import Stage, Brain, create_experiment_directory, parse_arguments
 5 | from . import alignment  # noqa
 6 | from . import dataio  # noqa
 7 | from . import decoders  # noqa
 8 | from . import lobes  # noqa
 9 | from . import lm  # noqa
10 | from . import nnet  # noqa
11 | from . import processing  # noqa
12 | from . import tokenizers  # noqa
13 | from . import utils  # noqa
14 | 
15 | with open(os.path.join(os.path.dirname(__file__), "version.txt")) as f:
16 |     version = f.read().strip()
17 | 
18 | __all__ = [
19 |     "Stage",
20 |     "Brain",
21 |     "create_experiment_directory",
22 |     "parse_arguments",
23 | ]
24 | 
25 | __version__ = version
26 | 


--------------------------------------------------------------------------------
/speechbrain/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/__pycache__/core.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/__pycache__/core.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/__pycache__/core.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/__pycache__/core.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/alignment/__init__.py:
--------------------------------------------------------------------------------
1 | """Tools for aligning transcripts and speech signals
2 | """
3 | 


--------------------------------------------------------------------------------
/speechbrain/alignment/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/alignment/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/alignment/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/alignment/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/dataio/__init__.py:
--------------------------------------------------------------------------------
 1 | """Data loading and dataset preprocessing
 2 | """
 3 | import os
 4 | 
 5 | __all__ = []
 6 | for filename in os.listdir(os.path.dirname(__file__)):
 7 |     filename = os.path.basename(filename)
 8 |     if filename.endswith(".py") and not filename.startswith("__"):
 9 |         __all__.append(filename[:-3])
10 | 
11 | from . import *  # noqa
12 | 


--------------------------------------------------------------------------------
/speechbrain/dataio/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/dataio/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/dataio/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/dataio/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/dataio/__pycache__/batch.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/dataio/__pycache__/batch.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/dataio/__pycache__/batch.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/dataio/__pycache__/batch.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/dataio/__pycache__/dataio.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/dataio/__pycache__/dataio.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/dataio/__pycache__/dataio.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/dataio/__pycache__/dataio.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/dataio/__pycache__/dataloader.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/dataio/__pycache__/dataloader.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/dataio/__pycache__/dataloader.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/dataio/__pycache__/dataloader.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/dataio/__pycache__/dataset.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/dataio/__pycache__/dataset.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/dataio/__pycache__/dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/dataio/__pycache__/dataset.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/dataio/__pycache__/encoder.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/dataio/__pycache__/encoder.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/dataio/__pycache__/encoder.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/dataio/__pycache__/encoder.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/dataio/__pycache__/iterators.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/dataio/__pycache__/iterators.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/dataio/__pycache__/iterators.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/dataio/__pycache__/iterators.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/dataio/__pycache__/legacy.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/dataio/__pycache__/legacy.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/dataio/__pycache__/legacy.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/dataio/__pycache__/legacy.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/dataio/__pycache__/preprocess.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/dataio/__pycache__/preprocess.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/dataio/__pycache__/preprocess.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/dataio/__pycache__/preprocess.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/dataio/__pycache__/sampler.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/dataio/__pycache__/sampler.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/dataio/__pycache__/sampler.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/dataio/__pycache__/sampler.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/dataio/__pycache__/wer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/dataio/__pycache__/wer.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/dataio/__pycache__/wer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/dataio/__pycache__/wer.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/dataio/preprocess.py:
--------------------------------------------------------------------------------
 1 | """Preprocessors for audio"""
 2 | import torch
 3 | import functools
 4 | from speechbrain.processing.speech_augmentation import Resample
 5 | 
 6 | 
 7 | class AudioNormalizer:
 8 |     """Normalizes audio into a standard format
 9 | 
10 |     Arguments
11 |     ---------
12 |     sample_rate : int
13 |         The sampling rate to which the incoming signals should be converted.
14 |     mix : {"avg-to-mono", "keep"}
15 |         "avg-to-mono" - add all channels together and normalize by number of
16 |         channels. This also removes the channel dimension, resulting in [time]
17 |         format tensor.
18 |         "keep" - don't normalize channel information
19 | 
20 |     Example
21 |     -------
22 |     >>> import torchaudio
23 |     >>> example_file = 'samples/audio_samples/example_multichannel.wav'
24 |     >>> signal, sr = torchaudio.load(example_file, channels_first = False)
25 |     >>> normalizer = AudioNormalizer(sample_rate=8000)
26 |     >>> normalized = normalizer(signal, sr)
27 |     >>> signal.shape
28 |     torch.Size([33882, 2])
29 |     >>> normalized.shape
30 |     torch.Size([16941])
31 | 
32 |     NOTE
33 |     ----
34 |     This will also upsample audio. However, upsampling cannot produce meaningful
35 |     information in the bandwidth which it adds. Generally models will not work
36 |     well for upsampled data if they have not specifically been trained to do so.
37 |     """
38 | 
39 |     def __init__(self, sample_rate=16000, mix="avg-to-mono"):
40 |         self.sample_rate = sample_rate
41 |         if mix not in ["avg-to-mono", "keep"]:
42 |             raise ValueError(f"Unexpected mixing configuration {mix}")
43 |         self.mix = mix
44 |         self._cached_resample = functools.lru_cache(maxsize=12)(Resample)
45 | 
46 |     def __call__(self, audio, sample_rate):
47 |         """Perform normalization
48 | 
49 |         Arguments
50 |         ---------
51 |         audio : tensor
52 |             The input waveform torch tensor. Assuming [time, channels],
53 |             or [time].
54 |         """
55 |         resampler = self._cached_resample(sample_rate, self.sample_rate)
56 |         resampled = resampler(audio.unsqueeze(0)).squeeze(0)
57 |         return self._mix(resampled)
58 | 
59 |     def _mix(self, audio):
60 |         """Handle channel mixing"""
61 |         flat_input = audio.dim() == 1
62 |         if self.mix == "avg-to-mono":
63 |             if flat_input:
64 |                 return audio
65 |             return torch.mean(audio, 1)
66 |         if self.mix == "keep":
67 |             return audio
68 | 


--------------------------------------------------------------------------------
/speechbrain/decoders/__init__.py:
--------------------------------------------------------------------------------
1 | """ Package containing the different decoders (ctc, beamsearch ...)
2 | """
3 | from .seq2seq import *  # noqa
4 | from .ctc import *  # noqa
5 | 


--------------------------------------------------------------------------------
/speechbrain/decoders/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/decoders/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/decoders/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/decoders/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/decoders/__pycache__/ctc.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/decoders/__pycache__/ctc.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/decoders/__pycache__/ctc.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/decoders/__pycache__/ctc.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/decoders/__pycache__/seq2seq.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/decoders/__pycache__/seq2seq.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/decoders/__pycache__/seq2seq.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/decoders/__pycache__/seq2seq.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/lm/__init__.py:
--------------------------------------------------------------------------------
1 | """ Package defining language models
2 | """
3 | 


--------------------------------------------------------------------------------
/speechbrain/lm/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/lm/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/lm/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/lm/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/lobes/__init__.py:
--------------------------------------------------------------------------------
1 | """ Package defining common blocks (DNN models, processing ...)
2 | 
3 | This subpackage gathers higher level blocks, or "lobes".
4 | The classes here may leverage the extended YAML syntax.
5 | """
6 | from . import models  # noqa
7 | 


--------------------------------------------------------------------------------
/speechbrain/lobes/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/lobes/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/lobes/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/lobes/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/lobes/__pycache__/augment.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/lobes/__pycache__/augment.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/lobes/beamform_multimic.py:
--------------------------------------------------------------------------------
 1 | """Beamformer for multi-mic processing.
 2 | 
 3 | Authors
 4 |  * Nauman Dawalatabad
 5 | """
 6 | import torch
 7 | from speechbrain.processing.features import (
 8 |     STFT,
 9 |     ISTFT,
10 | )
11 | 
12 | from speechbrain.processing.multi_mic import (
13 |     Covariance,
14 |     GccPhat,
15 |     DelaySum,
16 | )
17 | 
18 | 
19 | class DelaySum_Beamformer(torch.nn.Module):
20 |     """Generate beamformed signal from multi-mic data using DelaySum beamforming.
21 | 
22 |     Arguments
23 |     ---------
24 |     sampling_rate : int (default: 16000)
25 |         Sampling rate of audio signals.
26 |     """
27 | 
28 |     def __init__(self, sampling_rate=16000):
29 |         super().__init__()
30 |         self.fs = sampling_rate
31 |         self.stft = STFT(sample_rate=self.fs)
32 |         self.cov = Covariance()
33 |         self.gccphat = GccPhat()
34 |         self.delaysum = DelaySum()
35 |         self.istft = ISTFT(sample_rate=self.fs)
36 | 
37 |     def forward(self, mics_signals):
38 |         """Returns beamformed signal using multi-mic data.
39 | 
40 |         Arguments
41 |         ---------
42 |         mics_sginal : tensor
43 |             Set of audio signals to be transformed.
44 |         """
45 |         with torch.no_grad():
46 | 
47 |             Xs = self.stft(mics_signals)
48 |             XXs = self.cov(Xs)
49 |             tdoas = self.gccphat(XXs)
50 |             Ys_ds = self.delaysum(Xs, tdoas)
51 |             sig = self.istft(Ys_ds)
52 | 
53 |         return sig
54 | 


--------------------------------------------------------------------------------
/speechbrain/lobes/models/VanillaNN.py:
--------------------------------------------------------------------------------
 1 | """Vanilla Neural Network for simple tests.
 2 | 
 3 | Authors
 4 | * Elena Rastorgueva 2020
 5 | """
 6 | import torch
 7 | import speechbrain as sb
 8 | 
 9 | 
10 | class VanillaNN(sb.nnet.containers.Sequential):
11 |     """A simple vanilla Deep Neural Network.
12 | 
13 |     Arguments
14 |     ---------
15 |     activation : torch class
16 |         A class used for constructing the activation layers.
17 |     dnn_blocks : int
18 |         The number of linear neural blocks to include.
19 |     dnn_neurons : int
20 |         The number of neurons in the linear layers.
21 | 
22 |     Example
23 |     -------
24 |     >>> inputs = torch.rand([10, 120, 60])
25 |     >>> model = VanillaNN(input_shape=inputs.shape)
26 |     >>> outputs = model(inputs)
27 |     >>> outputs.shape
28 |     torch.Size([10, 120, 512])
29 |     """
30 | 
31 |     def __init__(
32 |         self,
33 |         input_shape,
34 |         activation=torch.nn.LeakyReLU,
35 |         dnn_blocks=2,
36 |         dnn_neurons=512,
37 |     ):
38 |         super().__init__(input_shape=input_shape)
39 | 
40 |         for block_index in range(dnn_blocks):
41 |             self.append(
42 |                 sb.nnet.linear.Linear,
43 |                 n_neurons=dnn_neurons,
44 |                 bias=True,
45 |                 layer_name="linear",
46 |             )
47 |             self.append(activation(), layer_name="act")
48 | 


--------------------------------------------------------------------------------
/speechbrain/lobes/models/__init__.py:
--------------------------------------------------------------------------------
1 | """ Package defining neural netword models (CRDNN, Xvectors ...)
2 | """
3 | 


--------------------------------------------------------------------------------
/speechbrain/lobes/models/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/lobes/models/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/lobes/models/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/lobes/models/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/lobes/models/__pycache__/conv_tasnet.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/lobes/models/__pycache__/conv_tasnet.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/lobes/models/__pycache__/dual_path.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/lobes/models/__pycache__/dual_path.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/lobes/models/__pycache__/dual_path.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/lobes/models/__pycache__/dual_path.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/lobes/models/__pycache__/dual_path2.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/lobes/models/__pycache__/dual_path2.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/lobes/models/__pycache__/dual_path_context.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/lobes/models/__pycache__/dual_path_context.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/lobes/models/__pycache__/dual_path_conv.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/lobes/models/__pycache__/dual_path_conv.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/lobes/models/__pycache__/dual_path_conv2.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/lobes/models/__pycache__/dual_path_conv2.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/lobes/models/__pycache__/dual_path_multi_scale.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/lobes/models/__pycache__/dual_path_multi_scale.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/lobes/models/__pycache__/dual_path_splitnet.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/lobes/models/__pycache__/dual_path_splitnet.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/lobes/models/__pycache__/dual_path_splitnet_exchange.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/lobes/models/__pycache__/dual_path_splitnet_exchange.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/lobes/models/__pycache__/galr.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/lobes/models/__pycache__/galr.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/lobes/models/__pycache__/norms.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/lobes/models/__pycache__/norms.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/lobes/models/__pycache__/torch_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/lobes/models/__pycache__/torch_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/lobes/models/__pycache__/u_net.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/lobes/models/__pycache__/u_net.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/lobes/models/__pycache__/unet.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/lobes/models/__pycache__/unet.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/lobes/models/transformer/__init__.py:
--------------------------------------------------------------------------------
1 | """High level processing blocks.
2 | 
3 | This subpackage gathers higher level blocks, or "lobes".
4 | The classes here may leverage the extended YAML syntax.
5 | """
6 | 


--------------------------------------------------------------------------------
/speechbrain/lobes/models/transformer/__pycache__/Conformer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/lobes/models/transformer/__pycache__/Conformer.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/lobes/models/transformer/__pycache__/Conformer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/lobes/models/transformer/__pycache__/Conformer.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/lobes/models/transformer/__pycache__/Transformer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/lobes/models/transformer/__pycache__/Transformer.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/lobes/models/transformer/__pycache__/Transformer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/lobes/models/transformer/__pycache__/Transformer.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/lobes/models/transformer/__pycache__/Transformer_GALR.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/lobes/models/transformer/__pycache__/Transformer_GALR.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/lobes/models/transformer/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/lobes/models/transformer/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/lobes/models/transformer/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/lobes/models/transformer/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/log-config.yaml:
--------------------------------------------------------------------------------
 1 | version: 1
 2 | disable_existing_loggers: False
 3 | formatters:
 4 |   simple:
 5 |     format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
 6 |   console:
 7 |     format: "%(name)s - %(message)s"
 8 | 
 9 | handlers:
10 |   console:
11 |     class: speechbrain.utils.logger.TqdmCompatibleStreamHandler
12 |     level: INFO
13 |     formatter: console
14 |     stream: ext://sys.stdout
15 | 
16 |   file_handler:
17 |     class: logging.FileHandler
18 |     level: DEBUG
19 |     formatter: simple
20 |     filename: log.txt
21 |     encoding: utf8
22 | 
23 | root:
24 |   level: DEBUG
25 |   handlers: [console, file_handler]
26 | 


--------------------------------------------------------------------------------
/speechbrain/nnet/__init__.py:
--------------------------------------------------------------------------------
 1 | """ Package containing the different neural networks layers
 2 | """
 3 | import os
 4 | 
 5 | __all__ = []
 6 | for filename in os.listdir(os.path.dirname(__file__)):
 7 |     filename = os.path.basename(filename)
 8 |     if filename.endswith(".py") and not filename.startswith("__"):
 9 |         __all__.append(filename[:-3])
10 | 
11 | from . import *  # noqa
12 | from .loss import stoi_loss  # noqa
13 | 


--------------------------------------------------------------------------------
/speechbrain/nnet/__pycache__/CNN.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/nnet/__pycache__/CNN.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/nnet/__pycache__/CNN.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/nnet/__pycache__/CNN.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/nnet/__pycache__/RNN.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/nnet/__pycache__/RNN.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/nnet/__pycache__/RNN.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/nnet/__pycache__/RNN.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/nnet/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/nnet/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/nnet/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/nnet/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/nnet/__pycache__/activations.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/nnet/__pycache__/activations.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/nnet/__pycache__/activations.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/nnet/__pycache__/activations.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/nnet/__pycache__/attention.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/nnet/__pycache__/attention.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/nnet/__pycache__/attention.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/nnet/__pycache__/attention.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/nnet/__pycache__/containers.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/nnet/__pycache__/containers.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/nnet/__pycache__/containers.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/nnet/__pycache__/containers.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/nnet/__pycache__/dropout.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/nnet/__pycache__/dropout.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/nnet/__pycache__/dropout.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/nnet/__pycache__/dropout.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/nnet/__pycache__/embedding.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/nnet/__pycache__/embedding.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/nnet/__pycache__/embedding.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/nnet/__pycache__/embedding.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/nnet/__pycache__/linear.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/nnet/__pycache__/linear.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/nnet/__pycache__/linear.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/nnet/__pycache__/linear.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/nnet/__pycache__/losses.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/nnet/__pycache__/losses.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/nnet/__pycache__/losses.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/nnet/__pycache__/losses.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/nnet/__pycache__/normalization.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/nnet/__pycache__/normalization.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/nnet/__pycache__/normalization.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/nnet/__pycache__/normalization.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/nnet/__pycache__/pooling.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/nnet/__pycache__/pooling.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/nnet/__pycache__/pooling.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/nnet/__pycache__/pooling.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/nnet/__pycache__/schedulers.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/nnet/__pycache__/schedulers.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/nnet/__pycache__/schedulers.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/nnet/__pycache__/schedulers.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/nnet/complex_networks/__init__.py:
--------------------------------------------------------------------------------
1 | """Package containing complex neural networks
2 | """
3 | 


--------------------------------------------------------------------------------
/speechbrain/nnet/dropout.py:
--------------------------------------------------------------------------------
 1 | """Library implementing dropout.
 2 | 
 3 | Authors
 4 |  * Mirco Ravanelli 2020
 5 | """
 6 | import torch  # noqa: F401
 7 | import logging
 8 | import torch.nn as nn
 9 | 
10 | logger = logging.getLogger(__name__)
11 | 
12 | 
13 | class Dropout2d(nn.Module):
14 |     """This function implements dropout 2d. It randomly put zeros on
15 |     entire channels.
16 | 
17 | 
18 |     Arguments
19 |     ---------
20 |     dropout_rate : float
21 |         It is the dropout factor (between 0 and 1).
22 |     inplace : bool
23 |         If True, it uses inplace operations.
24 | 
25 |     Example
26 |     -------
27 |     >>> drop = Dropout2d(drop_rate=0.5)
28 |     >>> inputs = torch.rand(10, 50, 40)
29 |     >>> output=drop(inputs)
30 |     >>> output.shape
31 |     torch.Size([10, 50, 40])
32 |     """
33 | 
34 |     def __init__(
35 |         self, drop_rate, inplace=False,
36 |     ):
37 |         super().__init__()
38 |         self.drop_rate = drop_rate
39 |         self.inplace = inplace
40 |         self.drop = nn.Dropout2d(p=self.drop_rate, inplace=self.inplace)
41 | 
42 |     def forward(self, x):
43 |         """Applies dropout 2d to the input tensor.
44 | 
45 |         Arguments
46 |         ---------
47 |         x : torch.Tensor (batch, time, channel1, channel2)
48 |             input to normalize. 4d tensors are expected.
49 |         """
50 | 
51 |         # time must be the last
52 |         x = x.transpose(1, 2).transpose(2, -1)
53 |         x_drop = self.drop(x)
54 |         x_drop = x_drop.transpose(-1, 1).transpose(2, -1)
55 | 
56 |         return x_drop
57 | 


--------------------------------------------------------------------------------
/speechbrain/nnet/loss/__init__.py:
--------------------------------------------------------------------------------
1 | """Package containing specific losses (transducer, stoi ...)
2 | """
3 | 


--------------------------------------------------------------------------------
/speechbrain/nnet/loss/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/nnet/loss/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/nnet/loss/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/nnet/loss/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/nnet/loss/__pycache__/stoi_loss.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/nnet/loss/__pycache__/stoi_loss.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/nnet/loss/__pycache__/stoi_loss.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/nnet/loss/__pycache__/stoi_loss.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/nnet/quaternion_networks/__init__.py:
--------------------------------------------------------------------------------
1 | """Package containing quaternion neural networks
2 | """
3 | 


--------------------------------------------------------------------------------
/speechbrain/nnet/transducer/__init__.py:
--------------------------------------------------------------------------------
1 | """Package containing transducer neural networks
2 | """
3 | 


--------------------------------------------------------------------------------
/speechbrain/pretrained/__init__.py:
--------------------------------------------------------------------------------
1 | """Pretrained models"""
2 | 
3 | from .interfaces import *  # noqa
4 | 


--------------------------------------------------------------------------------
/speechbrain/pretrained/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/pretrained/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/pretrained/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/pretrained/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/pretrained/__pycache__/fetching.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/pretrained/__pycache__/fetching.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/pretrained/__pycache__/fetching.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/pretrained/__pycache__/fetching.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/pretrained/__pycache__/interfaces.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/pretrained/__pycache__/interfaces.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/pretrained/__pycache__/interfaces.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/pretrained/__pycache__/interfaces.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/processing/__init__.py:
--------------------------------------------------------------------------------
1 | """ Package containing various techniques of speech processing
2 | """
3 | 


--------------------------------------------------------------------------------
/speechbrain/processing/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/processing/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/processing/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/processing/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/processing/__pycache__/signal_processing.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/processing/__pycache__/signal_processing.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/processing/__pycache__/signal_processing.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/processing/__pycache__/signal_processing.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/processing/__pycache__/speech_augmentation.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/processing/__pycache__/speech_augmentation.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/processing/__pycache__/speech_augmentation.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/processing/__pycache__/speech_augmentation.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/tokenizers/__init__.py:
--------------------------------------------------------------------------------
1 | """ Package defining the SentencePiece tokenizer
2 | """
3 | 


--------------------------------------------------------------------------------
/speechbrain/tokenizers/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/tokenizers/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/tokenizers/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/tokenizers/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/Accuracy.py:
--------------------------------------------------------------------------------
 1 | """Calculate accuracy.
 2 | 
 3 | Authors
 4 | * Jianyuan Zhong 2020
 5 | """
 6 | import torch
 7 | from speechbrain.dataio.dataio import length_to_mask
 8 | 
 9 | 
10 | def Accuracy(log_probabilities, targets, length=None):
11 |     """Calculates the accuracy for predicted log probabilities and targets in a batch.
12 | 
13 |     Arguments
14 |     ----------
15 |     log_probabilities : tensor
16 |         Predicted log probabilities (batch_size, time, feature).
17 |     targets : tensor
18 |         Target (batch_size, time).
19 |     length : tensor
20 |         Length of target (batch_size,).
21 | 
22 |     Example
23 |     -------
24 |     >>> probs = torch.tensor([[0.9, 0.1], [0.1, 0.9], [0.8, 0.2]]).unsqueeze(0)
25 |     >>> acc = Accuracy(torch.log(probs), torch.tensor([1, 1, 0]).unsqueeze(0), torch.tensor([2/3]))
26 |     >>> print(acc)
27 |     (1.0, 2.0)
28 |     """
29 |     if length is not None:
30 |         mask = length_to_mask(
31 |             length * targets.shape[1], max_len=targets.shape[1],
32 |         ).bool()
33 |         if len(targets.shape) == 3:
34 |             mask = mask.unsqueeze(2).repeat(1, 1, targets.shape[2])
35 | 
36 |     padded_pred = log_probabilities.argmax(-1)
37 | 
38 |     if length is not None:
39 |         numerator = torch.sum(
40 |             padded_pred.masked_select(mask) == targets.masked_select(mask)
41 |         )
42 |         denominator = torch.sum(mask)
43 |     else:
44 |         numerator = torch.sum(padded_pred == targets)
45 |         denominator = targets.shape[1]
46 |     return float(numerator), float(denominator)
47 | 
48 | 
49 | class AccuracyStats:
50 |     """Module for calculate the overall one-step-forward prediction accuracy.
51 | 
52 |     Example
53 |     -------
54 |     >>> probs = torch.tensor([[0.9, 0.1], [0.1, 0.9], [0.8, 0.2]]).unsqueeze(0)
55 |     >>> stats = AccuracyStats()
56 |     >>> stats.append(torch.log(probs), torch.tensor([1, 1, 0]).unsqueeze(0), torch.tensor([2/3]))
57 |     >>> acc = stats.summarize()
58 |     >>> print(acc)
59 |     0.5
60 |     """
61 | 
62 |     def __init__(self):
63 |         self.correct = 0
64 |         self.total = 0
65 | 
66 |     def append(self, log_probabilities, targets, length=None):
67 |         """This function is for updating the stats according to the prediction
68 |         and target in the current batch.
69 | 
70 |         Arguments
71 |         ----------
72 |         log_probabilities : tensor
73 |             Predicted log probabilities (batch_size, time, feature).
74 |         targets : tensor
75 |             Target (batch_size, time).
76 |         length: tensor
77 |             Length of target (batch_size,).
78 |         """
79 |         numerator, denominator = Accuracy(log_probabilities, targets, length)
80 |         self.correct += numerator
81 |         self.total += denominator
82 | 
83 |     def summarize(self):
84 |         return self.correct / self.total
85 | 


--------------------------------------------------------------------------------
/speechbrain/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | """ Package containing various tools (accuracy, checkpoints ...)
 2 | """
 3 | import os
 4 | 
 5 | __all__ = []
 6 | for filename in os.listdir(os.path.dirname(__file__)):
 7 |     filename = os.path.basename(filename)
 8 |     if filename.endswith(".py") and not filename.startswith("__"):
 9 |         __all__.append(filename[:-3])
10 | 
11 | from . import *  # noqa
12 | 


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/Accuracy.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/Accuracy.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/Accuracy.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/Accuracy.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/DER.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/DER.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/DER.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/DER.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/bleu.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/bleu.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/bleu.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/bleu.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/callchains.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/callchains.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/callchains.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/callchains.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/checkpoints.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/checkpoints.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/checkpoints.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/checkpoints.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/data_pipeline.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/data_pipeline.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/data_pipeline.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/data_pipeline.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/data_utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/data_utils.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/data_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/data_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/depgraph.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/depgraph.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/depgraph.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/depgraph.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/distributed.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/distributed.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/distributed.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/distributed.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/edit_distance.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/edit_distance.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/edit_distance.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/edit_distance.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/epoch_loop.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/epoch_loop.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/epoch_loop.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/epoch_loop.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/logger.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/logger.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/logger.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/logger.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/metric_stats.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/metric_stats.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/metric_stats.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/metric_stats.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/parameter_transfer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/parameter_transfer.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/parameter_transfer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/parameter_transfer.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/superpowers.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/superpowers.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/superpowers.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/superpowers.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/torch_audio_backend.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/torch_audio_backend.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/torch_audio_backend.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/torch_audio_backend.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/train_logger.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/train_logger.cpython-37.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/__pycache__/train_logger.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/speechbrain/utils/__pycache__/train_logger.cpython-38.pyc


--------------------------------------------------------------------------------
/speechbrain/utils/callchains.py:
--------------------------------------------------------------------------------
 1 | """Chaining together callables, if some require relative lengths"""
 2 | import inspect
 3 | 
 4 | 
 5 | def lengths_arg_exists(func):
 6 |     """Returns True if func takes ``lengths`` keyword argument.
 7 | 
 8 |     Arguments
 9 |     ---------
10 |     func : callable
11 |         The function, method, or other callable to search for the lengths arg.
12 |     """
13 |     spec = inspect.getfullargspec(func)
14 |     return "lengths" in spec.args + spec.kwonlyargs
15 | 
16 | 
17 | class LengthsCapableChain:
18 |     """Chain together callables. Can handle relative lengths.
19 | 
20 |     This is a more light-weight version of
21 |     speechbrain.nnet.containers.LengthsCapableSequential
22 | 
23 |     Arguments
24 |     ---------
25 |     *funcs : list, optional
26 |         Any number of functions or other callables, given in order of
27 |         execution.
28 | 
29 |     Returns
30 |     -------
31 |     Any
32 |         The input as processed by each function. If no functions were given,
33 |         simply returns the input.
34 |     """
35 | 
36 |     def __init__(self, *funcs):
37 |         self.funcs = []
38 |         self.takes_lengths = []
39 |         for func in funcs:
40 |             self.append(func)
41 | 
42 |     def __call__(self, x, lengths=None):
43 |         """Run the chain of callables on the given input
44 | 
45 |         Arguments
46 |         ---------
47 |         x : Any
48 |             The main input
49 |         lengths : Any
50 |             The lengths argument which will be conditionally passed to
51 |             any functions in the chain that take a 'lengths' argument.
52 |             In SpeechBrain the convention is to use relative lengths.
53 | 
54 |         Note
55 |         ----
56 |         By convention, if a callable in the chain returns multiple outputs
57 |         (returns a tuple), only the first output is passed to the next
58 |         callable in the chain.
59 |         """
60 |         if not self.funcs:
61 |             return x
62 |         for func, give_lengths in zip(self.funcs, self.takes_lengths):
63 |             if give_lengths:
64 |                 x = func(x, lengths)
65 |             else:
66 |                 x = func(x)
67 |             if isinstance(x, tuple):
68 |                 x = x[0]
69 |         return x
70 | 
71 |     def append(self, func):
72 |         """Add a function to the chain"""
73 |         self.funcs.append(func)
74 |         self.takes_lengths.append(lengths_arg_exists(func))
75 | 
76 |     def __str__(self):
77 |         clsname = self.__class__.__name__
78 |         if self.funcs:
79 |             return f"{clsname}:\n" + "\n".join(str(f) for f in self.funcs)
80 |         else:
81 |             return "Empty {clsname}"
82 | 


--------------------------------------------------------------------------------
/speechbrain/utils/superpowers.py:
--------------------------------------------------------------------------------
 1 | """Superpowers which should be rarely used.
 2 | 
 3 | This library contains functions for importing python classes and
 4 | for running shell commands. Remember, with great power comes great
 5 | responsibility.
 6 | 
 7 | Authors
 8 |  * Mirco Ravanelli 2020
 9 | """
10 | 
11 | import logging
12 | import subprocess
13 | 
14 | logger = logging.getLogger(__name__)
15 | 
16 | 
17 | def run_shell(cmd):
18 |     r"""This function can be used to run a command in the bash shell.
19 | 
20 |     Arguments
21 |     ---------
22 |     cmd : str
23 |         Shell command to run.
24 | 
25 |     Returns
26 |     -------
27 |     bytes
28 |         The captured standard output.
29 |     bytes
30 |         The captured standard error.
31 |     int
32 |         The returncode.
33 | 
34 |     Raises
35 |     ------
36 |     OSError
37 |         If returncode is not 0, i.e., command failed.
38 | 
39 |     Example
40 |     -------
41 |     >>> out, err, code = run_shell("echo 'hello world'")
42 |     >>> out.decode(errors="ignore")
43 |     'hello world\n'
44 |     """
45 | 
46 |     # Executing the command
47 |     p = subprocess.Popen(
48 |         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True
49 |     )
50 | 
51 |     # Capturing standard output and error
52 |     (output, err) = p.communicate()
53 | 
54 |     if p.returncode != 0:
55 |         raise OSError(err.decode(errors="replace"))
56 | 
57 |     # Adding information in the logger
58 |     msg = output.decode(errors="replace") + "\n" + err.decode(errors="replace")
59 |     logger.debug(msg)
60 | 
61 |     return output, err, p.returncode
62 | 


--------------------------------------------------------------------------------
/speechbrain/utils/torch_audio_backend.py:
--------------------------------------------------------------------------------
 1 | import platform
 2 | 
 3 | 
 4 | def get_torchaudio_backend():
 5 |     """Get the backend for torchaudio between soundfile and sox_io according to the os.
 6 | 
 7 |     Allow users to use soundfile or sox_io according to their os.
 8 | 
 9 |     Returns
10 |     -------
11 |     str
12 |         The torchaudio backend to use.
13 |     """
14 |     current_system = platform.system()
15 |     if current_system == "Windows":
16 |         return "soundfile"
17 |     else:
18 |         return "sox_io"
19 | 


--------------------------------------------------------------------------------
/speechbrain/version.txt:
--------------------------------------------------------------------------------
1 | 0.5.10
2 | 


--------------------------------------------------------------------------------
/templates/README.md:
--------------------------------------------------------------------------------
 1 | Templates
 2 | ---------
 3 | 
 4 | These templates should serve as a good starting point for developing new
 5 | recipes with the SpeechBrain toolkit. They are simple, well-documented,
 6 | and contain all the parts necessary for a working recipe. They cover
 7 | a broad spectrum of types of tasks that are encountered in speech
 8 | research, such as sequence regression (enhancement), sequence
 9 | to sequence (speech_recognition), and sequence classification (speaker ID).
10 | 


--------------------------------------------------------------------------------
/templates/enhancement/README.md:
--------------------------------------------------------------------------------
 1 | # Template for Speech Enhancement
 2 | 
 3 | This folder provides a working, well-documented example for training
 4 | a speech enhancement model from scratch, based on a few hours of
 5 | data. The data we use is from Mini Librispeech + OpenRIR.
 6 | 
 7 | There are four files here:
 8 | 
 9 | * `train.py`: the main code file, outlines entire training process.
10 | * `train.yaml`: the hyperparameters file, sets all parameters of execution.
11 | * `custom_model.py`: A file containing the definition of a PyTorch module.
12 | * `mini_librispeech_prepare.py`: If necessary, downloads and prepares data
13 |     manifests.
14 | 
15 | To train an enhancement model, just execute the following on the command-line:
16 | 
17 | ```bash
18 | python train.py train.yaml --data_folder /path/to/save/mini_librispeech
19 | ```
20 | 
21 | This will automatically download and prepare the data manifest for mini
22 | librispeech, and then train a model with dynamically generated noisy
23 | samples, using noise, reverberation, and babble.
24 | 
25 | More details about what each file does and how to make modifications
26 | are found within each file. The whole folder can be copied and used
27 | as a starting point for developing recipes doing regression tasks
28 | similar to speech enhancement. Please reach out to the SpeechBrain
29 | team if any errors are found or clarification is needed about how
30 | parts of the template work. Good Luck!
31 | 
32 | [For more information, please take a look into the "Speech Enhancement from scratch" tutorial](https://colab.research.google.com/drive/18RyiuKupAhwWX7fh3LCatwQGU5eIS3TR?usp=sharing)
33 | 


--------------------------------------------------------------------------------
/templates/enhancement/custom_model.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This file contains a very simple PyTorch module to use for enhancement.
 3 | 
 4 | To replace this model, change the `!new:` tag in the hyperparameter file
 5 | to refer to a built-in SpeechBrain model or another file containing
 6 | a custom PyTorch module.
 7 | 
 8 | Authors
 9 |  * Peter Plantinga 2021
10 | """
11 | import torch
12 | 
13 | 
14 | class CustomModel(torch.nn.Module):
15 |     """Basic RNN model with projection layers between RNN layers.
16 | 
17 |     Arguments
18 |     ---------
19 |     input_size : int
20 |         Size of the expected input in the 3rd dimension.
21 |     rnn_size : int
22 |         Number of neurons to use in rnn (for each direction -> and <-).
23 |     projection : int
24 |         Number of neurons in projection layer.
25 |     layers : int
26 |         Number of RNN layers to use.
27 |     """
28 | 
29 |     def __init__(self, input_size, rnn_size=256, projection=128, layers=2):
30 |         super().__init__()
31 |         self.layers = torch.nn.ModuleList()
32 | 
33 |         # Alternate RNN and projection layers.
34 |         for i in range(layers):
35 |             self.layers.append(
36 |                 torch.nn.LSTM(
37 |                     input_size=input_size if i == 0 else projection,
38 |                     hidden_size=rnn_size,
39 |                     bidirectional=True,
40 |                 )
41 |             )
42 | 
43 |             # Projection layer reduces size, except last layer, which
44 |             # goes back to input size to create the mask
45 |             linear_size = input_size if i == layers - 1 else projection
46 |             self.layers.append(
47 |                 torch.nn.Linear(
48 |                     in_features=rnn_size * 2, out_features=linear_size,
49 |                 )
50 |             )
51 | 
52 |         # Use ReLU to make sure outputs aren't negative (unhelpful for masking)
53 |         self.layers.append(torch.nn.ReLU())
54 | 
55 |     def forward(self, x):
56 |         """Shift to time-first, pass layers, then back to batch-first."""
57 |         x = x.transpose(0, 1)
58 |         for layer in self.layers:
59 |             x = layer(x)
60 |             if isinstance(x, tuple):
61 |                 x = x[0]
62 |         x = x.transpose(0, 1)
63 |         return x
64 | 


--------------------------------------------------------------------------------
/templates/speaker_id/README.md:
--------------------------------------------------------------------------------
 1 | # Template for Speaker Identification
 2 |   
 3 | This folder provides a working, well-documented example for training
 4 | a speaker identification model from scratch, based on a few hours of
 5 | data. The data we use is from Mini Librispeech + OpenRIR.
 6 | 
 7 | There are four files here:
 8 | 
 9 | * `train.py`: the main code file, outlines the entire training process.
10 | * `train.yaml`: the hyperparameters file, sets all parameters of execution.
11 | * `custom_model.py`: A file containing the definition of a PyTorch module.
12 | * `mini_librispeech_prepare.py`: If necessary, downloads and prepares data manifests.
13 | 
14 | To train the speaker-id model, just execute the following on the command-line:
15 | 
16 | ```bash
17 | python train.py train.yaml
18 | ```
19 | 
20 | This will automatically download and prepare the data manifest for mini
21 | librispeech, and then train a model with dynamically augmented samples.
22 | 
23 | More details about what each file does and how to make modifications
24 | are found within each file. The whole folder can be copied and used
25 | as a starting point for developing recipes doing classification tasks
26 | similar to speech speaker-id (e.g, language-id, emotion classification, ..).
27 | Please reach out to the SpeechBrain
28 | team if any errors are found or clarification is needed about how
29 | parts of the template work. Good Luck!
30 | 
31 | [For more information, please take a look into the "speaker-id from scratch" tutorial](https://colab.research.google.com/drive/1UwisnAjr8nQF3UnrkIJ4abBMAWzVwBMh?usp=sharing)
32 | 


--------------------------------------------------------------------------------
/templates/speech_recognition/ASR/README.md:
--------------------------------------------------------------------------------
 1 | # Template for Speech Recognition
 2 | This folder provides a working, well-documented example for training
 3 | a  seq2seq (+ CTC) speech recognizer model from scratch, based on a few hours of data.
 4 | 
 5 | There are three files here:
 6 | 
 7 | * `train.py`: the main code file, outlines the entire training process.
 8 | * `train.yaml`: the hyperparameters file, sets all parameters of execution.
 9 | * `mini_librispeech_prepare.py`: If necessary, downloads and prepares data manifests.
10 | 
11 | To train the speech recognition model, just execute the following on the command-line:
12 | 
13 | ```bash
14 | python train.py train.yaml
15 | ```        
16 | 
17 | We assume you already trained the tokenizer (see ../Tokenizer) and the language model (../LM).
18 | Training is done with the mini-librispeech dataset using a CRDNN model for encoding and a GRU for decoding.
19 | We pre-train with a larger model to ensure convergence (mini-librispeech is too small for training an e2e model from scratch).
20 | In a real case, you can skip pre-training and train from scratch on a larger dataset.
21 | 
22 | 


--------------------------------------------------------------------------------
/templates/speech_recognition/ASR/mini_librispeech_prepare.py:
--------------------------------------------------------------------------------
1 | ../mini_librispeech_prepare.py


--------------------------------------------------------------------------------
/templates/speech_recognition/LM/README.md:
--------------------------------------------------------------------------------
 1 | # Language Model
 2 | This folder contains a recipe for training language models.
 3 | It supports both an RNN-based LM and a Transformer-based LM. 
 4 | The scripts rely on the HuggingFace dataset, which manages data reading and loading from large text corpora. 
 5 | Training an LM might on large text corpora might take weeks (or months) even on modern GPUs. In this template, for simplicity, we only use the training transcriptions of the mini-librispeech dataset.  In the recipes, we assume you
 6 | already ran the tokenizer training (see ../Tokenizer).
 7 | 
 8 | # Extra Dependency:
 9 | Make sure you have the HuggingFace dataset installed. If not, type:
10 | pip install datasets
11 | 
12 | # How to run:
13 | python train.py RNNLM.yaml
14 | 


--------------------------------------------------------------------------------
/templates/speech_recognition/LM/extra_requirements.txt:
--------------------------------------------------------------------------------
1 | # huggingface dataset
2 | datasets
3 | 


--------------------------------------------------------------------------------
/templates/speech_recognition/README.md:
--------------------------------------------------------------------------------
 1 | # Training a Speech Recognizer
 2 | 
 3 | This template implements a simple speech recognizer trained on mini-librispeech.  In particular,  it implements an offline end-to-end attention-based speech recognizer.  A tokenizer is used to detect the word token to estimate. Search replies on beam search coupled with an RNN language model. 
 4 | 
 5 | Training such a system requires the following steps:
 6 | 
 7 | 1. Train a tokenizer. 
 8 | Given the training transcriptions, the tokenizers decide which word pieces allocate for training. Most atomic units are character,  the least atomic units are words.  Most of the time, it is convenient to use tokens that are something in between characters and full words. 
 9 | SpeechBrain relies on the popular [SentencePiece](https://github.com/google/sentencepiece) for tokenization. To train the tokenizer:
10 | 
11 | ```
12 | cd python train.py tokenizer.yaml
13 | python train.py tokenizer.yaml
14 | ```
15 | 
16 | 2. Train a LM
17 | After having our target tokens, we can train a language model on top of that. To do it, we need some large text corpus (better if the language domain is the same as the one of your target application). In this example, we simply train the LM on top of the training transcriptions:
18 | 
19 | ```
20 | cd ../LM
21 | python train.py RNNLM.yaml
22 | ```
23 | 
24 | In a real case, training LM is extremely computational demanding. It is thus a good practice to re-use existing LM or fine-tune them.
25 | 
26 | 3. Train the speech recognizer
27 | At this point, we can train our speech recognizer. In this case, we are using a simple CRDNN model with an autoregressive GRU decoder. An attention mechanism is employed between encoding and decoder. The final sequence of words is retrieved with beamsearch coupled with the RNN LM trained in the previous step. To train the ASR:
28 | 
29 | ```
30 | cd ../ASR
31 | python train.py train.yaml
32 | ```
33 | 
34 | This template can help you figure out how to set speechbrain for implementing an e2e speech recognizer. However, in a real case, the system must be trained with much more data to provide acceptable performance. For a competitive recipe with more data, see for instance our recipes on LibriSpeech (https://github.com/speechbrain/speechbrain/tree/develop/recipes/LibriSpeech/ASR).
35 | 
36 | [For more information, please take a look into the "ASR from scratch" tutorial](https://colab.research.google.com/drive/1aFgzrUv3udM_gNJNUoLaHIm78QHtxdIz?usp=sharing)
37 | 


--------------------------------------------------------------------------------
/templates/speech_recognition/Tokenizer/README.md:
--------------------------------------------------------------------------------
1 | # Tokenizer.
2 | This folder contains the scripts to train a tokenizer using SentencePiece (https://github.com/google/sentencepiece).
3 | The tokenizer is trained on the top of the training transcriptions.
4 | 
5 | # How to run
6 | python train.py tokenizer.yaml
7 | 


--------------------------------------------------------------------------------
/templates/speech_recognition/Tokenizer/mini_librispeech_prepare.py:
--------------------------------------------------------------------------------
1 | ../mini_librispeech_prepare.py


--------------------------------------------------------------------------------
/templates/speech_recognition/Tokenizer/tokenizer.yaml:
--------------------------------------------------------------------------------
 1 | # ############################################################################
 2 | # Tokenizer: subword BPE tokenizer with unigram 1K
 3 | # Training: Mini-LibriSpeech
 4 | # Authors:  Abdel Heba 2021
 5 | #           Mirco Ravanelli 2021
 6 | # ############################################################################
 7 | 
 8 | 
 9 | # Set up folders for reading from and writing to
10 | data_folder: ../data
11 | output_folder: ./save
12 | 
13 | # Path where data-specification files are stored
14 | train_annotation: ../train.json
15 | valid_annotation: ../valid.json
16 | test_annotation: ../test.json
17 | 
18 | # Tokenizer parameters
19 | token_type: unigram  # ["unigram", "bpe", "char"]
20 | token_output: 1000  # index(blank/eos/bos/unk) = 0
21 | character_coverage: 1.0
22 | annotation_read: words # field to read
23 | 
24 | # Tokenizer object
25 | tokenizer: !name:speechbrain.tokenizers.SentencePiece.SentencePiece
26 |    model_dir: !ref <output_folder>
27 |    vocab_size: !ref <token_output>
28 |    annotation_train: !ref <train_annotation>
29 |    annotation_read: !ref <annotation_read>
30 |    model_type: !ref <token_type> # ["unigram", "bpe", "char"]
31 |    character_coverage: !ref <character_coverage>
32 |    annotation_list_to_check: [!ref <train_annotation>, !ref <valid_annotation>]
33 |    annotation_format: json
34 | 


--------------------------------------------------------------------------------
/templates/speech_recognition/Tokenizer/train.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env/python3
 2 | """Script for training a BPE tokenizer on the top of CSV or JSON annotation files.
 3 | The tokenizer converts words into sub-word units that can be used to train a
 4 | language (LM) or an acoustic model (AM).
 5 | When doing a speech recognition experiment you have to make
 6 | sure that the acoustic and language models are trained with
 7 | the same tokenizer. Otherwise, a token mismatch is introduced
 8 | and beamsearch will produce bad results when combining AM and LM.
 9 | 
10 | To run this recipe, do the following:
11 | > python train.py tokenizer.yaml
12 | 
13 | 
14 | Authors
15 |  * Abdel Heba 2021
16 |  * Mirco Ravanelli 2021
17 | """
18 | 
19 | import sys
20 | import speechbrain as sb
21 | from hyperpyyaml import load_hyperpyyaml
22 | from mini_librispeech_prepare import prepare_mini_librispeech
23 | 
24 | if __name__ == "__main__":
25 | 
26 |     # Load hyperparameters file with command-line overrides
27 |     hparams_file, run_opts, overrides = sb.parse_arguments(sys.argv[1:])
28 |     with open(hparams_file) as fin:
29 |         hparams = load_hyperpyyaml(fin, overrides)
30 | 
31 |     # Create experiment directory
32 |     sb.create_experiment_directory(
33 |         experiment_directory=hparams["output_folder"],
34 |         hyperparams_to_save=hparams_file,
35 |         overrides=overrides,
36 |     )
37 | 
38 |     # Data preparation, to be run on only one process.
39 |     prepare_mini_librispeech(
40 |         data_folder=hparams["data_folder"],
41 |         save_json_train=hparams["train_annotation"],
42 |         save_json_valid=hparams["valid_annotation"],
43 |         save_json_test=hparams["test_annotation"],
44 |     )
45 | 
46 |     # Train tokenizer
47 |     hparams["tokenizer"]()
48 | 


--------------------------------------------------------------------------------
/tests/.run-doctests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e -u -o pipefail
 3 | 
 4 | # To run doctests locally, the easiest approach is to do:
 5 | # > pytest --doctest-modules speechbrain/
 6 | # However, we take this more complex approach to avoid testing files not
 7 | # tracked by git. We filter out tests that require optional dependencies.
 8 | avoid="transducer_loss.py\|fairseq_wav2vec.py\|huggingface_wav2vec.py\|bleu.py"
 9 | git ls-files speechbrain | grep -e "\.py$" | grep -v $avoid | xargs pytest --doctest-modules
10 | 


--------------------------------------------------------------------------------
/tests/.run-linters.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e -u -o pipefail
 3 | 
 4 | echo "===Black==="
 5 | git ls-files | grep -E "\.py$" | xargs black --check --diff
 6 | echo "===Flake8==="
 7 | git ls-files | grep -E "\.py$" | xargs flake8 --count --statistics
 8 | echo "===Yamllint==="
 9 | git ls-files | grep -E "\.yaml$|\.yml$" | xargs yamllint --no-warnings
10 | 


--------------------------------------------------------------------------------
/tests/.run-unittests.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e -u -o pipefail
3 | 
4 | git ls-files tests/unittests | grep -e "\.py$" | xargs pytest
5 | 


--------------------------------------------------------------------------------
/tests/integration/neural_networks/ASR_CTC/hyperparams.yaml:
--------------------------------------------------------------------------------
 1 | # Seed needs to be set at top of yaml, before objects with parameters are made
 2 | # NOTE: Seed does not guarantee replicability with CTC
 3 | seed: 1234
 4 | __set_seed: !apply:torch.manual_seed [!ref <seed>]
 5 | 
 6 | # Training params
 7 | N_epochs: 15
 8 | lr: 0.002
 9 | dataloader_options:
10 |     batch_size: 1
11 | 
12 | # Special tokens and labels
13 | blank_index: 0
14 | num_labels: 44
15 | 
16 | 
17 | # Model parameters
18 | activation: !name:torch.nn.LeakyReLU []
19 | dropout: 0.15
20 | cnn_blocks: 1
21 | cnn_channels: (16,)
22 | cnn_kernelsize: (3, 3)
23 | rnn_layers: 1
24 | rnn_neurons: 128
25 | rnn_bidirectional: True
26 | dnn_blocks: 1
27 | dnn_neurons: 128
28 | 
29 | compute_features: !new:speechbrain.lobes.features.MFCC
30 | 
31 | mean_var_norm: !new:speechbrain.processing.features.InputNormalization
32 |     norm_type: global
33 | 
34 | model: !new:speechbrain.lobes.models.CRDNN.CRDNN
35 |     input_shape: [null, null, 660]
36 |     activation: !ref <activation>
37 |     dropout: !ref <dropout>
38 |     cnn_blocks: !ref <cnn_blocks>
39 |     cnn_channels: !ref <cnn_channels>
40 |     cnn_kernelsize: !ref <cnn_kernelsize>
41 |     time_pooling: True
42 |     rnn_layers: !ref <rnn_layers>
43 |     rnn_neurons: !ref <rnn_neurons>
44 |     rnn_bidirectional: !ref <rnn_bidirectional>
45 |     dnn_blocks: !ref <dnn_blocks>
46 |     dnn_neurons: !ref <dnn_neurons>
47 | 
48 | lin: !new:speechbrain.nnet.linear.Linear
49 |     input_size: !ref <dnn_neurons>
50 |     n_neurons: !ref <num_labels>
51 |     bias: False
52 | 
53 | softmax: !new:speechbrain.nnet.activations.Softmax
54 |     apply_log: True
55 | 
56 | compute_cost: !name:speechbrain.nnet.losses.ctc_loss
57 |     blank_index: !ref <blank_index>
58 | 
59 | modules:
60 |     model: !ref <model>
61 |     lin: !ref <lin>
62 |     mean_var_norm: !ref <mean_var_norm>
63 | 
64 | opt_class: !name:torch.optim.Adam
65 |     lr: !ref <lr>
66 | 
67 | per_stats: !name:speechbrain.utils.metric_stats.ErrorRateStats
68 | 


--------------------------------------------------------------------------------
/tests/integration/neural_networks/ASR_CTC/hyperparams_complex_net.yaml:
--------------------------------------------------------------------------------
 1 | # Seed needs to be set at top of yaml, before objects with parameters are made
 2 | # NOTE: Seed does not guarantee replicability with CTC
 3 | seed: 1234
 4 | __set_seed: !apply:torch.manual_seed [!ref <seed>]
 5 | 
 6 | # Training params
 7 | N_epochs: 25
 8 | lr: 0.002
 9 | dataloader_options:
10 |     batch_size: 1
11 | 
12 | # Special tokens and labels
13 | blank_index: 0
14 | num_labels: 44 # 43 phonemes + 1 blank
15 | 
16 | # Model parameters
17 | activation: !new:torch.nn.LeakyReLU
18 | 
19 | compute_features: !new:speechbrain.lobes.features.MFCC
20 | 
21 | mean_var_norm: !new:speechbrain.processing.features.InputNormalization
22 |     norm_type: global
23 | 
24 | 
25 | model: !new:speechbrain.nnet.containers.Sequential
26 |     input_shape: [null, null, 660]  # input_size
27 |     conv1: !name:speechbrain.nnet.complex_networks.c_CNN.CConv1d
28 |         out_channels: 16
29 |         kernel_size: 3
30 |     nrm1: !name:speechbrain.nnet.complex_networks.c_normalization.CLayerNorm
31 |     act1: !ref <activation>
32 |     conv2: !name:speechbrain.nnet.complex_networks.c_CNN.CConv1d
33 |         out_channels: 32
34 |         kernel_size: 3
35 |     nrm2: !name:speechbrain.nnet.complex_networks.c_normalization.CLayerNorm
36 |     act2: !ref <activation>
37 |     pooling: !new:speechbrain.nnet.pooling.Pooling1d
38 |         pool_type: "avg"
39 |         kernel_size: 3
40 |     RNN: !name:speechbrain.nnet.complex_networks.c_RNN.CLiGRU
41 |         hidden_size: 64
42 |         bidirectional: True
43 | 
44 | lin: !new:speechbrain.nnet.linear.Linear
45 |     input_size: 256
46 |     n_neurons: !ref <num_labels>
47 |     bias: False
48 | 
49 | softmax: !new:speechbrain.nnet.activations.Softmax
50 |     apply_log: True
51 | 
52 | modules:
53 |     model: !ref <model>
54 |     lin: !ref <lin>
55 |     mean_var_norm: !ref <mean_var_norm>
56 | 
57 | 
58 | opt_class: !name:torch.optim.Adam
59 |     lr: !ref <lr>
60 | 
61 | compute_cost: !name:speechbrain.nnet.losses.ctc_loss
62 |     blank_index: !ref <blank_index>
63 | 
64 | per_stats: !name:speechbrain.utils.metric_stats.ErrorRateStats
65 | 


--------------------------------------------------------------------------------
/tests/integration/neural_networks/ASR_CTC/hyperparams_quaternion_net.yaml:
--------------------------------------------------------------------------------
 1 | # Seed needs to be set at top of yaml, before objects with parameters are made
 2 | # NOTE: Seed does not guarantee replicability with CTC
 3 | seed: 1234
 4 | __set_seed: !apply:torch.manual_seed [!ref <seed>]
 5 | 
 6 | # Training params
 7 | N_epochs: 25
 8 | lr: 0.002
 9 | dataloader_options:
10 |     batch_size: 1
11 | 
12 | # Special tokens and labels
13 | blank_index: 0
14 | num_labels: 44 # 43 phonemes + 1 blank
15 | 
16 | # Model parameters
17 | activation: !new:torch.nn.LeakyReLU
18 | 
19 | compute_features: !new:speechbrain.lobes.features.MFCC
20 | 
21 | mean_var_norm: !new:speechbrain.processing.features.InputNormalization
22 |     norm_type: global
23 | 
24 | 
25 | model: !new:speechbrain.nnet.containers.Sequential
26 |     input_shape: [null, null, 660]  # input_size
27 |     conv1: !name:speechbrain.nnet.quaternion_networks.q_CNN.QConv1d
28 |         out_channels: 16
29 |         kernel_size: 3
30 |     act1: !ref <activation>
31 |     conv2: !name:speechbrain.nnet.quaternion_networks.q_CNN.QConv1d
32 |         out_channels: 32
33 |         kernel_size: 3
34 |     act2: !ref <activation>
35 |     pooling: !new:speechbrain.nnet.pooling.Pooling1d
36 |         pool_type: "avg"
37 |         kernel_size: 3
38 |     RNN: !name:speechbrain.nnet.quaternion_networks.q_RNN.QLiGRU
39 |         hidden_size: 64
40 |         bidirectional: True
41 | 
42 | lin: !new:speechbrain.nnet.linear.Linear
43 |     input_size: 512 # 64 * 2 (bidirectional) * 4 (quaternion)
44 |     n_neurons: !ref <num_labels>
45 |     bias: False
46 | 
47 | softmax: !new:speechbrain.nnet.activations.Softmax
48 |     apply_log: True
49 | 
50 | modules:
51 |     model: !ref <model>
52 |     lin: !ref <lin>
53 |     mean_var_norm: !ref <mean_var_norm>
54 | 
55 | 
56 | opt_class: !name:torch.optim.Adam
57 |     lr: !ref <lr>
58 | 
59 | compute_cost: !name:speechbrain.nnet.losses.ctc_loss
60 |     blank_index: !ref <blank_index>
61 | 
62 | per_stats: !name:speechbrain.utils.metric_stats.ErrorRateStats
63 | 


--------------------------------------------------------------------------------
/tests/integration/neural_networks/ASR_DNN_HMM/hyperparams.yaml:
--------------------------------------------------------------------------------
 1 | # Seed needs to be set at top of yaml, before objects with parameters are made
 2 | seed: 1234
 3 | __set_seed: !apply:torch.manual_seed [!ref <seed>]
 4 | 
 5 | # Training params
 6 | N_epochs: 15
 7 | lr: 0.002
 8 | dataloader_options:
 9 |     batch_size: 1
10 | 
11 | # Model parameters
12 | compute_features: !new:speechbrain.lobes.features.MFCC
13 | 
14 | mean_var_norm: !new:speechbrain.processing.features.InputNormalization
15 |     norm_type: global
16 | 
17 | 
18 | linear1: !new:speechbrain.nnet.linear.Linear
19 |     input_size: 660
20 |     n_neurons: 1024
21 |     bias: False
22 | 
23 | activation: !new:torch.nn.LeakyReLU
24 | 
25 | linear2: !new:speechbrain.nnet.linear.Linear
26 |     input_size: 1024
27 |     n_neurons: 43
28 |     bias: False
29 | 
30 | softmax: !new:speechbrain.nnet.activations.Softmax
31 |     apply_log: True
32 | 
33 | modules:
34 |     linear1: !ref <linear1>
35 |     linear2: !ref <linear2>
36 |     mean_var_norm: !ref <mean_var_norm>
37 | 
38 | opt_class: !name:torch.optim.Adam
39 |     lr: !ref <lr>
40 | 
41 | compute_cost: !name:speechbrain.nnet.losses.nll_loss
42 | 
43 | error_stats: !name:speechbrain.utils.metric_stats.MetricStats
44 |     metric: !name:speechbrain.nnet.losses.classification_error
45 |         reduction: batch
46 | 


--------------------------------------------------------------------------------
/tests/integration/neural_networks/ASR_alignment_forward/hyperparams.yaml:
--------------------------------------------------------------------------------
 1 | # Seed needs to be set at top of yaml, before objects with parameters are made
 2 | seed: 1234
 3 | __set_seed: !apply:torch.manual_seed [!ref <seed>]
 4 | 
 5 | # Training params
 6 | N_epochs: 15
 7 | lr: 0.004
 8 | dataloader_options:
 9 |     batch_size: 1
10 | 
11 | # Model parameters
12 | activation: !name:torch.nn.LeakyReLU
13 | dropout: 0.15
14 | cnn_blocks: 1
15 | cnn_channels: (16,)
16 | cnn_kernelsize: (3, 3)
17 | rnn_layers: 1
18 | rnn_neurons: 128
19 | rnn_bidirectional: True
20 | dnn_blocks: 1
21 | dnn_neurons: 128
22 | 
23 | 
24 | compute_features: !new:speechbrain.lobes.features.MFCC
25 | 
26 | mean_var_norm: !new:speechbrain.processing.features.InputNormalization
27 |     norm_type: global
28 | 
29 | model: !new:speechbrain.lobes.models.CRDNN.CRDNN
30 |     input_shape: [null, null, 660]
31 |     activation: !ref <activation>
32 |     dropout: !ref <dropout>
33 |     cnn_blocks: !ref <cnn_blocks>
34 |     cnn_channels: !ref <cnn_channels>
35 |     cnn_kernelsize: !ref <cnn_kernelsize>
36 |     time_pooling: False
37 |     rnn_layers: !ref <rnn_layers>
38 |     rnn_neurons: !ref <rnn_neurons>
39 |     rnn_bidirectional: !ref <rnn_bidirectional>
40 |     dnn_blocks: !ref <dnn_blocks>
41 |     dnn_neurons: !ref <dnn_neurons>
42 | 
43 | lin: !new:speechbrain.nnet.linear.Linear
44 |     input_size: !ref <dnn_neurons>
45 |     n_neurons: 43  # 43 phonemes, no blank
46 |     bias: False
47 | 
48 | modules:
49 |     model: !ref <model>
50 |     lin: !ref <lin>
51 |     mean_var_norm: !ref <mean_var_norm>
52 | 
53 | opt_class: !name:torch.optim.Adam
54 |     lr: !ref <lr>
55 | 
56 | softmax: !new:speechbrain.nnet.activations.Softmax
57 |     apply_log: True
58 | 
59 | aligner: !new:speechbrain.alignment.aligner.HMMAligner
60 | 
61 | compute_cost: !name:speechbrain.nnet.losses.nll_loss
62 | 


--------------------------------------------------------------------------------
/tests/integration/neural_networks/ASR_alignment_viterbi/hyperparams.yaml:
--------------------------------------------------------------------------------
 1 | # Seed needs to be set at top of yaml, before objects with parameters are made
 2 | seed: 1234
 3 | __set_seed: !apply:torch.manual_seed [!ref <seed>]
 4 | 
 5 | # Training params
 6 | N_epochs: 10
 7 | lr: 0.004
 8 | dataloader_options:
 9 |     batch_size: 1
10 | 
11 | # Model parameters
12 | activation: !name:torch.nn.LeakyReLU
13 | dropout: 0.15
14 | cnn_blocks: 1
15 | cnn_channels: (16,)
16 | cnn_kernelsize: (3, 3)
17 | rnn_layers: 1
18 | rnn_neurons: 128
19 | rnn_bidirectional: True
20 | dnn_blocks: 1
21 | dnn_neurons: 128
22 | 
23 | 
24 | compute_features: !new:speechbrain.lobes.features.MFCC
25 | 
26 | mean_var_norm: !new:speechbrain.processing.features.InputNormalization
27 |     norm_type: global
28 | 
29 | model: !new:speechbrain.lobes.models.CRDNN.CRDNN
30 |     input_shape: [null, null, 660]
31 |     activation: !ref <activation>
32 |     dropout: !ref <dropout>
33 |     cnn_blocks: !ref <cnn_blocks>
34 |     cnn_channels: !ref <cnn_channels>
35 |     cnn_kernelsize: !ref <cnn_kernelsize>
36 |     time_pooling: False
37 |     rnn_layers: !ref <rnn_layers>
38 |     rnn_neurons: !ref <rnn_neurons>
39 |     rnn_bidirectional: !ref <rnn_bidirectional>
40 |     dnn_blocks: !ref <dnn_blocks>
41 |     dnn_neurons: !ref <dnn_neurons>
42 | 
43 | lin: !new:speechbrain.nnet.linear.Linear
44 |     input_size: !ref <dnn_neurons>
45 |     n_neurons: 43  # 43 phonemes, no blank
46 |     bias: False
47 | 
48 | modules:
49 |     model: !ref <model>
50 |     lin: !ref <lin>
51 |     mean_var_norm: !ref <mean_var_norm>
52 | 
53 | opt_class: !name:torch.optim.Adam
54 |     lr: !ref <lr>
55 | 
56 | softmax: !new:speechbrain.nnet.activations.Softmax
57 |     apply_log: True
58 | 
59 | aligner: !new:speechbrain.alignment.aligner.HMMAligner
60 | 
61 | compute_cost: !name:speechbrain.nnet.losses.nll_loss
62 | 


--------------------------------------------------------------------------------
/tests/integration/neural_networks/ASR_seq2seq/hyperparams.yaml:
--------------------------------------------------------------------------------
 1 | # Seed needs to be set at top of yaml, before objects with parameters are made
 2 | seed: 1234
 3 | __set_seed: !apply:torch.manual_seed [!ref <seed>]
 4 | 
 5 | # Training Parameters
 6 | N_epochs: 10
 7 | lr: 0.002
 8 | dataloader_options:
 9 |     batch_size: 1
10 | 
11 | # token information
12 | bos_index: 0 # eos_index = bos_index + 1
13 | num_labels: 45
14 | 
15 | # Model parameters
16 | activation: !name:torch.nn.LeakyReLU []
17 | dropout: 0.15
18 | cnn_blocks: 1
19 | cnn_channels: (16,)
20 | cnn_kernelsize: (3, 3)
21 | rnn_layers: 1
22 | rnn_neurons: 128
23 | rnn_bidirectional: True
24 | dnn_blocks: 1
25 | dnn_neurons: 128
26 | 
27 | compute_features: !new:speechbrain.lobes.features.MFCC
28 | 
29 | mean_var_norm: !new:speechbrain.processing.features.InputNormalization
30 |     norm_type: global
31 | 
32 | enc: !new:speechbrain.lobes.models.CRDNN.CRDNN
33 |     input_shape: [null, null, 660]
34 |     activation: !ref <activation>
35 |     dropout: !ref <dropout>
36 |     cnn_blocks: !ref <cnn_blocks>
37 |     cnn_channels: !ref <cnn_channels>
38 |     cnn_kernelsize: !ref <cnn_kernelsize>
39 |     time_pooling: True
40 |     rnn_layers: !ref <rnn_layers>
41 |     rnn_neurons: !ref <rnn_neurons>
42 |     rnn_bidirectional: !ref <rnn_bidirectional>
43 |     dnn_blocks: !ref <dnn_blocks>
44 |     dnn_neurons: !ref <dnn_neurons>
45 | 
46 | lin: !new:speechbrain.nnet.linear.Linear
47 |     input_size: !ref <dnn_neurons>
48 |     n_neurons: !ref <num_labels>
49 |     bias: False
50 | 
51 | emb: !new:speechbrain.nnet.embedding.Embedding
52 |     num_embeddings: !ref <num_labels>
53 |     embedding_dim: 128
54 | 
55 | dec: !new:speechbrain.nnet.RNN.AttentionalRNNDecoder
56 |     enc_dim: 128
57 |     input_size: 128
58 |     rnn_type: gru
59 |     attn_type: content
60 |     hidden_size: !ref <rnn_neurons>
61 |     attn_dim: !ref <rnn_neurons>
62 |     num_layers: 1
63 | 
64 | softmax: !new:speechbrain.nnet.activations.Softmax
65 |     apply_log: True
66 | 
67 | modules:
68 |     enc: !ref <enc>
69 |     emb: !ref <emb>
70 |     dec: !ref <dec>
71 |     lin: !ref <lin>
72 |     mean_var_norm: !ref <mean_var_norm>
73 | 
74 | opt_class: !name:torch.optim.Adam
75 |     lr: !ref <lr>
76 | 
77 | searcher: !new:speechbrain.decoders.seq2seq.S2SRNNGreedySearcher
78 |     embedding: !ref <emb>
79 |     decoder: !ref <dec>
80 |     linear: !ref <lin>
81 |     bos_index: !ref <bos_index>
82 |     eos_index: !ref <bos_index> + 1
83 |     min_decode_ratio: 0
84 |     max_decode_ratio: 0.1
85 | 
86 | compute_cost: !name:speechbrain.nnet.losses.nll_loss
87 | 
88 | per_stats: !name:speechbrain.utils.metric_stats.ErrorRateStats
89 | 


--------------------------------------------------------------------------------
/tests/integration/neural_networks/G2P/hyperparams.yaml:
--------------------------------------------------------------------------------
 1 | # Seed needs to be set at top of yaml, before objects with parameters are made
 2 | seed: 1234
 3 | __set_seed: !apply:torch.manual_seed [!ref <seed>]
 4 | 
 5 | # Training Parameters
 6 | N_epochs: 10
 7 | lr: 0.002
 8 | dataloader_options:
 9 |     batch_size: 1
10 | 
11 | # token information
12 | bos_index: 0 # eos_index = bos_index + 1
13 | num_phns: 45 # 43 phonemes + 1 bos + 1 eos
14 | num_chars: 26 # 24 chars + 1 bos + 1 eos
15 | 
16 | 
17 | # Model parameters
18 | activation: !name:torch.nn.LeakyReLU
19 | rnn_layers: 1
20 | rnn_neurons: 128
21 | rnn_bidirectional: True
22 | 
23 | enc: !new:speechbrain.nnet.RNN.LSTM
24 |     input_shape: [null, null, 128]
25 |     bidirectional: True
26 |     hidden_size: 64
27 |     num_layers: 1
28 |     dropout: 0.0
29 | 
30 | lin: !new:speechbrain.nnet.linear.Linear
31 |     input_size: !ref <rnn_neurons>
32 |     n_neurons: !ref <num_phns>
33 |     bias: False
34 | 
35 | encoder_emb: !new:speechbrain.nnet.embedding.Embedding
36 |     num_embeddings: !ref <num_chars>
37 |     embedding_dim: 128
38 | 
39 | emb: !new:speechbrain.nnet.embedding.Embedding
40 |     num_embeddings: !ref <num_phns>
41 |     embedding_dim: 128
42 | 
43 | dec: !new:speechbrain.nnet.RNN.AttentionalRNNDecoder
44 |     enc_dim: 128
45 |     input_size: 128
46 |     rnn_type: gru
47 |     attn_type: content
48 |     hidden_size: !ref <rnn_neurons>
49 |     attn_dim: !ref <rnn_neurons>
50 |     num_layers: 1
51 | 
52 | softmax: !new:speechbrain.nnet.activations.Softmax
53 |     apply_log: True
54 | 
55 | modules:
56 |     enc: !ref <enc>
57 |     emb: !ref <emb>
58 |     dec: !ref <dec>
59 |     lin: !ref <lin>
60 | 
61 | opt_class: !name:torch.optim.Adam
62 |     lr: !ref <lr>
63 | 
64 | searcher: !new:speechbrain.decoders.seq2seq.S2SRNNGreedySearcher
65 |     embedding: !ref <emb>
66 |     decoder: !ref <dec>
67 |     linear: !ref <lin>
68 |     bos_index: !ref <bos_index>
69 |     eos_index: !ref <bos_index> + 1
70 |     min_decode_ratio: 0
71 |     max_decode_ratio: 0.1
72 | 
73 | compute_cost: !name:speechbrain.nnet.losses.nll_loss
74 | 
75 | per_stats: !name:speechbrain.utils.metric_stats.ErrorRateStats
76 | 


--------------------------------------------------------------------------------
/tests/integration/neural_networks/LM_RNN/hyperparams.yaml:
--------------------------------------------------------------------------------
 1 | # Seed needs to be set at top of yaml, before objects with parameters are made
 2 | seed: 1234
 3 | __set_seed: !apply:torch.manual_seed [!ref <seed>]
 4 | 
 5 | # Training Parameters
 6 | N_epochs: 30
 7 | lr: 0.01
 8 | dataloader_options:
 9 |     batch_size: 8
10 | 
11 | # token information
12 | bos_index: 0 # eos_index = bos_index + 1
13 | num_chars: 26 # 24 chars + 1 bos + 1 eos
14 | 
15 | # Model parameters
16 | rnn_layers: 1
17 | rnn_neurons: 256
18 | emb_size: 128
19 | dropout: 0.0
20 | 
21 | model: !new:speechbrain.lobes.models.RNNLM.RNNLM
22 |     output_neurons: !ref <num_chars>
23 |     embedding_dim: !ref <emb_size>
24 |     rnn_neurons: !ref <rnn_neurons>
25 |     rnn_layers: !ref <rnn_layers>
26 |     dropout: !ref <dropout>
27 | 
28 | modules: {model: !ref <model>}
29 | 
30 | opt_class: !name:torch.optim.Adam
31 |     lr: !ref <lr>
32 | 
33 | epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
34 |     limit: !ref <N_epochs>
35 | 
36 | log_softmax: !new:speechbrain.nnet.activations.Softmax
37 |     apply_log: True
38 | 
39 | compute_cost: !name:speechbrain.nnet.losses.nll_loss
40 | 


--------------------------------------------------------------------------------
/tests/integration/neural_networks/VAD/hyperparams.yaml:
--------------------------------------------------------------------------------
 1 | # Seed needs to be set at top of yaml, before objects with parameters are made
 2 | seed: 1234
 3 | __set_seed: !apply:torch.manual_seed [!ref <seed>]
 4 | sample_rate: 16000
 5 | example_length: 2
 6 | 
 7 | 
 8 | # Training params
 9 | N_epochs: 15
10 | lr: 0.01
11 | dataloader_options:
12 |     batch_size: 1
13 | 
14 | # Feature parameters
15 | n_mfcc: 20
16 | 
17 | # Model parameters
18 | rnn_layers: 2
19 | rnn_neurons: 256
20 | emb_size: 23
21 | dropout: 0.1
22 | output_neurons: 1
23 | 
24 | compute_features: !new:speechbrain.lobes.features.MFCC
25 |     n_mfcc: !ref <n_mfcc>
26 | 
27 | mean_var_norm: !new:speechbrain.processing.features.InputNormalization
28 |     norm_type: global
29 | 
30 | rnn: !new:speechbrain.nnet.RNN.LSTM
31 |     input_size: !ref <n_mfcc> * 33  # d & dd = *3, 5 left & 5 right = *11
32 |     hidden_size: !ref <rnn_neurons>
33 |     num_layers: !ref <rnn_layers>
34 |     dropout: !ref <dropout>
35 |     bidirectional: False
36 |     re_init: True
37 | 
38 | lin: !new:speechbrain.nnet.linear.Linear
39 |     input_size: !ref <rnn_neurons>
40 |     n_neurons: !ref <output_neurons>
41 |     bias: False
42 | 
43 | modules:
44 |     rnn: !ref <rnn>
45 |     lin: !ref <lin>
46 |     mean_var_norm: !ref <mean_var_norm>
47 | 
48 | opt_class: !name:torch.optim.Adam
49 |     lr: !ref <lr>
50 | 
51 | compute_BCE_cost: !name:speechbrain.nnet.losses.compute_masked_loss
52 | 


--------------------------------------------------------------------------------
/tests/integration/neural_networks/autoencoder/hyperparams.yaml:
--------------------------------------------------------------------------------
 1 | # Basic parameters
 2 | # Seed needs to be set at top of yaml, before objects with parameters are made
 3 | seed: 1234
 4 | __set_seed: !apply:torch.manual_seed [!ref <seed>]
 5 | use_tensorboard: False
 6 | tensorboard_logs: runs
 7 | 
 8 | # Training params
 9 | N_epochs: 100
10 | lr: 0.004
11 | dataloader_options:
12 |     batch_size: 2
13 | 
14 | 
15 | compute_features: !new:speechbrain.lobes.features.MFCC
16 |     left_frames: 1
17 |     right_frames: 1
18 | 
19 | mean_var_norm: !new:speechbrain.processing.features.InputNormalization
20 |     norm_type: global
21 | 
22 | linear1: !new:speechbrain.nnet.linear.Linear
23 |     input_size: 180
24 |     n_neurons: 128
25 |     bias: False
26 | 
27 | activation: !new:torch.nn.LeakyReLU
28 | 
29 | linear2: !new:speechbrain.nnet.linear.Linear
30 |     input_size: 128
31 |     n_neurons: 180
32 |     bias: False
33 | 
34 | modules:
35 |     linear1: !ref <linear1>
36 |     linear2: !ref <linear2>
37 |     mean_var_norm: !ref <mean_var_norm>
38 | 
39 | opt_class: !name:torch.optim.Adam
40 |     lr: !ref <lr>
41 | 
42 | compute_cost: !name:speechbrain.nnet.losses.mse_loss
43 | 
44 | loss_tracker: !name:speechbrain.utils.metric_stats.MetricStats
45 |     metric: !name:speechbrain.nnet.losses.mse_loss
46 |         reduction: batch
47 | 


--------------------------------------------------------------------------------
/tests/integration/neural_networks/enhance_GAN/hyperparams.yaml:
--------------------------------------------------------------------------------
 1 | # Basic parameters
 2 | # Seed needs to be set at top of yaml, before objects with parameters are made
 3 | seed: 1234
 4 | __set_seed: !apply:torch.manual_seed [!ref <seed>]
 5 | 
 6 | # Training params
 7 | N_epochs: 5
 8 | lr: 0.004
 9 | dataloader_options:
10 |     batch_size: 2
11 | 
12 | models: !include:models.yaml
13 | 
14 | add_noise: !new:speechbrain.processing.speech_augmentation.AddNoise
15 | 
16 | modules:
17 |     generator: !ref <models[generator]>
18 |     discriminator: !ref <models[discriminator]>
19 | 
20 | g_opt_class: !name:torch.optim.Adam
21 |     lr: !ref <lr>
22 | d_opt_class: !name:torch.optim.Adam
23 |     lr: !ref <lr> / 4
24 | 
25 | compute_cost: !name:speechbrain.nnet.losses.mse_loss
26 | 


--------------------------------------------------------------------------------
/tests/integration/neural_networks/enhance_GAN/models.yaml:
--------------------------------------------------------------------------------
 1 | generator: !new:speechbrain.nnet.containers.Sequential
 2 |     input_shape: [null, null, 1]
 3 |     conv1: !name:speechbrain.nnet.CNN.Conv1d
 4 |         out_channels: 32
 5 |         kernel_size: 11
 6 |     activation: !new:torch.nn.LeakyReLU
 7 |     conv2: !name:speechbrain.nnet.CNN.Conv1d
 8 |         out_channels: 1
 9 |         kernel_size: 11
10 |     tanh: !new:torch.nn.Tanh
11 | 
12 | discriminator: !new:speechbrain.nnet.containers.Sequential
13 |     input_shape: [null, null, 1]
14 |     conv1: !name:speechbrain.nnet.CNN.Conv1d
15 |         out_channels: 32
16 |         kernel_size: 11
17 |         stride: 8
18 |     activation: !new:torch.nn.LeakyReLU
19 |     conv2: !name:speechbrain.nnet.CNN.Conv1d
20 |         out_channels: 1
21 |         kernel_size: 11
22 |         stride: 8
23 |     sigmoid: !new:torch.nn.Sigmoid
24 | 


--------------------------------------------------------------------------------
/tests/integration/neural_networks/separation/hyperparams.yaml:
--------------------------------------------------------------------------------
 1 | # ################################
 2 | # Model: ConvTasnet for source separation
 3 | # Data : Minimal Example
 4 | # Author: Cem Subakan
 5 | # ################################
 6 | 
 7 | 
 8 | # Basic parameters
 9 | # Seed needs to be set at top of yaml, before objects with parameters are made
10 | seed: 1234
11 | __set_seed: !apply:torch.manual_seed [!ref <seed>]
12 | output_folder: !ref results/conv_tasnet/<seed>
13 | save_folder: !ref <output_folder>/save
14 | train_log: !ref <output_folder>/train_log.txt
15 | 
16 | # Training params
17 | N_epochs: 150
18 | lr: 0.002
19 | dataloader_options:
20 |     batch_size: 1
21 | 
22 | mask_net: !new:speechbrain.lobes.models.conv_tasnet.MaskNet
23 |     N: 32
24 |     B: 32
25 |     H: 32
26 |     P: 3
27 |     X: 1
28 |     R: 2
29 |     C: 2
30 |     norm_type: 'gLN'
31 |     causal: False
32 |     mask_nonlinear: 'relu'
33 | 
34 | encoder: !new:speechbrain.lobes.models.dual_path.Encoder
35 |     kernel_size: 16
36 |     out_channels: 32
37 | 
38 | decoder: !new:speechbrain.lobes.models.dual_path.Decoder
39 |     in_channels: 32
40 |     out_channels: 1
41 |     kernel_size: 16
42 |     stride: 8
43 |     bias: False
44 | 
45 | modules:
46 |     mask_net: !ref <mask_net>
47 |     encoder: !ref <encoder>
48 |     decoder: !ref <decoder>
49 | 
50 | opt_class: !name:torch.optim.Adam
51 |     lr: !ref <lr>
52 | 
53 | epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
54 |     limit: !ref <N_epochs>
55 | 
56 | train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
57 |     save_file: !ref <train_log>
58 | 


--------------------------------------------------------------------------------
/tests/integration/neural_networks/speaker_id/hyperparams.yaml:
--------------------------------------------------------------------------------
 1 | # Basic parameters
 2 | seed: 1234
 3 | __set_seed: !apply:torch.manual_seed [!ref <seed>]
 4 | 
 5 | # Training params
 6 | N_epochs: 10
 7 | lr: 0.001
 8 | dataloader_options:
 9 |     batch_size: 8
10 | 
11 | # Feature parameters
12 | n_mels: 24
13 | left_frames: 0
14 | right_frames: 0
15 | deltas: False
16 | 
17 | # Number of speakers
18 | num_spks: 2
19 | 
20 | 
21 | compute_features: !new:speechbrain.lobes.features.Fbank
22 |     n_mels: !ref <n_mels>
23 |     left_frames: !ref <left_frames>
24 |     right_frames: !ref <right_frames>
25 |     deltas: !ref <deltas>
26 | 
27 | xvector_model: !new:speechbrain.lobes.models.Xvector.Xvector
28 |     in_channels: !ref <n_mels>
29 |     activation: !name:torch.nn.LeakyReLU
30 |     tdnn_blocks: 5
31 |     tdnn_channels: [512, 512, 512, 512, 1500]
32 |     tdnn_kernel_sizes: [5, 3, 3, 1, 1]
33 |     tdnn_dilations: [1, 2, 3, 1, 1]
34 |     lin_neurons: 512
35 | 
36 | classifier: !new:speechbrain.lobes.models.Xvector.Classifier
37 |     input_shape: [null, null, 512]
38 |     activation: !name:torch.nn.LeakyReLU
39 |     lin_blocks: 1
40 |     lin_neurons: 512
41 |     out_neurons: !ref <num_spks>
42 | 
43 | mean_var_norm: !new:speechbrain.processing.features.InputNormalization
44 |     norm_type: global
45 | 
46 | modules:
47 |     xvector_model: !ref <xvector_model>
48 |     classifier: !ref <classifier>
49 |     mean_var_norm: !ref <mean_var_norm>
50 | 
51 | opt_class: !name:torch.optim.Adam
52 |     lr: !ref <lr>
53 | 
54 | compute_cost: !name:speechbrain.nnet.losses.nll_loss
55 | 
56 | error_stats: !name:speechbrain.utils.metric_stats.MetricStats
57 |     metric: !name:speechbrain.nnet.losses.classification_error
58 |         reduction: batch
59 | 


--------------------------------------------------------------------------------
/tests/integration/signal_processing/PLDA_xvector/example_plda_experiment.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import os
 3 | import pickle
 4 | import numpy
 5 | from numpy import linalg as LA
 6 | from speechbrain.processing.PLDA_LDA import StatObject_SB  # noqa F401
 7 | from speechbrain.processing.PLDA_LDA import PLDA
 8 | from speechbrain.processing.PLDA_LDA import Ndx
 9 | from speechbrain.processing.PLDA_LDA import fast_PLDA_scoring
10 | 
11 | 
12 | # Load params file
13 | experiment_dir = os.path.dirname(os.path.abspath(__file__))
14 | data_folder = "../../../../../samples/plda_xvect_samples/"
15 | data_folder = os.path.abspath(experiment_dir + data_folder)
16 | 
17 | # Xvectors stored as StatObject_SB
18 | train_file = data_folder + "/train_stat_xvect.pkl"
19 | enrol_file = data_folder + "/enrol_stat_xvect.pkl"
20 | test_file = data_folder + "/test_stat_xvect.pkl"
21 | scores_file = data_folder + "/expected_plda_scores.pkl"
22 | 
23 | # Load Train
24 | with open(train_file, "rb") as input:
25 |     train_obj = pickle.load(input)
26 | 
27 | # Load Enrol
28 | with open(enrol_file, "rb") as input:
29 |     enrol_obj = pickle.load(input)
30 | 
31 | # Load Test
32 | with open(test_file, "rb") as input:
33 |     test_obj = pickle.load(input)
34 | 
35 | print("Training PLDA...")
36 | plda = PLDA()
37 | plda.plda(train_obj)
38 | 
39 | # Preparing Ndx map
40 | models = enrol_obj.modelset
41 | testsegs = test_obj.modelset
42 | ndx_obj = Ndx(models=models, testsegs=testsegs)
43 | 
44 | # PLDA scoring between enrol and test
45 | scores_plda = fast_PLDA_scoring(
46 |     enrol_obj, test_obj, ndx_obj, plda.mean, plda.F, plda.Sigma
47 | )
48 | print("PLDA score matrix: (Rows: Enrol, Columns: Test)")
49 | print(scores_plda.scoremat)
50 | 
51 | with open(scores_file, "rb") as input:
52 |     expected_score_matrix = pickle.load(input)
53 | 
54 | print("Expected scores:\n", expected_score_matrix)
55 | 
56 | # Ensuring the scores are proper (for integration test)
57 | dif = numpy.subtract(expected_score_matrix, scores_plda.scoremat)
58 | f_norm = LA.norm(dif, ord="fro")
59 | 
60 | 
61 | # Integration test: Ensure we get same score matrix
62 | def test_error():
63 |     assert f_norm < 0.1
64 | 


--------------------------------------------------------------------------------
/tests/integration/signal_processing/example_add_babble.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import speechbrain as sb
 3 | from hyperpyyaml import load_hyperpyyaml
 4 | from speechbrain.dataio.dataio import read_audio, write_audio
 5 | 
 6 | output_folder = os.path.join("results", "add_babble")
 7 | experiment_dir = os.path.dirname(os.path.abspath(__file__))
 8 | hyperparams_file = os.path.join(experiment_dir, "hyperparams.yaml")
 9 | 
10 | 
11 | def main():
12 |     overrides = {
13 |         "output_folder": output_folder,
14 |         "data_folder": os.path.join(
15 |             experiment_dir, "..", "..", "..", "samples"
16 |         ),
17 |         "batch_size": 5,
18 |     }
19 |     with open(hyperparams_file) as fin:
20 |         hyperparams = load_hyperpyyaml(fin, overrides)
21 | 
22 |     sb.create_experiment_directory(
23 |         experiment_directory=output_folder,
24 |         hyperparams_to_save=hyperparams_file,
25 |         overrides=overrides,
26 |     )
27 | 
28 |     dataloader = sb.dataio.dataloader.make_dataloader(
29 |         dataset=hyperparams["sample_data"], batch_size=hyperparams["batch_size"]
30 |     )
31 |     for (id, (wav, wav_len),) in iter(dataloader):
32 |         wav_babble = hyperparams["add_babble"](wav, wav_len)
33 |         # save results on file
34 |         for i, snt_id in enumerate(id):
35 |             filepath = (
36 |                 hyperparams["output_folder"] + "/save/" + snt_id + ".flac"
37 |             )
38 |             write_audio(filepath, wav_babble[i], 16000)
39 | 
40 | 
41 | def test_bubble():
42 |     from glob import glob
43 | 
44 |     for filename in glob(os.path.join(output_folder, "save", "*.flac")):
45 |         expected_file = filename.replace("results", "expected")
46 |         actual = read_audio(filename)
47 |         expected = read_audio(expected_file)
48 |         assert actual.allclose(expected)
49 | 
50 | 
51 | if __name__ == "__main__":
52 |     main()
53 | 


--------------------------------------------------------------------------------
/tests/integration/signal_processing/example_add_noise.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import speechbrain as sb
 3 | from hyperpyyaml import load_hyperpyyaml
 4 | from speechbrain.dataio.dataio import read_audio, write_audio
 5 | 
 6 | output_folder = os.path.join("results", "add_noise")
 7 | experiment_dir = os.path.dirname(os.path.abspath(__file__))
 8 | hyperparams_file = os.path.join(experiment_dir, "hyperparams.yaml")
 9 | 
10 | 
11 | def main():
12 |     overrides = {
13 |         "output_folder": output_folder,
14 |         "data_folder": os.path.join(
15 |             experiment_dir, "..", "..", "..", "samples"
16 |         ),
17 |     }
18 |     with open(hyperparams_file) as fin:
19 |         hyperparams = load_hyperpyyaml(fin, overrides)
20 | 
21 |     sb.create_experiment_directory(
22 |         experiment_directory=output_folder,
23 |         hyperparams_to_save=hyperparams_file,
24 |         overrides=overrides,
25 |     )
26 | 
27 |     dataloader = sb.dataio.dataloader.make_dataloader(
28 |         dataset=hyperparams["sample_data"], batch_size=hyperparams["batch_size"]
29 |     )
30 |     for (id, (wav, wav_len),) in iter(dataloader):
31 |         wav_noise = hyperparams["add_noise"](wav, wav_len)
32 |         # save results on file
33 |         for i, snt_id in enumerate(id):
34 |             filepath = (
35 |                 hyperparams["output_folder"] + "/save/" + snt_id + ".flac"
36 |             )
37 |             write_audio(filepath, wav_noise[i], 16000)
38 | 
39 | 
40 | def test_noise():
41 |     from glob import glob
42 | 
43 |     for filename in glob(os.path.join(output_folder, "save", "*.flac")):
44 |         expected_file = filename.replace("results", "expected")
45 |         actual = read_audio(filename)
46 |         expected = read_audio(expected_file)
47 |         assert actual.allclose(expected)
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     main()
52 | 


--------------------------------------------------------------------------------
/tests/integration/signal_processing/example_add_reverb.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import speechbrain as sb
 3 | from hyperpyyaml import load_hyperpyyaml
 4 | from speechbrain.dataio.dataio import read_audio, write_audio
 5 | 
 6 | output_folder = os.path.join("results", "add_reverb")
 7 | experiment_dir = os.path.dirname(os.path.abspath(__file__))
 8 | hyperparams_file = os.path.join(experiment_dir, "hyperparams.yaml")
 9 | 
10 | 
11 | def main():
12 |     overrides = {
13 |         "output_folder": output_folder,
14 |         "data_folder": os.path.join(
15 |             experiment_dir, "..", "..", "..", "samples"
16 |         ),
17 |     }
18 |     with open(hyperparams_file) as fin:
19 |         hyperparams = load_hyperpyyaml(fin, overrides)
20 | 
21 |     sb.create_experiment_directory(
22 |         experiment_directory=output_folder,
23 |         hyperparams_to_save=hyperparams_file,
24 |         overrides=overrides,
25 |     )
26 | 
27 |     dataloader = sb.dataio.dataloader.make_dataloader(
28 |         dataset=hyperparams["sample_data"], batch_size=hyperparams["batch_size"]
29 |     )
30 |     for (id, (wav, wav_len),) in iter(dataloader):
31 |         wav_reverb = hyperparams["add_reverb"](wav, wav_len)
32 |         # save results on file
33 |         for i, snt_id in enumerate(id):
34 |             filepath = (
35 |                 hyperparams["output_folder"] + "/save/" + snt_id + ".flac"
36 |             )
37 |             write_audio(filepath, wav_reverb[i], 16000)
38 | 
39 | 
40 | def test_reverb():
41 |     from glob import glob
42 | 
43 |     for filename in glob(os.path.join(output_folder, "save", "*.flac")):
44 |         expected_file = filename.replace("results", "expected")
45 |         actual = read_audio(filename)
46 |         expected = read_audio(expected_file)
47 |         assert actual.allclose(expected)
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     main()
52 | 


--------------------------------------------------------------------------------
/tests/integration/signal_processing/example_do_clip.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import speechbrain as sb
 3 | from hyperpyyaml import load_hyperpyyaml
 4 | from speechbrain.dataio.dataio import read_audio, write_audio
 5 | 
 6 | output_folder = os.path.join("results", "do_clip")
 7 | experiment_dir = os.path.dirname(os.path.abspath(__file__))
 8 | hyperparams_file = os.path.join(experiment_dir, "hyperparams.yaml")
 9 | 
10 | 
11 | def main():
12 |     overrides = {
13 |         "output_folder": output_folder,
14 |         "data_folder": os.path.join(
15 |             experiment_dir, "..", "..", "..", "samples"
16 |         ),
17 |     }
18 |     with open(hyperparams_file) as fin:
19 |         hyperparams = load_hyperpyyaml(fin, overrides)
20 | 
21 |     sb.create_experiment_directory(
22 |         experiment_directory=output_folder,
23 |         hyperparams_to_save=hyperparams_file,
24 |         overrides=overrides,
25 |     )
26 | 
27 |     dataloader = sb.dataio.dataloader.make_dataloader(
28 |         dataset=hyperparams["sample_data"], batch_size=hyperparams["batch_size"]
29 |     )
30 |     for (id, (wav, wav_len),) in iter(dataloader):
31 |         wav_clip = hyperparams["do_clip"](wav)
32 |         # save results on file
33 |         for i, snt_id in enumerate(id):
34 |             filepath = (
35 |                 hyperparams["output_folder"] + "/save/" + snt_id + ".flac"
36 |             )
37 |             write_audio(filepath, wav_clip[i], 16000)
38 | 
39 | 
40 | def test_do_clip():
41 |     from glob import glob
42 | 
43 |     for filename in glob(os.path.join(output_folder, "save", "*.flac")):
44 |         expected_file = filename.replace("results", "expected")
45 |         actual = read_audio(filename)
46 |         expected = read_audio(expected_file)
47 |         assert actual.allclose(expected)
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     main()
52 | 


--------------------------------------------------------------------------------
/tests/integration/signal_processing/example_drop_chunk.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import speechbrain as sb
 3 | from hyperpyyaml import load_hyperpyyaml
 4 | from speechbrain.dataio.dataio import read_audio, write_audio
 5 | 
 6 | output_folder = os.path.join("results", "drop_chunk")
 7 | experiment_dir = os.path.dirname(os.path.abspath(__file__))
 8 | hyperparams_file = os.path.join(experiment_dir, "hyperparams.yaml")
 9 | 
10 | 
11 | def main():
12 |     overrides = {
13 |         "output_folder": output_folder,
14 |         "data_folder": os.path.join(
15 |             experiment_dir, "..", "..", "..", "samples"
16 |         ),
17 |     }
18 |     with open(hyperparams_file) as fin:
19 |         hyperparams = load_hyperpyyaml(fin, overrides)
20 | 
21 |     sb.create_experiment_directory(
22 |         experiment_directory=output_folder,
23 |         hyperparams_to_save=hyperparams_file,
24 |         overrides=overrides,
25 |     )
26 | 
27 |     dataloader = sb.dataio.dataloader.make_dataloader(
28 |         dataset=hyperparams["sample_data"], batch_size=hyperparams["batch_size"]
29 |     )
30 |     for (id, (wav, wav_len),) in iter(dataloader):
31 |         wav_drop = hyperparams["drop_chunk"](wav, wav_len)
32 |         # save results on file
33 |         for i, snt_id in enumerate(id):
34 |             filepath = (
35 |                 hyperparams["output_folder"] + "/save/" + snt_id + ".flac"
36 |             )
37 |             write_audio(filepath, wav_drop[i], 16000)
38 | 
39 | 
40 | def test_chunk():
41 |     from glob import glob
42 | 
43 |     for filename in glob(os.path.join(output_folder, "save", "*.flac")):
44 |         expected_file = filename.replace("results", "expected")
45 |         actual = read_audio(filename)
46 |         expected = read_audio(expected_file)
47 |         assert actual.allclose(expected)
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     main()
52 | 


--------------------------------------------------------------------------------
/tests/integration/signal_processing/example_drop_freq.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import speechbrain as sb
 3 | from hyperpyyaml import load_hyperpyyaml
 4 | from speechbrain.dataio.dataio import read_audio, write_audio
 5 | 
 6 | output_folder = os.path.join("results", "drop_freq")
 7 | experiment_dir = os.path.dirname(os.path.abspath(__file__))
 8 | hyperparams_file = os.path.join(experiment_dir, "hyperparams.yaml")
 9 | 
10 | 
11 | def main():
12 |     overrides = {
13 |         "output_folder": output_folder,
14 |         "data_folder": os.path.join(
15 |             experiment_dir, "..", "..", "..", "samples"
16 |         ),
17 |     }
18 |     with open(hyperparams_file) as fin:
19 |         hyperparams = load_hyperpyyaml(fin, overrides)
20 | 
21 |     sb.create_experiment_directory(
22 |         experiment_directory=output_folder,
23 |         hyperparams_to_save=hyperparams_file,
24 |         overrides=overrides,
25 |     )
26 | 
27 |     dataloader = sb.dataio.dataloader.make_dataloader(
28 |         dataset=hyperparams["sample_data"], batch_size=hyperparams["batch_size"]
29 |     )
30 |     for (id, (wav, wav_len),) in iter(dataloader):
31 |         wav_drop = hyperparams["drop_freq"](wav)
32 |         # save results on file
33 |         for i, snt_id in enumerate(id):
34 |             filepath = (
35 |                 hyperparams["output_folder"] + "/save/" + snt_id + ".flac"
36 |             )
37 |             write_audio(filepath, wav_drop[i], 16000)
38 | 
39 | 
40 | def test_drop_freq():
41 |     from glob import glob
42 | 
43 |     for filename in glob(os.path.join(output_folder, "save", "*.flac")):
44 |         expected_file = filename.replace("results", "expected")
45 |         actual = read_audio(filename)
46 |         expected = read_audio(expected_file)
47 |         assert actual.allclose(expected)
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     main()
52 | 


--------------------------------------------------------------------------------
/tests/integration/signal_processing/example_speed_perturb.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import speechbrain as sb
 3 | from hyperpyyaml import load_hyperpyyaml
 4 | from speechbrain.dataio.dataio import read_audio, write_audio
 5 | 
 6 | output_folder = os.path.join("results", "speed_perturb")
 7 | experiment_dir = os.path.dirname(os.path.abspath(__file__))
 8 | hyperparams_file = os.path.join(experiment_dir, "hyperparams.yaml")
 9 | 
10 | 
11 | def main():
12 |     overrides = {
13 |         "output_folder": output_folder,
14 |         "data_folder": os.path.join(
15 |             experiment_dir, "..", "..", "..", "samples"
16 |         ),
17 |     }
18 |     with open(hyperparams_file) as fin:
19 |         hyperparams = load_hyperpyyaml(fin, overrides)
20 | 
21 |     sb.create_experiment_directory(
22 |         experiment_directory=output_folder,
23 |         hyperparams_to_save=hyperparams_file,
24 |         overrides=overrides,
25 |     )
26 | 
27 |     dataloader = sb.dataio.dataloader.make_dataloader(
28 |         dataset=hyperparams["sample_data"], batch_size=hyperparams["batch_size"]
29 |     )
30 |     for (id, (wav, wav_len),) in iter(dataloader):
31 |         wav_perturb = hyperparams["speed_perturb"](wav)
32 |         # save results on file
33 |         for i, snt_id in enumerate(id):
34 |             filepath = (
35 |                 hyperparams["output_folder"] + "/save/" + snt_id + ".flac"
36 |             )
37 |             write_audio(filepath, wav_perturb[i], 16000)
38 | 
39 | 
40 | def test_peturb():
41 |     from glob import glob
42 | 
43 |     for filename in glob(os.path.join(output_folder, "save", "*.flac")):
44 |         expected_file = filename.replace("results", "expected")
45 |         actual = read_audio(filename)
46 |         expected = read_audio(expected_file)
47 |         assert actual.allclose(expected)
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     main()
52 | 


--------------------------------------------------------------------------------
/tests/integration/signal_processing/expected/add_babble/save/example1.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/tests/integration/signal_processing/expected/add_babble/save/example1.flac


--------------------------------------------------------------------------------
/tests/integration/signal_processing/expected/add_noise/save/example1.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/tests/integration/signal_processing/expected/add_noise/save/example1.flac


--------------------------------------------------------------------------------
/tests/integration/signal_processing/expected/add_reverb/save/example1.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/tests/integration/signal_processing/expected/add_reverb/save/example1.flac


--------------------------------------------------------------------------------
/tests/integration/signal_processing/expected/do_clip/save/example1.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/tests/integration/signal_processing/expected/do_clip/save/example1.flac


--------------------------------------------------------------------------------
/tests/integration/signal_processing/expected/drop_chunk/save/example1.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/tests/integration/signal_processing/expected/drop_chunk/save/example1.flac


--------------------------------------------------------------------------------
/tests/integration/signal_processing/expected/drop_freq/save/example1.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/tests/integration/signal_processing/expected/drop_freq/save/example1.flac


--------------------------------------------------------------------------------
/tests/integration/signal_processing/expected/speed_perturb/save/example1.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YUCHEN005/Unified-Enhance-Separation/1925be9e75835391ad2aed89a7e63a5b9b40e757/tests/integration/signal_processing/expected/speed_perturb/save/example1.flac


--------------------------------------------------------------------------------
/tests/integration/signal_processing/hyperparams.yaml:
--------------------------------------------------------------------------------
 1 | output_folder: !PLACEHOLDER
 2 | data_folder: !PLACEHOLDER
 3 | csv_file: !ref <data_folder>/audio_samples/csv_example2.csv
 4 | sample_rate: 16000
 5 | batch_size: 1
 6 | 
 7 | sample_data: !new:speechbrain.dataio.legacy.ExtendedCSVDataset
 8 |     csvpath: !ref <csv_file>
 9 |     sorting: descending
10 |     output_keys: [id, wav]
11 |     replacements:
12 |         data_folder: !ref <data_folder>/audio_samples
13 | 
14 | add_babble: !new:speechbrain.processing.speech_augmentation.AddBabble
15 |     speaker_count: 4  # Must set batch size to 5 or more
16 |     snr_low: 0
17 |     snr_high: 0
18 | 
19 | add_reverb: !new:speechbrain.processing.speech_augmentation.AddReverb
20 |     csv_file: !ref <data_folder>/rir_samples/rirs_rel.csv
21 |     sorting: descending
22 |     replacements:
23 |         rir_folder: !ref <data_folder>/rir_samples
24 | 
25 | add_noise: !new:speechbrain.processing.speech_augmentation.AddNoise
26 |     csv_file: !ref <data_folder>/noise_samples/noise_rel.csv
27 |     sorting: descending
28 |     snr_low: 0
29 |     snr_high: 0
30 |     pad_noise: False
31 |     start_index: 0
32 |     replacements:
33 |         noise_folder: !ref <data_folder>/noise_samples
34 | 
35 | drop_freq: !new:speechbrain.processing.speech_augmentation.DropFreq
36 |     drop_freq_low: 0.5
37 |     drop_freq_high: 0.5
38 |     drop_count_low: 1
39 |     drop_count_high: 1
40 |     drop_width: 0.05
41 | 
42 | drop_chunk: !new:speechbrain.processing.speech_augmentation.DropChunk
43 |     drop_length_low: 1000
44 |     drop_length_high: 1000
45 |     drop_count_low: 1
46 |     drop_count_high: 1
47 |     drop_start: 1000
48 |     drop_end: 2000
49 | 
50 | do_clip: !new:speechbrain.processing.speech_augmentation.DoClip
51 |     clip_low: 0.01
52 |     clip_high: 0.01
53 | 
54 | speed_perturb: !new:speechbrain.processing.speech_augmentation.SpeedPerturb
55 |     orig_freq: !ref <sample_rate>
56 |     speeds: [90]
57 | 


--------------------------------------------------------------------------------
/tests/integration/signal_processing/nmf_sourcesep/hyperparams.yaml:
--------------------------------------------------------------------------------
 1 | # Basic parameters
 2 | output_folder: results/minimal/nmf_sourcesep
 3 | sample_rate: 16000
 4 | 
 5 | # Data files
 6 | data_folder: ../../../../samples/audio_samples/sourcesep_samples
 7 | csv_train: !ref <data_folder>/csv_example_sourcesep_source1.csv
 8 | csv_train2: !ref <data_folder>/csv_example_sourcesep_source2.csv
 9 | csv_test: !ref <data_folder>/csv_example_sourcesep_mixture.csv
10 | 
11 | # NMF parameters
12 | N_epochs: 50
13 | K: 20 # this specifies the number of template vectors to use in NMF.
14 | N_batch: 200
15 | m: 513  # length of stft vectors
16 | win_length: 40  # window length (in ms) for stft
17 | hop_length: 10  # hop length (in ms) for stft
18 | 
19 | # Experiment flags:
20 | save_reconstructed: False   # saves the results
21 | copy_original_files: False    # copies the original files
22 | 
23 | train_data: !new:speechbrain.dataio.legacy.ExtendedCSVDataset
24 |     csvpath: !ref <csv_train>
25 |     output_keys: [wav]
26 |     sorting: 'original'
27 |     replacements:
28 |         data_folder: !ref <data_folder>
29 | 
30 | train_data2: !new:speechbrain.dataio.legacy.ExtendedCSVDataset
31 |     csvpath: !ref <csv_train2>
32 |     output_keys: [wav]
33 |     sorting: 'original'
34 |     replacements:
35 |         data_folder: !ref <data_folder>
36 | 
37 | test_data: !new:speechbrain.dataio.legacy.ExtendedCSVDataset
38 |     csvpath: !ref <csv_test>
39 |     output_keys: [wav]
40 |     sorting: 'original'
41 |     replacements:
42 |         data_folder: !ref <data_folder>
43 | 
44 | loader_kwargs:
45 |     batch_size: !ref <N_batch>
46 | 
47 | compute_features: !new:speechbrain.processing.features.STFT
48 |     sample_rate: !ref <sample_rate>
49 |     n_fft: 1024
50 |     win_length: !ref <win_length>
51 |     hop_length: !ref <hop_length>
52 | 


--------------------------------------------------------------------------------
/tests/unittests/test_CNN.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn
 3 | 
 4 | 
 5 | def test_SincConv():
 6 | 
 7 |     from speechbrain.nnet.CNN import SincConv
 8 | 
 9 |     input = torch.rand([4, 16000])
10 |     convolve = SincConv(
11 |         input_shape=input.shape, out_channels=8, kernel_size=65, padding="same"
12 |     )
13 |     output = convolve(input)
14 |     assert output.shape[-1] == 8
15 | 
16 |     assert torch.jit.trace(convolve, input)
17 | 
18 | 
19 | def test_Conv1d():
20 | 
21 |     from speechbrain.nnet.CNN import Conv1d
22 | 
23 |     input = torch.tensor([-1, -1, -1, -1]).unsqueeze(0).unsqueeze(2).float()
24 |     convolve = Conv1d(
25 |         out_channels=1, kernel_size=1, input_shape=input.shape, padding="same"
26 |     )
27 |     output = convolve(input)
28 |     assert input.shape == output.shape
29 | 
30 |     convolve.conv.weight = torch.nn.Parameter(
31 |         torch.tensor([-1]).float().unsqueeze(0).unsqueeze(1)
32 |     )
33 |     convolve.conv.bias = torch.nn.Parameter(torch.tensor([0]).float())
34 |     output = convolve(input)
35 |     assert torch.all(torch.eq(torch.ones(input.shape), output))
36 | 
37 |     assert torch.jit.trace(convolve, input)
38 | 
39 | 
40 | def test_Conv2d():
41 | 
42 |     from speechbrain.nnet.CNN import Conv2d
43 | 
44 |     input = torch.rand([4, 11, 32, 1])
45 |     convolve = Conv2d(
46 |         out_channels=1,
47 |         input_shape=input.shape,
48 |         kernel_size=(1, 1),
49 |         padding="same",
50 |     )
51 |     output = convolve(input)
52 |     assert output.shape[-1] == 1
53 | 
54 |     convolve.conv.weight = torch.nn.Parameter(
55 |         torch.zeros(convolve.conv.weight.shape)
56 |     )
57 |     convolve.conv.bias = torch.nn.Parameter(torch.tensor([0]).float())
58 |     output = convolve(input)
59 |     assert torch.all(torch.eq(torch.zeros(input.shape), output))
60 | 
61 |     convolve.conv.weight = torch.nn.Parameter(
62 |         torch.ones(convolve.conv.weight.shape)
63 |     )
64 |     convolve.conv.bias = torch.nn.Parameter(torch.tensor([0]).float())
65 |     output = convolve(input)
66 |     assert torch.all(torch.eq(input, output))
67 | 
68 |     assert torch.jit.trace(convolve, input)
69 | 


--------------------------------------------------------------------------------
/tests/unittests/test_activations.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn
 3 | 
 4 | 
 5 | def test_softmax():
 6 | 
 7 |     from speechbrain.nnet.activations import Softmax
 8 | 
 9 |     inputs = torch.tensor([1, 2, 3]).float()
10 |     act = Softmax(apply_log=False)
11 |     outputs = act(inputs)
12 |     assert torch.argmax(outputs) == 2
13 | 
14 |     assert torch.jit.trace(act, inputs)
15 | 


--------------------------------------------------------------------------------
/tests/unittests/test_attention.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def test_rel_pos_MHA():
 5 | 
 6 |     from speechbrain.nnet.attention import RelPosMHAXL
 7 | 
 8 |     bsz = 2
 9 |     emb_dim = 4
10 |     k_len = [12, 10]
11 |     q_len = [10, 12]
12 |     bias = [True, False]
13 |     head_dim = [4, None]
14 | 
15 |     for kl in k_len:
16 |         for ql in q_len:
17 |             for b in bias:
18 |                 for h in head_dim:
19 |                     relpos = RelPosMHAXL(emb_dim, num_heads=2, vbias=b, vdim=h)
20 |                     q = torch.rand((bsz, ql, emb_dim))
21 |                     k = torch.rand((bsz, kl, emb_dim))
22 |                     pos_embs = torch.rand((1, 2 * kl - 1, emb_dim))
23 |                     relpos(q, k, k, pos_embs=pos_embs)
24 | 


--------------------------------------------------------------------------------
/tests/unittests/test_batching.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import torch
 3 | 
 4 | 
 5 | def test_batch_pad_right_to():
 6 |     from speechbrain.utils.data_utils import batch_pad_right
 7 |     import random
 8 | 
 9 |     n_channels = 40
10 |     batch_lens = [1, 5]
11 | 
12 |     for b in batch_lens:
13 |         tensors = [
14 |             torch.ones(n_channels, random.randint(10, 53),) for x in range(b)
15 |         ]
16 |         batched, lens = batch_pad_right(tensors)
17 |         assert batched.shape[0] == b
18 | 
19 |     for b in batch_lens:
20 |         tensors = [torch.ones(random.randint(10, 53),) for x in range(b)]
21 |         batched, lens = batch_pad_right(tensors)
22 |         assert batched.shape[0] == b
23 | 
24 | 
25 | def test_paddedbatch():
26 |     from speechbrain.dataio.batch import PaddedBatch
27 | 
28 |     batch = PaddedBatch(
29 |         [
30 |             {
31 |                 "id": "ex1",
32 |                 "foo": torch.Tensor([1.0]),
33 |                 "bar": torch.Tensor([1.0, 2.0, 3.0]),
34 |             },
35 |             {
36 |                 "id": "ex2",
37 |                 "foo": torch.Tensor([2.0, 1.0]),
38 |                 "bar": torch.Tensor([2.0]),
39 |             },
40 |         ]
41 |     )
42 |     batch.to(dtype=torch.half)
43 |     assert batch.foo.data.dtype == torch.half
44 |     assert batch["foo"][1].dtype == torch.half
45 |     assert batch.bar.lengths.dtype == torch.half
46 |     assert batch.foo.data.shape == torch.Size([2, 2])
47 |     assert batch.bar.data.shape == torch.Size([2, 3])
48 |     ids, foos, bars = batch
49 |     assert ids == ["ex1", "ex2"]
50 | 
51 | 
52 | @pytest.mark.skipif(not torch.cuda.is_available(), reason="Requires CUDA")
53 | def test_pin_memory():
54 |     from speechbrain.dataio.batch import PaddedBatch
55 | 
56 |     batch = PaddedBatch(
57 |         [
58 |             {
59 |                 "id": "ex1",
60 |                 "foo": torch.Tensor([1.0]),
61 |                 "bar": torch.Tensor([1.0, 2.0, 3.0]),
62 |             },
63 |             {
64 |                 "id": "ex2",
65 |                 "foo": torch.Tensor([2.0, 1.0]),
66 |                 "bar": torch.Tensor([2.0]),
67 |             },
68 |         ]
69 |     )
70 |     batch.pin_memory()
71 |     assert batch.foo.data.is_pinned()
72 | 


--------------------------------------------------------------------------------
/tests/unittests/test_callchains.py:
--------------------------------------------------------------------------------
 1 | def test_lengths_arg_exists():
 2 |     from speechbrain.utils.callchains import lengths_arg_exists
 3 | 
 4 |     def non_len_func(x):
 5 |         return x + 1
 6 | 
 7 |     def len_func(x, lengths):
 8 |         return x + lengths
 9 | 
10 |     assert not lengths_arg_exists(non_len_func)
11 |     assert lengths_arg_exists(len_func)
12 | 
13 | 
14 | def test_lengths_capable_chain():
15 |     from speechbrain.utils.callchains import LengthsCapableChain
16 | 
17 |     def non_len_func(x):
18 |         return x + 1
19 | 
20 |     def len_func(x, lengths):
21 |         return x + lengths
22 | 
23 |     def tuple_func(x):
24 |         return x, x + 1
25 | 
26 |     chain = LengthsCapableChain(non_len_func, len_func)
27 |     assert chain(1, 2) == 4
28 |     assert chain(lengths=2, x=1) == 4
29 |     chain.append(non_len_func)
30 |     assert chain(1, 2) == 5
31 |     chain.append(tuple_func)
32 |     assert chain(1, 2) == 5
33 | 


--------------------------------------------------------------------------------
/tests/unittests/test_core.py:
--------------------------------------------------------------------------------
 1 | def test_parse_arguments():
 2 |     from speechbrain.core import parse_arguments
 3 | 
 4 |     filename, run_opts, overrides = parse_arguments(
 5 |         ["params.yaml", "--device=cpu", "--seed=3", "--data_folder", "TIMIT"]
 6 |     )
 7 |     assert filename == "params.yaml"
 8 |     assert run_opts["device"] == "cpu"
 9 |     assert overrides == "seed: 3\ndata_folder: TIMIT"
10 | 
11 | 
12 | def test_brain():
13 |     import torch
14 |     from speechbrain.core import Brain, Stage
15 |     from torch.optim import SGD
16 | 
17 |     model = torch.nn.Linear(in_features=10, out_features=10)
18 | 
19 |     class SimpleBrain(Brain):
20 |         def compute_forward(self, batch, stage):
21 |             return self.modules.model(batch[0])
22 | 
23 |         def compute_objectives(self, predictions, batch, stage):
24 |             return torch.nn.functional.l1_loss(predictions, batch[1])
25 | 
26 |     brain = SimpleBrain({"model": model}, lambda x: SGD(x, 0.1))
27 | 
28 |     inputs = torch.rand(10, 10)
29 |     targets = torch.rand(10, 10)
30 |     train_set = ([inputs, targets],)
31 |     valid_set = ([inputs, targets],)
32 | 
33 |     start_output = brain.compute_forward(inputs, Stage.VALID)
34 |     start_loss = brain.compute_objectives(start_output, targets, Stage.VALID)
35 |     brain.fit(epoch_counter=range(10), train_set=train_set, valid_set=valid_set)
36 |     end_output = brain.compute_forward(inputs, Stage.VALID)
37 |     end_loss = brain.compute_objectives(end_output, targets, Stage.VALID)
38 |     assert end_loss < start_loss
39 | 


--------------------------------------------------------------------------------
/tests/unittests/test_counting.py:
--------------------------------------------------------------------------------
 1 | def test_pad_ends():
 2 |     from speechbrain.lm.counting import pad_ends
 3 | 
 4 |     assert next(pad_ends(["a", "b", "c"])) == "<s>"
 5 |     assert next(pad_ends(["a", "b", "c"], pad_left=False)) == "a"
 6 |     assert list(pad_ends(["a", "b", "c"], pad_left=False))[-1] == "</s>"
 7 |     assert list(pad_ends([], pad_left=False))
 8 |     assert list(pad_ends([], pad_left=True))
 9 | 
10 | 
11 | def test_ngrams():
12 |     from speechbrain.lm.counting import ngrams
13 | 
14 |     assert next(ngrams(["a", "b", "c"], n=3)) == ("a", "b", "c")
15 |     assert next(ngrams(["a", "b", "c"], n=1)) == ("a",)
16 |     assert not list(ngrams(["a", "b", "c"], n=4))
17 |     assert list(ngrams(["a", "b", "c"], n=2)) == [("a", "b"), ("b", "c")]
18 | 
19 | 
20 | def test_ngrams_for_evaluation():
21 |     from speechbrain.lm.counting import ngrams_for_evaluation
22 | 
23 |     assert list(ngrams_for_evaluation(["a", "b", "c"], max_n=3)) == [
24 |         ("b", ("a",)),
25 |         ("c", ("a", "b")),
26 |     ]
27 |     assert list(
28 |         ngrams_for_evaluation(["a", "b", "c"], max_n=3, predict_first=True)
29 |     ) == [("a", ()), ("b", ("a",)), ("c", ("a", "b"))]
30 | 


--------------------------------------------------------------------------------
/tests/unittests/test_dependency_graph.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | 
 4 | def test_dependency_graph():
 5 |     from speechbrain.utils.depgraph import (
 6 |         DependencyGraph,
 7 |         CircularDependencyError,
 8 |     )
 9 | 
10 |     dg = DependencyGraph()
11 |     # a->b->c
12 |     dg.add_edge("b", "c")
13 |     dg.add_edge("a", "b")
14 |     assert dg.is_valid()
15 |     eval_order = [node.key for node in dg.get_evaluation_order()]
16 |     assert eval_order == ["c", "b", "a"]
17 |     dg = DependencyGraph()
18 |     # a->b->c, a->c
19 |     dg.add_edge("b", "c")
20 |     dg.add_edge("a", "b")
21 |     dg.add_edge("a", "c")
22 |     eval_order = [node.key for node in dg.get_evaluation_order()]
23 |     assert eval_order == ["c", "b", "a"]
24 |     dg = DependencyGraph()
25 |     # a->b, a->c
26 |     dg.add_edge("a", "b")
27 |     dg.add_edge("a", "c")
28 |     eval_order = [node.key for node in dg.get_evaluation_order()]
29 |     assert eval_order == ["c", "b", "a"] or eval_order == ["b", "c", "a"]
30 |     dg = DependencyGraph()
31 |     # a->b, c->d
32 |     dg.add_edge("a", "b")
33 |     dg.add_edge("c", "d")
34 |     eval_order = [node.key for node in dg.get_evaluation_order()]
35 |     valid_orders = [
36 |         ["d", "c", "b", "a"],
37 |         ["d", "b", "c", "a"],
38 |         ["d", "b", "a", "c"],
39 |         ["b", "a", "d", "c"],
40 |         ["b", "d", "a", "c"],
41 |         ["b", "d", "c", "a"],
42 |     ]
43 |     assert eval_order in valid_orders
44 |     dg = DependencyGraph()
45 |     # a->b
46 |     dg.add_node("a")
47 |     dg.add_node("b")
48 |     dg.add_edge("a", "b")
49 |     eval_order = [node.key for node in dg.get_evaluation_order()]
50 |     assert eval_order == ["b", "a"]
51 |     dg = DependencyGraph()
52 |     # a->b->a Impossible!
53 |     dg.add_edge("a", "b")
54 |     dg.add_edge("b", "a")
55 |     assert not dg.is_valid()
56 |     with pytest.raises(CircularDependencyError):
57 |         list(dg.get_evaluation_order())
58 |     dg = DependencyGraph()
59 |     # a->b with data
60 |     # should use uuids
61 |     a_key = dg.add_node(data="a")
62 |     assert a_key != "a"
63 |     b_key = dg.add_node(data="b")
64 |     dg.add_edge(a_key, b_key)
65 |     eval_order_data = [node.data for node in dg.get_evaluation_order()]
66 |     assert eval_order_data == ["b", "a"]
67 |     # Adding same key in edge (implicitly) and then explicitly is ok:
68 |     dg = DependencyGraph()
69 |     dg.add_edge("a", "b")
70 |     dg.add_node("a")
71 |     eval_order = [node.key for node in dg.get_evaluation_order()]
72 |     assert eval_order == ["b", "a"]
73 |     # But adding same key twice explicitly will not work:
74 |     with pytest.raises(ValueError):
75 |         dg.add_node("a")
76 | 


--------------------------------------------------------------------------------
/tests/unittests/test_dropout.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn
 3 | 
 4 | 
 5 | def test_dropout():
 6 | 
 7 |     from speechbrain.nnet.dropout import Dropout2d
 8 | 
 9 |     inputs = torch.rand([4, 10, 32])
10 |     drop = Dropout2d(drop_rate=0.0)
11 |     outputs = drop(inputs)
12 |     assert torch.all(torch.eq(inputs, outputs))
13 | 
14 |     drop = Dropout2d(drop_rate=1.0)
15 |     outputs = drop(inputs)
16 |     assert torch.all(torch.eq(torch.zeros(inputs.shape), outputs))
17 | 
18 |     assert torch.jit.trace(drop, inputs)
19 | 


--------------------------------------------------------------------------------
/tests/unittests/test_edit_distance.py:
--------------------------------------------------------------------------------
 1 | def test_accumulatable_wer_stats():
 2 |     from speechbrain.utils.edit_distance import accumulatable_wer_stats
 3 | 
 4 |     refs = [[[1, 2, 3], [4, 5, 6]], [[7, 8], [9]]]
 5 |     hyps = [[[1, 2, 4], [5, 6]], [[7, 8], [10]]]
 6 |     # Test basic functionality:
 7 |     stats = accumulatable_wer_stats(refs[0], hyps[0])
 8 |     assert stats["WER"] == 100.0 * 2 / 6
 9 |     stats = accumulatable_wer_stats(refs[1], hyps[1], stats)
10 |     assert stats["WER"] == 100.0 * 3 / 9
11 |     # Test edge cases:
12 |     import math
13 | 
14 |     # No batches:
15 |     stats = accumulatable_wer_stats([], [])
16 |     assert stats["num_ref_tokens"] == 0
17 |     assert math.isnan(stats["WER"])
18 |     # Empty hyp sequence:
19 |     stats = accumulatable_wer_stats([[1, 2, 3]], [[]])
20 |     assert stats["num_ref_tokens"] == 3
21 |     assert stats["WER"] == 100.0
22 |     # Empty ref sequence:
23 |     stats = accumulatable_wer_stats([[]], [[1, 2, 3]])
24 |     assert stats["num_ref_tokens"] == 0
25 |     assert stats["insertions"] == 3
26 |     assert math.isnan(stats["WER"])
27 | 
28 | 
29 | def test_op_table():
30 |     from speechbrain.utils.edit_distance import op_table, EDIT_SYMBOLS
31 | 
32 |     assert len(op_table([1, 2, 3], [1, 2, 4])) == 4
33 |     assert len(op_table([1, 2, 3], [1, 2, 4])[0]) == 4
34 |     assert len(op_table([1, 2, 3], [])) == 4
35 |     assert len(op_table([1, 2, 3], [])[0]) == 1
36 |     assert op_table([1, 2, 3], [1, 2, 4])[3][3] == EDIT_SYMBOLS["sub"]
37 |     assert op_table([1, 2, 3], [1, 2, 4])[2][2] == EDIT_SYMBOLS["eq"]
38 |     assert op_table([1, 2, 3], [1, 2, 4])[0][0] == EDIT_SYMBOLS["eq"]
39 | 
40 | 
41 | def test_alignment():
42 |     from speechbrain.utils.edit_distance import alignment, EDIT_SYMBOLS
43 | 
44 |     I = EDIT_SYMBOLS["ins"]  # noqa: E741, here I is a good var name
45 |     D = EDIT_SYMBOLS["del"]
46 |     S = EDIT_SYMBOLS["sub"]
47 |     E = EDIT_SYMBOLS["eq"]
48 |     table = [[I, I, I, I], [D, E, I, I], [D, D, E, I], [D, D, D, S]]
49 |     assert alignment(table) == [(E, 0, 0), (E, 1, 1), (S, 2, 2)]
50 | 
51 | 
52 | def test_count_ops():
53 |     from speechbrain.utils.edit_distance import count_ops, EDIT_SYMBOLS
54 | 
55 |     I = EDIT_SYMBOLS["ins"]  # noqa: E741, here I is a good var name
56 |     D = EDIT_SYMBOLS["del"]
57 |     S = EDIT_SYMBOLS["sub"]
58 |     E = EDIT_SYMBOLS["eq"]
59 |     table = [[I, I, I, I], [D, E, I, I], [D, D, E, I], [D, D, D, S]]
60 |     assert count_ops(table)["insertions"] == 0
61 |     assert count_ops(table)["deletions"] == 0
62 |     assert count_ops(table)["substitutions"] == 1
63 | 


--------------------------------------------------------------------------------
/tests/unittests/test_embedding.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def test_embedding():
 5 | 
 6 |     from speechbrain.nnet.embedding import Embedding
 7 | 
 8 |     # create one hot vector and consider blank as zero vector
 9 |     embedding_dim = 39
10 |     blank_id = 39
11 |     size_dict = 40
12 |     emb = Embedding(
13 |         num_embeddings=size_dict, consider_as_one_hot=True, blank_id=blank_id,
14 |     )
15 |     inputs = torch.Tensor([10, 5, 2, 0, 39]).long()
16 |     output = emb(inputs)
17 |     assert output.shape == (5, 39)
18 | 
19 |     # use standard embedding layer
20 |     embedding_dim = 128
21 |     emb = Embedding(num_embeddings=size_dict, embedding_dim=embedding_dim)
22 |     inputs = torch.randint(0, 40, (5, 10))
23 |     output = emb(inputs)
24 |     assert output.shape == (5, 10, 128)
25 | 
26 |     assert torch.jit.trace(emb, inputs)
27 | 


--------------------------------------------------------------------------------
/tests/unittests/test_epoch_loop.py:
--------------------------------------------------------------------------------
 1 | def test_epoch_loop_recovery(tmpdir):
 2 |     from speechbrain.utils.checkpoints import Checkpointer
 3 |     from speechbrain.utils.epoch_loop import EpochCounter
 4 | 
 5 |     epoch_counter = EpochCounter(2)
 6 |     recoverer = Checkpointer(tmpdir, {"epoch": epoch_counter})
 7 |     for epoch in epoch_counter:
 8 |         assert epoch == 1
 9 |         # Save a mid-epoch checkpoint:
10 |         recoverer.save_checkpoint(end_of_epoch=False)
11 |         # Simulate interruption
12 |         break
13 |     # Now after recovery still at epoch 1:
14 |     recoverer.recover_if_possible()
15 |     second_epoch = False  # Will manually update this
16 |     for epoch in epoch_counter:
17 |         if not second_epoch:
18 |             assert epoch == 1
19 |             recoverer.save_checkpoint(end_of_epoch=True)
20 |             second_epoch = True
21 |         else:
22 |             assert epoch == 2
23 |             # Again simulate interruption
24 |             break
25 |     # Now after recovery we are in epoch 2:
26 |     recoverer.recover_if_possible()
27 |     loop_runs = 0
28 |     for epoch in epoch_counter:
29 |         assert epoch == 2
30 |         loop_runs += 1
31 |         recoverer.save_checkpoint(end_of_epoch=True)
32 |     # And that is that:
33 |     assert loop_runs == 1
34 |     # And now after recovery, no more epochs:
35 |     recoverer.recover_if_possible()
36 |     for epoch in epoch_counter:
37 |         # Will not get here:
38 |         assert False
39 | 


--------------------------------------------------------------------------------
/tests/unittests/test_linear.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn
 3 | 
 4 | 
 5 | def test_linear():
 6 | 
 7 |     from speechbrain.nnet.linear import Linear
 8 | 
 9 |     inputs = torch.rand(1, 2, 4)
10 |     lin_t = Linear(n_neurons=4, input_size=inputs.shape[-1], bias=False)
11 |     lin_t.w.weight = torch.nn.Parameter(torch.eye(inputs.shape[-1]))
12 |     outputs = lin_t(inputs)
13 |     assert torch.all(torch.eq(inputs, outputs))
14 | 
15 |     assert torch.jit.trace(lin_t, inputs)
16 | 


--------------------------------------------------------------------------------
/tests/unittests/test_multi_mic.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def test_gccphat():
 5 | 
 6 |     from speechbrain.processing.features import STFT
 7 |     from speechbrain.processing.multi_mic import Covariance, GccPhat
 8 | 
 9 |     # Creating the test signal
10 |     fs = 16000
11 | 
12 |     delay = 60
13 | 
14 |     sig = torch.randn([10, fs])
15 |     sig_delayed = torch.cat((torch.zeros([10, delay]), sig[:, 0:-delay]), 1)
16 | 
17 |     xs = torch.stack((sig_delayed, sig), -1)
18 | 
19 |     stft = STFT(sample_rate=fs)
20 |     Xs = stft(xs)
21 | 
22 |     # Computing the covariance matrix for GCC-PHAT
23 |     cov = Covariance()
24 |     gccphat = GccPhat()
25 | 
26 |     XXs = cov(Xs)
27 |     tdoas = torch.abs(gccphat(XXs))
28 | 
29 |     n_valid_tdoas = torch.sum(torch.abs(tdoas[..., 1] - delay) < 1e-3)
30 |     assert n_valid_tdoas == Xs.shape[0] * Xs.shape[1]
31 |     assert torch.jit.trace(stft, xs)
32 |     assert torch.jit.trace(cov, Xs)
33 |     assert torch.jit.trace(gccphat, XXs)
34 | 


--------------------------------------------------------------------------------
/tests/unittests/test_ngram_lm.py:
--------------------------------------------------------------------------------
 1 | def test_backofff_ngram_lm():
 2 |     from speechbrain.lm.ngram import BackoffNgramLM
 3 |     import math
 4 | 
 5 |     HALF = math.log(0.5)
 6 |     ngrams = {
 7 |         1: {tuple(): {"a": HALF, "b": HALF}},
 8 |         2: {("a",): {"a": HALF, "b": HALF}, ("b",): {"a": HALF}},
 9 |     }
10 |     backoffs = {1: {("b",): 0.0}}
11 |     lm = BackoffNgramLM(ngrams, backoffs)
12 |     # The basic cases covered by the ngrams and backoffs:
13 |     assert lm.logprob("a", ()) == HALF
14 |     assert lm.logprob("b", ()) == HALF
15 |     assert lm.logprob("a", ("a",)) == HALF
16 |     assert lm.logprob("a", ("b",)) == HALF
17 |     assert lm.logprob("b", ("a",)) == HALF
18 |     assert lm.logprob("b", ("b",)) == HALF
19 |     # Edge cases
20 |     # Too large context:
21 |     assert lm.logprob("a", ("a", "a")) == HALF
22 |     assert lm.logprob("b", ("a", "b")) == HALF
23 |     # OOV:
24 |     assert lm.logprob("c", ()) == float("-inf")
25 |     # OOV in context:
26 |     assert lm.logprob("a", ("c",)) == HALF
27 | 


--------------------------------------------------------------------------------
/tests/unittests/test_pooling.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn
 3 | 
 4 | 
 5 | def test_pooling1d():
 6 | 
 7 |     from speechbrain.nnet.pooling import Pooling1d
 8 | 
 9 |     input = torch.tensor([1, 3, 2]).unsqueeze(0).unsqueeze(-1).float()
10 |     pool = Pooling1d("max", 3)
11 |     output = pool(input)
12 |     assert output == 3
13 | 
14 |     pool = Pooling1d("avg", 3)
15 |     output = pool(input)
16 |     assert output == 2
17 | 
18 |     assert torch.jit.trace(pool, input)
19 | 
20 | 
21 | def test_pooling2d():
22 | 
23 |     from speechbrain.nnet.pooling import Pooling2d
24 | 
25 |     input = torch.tensor([[1, 3, 2], [4, 6, 5]]).float().unsqueeze(0)
26 |     pool = Pooling2d("max", (2, 3))
27 |     output = pool(input)
28 |     assert output == 6
29 | 
30 |     input = torch.tensor([[1, 3, 2], [4, 6, 5]]).float().unsqueeze(0)
31 |     pool = Pooling2d("max", (1, 3))
32 |     output = pool(input)
33 |     assert output[0][0] == 3
34 |     assert output[0][1] == 6
35 | 
36 |     input = torch.tensor([[1, 3, 2], [4, 6, 5]]).float().unsqueeze(0)
37 |     pool = Pooling2d("avg", (2, 3))
38 |     output = pool(input)
39 |     assert output == 3.5
40 | 
41 |     input = torch.tensor([[1, 3, 2], [4, 6, 5]]).float().unsqueeze(0)
42 |     pool = Pooling2d("avg", (1, 3))
43 |     output = pool(input)
44 |     assert output[0][0] == 2
45 |     assert output[0][1] == 5
46 | 
47 |     assert torch.jit.trace(pool, input)
48 | 


--------------------------------------------------------------------------------
/tests/unittests/test_pretrainer.py:
--------------------------------------------------------------------------------
 1 | def test_pretrainer(tmpdir):
 2 |     import torch
 3 |     from torch.nn import Linear
 4 | 
 5 |     # save a model in tmpdir/original/model.ckpt
 6 |     first_model = Linear(32, 32)
 7 |     pretrained_dir = tmpdir / "original"
 8 |     pretrained_dir.mkdir()
 9 |     with open(pretrained_dir / "model.ckpt", "wb") as fo:
10 |         torch.save(first_model.state_dict(), fo)
11 | 
12 |     # Make a new model and Pretrainer
13 |     pretrained_model = Linear(32, 32)
14 |     assert not torch.all(torch.eq(pretrained_model.weight, first_model.weight))
15 |     from speechbrain.utils.parameter_transfer import Pretrainer
16 | 
17 |     pt = Pretrainer(
18 |         collect_in=tmpdir / "reused", loadables={"model": pretrained_model}
19 |     )
20 |     pt.collect_files(default_source=pretrained_dir)
21 |     pt.load_collected()
22 |     assert torch.all(torch.eq(pretrained_model.weight, first_model.weight))
23 | 


--------------------------------------------------------------------------------
/tests/unittests/test_samplers.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def test_ConcatDatasetBatchSampler():
 5 |     from torch.utils.data import TensorDataset, ConcatDataset, DataLoader
 6 |     from speechbrain.dataio.sampler import (
 7 |         ReproducibleRandomSampler,
 8 |         ConcatDatasetBatchSampler,
 9 |     )
10 |     import numpy as np
11 | 
12 |     datasets = []
13 |     for i in range(3):
14 |         if i == 0:
15 |             datasets.append(TensorDataset(torch.arange(i * 10, (i + 1) * 10)))
16 |         else:
17 |             datasets.append(TensorDataset(torch.arange(i * 6, (i + 1) * 6)))
18 | 
19 |     samplers = [ReproducibleRandomSampler(x) for x in datasets]
20 |     dataset = ConcatDataset(datasets)
21 |     loader = DataLoader(
22 |         dataset, batch_sampler=ConcatDatasetBatchSampler(samplers, [1, 1, 1]),
23 |     )
24 | 
25 |     concat_data = []
26 | 
27 |     for data in loader:
28 |         concat_data.append([x.item() for x in data[0]])
29 |     concat_data = np.array(concat_data)
30 | 
31 |     non_cat_data = []
32 |     for i in range(len(samplers)):
33 |         c_data = []
34 |         loader = DataLoader(dataset.datasets[i], sampler=samplers[i],)
35 | 
36 |         for data in loader:
37 |             c_data.append(data[0].item())
38 | 
39 |         non_cat_data.append(c_data)
40 | 
41 |     minlen = min([len(x) for x in non_cat_data])
42 |     non_cat_data = [x[:minlen] for x in non_cat_data]
43 |     non_cat_data = np.array(non_cat_data)
44 |     np.testing.assert_array_equal(non_cat_data.T, concat_data)
45 | 


--------------------------------------------------------------------------------
/tests/unittests/test_schedulers.py:
--------------------------------------------------------------------------------
 1 | def test_NewBobScheduler():
 2 | 
 3 |     from speechbrain.nnet.schedulers import NewBobScheduler
 4 | 
 5 |     scheduler = NewBobScheduler(initial_value=0.8)
 6 | 
 7 |     prev_lr, next_lr = scheduler(1.0)
 8 |     assert prev_lr == 0.8
 9 |     assert next_lr == 0.8
10 | 
11 |     prev_lr, next_lr = scheduler(1.1)
12 |     assert next_lr == 0.4
13 | 
14 |     prev_lr, next_lr = scheduler(0.5)
15 |     assert next_lr == 0.4
16 | 
17 |     scheduler = NewBobScheduler(initial_value=0.8, patient=3)
18 |     prev_lr, next_lr = scheduler(1.0)
19 |     assert next_lr == 0.8
20 | 
21 |     prev_lr, next_lr = scheduler(1.1)
22 |     prev_lr, next_lr = scheduler(1.1)
23 |     prev_lr, next_lr = scheduler(1.1)
24 |     assert next_lr == 0.8
25 | 
26 |     prev_lr, next_lr = scheduler(1.1)
27 |     assert next_lr == 0.4
28 |     assert scheduler.current_patient == 3
29 | 


--------------------------------------------------------------------------------
/tests/unittests/test_signal_processing.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def test_normalize():
 5 | 
 6 |     from speechbrain.processing.signal_processing import compute_amplitude
 7 |     from speechbrain.processing.signal_processing import rescale
 8 |     import random
 9 |     import numpy as np
10 | 
11 |     for scale in ["dB", "linear"]:
12 |         for amp_type in ["peak", "avg"]:
13 |             for test_vec in [
14 |                 torch.zeros((100)),
15 |                 torch.rand((10, 100)),
16 |                 torch.rand((10, 100, 5)),
17 |             ]:
18 | 
19 |                 lengths = (
20 |                     test_vec.size(1)
21 |                     if len(test_vec.shape) > 1
22 |                     else test_vec.size(0)
23 |                 )
24 |                 amp = compute_amplitude(test_vec, lengths, amp_type, scale)
25 |                 scaled_back = rescale(
26 |                     random.random() * test_vec, lengths, amp, amp_type, scale,
27 |                 )
28 |                 np.testing.assert_array_almost_equal(
29 |                     scaled_back.numpy(), test_vec.numpy()
30 |                 )
31 | 


--------------------------------------------------------------------------------
/tests/unittests/test_superpowers.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import pytest
 3 | 
 4 | 
 5 | @pytest.mark.skipif(
 6 |     sys.platform.startswith("win"),
 7 |     reason="shell tools not necessarily available on Windows",
 8 | )
 9 | def test_run_shell():
10 |     from speechbrain.utils.superpowers import run_shell
11 | 
12 |     out, err, code = run_shell("echo -n hello")
13 |     assert out.decode() == "hello"
14 |     assert err.decode() == ""
15 |     assert code == 0
16 | 
17 |     with pytest.raises(OSError):
18 |         run_shell("false")
19 | 
20 |     # This last run is just to check that a bytes
21 |     # sequence that is returned in an incompatible encoding (not UTF-8)
22 |     # does not cause an error .
23 |     output, _, _ = run_shell("echo -n pöö | iconv -t LATIN1")
24 |     assert output.decode("latin1") == "pöö"
25 | 


--------------------------------------------------------------------------------